tt-help-cli-ycl 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/package.json +1 -1
  2. package/src/auto-core.mjs +174 -0
  3. package/src/cli/auto.js +94 -0
  4. package/src/cli/explore.js +117 -0
  5. package/src/cli/progress.js +111 -0
  6. package/src/cli/scrape.js +47 -0
  7. package/src/cli/utils.js +18 -0
  8. package/src/cli/videos.js +41 -0
  9. package/src/cli/watch.js +28 -0
  10. package/src/data-store.mjs +213 -0
  11. package/src/{explore-core.cjs → explore-core.mjs} +148 -157
  12. package/src/{get-user-videos-core.cjs → get-user-videos-core.mjs} +6 -23
  13. package/src/lib/args.js +19 -38
  14. package/src/lib/auto-browser.mjs +5 -12
  15. package/src/lib/browser/anti-detect.js +23 -0
  16. package/src/lib/browser/cdp.js +142 -0
  17. package/src/lib/browser/launch.js +43 -0
  18. package/src/lib/browser/page.js +62 -0
  19. package/src/lib/constants.js +13 -95
  20. package/src/lib/delay.js +54 -0
  21. package/src/lib/explore.js +16 -123
  22. package/src/lib/fetcher.js +3 -18
  23. package/src/lib/get-user-videos-browser.mjs +1 -6
  24. package/src/lib/io.js +8 -30
  25. package/src/lib/parser.js +1 -1
  26. package/src/lib/retry.js +44 -0
  27. package/src/lib/scrape-browser.mjs +1 -6
  28. package/src/lib/scrape.js +5 -4
  29. package/src/lib/url.js +52 -0
  30. package/src/main.mjs +59 -822
  31. package/src/scraper/{core.cjs → core.mjs} +25 -57
  32. package/src/scraper/modules/{comment-extractor.cjs → comment-extractor.mjs} +23 -15
  33. package/src/scraper/modules/follow-extractor.mjs +121 -0
  34. package/src/scraper/modules/{guess-extractor.cjs → guess-extractor.mjs} +3 -5
  35. package/src/scraper/modules/page-error-detector.mjs +68 -0
  36. package/src/scraper/modules/page-helpers.mjs +44 -0
  37. package/src/scraper/modules/scroll-collector.mjs +189 -0
  38. package/src/watch/public/index.html +139 -64
  39. package/src/watch/server.mjs +234 -153
  40. package/src/auto-core.cjs +0 -367
  41. package/src/data-store.cjs +0 -69
  42. package/src/get-user-videos.cjs +0 -59
  43. package/src/scraper/index.cjs +0 -97
  44. package/src/scraper/modules/follow-extractor.cjs +0 -112
  45. package/src/scraper/modules/page-helpers.cjs +0 -422
  46. package/src/scraper/modules/scroll-collector.cjs +0 -173
  47. package/src/scraper/modules/video-scanner.cjs +0 -43
package/src/auto-core.cjs DELETED
@@ -1,367 +0,0 @@
1
- const {
2
- delay,
3
- ensureBrowserReady,
4
- ensureTikTokPage,
5
- setDelayConfig,
6
- getDelayConfig,
7
- closeCommentPanel,
8
- retryWithBackoff,
9
- } = require('./scraper/modules/page-helpers.cjs');
10
- const {
11
- getUserInfo,
12
- collectVideos,
13
- isPageRestricted,
14
- } = require('./get-user-videos-core.cjs');
15
- const { runScrape } = require('./scraper/core.cjs');
16
- const { extractFollowAndFollowers } = require('./scraper/modules/follow-extractor.cjs');
17
-
18
- function mergeUserInfo(existing, incoming, source) {
19
- const merged = { ...existing };
20
- for (const [key, value] of Object.entries(incoming)) {
21
- if (key === '_sources') continue;
22
- if (value === undefined || value === null || value === '') continue;
23
- if (typeof value === 'number' && typeof merged[key] === 'number') {
24
- merged[key] = Math.max(merged[key], value);
25
- } else if (merged[key] === undefined || merged[key] === null || merged[key] === '') {
26
- merged[key] = value;
27
- }
28
- }
29
- if (source) {
30
- if (!merged._sources) merged._sources = [];
31
- if (!merged._sources.includes(source)) merged._sources.push(source);
32
- }
33
- return merged;
34
- }
35
-
36
- async function runAuto(options) {
37
- const {
38
- username,
39
- collectMax = 1,
40
- scrapeDepth = 50,
41
- maxComments = 200,
42
- maxGuess = 10,
43
- preset = null,
44
- switchMax = null,
45
- commentMax = null,
46
- enableFollow = false,
47
- maxFollowing = 200,
48
- maxFollowers = 200,
49
- log = console.error,
50
- } = options;
51
-
52
- if (preset) {
53
- setDelayConfig(preset);
54
- } else if (switchMax || commentMax) {
55
- setDelayConfig({
56
- switchMax: switchMax || 5000,
57
- commentMax: commentMax || 3000,
58
- });
59
- }
60
-
61
- const config = getDelayConfig();
62
- const cleanUsername = username.replace('@', '');
63
-
64
- log(`auto 模式: @${cleanUsername}`);
65
- log(`收集视频数: ${collectMax}, 每个滑动: ${scrapeDepth}次, 每视频评论数: ${maxComments}`);
66
-
67
- const browser = await ensureBrowserReady();
68
- let page;
69
- try {
70
- page = await ensureTikTokPage(browser, `https://www.tiktok.com/@${cleanUsername}`);
71
- } catch (e) {
72
- await browser.close().catch(() => {});
73
- throw e;
74
- }
75
-
76
- // [1/3] 获取种子用户信息
77
- const profileUrl = `https://www.tiktok.com/@${cleanUsername}`;
78
- log(`\n[1/3] 获取 @${cleanUsername} 的用户信息和视频列表...`);
79
- await retryWithBackoff(() => page.goto(profileUrl, { waitUntil: 'load', timeout: 30000 }), { log });
80
- await page.waitForSelector('[class*="DivVideoList"]', { timeout: 10000 }).catch(() => {});
81
- await delay(1000, 2000);
82
-
83
- const seedUserInfo = await getUserInfo(page);
84
- if (!seedUserInfo.uniqueId) {
85
- seedUserInfo.uniqueId = cleanUsername;
86
- }
87
- log(`种子用户: ${seedUserInfo.nickname || seedUserInfo.uniqueId} (粉丝: ${seedUserInfo.followerCount || '-'})`);
88
-
89
- if (options.enableFollow) {
90
- try {
91
- log(` 提取关注/粉丝列表...`);
92
- const { following, followers } = await extractFollowAndFollowers(page, {
93
- maxFollowing: options.maxFollowing || 200,
94
- maxFollowers: options.maxFollowers || 200,
95
- log,
96
- });
97
- log(` 关注: ${following.length} | 粉丝: ${followers.length}`);
98
- following.forEach(([handle, name]) => {
99
- const uid = handle.replace(/^@/, '');
100
- users.set(uid, mergeUserInfo(
101
- users.get(uid) || {},
102
- { uniqueId: uid, nickname: name },
103
- 'following'
104
- ));
105
- });
106
- followers.forEach(([handle, name]) => {
107
- const uid = handle.replace(/^@/, '');
108
- users.set(uid, mergeUserInfo(
109
- users.get(uid) || {},
110
- { uniqueId: uid, nickname: name },
111
- 'follower'
112
- ));
113
- });
114
- } catch (e) {
115
- log(` 关注/粉丝提取失败: ${e.message}`);
116
- }
117
- }
118
-
119
- // [2/3] 收集视频列表
120
- const videos = await collectVideos(page, cleanUsername, collectMax, log);
121
- const videoList = Array.from(videos.values()).slice(0, collectMax);
122
- log(`获取到 ${videoList.length} 个视频`);
123
-
124
- if (videoList.length === 0) {
125
- const restricted = await isPageRestricted(page);
126
- if (restricted) {
127
- log('种子用户页面受限(需登录),结束');
128
- } else {
129
- log('没有获取到视频,结束');
130
- }
131
- const output = {
132
- seedUser: { ...seedUserInfo, sources: ['seed'], restricted },
133
- users: [{ ...seedUserInfo, sources: ['seed'], restricted }],
134
- stats: {
135
- totalVideos: 0,
136
- totalUsers: 1,
137
- fromSeed: 1,
138
- fromVideo: 0,
139
- fromComment: 0,
140
- },
141
- };
142
- return { output, browser };
143
- }
144
-
145
- // [3/3] 循环每个视频,执行 runScrape
146
- log(`\n[3/3] 开始循环抓取(${videoList.length} 个视频,每个滑动 ${scrapeDepth} 次)...`);
147
-
148
- const users = new Map();
149
- users.set(seedUserInfo.uniqueId, mergeUserInfo({}, seedUserInfo, 'seed'));
150
-
151
- const restrictedUsers = new Set();
152
- let totalVideosScraped = 0;
153
-
154
- for (let i = 0; i < videoList.length; i++) {
155
- const videoUrl = videoList[i].href.startsWith('http')
156
- ? videoList[i].href
157
- : `https://www.tiktok.com${videoList[i].href}`;
158
-
159
- log(`\n[${i + 1}/${videoList.length}] ${videoUrl}`);
160
-
161
- const { output: scrapeOutput } = await runScrape({
162
- videoUrl,
163
- maxVideos: scrapeDepth,
164
- maxComments,
165
- maxGuess,
166
- preset,
167
- switchMax,
168
- commentMax,
169
- log,
170
- browser,
171
- page,
172
- });
173
-
174
- totalVideosScraped += (scrapeOutput && scrapeOutput.stats) ? scrapeOutput.stats.totalVideos : 0;
175
-
176
- // 合并视频作者信息
177
- for (const vd of scrapeOutput.videoDetails) {
178
- if (restrictedUsers.has(vd.uniqueId)) continue;
179
- const existing = users.get(vd.uniqueId);
180
- users.set(vd.uniqueId, mergeUserInfo(existing || {}, vd, 'video'));
181
- }
182
-
183
- // 添加评论者
184
- for (const cu of scrapeOutput.commentUsers) {
185
- if (restrictedUsers.has(cu)) continue;
186
- if (!users.has(cu)) {
187
- users.set(cu, mergeUserInfo({}, { uniqueId: cu }, 'comment'));
188
- }
189
- }
190
-
191
- // 添加猜你喜欢作者
192
- for (const ga of (scrapeOutput.guessAuthors || [])) {
193
- const gaId = ga.replace(/^@/, '');
194
- if (restrictedUsers.has(gaId)) continue;
195
- if (!users.has(gaId)) {
196
- users.set(gaId, mergeUserInfo({}, { uniqueId: gaId }, 'guess'));
197
- }
198
- }
199
- }
200
-
201
- // 构建输出
202
- const usersList = [...users.values()].map(u => {
203
- const { _sources, ...rest } = u;
204
- return { ...rest, sources: _sources || [] };
205
- });
206
-
207
- usersList.sort((a, b) => {
208
- const aIsSeed = a._sources && a._sources.includes('seed');
209
- const bIsSeed = b._sources && b._sources.includes('seed');
210
- if (aIsSeed && !bIsSeed) return -1;
211
- if (!aIsSeed && bIsSeed) return 1;
212
- const aHasInfo = a.nickname || a.followerCount;
213
- const bHasInfo = b.nickname || b.followerCount;
214
- if (aHasInfo && !bHasInfo) return -1;
215
- if (!aHasInfo && bHasInfo) return 1;
216
- return 0;
217
- });
218
-
219
- const output = usersList;
220
-
221
- log(`\n结果: ${usersList.length} 个用户`);
222
-
223
- return { output, browser };
224
- }
225
-
226
- async function processUser(page, username, options, log) {
227
- const {
228
- collectMax = 1,
229
- scrapeDepth = 50,
230
- maxComments = 200,
231
- maxGuess = 10,
232
- preset = 'fast',
233
- switchMax = null,
234
- commentMax = null,
235
- enableFollow = false,
236
- maxFollowing = 200,
237
- maxFollowers = 200,
238
- browser = null,
239
- } = options;
240
-
241
- const result = {
242
- userInfo: null,
243
- collectedVideos: [],
244
- discoveredVideoAuthors: [],
245
- discoveredCommentAuthors: [],
246
- discoveredGuessAuthors: [],
247
- discoveredFollowing: [],
248
- discoveredFollowers: [],
249
- error: null,
250
- };
251
-
252
- try {
253
- log(`\n[processUser] 访问 @${username}...`);
254
- await retryWithBackoff(() => page.goto(`https://www.tiktok.com/@${username}`, {
255
- waitUntil: 'load',
256
- timeout: 30000,
257
- }), { log });
258
- await page.waitForSelector('[class*="DivVideoList"]', { timeout: 10000 }).catch(() => {});
259
- await delay(1000, 2000);
260
-
261
- const info = await getUserInfo(page);
262
- result.userInfo = info;
263
- if (!info.uniqueId) {
264
- info.uniqueId = username;
265
- }
266
- log(` 昵称: ${info.nickname || '-'} | 粉丝: ${info.followerCount || 0}`);
267
-
268
- if (options.enableFollow) {
269
- try {
270
- log(` 提取关注/粉丝列表...`);
271
- const { following, followers } = await extractFollowAndFollowers(page, {
272
- maxFollowing: options.maxFollowing || 200,
273
- maxFollowers: options.maxFollowers || 200,
274
- log,
275
- });
276
- result.discoveredFollowing = following;
277
- result.discoveredFollowers = followers;
278
- log(` 关注: ${following.length} | 粉丝: ${followers.length}`);
279
- } catch (e) {
280
- log(` 关注/粉丝提取失败: ${e.message}`);
281
- result.discoveredFollowing = [];
282
- result.discoveredFollowers = [];
283
- }
284
- }
285
-
286
- const videos = await collectVideos(page, username, collectMax, log);
287
- const videoList = Array.from(videos.values()).slice(0, collectMax);
288
- result.collectedVideos = videoList.map(v => ({
289
- videoId: v.id,
290
- videoUrl: v.href,
291
- }));
292
-
293
- if (videoList.length > 0) {
294
- const allVideoAuthors = new Map();
295
- const allCommentAuthors = new Set();
296
- const allGuessAuthors = new Set();
297
-
298
- for (let i = 0; i < videoList.length; i++) {
299
- const video = videoList[i];
300
- const videoUrl = video.href.startsWith('http')
301
- ? video.href
302
- : `https://www.tiktok.com${video.href}`;
303
- log(` [${i + 1}/${videoList.length}] 开始 scrape: ${videoUrl} (深度 ${scrapeDepth})`);
304
-
305
- const scrapeResult = await runScrape({
306
- videoUrl,
307
- maxVideos: scrapeDepth,
308
- maxComments,
309
- maxGuess,
310
- preset,
311
- switchMax,
312
- commentMax,
313
- browser,
314
- page,
315
- log,
316
- });
317
-
318
- const scrapeOutput = scrapeResult.output;
319
-
320
- if (scrapeOutput && scrapeOutput.videoDetails) {
321
- for (const vd of scrapeOutput.videoDetails) {
322
- if (!allVideoAuthors.has(vd.uniqueId)) {
323
- allVideoAuthors.set(vd.uniqueId, {
324
- uniqueId: vd.uniqueId,
325
- nickname: vd.nickname,
326
- locationCreated: vd.locationCreated,
327
- });
328
- }
329
- }
330
- }
331
-
332
- if (scrapeOutput && scrapeOutput.commentUsers) {
333
- for (const cu of scrapeOutput.commentUsers) {
334
- allCommentAuthors.add(cu);
335
- }
336
- }
337
-
338
- if (scrapeOutput && scrapeOutput.guessAuthors) {
339
- for (const ga of scrapeOutput.guessAuthors) {
340
- allGuessAuthors.add(ga);
341
- }
342
- }
343
- }
344
-
345
- result.discoveredVideoAuthors = [...allVideoAuthors.values()];
346
- result.discoveredCommentAuthors = [...allCommentAuthors];
347
- result.discoveredGuessAuthors = [...allGuessAuthors];
348
-
349
- log(` 发现: ${result.discoveredVideoAuthors.length} 个视频作者, ${result.discoveredCommentAuthors.length} 个评论作者, ${result.discoveredGuessAuthors.length} 个猜你喜欢作者`);
350
- } else {
351
- const restricted = await isPageRestricted(page);
352
- result.restricted = restricted;
353
- if (restricted) {
354
- log(` @${username} 页面受限(需登录),标记跳过`);
355
- } else {
356
- log(` @${username} 没有视频,跳过 scrape`);
357
- }
358
- }
359
- } catch (e) {
360
- result.error = e.message;
361
- log(` [错误] ${e.message}`);
362
- }
363
-
364
- return result;
365
- }
366
-
367
- module.exports = { runAuto, processUser };
@@ -1,69 +0,0 @@
1
- const fs = require('fs');
2
- const path = require('path');
3
-
4
- function createStore(filePath) {
5
- let data = [];
6
-
7
- if (filePath) {
8
- const resolved = path.resolve(filePath);
9
- if (fs.existsSync(resolved)) {
10
- try {
11
- const raw = fs.readFileSync(resolved, 'utf-8');
12
- data = JSON.parse(raw);
13
- if (!Array.isArray(data)) data = [];
14
- } catch (e) {
15
- console.error(`[data-store] 读取文件失败: ${e.message}`);
16
- data = [];
17
- }
18
- }
19
- }
20
-
21
- function save() {
22
- if (!filePath) return;
23
- const resolved = path.resolve(filePath);
24
- const json = JSON.stringify(data, null, 2);
25
- fs.writeFileSync(resolved, json, 'utf-8');
26
- }
27
-
28
- function getUser(uid) {
29
- return data.find(u => u.uniqueId === uid);
30
- }
31
-
32
- function hasUser(uid) {
33
- return getUser(uid) !== undefined;
34
- }
35
-
36
- function addUser(user) {
37
- const existing = getUser(user.uniqueId);
38
- if (existing) {
39
- for (const key of Object.keys(user)) {
40
- if (key === 'uniqueId') continue;
41
- if (key === 'sources') continue;
42
- if (user[key] !== undefined && user[key] !== null && user[key] !== '') {
43
- existing[key] = user[key];
44
- }
45
- }
46
- if (user.sources && Array.isArray(user.sources)) {
47
- existing.sources = [...new Set([...(existing.sources || []), ...user.sources])];
48
- }
49
- } else {
50
- data.push(user);
51
- }
52
- }
53
-
54
- function getPendingUsers() {
55
- return data.filter(u => u.followerCount === undefined);
56
- }
57
-
58
- function getAllUsers() {
59
- return data;
60
- }
61
-
62
- function getProcessedUsers() {
63
- return data.filter(u => u.processed === true || u.followerCount !== undefined);
64
- }
65
-
66
- return { save, getUser, hasUser, addUser, getPendingUsers, getAllUsers, getProcessedUsers, data };
67
- }
68
-
69
- module.exports = { createStore };
@@ -1,59 +0,0 @@
1
- const { runGetUserVideos } = require('./get-user-videos-core.cjs');
2
- const fs = require('fs');
3
- const path = require('path');
4
-
5
- async function main() {
6
- const rawArgs = process.argv.slice(2);
7
-
8
- let outputPath = null;
9
- const args = [];
10
- for (let i = 0; i < rawArgs.length; i++) {
11
- if (rawArgs[i] === '-o' || rawArgs[i] === '--output') {
12
- outputPath = rawArgs[++i];
13
- } else {
14
- args.push(rawArgs[i]);
15
- }
16
- }
17
-
18
- if (args.length < 1) {
19
- console.error('用法: node get-user-videos.cjs <用户名> [最大视频数] [-o 输出路径]');
20
- console.error('示例: node get-user-videos.cjs bar.lar.lar.moeta 1000');
21
- console.error(' node get-user-videos.cjs username 50 -o videos.json');
22
- console.error('');
23
- console.error('选项: -o, --output <路径> 输出到文件; 不指定则输出到 stdout');
24
- process.exit(1);
25
- }
26
-
27
- const username = args[0].replace('@', '');
28
- const maxVideos = parseInt(args[1]) || 5;
29
-
30
- let browser;
31
- try {
32
- const { output, browser: b } = await runGetUserVideos({
33
- username,
34
- maxVideos,
35
- log: console.error,
36
- });
37
- browser = b;
38
-
39
- const json = JSON.stringify(output, null, 2);
40
- if (outputPath) {
41
- const resultFile = path.isAbsolute(outputPath) ? outputPath : path.resolve(outputPath);
42
- fs.mkdirSync(path.dirname(resultFile), { recursive: true });
43
- fs.writeFileSync(resultFile, json, 'utf-8');
44
- console.error(`已保存到 ${resultFile}`);
45
- } else {
46
- process.stdout.write(json + '\n');
47
- }
48
- } catch (err) {
49
- console.error(err.message);
50
- process.exit(1);
51
- } finally {
52
- if (browser) await browser.close().catch(() => {});
53
- }
54
- }
55
-
56
- main().catch((err) => {
57
- console.error(err.message);
58
- process.exit(1);
59
- });
@@ -1,97 +0,0 @@
1
- const { runScrape } = require("./core.cjs");
2
-
3
- async function main() {
4
- const rawArgs = process.argv.slice(2);
5
-
6
- let outputPath = null;
7
- const args = [];
8
- for (let i = 0; i < rawArgs.length; i++) {
9
- if (rawArgs[i] === "-o" || rawArgs[i] === "--output") {
10
- outputPath = rawArgs[++i];
11
- } else {
12
- args.push(rawArgs[i]);
13
- }
14
- }
15
-
16
- const videoUrl = args[0];
17
-
18
- let preset = null;
19
- let maxVideos = 20;
20
- let maxComments = 999;
21
- let maxGuess = 10;
22
- let switchMax = null;
23
- let commentMax = null;
24
-
25
- if (args[1]) {
26
- if (["fast", "normal", "slow", "stealth"].includes(args[1].toLowerCase())) {
27
- preset = args[1].toLowerCase();
28
- maxVideos = parseInt(args[2]) || 20;
29
- maxComments = parseInt(args[3]) || 999;
30
- maxGuess = parseInt(args[4]) || 10;
31
- } else {
32
- maxVideos = parseInt(args[1]) || 20;
33
- maxComments = parseInt(args[2]) || 999;
34
- maxGuess = parseInt(args[3]) || 10;
35
- switchMax = parseInt(args[4]) || null;
36
- commentMax = parseInt(args[5]) || null;
37
- }
38
- }
39
-
40
- if (!videoUrl) {
41
- console.error("用法:");
42
- console.error(
43
- " 预设模式: node index.cjs <视频URL> <preset> [最大视频数] [最大评论数] [猜你喜欢数] [-o 输出路径]",
44
- );
45
- console.error(
46
- " 手动模式: node index.cjs <视频URL> [最大视频数] [最大评论数] [猜你喜欢数] [切换延迟ms] [评论延迟ms] [-o 输出路径]",
47
- );
48
- console.error(
49
- "预设: fast(1s/0.8s), normal(2.5s/1.5s), slow(5s/3s), stealth(8s/5s)",
50
- );
51
- console.error(
52
- "选项: -o, --output <路径> 输出到文件; 不指定则输出到 stdout",
53
- );
54
- process.exit(1);
55
- }
56
-
57
- let browser;
58
- try {
59
- const { output, browser: b, isExternal } = await runScrape({
60
- videoUrl,
61
- maxVideos,
62
- maxComments,
63
- maxGuess,
64
- preset,
65
- switchMax,
66
- commentMax,
67
- log: console.error,
68
- });
69
- if (!isExternal) {
70
- browser = b;
71
- }
72
-
73
- const json = JSON.stringify(output, null, 2);
74
- if (outputPath) {
75
- const fs = require("fs");
76
- const path = require("path");
77
- const resultFile = path.isAbsolute(outputPath)
78
- ? outputPath
79
- : path.resolve(outputPath);
80
- fs.mkdirSync(path.dirname(resultFile), { recursive: true });
81
- fs.writeFileSync(resultFile, json);
82
- console.error(`已保存到 ${resultFile}`);
83
- } else {
84
- process.stdout.write(json + "\n");
85
- }
86
- } catch (err) {
87
- console.error(err.message);
88
- process.exit(1);
89
- } finally {
90
- if (browser) await browser.close().catch(() => {});
91
- }
92
- }
93
-
94
- main().catch((err) => {
95
- console.error(err.message);
96
- process.exit(1);
97
- });
@@ -1,112 +0,0 @@
1
- const { delay, getDelayConfig } = require('./page-helpers.cjs');
2
- const { scrollAndCollect } = require('./scroll-collector.cjs');
3
-
4
- const FILTER_WORDS = ['主页', '已关注', '粉丝', '推荐'];
5
-
6
- async function openFollowModal(page) {
7
- const el = await page.$('[data-e2e=following]');
8
- if (!el) {
9
- throw new Error('未找到 [data-e2e=following] 元素,请确认当前页面为用户主页');
10
- }
11
- await el.evaluate(el => el.parentElement.click());
12
- await delay(2000, 3000);
13
-
14
- await page.waitForSelector('[class*=DivUserListContainer]', { timeout: 5000 }).catch(() => {
15
- throw new Error('关注弹窗未出现 DivUserListContainer');
16
- });
17
- }
18
-
19
- async function switchToFollowersTab(page) {
20
- await page.evaluate(() => {
21
- const tabs = document.querySelectorAll('[class*=DivTabItem]');
22
- for (const tab of tabs) {
23
- if (tab.textContent?.includes('粉丝')) {
24
- tab.click();
25
- return;
26
- }
27
- }
28
- throw new Error('未找到粉丝 Tab');
29
- });
30
- await delay(2000, 3000);
31
- await page.waitForSelector('[class*=DivUserListContainer]', { timeout: 5000 }).catch(() => {});
32
- }
33
-
34
- async function closeFollowModal(page) {
35
- await page.evaluate(() => {
36
- const closeBtn = document.querySelector('[data-e2e=follow-popup-close]');
37
- if (closeBtn) closeBtn.click();
38
- });
39
- await delay(1000, 2000);
40
- }
41
-
42
- function createUserCollectFn() {
43
- return (container) => {
44
- const FILTER_WORDS = ['主页', '已关注', '粉丝', '推荐'];
45
- const modal = document.querySelector('[class*=eyhy6180]');
46
- const root = modal || document;
47
- const users = [];
48
- const seen = new Set();
49
- const links = root.querySelectorAll('a[href*="/@"]');
50
- for (const link of links) {
51
- const match = link.href.match(/@([^/?]+)/);
52
- if (!match) continue;
53
- const handle = '@' + decodeURIComponent(match[1]);
54
- const text = (link.textContent || '').trim();
55
- if (text.length <= 2) continue;
56
- if (FILTER_WORDS.includes(text)) continue;
57
- if (seen.has(handle)) continue;
58
- seen.add(handle);
59
- users.push({ handle, displayName: text });
60
- }
61
- return { items: users };
62
- };
63
- }
64
-
65
- async function extractUsersFromModal(page, maxUsers) {
66
- const config = getDelayConfig();
67
- const allUsers = await scrollAndCollect(page, {
68
- container: '[class*=DivUserListContainer]',
69
- findScrollable: false,
70
- collectFn: createUserCollectFn(),
71
- uniqueKey: (u) => u.handle,
72
- maxItems: maxUsers,
73
- delayRange: [Math.round(config.commentMax * 0.3), config.commentMax],
74
- staleThreshold: 5,
75
- });
76
-
77
- return allUsers.slice(0, maxUsers);
78
- }
79
-
80
- async function extractFollowAndFollowers(page, options = {}) {
81
- const {
82
- maxFollowing = 999,
83
- maxFollowers = 999,
84
- log = () => {},
85
- } = options;
86
-
87
- log('打开关注弹窗...');
88
- await openFollowModal(page);
89
-
90
- log('提取关注列表...');
91
- const following = await extractUsersFromModal(page, maxFollowing);
92
- log(` 已关注: ${following.length}`);
93
-
94
- log('切换到粉丝 Tab...');
95
- await switchToFollowersTab(page);
96
-
97
- log('提取粉丝列表...');
98
- const followers = await extractUsersFromModal(page, maxFollowers);
99
- log(` 粉丝: ${followers.length}`);
100
-
101
- log('关闭弹窗...');
102
- await closeFollowModal(page);
103
-
104
- return {
105
- following: following.map(u => [u.handle, u.displayName]),
106
- followers: followers.map(u => [u.handle, u.displayName]),
107
- };
108
- }
109
-
110
- module.exports = {
111
- extractFollowAndFollowers,
112
- };