tt-help-cli-ycl 1.0.8 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -17
- package/cli.js +9 -9
- package/package.json +44 -44
- package/src/auto-core.cjs +367 -310
- package/src/data-store.cjs +69 -65
- package/src/explore-core.cjs +157 -0
- package/src/get-user-videos-core.cjs +142 -164
- package/src/get-user-videos.cjs +58 -58
- package/src/lib/args.js +397 -295
- package/src/lib/auto-browser.mjs +12 -10
- package/src/lib/constants.js +151 -148
- package/src/lib/explore.js +225 -244
- package/src/lib/fetcher.js +60 -60
- package/src/lib/filter.js +66 -66
- package/src/lib/get-user-videos-browser.mjs +5 -5
- package/src/lib/io.js +76 -76
- package/src/lib/output.js +80 -80
- package/src/lib/parser.js +47 -47
- package/src/lib/scrape-browser.mjs +5 -5
- package/src/lib/scrape.js +39 -39
- package/src/main.mjs +962 -668
- package/src/scraper/core.cjs +213 -213
- package/src/scraper/index.cjs +96 -96
- package/src/scraper/modules/comment-extractor.cjs +49 -122
- package/src/scraper/modules/follow-extractor.cjs +112 -0
- package/src/scraper/modules/guess-extractor.cjs +53 -117
- package/src/scraper/modules/page-helpers.cjs +422 -422
- package/src/scraper/modules/scroll-collector.cjs +173 -0
- package/src/scraper/modules/video-scanner.cjs +43 -43
- package/src/test-auto-follow.cjs +109 -0
- package/src/test-extractors.cjs +75 -0
- package/src/test-follow.cjs +41 -0
- package/src/watch/public/index.html +271 -265
- package/src/watch/server.mjs +153 -145
- package/src/results/user-videos-bar.lar.lar.moeta.json +0 -37
package/src/data-store.cjs
CHANGED
|
@@ -1,65 +1,69 @@
|
|
|
1
|
-
const fs = require('fs');
|
|
2
|
-
const path = require('path');
|
|
3
|
-
|
|
4
|
-
function createStore(filePath) {
|
|
5
|
-
let data = [];
|
|
6
|
-
|
|
7
|
-
if (filePath) {
|
|
8
|
-
const resolved = path.resolve(filePath);
|
|
9
|
-
if (fs.existsSync(resolved)) {
|
|
10
|
-
try {
|
|
11
|
-
const raw = fs.readFileSync(resolved, 'utf-8');
|
|
12
|
-
data = JSON.parse(raw);
|
|
13
|
-
if (!Array.isArray(data)) data = [];
|
|
14
|
-
} catch (e) {
|
|
15
|
-
console.error(`[data-store] 读取文件失败: ${e.message}`);
|
|
16
|
-
data = [];
|
|
17
|
-
}
|
|
18
|
-
}
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
function save() {
|
|
22
|
-
if (!filePath) return;
|
|
23
|
-
const resolved = path.resolve(filePath);
|
|
24
|
-
const json = JSON.stringify(data, null, 2);
|
|
25
|
-
fs.writeFileSync(resolved, json, 'utf-8');
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
function getUser(uid) {
|
|
29
|
-
return data.find(u => u.uniqueId === uid);
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
function
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
function
|
|
55
|
-
return data;
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
function
|
|
59
|
-
return data
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
1
|
+
const fs = require('fs');
|
|
2
|
+
const path = require('path');
|
|
3
|
+
|
|
4
|
+
function createStore(filePath) {
|
|
5
|
+
let data = [];
|
|
6
|
+
|
|
7
|
+
if (filePath) {
|
|
8
|
+
const resolved = path.resolve(filePath);
|
|
9
|
+
if (fs.existsSync(resolved)) {
|
|
10
|
+
try {
|
|
11
|
+
const raw = fs.readFileSync(resolved, 'utf-8');
|
|
12
|
+
data = JSON.parse(raw);
|
|
13
|
+
if (!Array.isArray(data)) data = [];
|
|
14
|
+
} catch (e) {
|
|
15
|
+
console.error(`[data-store] 读取文件失败: ${e.message}`);
|
|
16
|
+
data = [];
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function save() {
|
|
22
|
+
if (!filePath) return;
|
|
23
|
+
const resolved = path.resolve(filePath);
|
|
24
|
+
const json = JSON.stringify(data, null, 2);
|
|
25
|
+
fs.writeFileSync(resolved, json, 'utf-8');
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function getUser(uid) {
|
|
29
|
+
return data.find(u => u.uniqueId === uid);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function hasUser(uid) {
|
|
33
|
+
return getUser(uid) !== undefined;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function addUser(user) {
|
|
37
|
+
const existing = getUser(user.uniqueId);
|
|
38
|
+
if (existing) {
|
|
39
|
+
for (const key of Object.keys(user)) {
|
|
40
|
+
if (key === 'uniqueId') continue;
|
|
41
|
+
if (key === 'sources') continue;
|
|
42
|
+
if (user[key] !== undefined && user[key] !== null && user[key] !== '') {
|
|
43
|
+
existing[key] = user[key];
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
if (user.sources && Array.isArray(user.sources)) {
|
|
47
|
+
existing.sources = [...new Set([...(existing.sources || []), ...user.sources])];
|
|
48
|
+
}
|
|
49
|
+
} else {
|
|
50
|
+
data.push(user);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function getPendingUsers() {
|
|
55
|
+
return data.filter(u => u.followerCount === undefined);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function getAllUsers() {
|
|
59
|
+
return data;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function getProcessedUsers() {
|
|
63
|
+
return data.filter(u => u.processed === true || u.followerCount !== undefined);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return { save, getUser, hasUser, addUser, getPendingUsers, getAllUsers, getProcessedUsers, data };
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
module.exports = { createStore };
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
const {
|
|
2
|
+
delay,
|
|
3
|
+
ensureBrowserReady,
|
|
4
|
+
setDelayConfig,
|
|
5
|
+
closeCommentPanel,
|
|
6
|
+
retryWithBackoff,
|
|
7
|
+
} = require('./scraper/modules/page-helpers.cjs');
|
|
8
|
+
const {
|
|
9
|
+
getUserInfo,
|
|
10
|
+
collectVideos,
|
|
11
|
+
isPageRestricted,
|
|
12
|
+
} = require('./get-user-videos-core.cjs');
|
|
13
|
+
const { scrapeSingleVideo } = require('./scraper/core.cjs');
|
|
14
|
+
const { extractFollowAndFollowers } = require('./scraper/modules/follow-extractor.cjs');
|
|
15
|
+
const { extractCommentAuthors } = require('./scraper/modules/comment-extractor.cjs');
|
|
16
|
+
const { extractGuessVideos } = require('./scraper/modules/guess-extractor.cjs');
|
|
17
|
+
|
|
18
|
+
async function processExplore(page, username, options, log) {
|
|
19
|
+
const {
|
|
20
|
+
maxComments = 0,
|
|
21
|
+
maxGuess = 0,
|
|
22
|
+
enableFollow = true,
|
|
23
|
+
maxFollowing = 200,
|
|
24
|
+
maxFollowers = 200,
|
|
25
|
+
location = 'ES',
|
|
26
|
+
} = options;
|
|
27
|
+
|
|
28
|
+
const result = {
|
|
29
|
+
userInfo: null,
|
|
30
|
+
discoveredVideoAuthors: [],
|
|
31
|
+
discoveredCommentAuthors: [],
|
|
32
|
+
discoveredGuessAuthors: [],
|
|
33
|
+
discoveredFollowing: [],
|
|
34
|
+
discoveredFollowers: [],
|
|
35
|
+
collectedVideos: 0,
|
|
36
|
+
processed: false,
|
|
37
|
+
hasFollowData: false,
|
|
38
|
+
keepFollow: false,
|
|
39
|
+
locationCreated: null,
|
|
40
|
+
noVideo: false,
|
|
41
|
+
error: null,
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
try {
|
|
45
|
+
log(` 访问 @${username} 主页...`);
|
|
46
|
+
const homeUrl = `https://www.tiktok.com/@${username}`;
|
|
47
|
+
await retryWithBackoff(() => page.goto(homeUrl, { waitUntil: 'domcontentloaded', timeout: 30000 }), { log });
|
|
48
|
+
await page.waitForSelector('[class*="DivVideoList"]', { timeout: 10000 }).catch(() => {});
|
|
49
|
+
await delay(1000, 2000);
|
|
50
|
+
|
|
51
|
+
// 1. 获取用户信息
|
|
52
|
+
log(` 获取用户信息...`);
|
|
53
|
+
const info = await getUserInfo(page);
|
|
54
|
+
if (info) {
|
|
55
|
+
result.userInfo = info;
|
|
56
|
+
log(` 用户: ${info.nickname || username} | 粉丝: ${info.followerCount || '-'} | 视频: ${info.videoCount || '-'}`);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// 2. 获取关注+粉丝(在滚动前执行,避免按钮被滚出视口)
|
|
60
|
+
if (enableFollow) {
|
|
61
|
+
try {
|
|
62
|
+
log(` 获取关注/粉丝...`);
|
|
63
|
+
const { following, followers } = await extractFollowAndFollowers(
|
|
64
|
+
page,
|
|
65
|
+
{ maxFollowing, maxFollowers, log }
|
|
66
|
+
);
|
|
67
|
+
result.discoveredFollowing = following || [];
|
|
68
|
+
result.discoveredFollowers = followers || [];
|
|
69
|
+
result.hasFollowData = true;
|
|
70
|
+
log(` 关注: ${result.discoveredFollowing.length}, 粉丝: ${result.discoveredFollowers.length}`);
|
|
71
|
+
} catch (e) {
|
|
72
|
+
log(` 关注/粉丝提取失败: ${e.message}`);
|
|
73
|
+
result.hasFollowData = false;
|
|
74
|
+
result.discoveredFollowing = [];
|
|
75
|
+
result.discoveredFollowers = [];
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// 3. 获取视频列表
|
|
80
|
+
const videoList = await collectVideos(page, username, 1, log);
|
|
81
|
+
const videoArray = videoList ? [...videoList.values()] : [];
|
|
82
|
+
result.collectedVideos = videoArray.length;
|
|
83
|
+
|
|
84
|
+
if (videoArray.length <= 0) {
|
|
85
|
+
result.processed = true;
|
|
86
|
+
result.noVideo = true;
|
|
87
|
+
const restricted = await isPageRestricted(page);
|
|
88
|
+
if (restricted) {
|
|
89
|
+
result.restricted = true;
|
|
90
|
+
log(` @${username} 页面受限(需登录),标记跳过`);
|
|
91
|
+
} else {
|
|
92
|
+
log(` @${username} 没有视频,标记已处理`);
|
|
93
|
+
}
|
|
94
|
+
return result;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// 4. 进入第一个视频
|
|
98
|
+
const firstVideo = videoArray[0];
|
|
99
|
+
const videoUrl = firstVideo.href.startsWith('http')
|
|
100
|
+
? firstVideo.href
|
|
101
|
+
: `https://www.tiktok.com${firstVideo.href}`;
|
|
102
|
+
|
|
103
|
+
log(` 进入第一个视频: ${videoUrl}`);
|
|
104
|
+
await retryWithBackoff(() => page.goto(videoUrl, { waitUntil: 'domcontentloaded', timeout: 30000 }), { log });
|
|
105
|
+
await delay(1500, 2500);
|
|
106
|
+
|
|
107
|
+
// 5. 获取视频信息(含 locationCreated)
|
|
108
|
+
const videoData = await scrapeSingleVideo(page, 0, 0, log, 'NEVER_MATCH');
|
|
109
|
+
result.locationCreated = videoData.locationCreated || null;
|
|
110
|
+
log(` 视频作者: ${videoData.videoAuthor} | 国家: ${result.locationCreated || '未知'}`);
|
|
111
|
+
|
|
112
|
+
// 6. 判断是否为目标国家
|
|
113
|
+
const isTargetLocation = result.locationCreated === location;
|
|
114
|
+
|
|
115
|
+
if (isTargetLocation) {
|
|
116
|
+
result.keepFollow = true;
|
|
117
|
+
log(` 国家匹配 (${location}),获取评论和猜你喜欢...`);
|
|
118
|
+
|
|
119
|
+
if (maxComments > 0) {
|
|
120
|
+
const commentResult = await extractCommentAuthors(page, maxComments);
|
|
121
|
+
result.discoveredCommentAuthors = commentResult || [];
|
|
122
|
+
await closeCommentPanel(page);
|
|
123
|
+
await delay(500, 1000);
|
|
124
|
+
log(` 评论用户: ${result.discoveredCommentAuthors.length}`);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if (maxGuess > 0) {
|
|
128
|
+
const guessResult = await extractGuessVideos(page, maxGuess);
|
|
129
|
+
result.discoveredGuessAuthors = (guessResult || []).map(v => v.author).filter(Boolean);
|
|
130
|
+
await closeCommentPanel(page);
|
|
131
|
+
await delay(500, 1000);
|
|
132
|
+
log(` 猜你喜欢作者: ${result.discoveredGuessAuthors.length}`);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
result.discoveredVideoAuthors = [{
|
|
136
|
+
uniqueId: videoData.uniqueId,
|
|
137
|
+
nickname: videoData.nickname,
|
|
138
|
+
locationCreated: videoData.locationCreated,
|
|
139
|
+
}];
|
|
140
|
+
} else {
|
|
141
|
+
result.keepFollow = false;
|
|
142
|
+
log(` 国家不匹配 (${result.locationCreated} !== ${location}),跳过评论/猜你喜欢,丢弃关注/粉丝`);
|
|
143
|
+
result.discoveredFollowing = [];
|
|
144
|
+
result.discoveredFollowers = [];
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
result.processed = true;
|
|
148
|
+
|
|
149
|
+
} catch (e) {
|
|
150
|
+
result.error = e.message;
|
|
151
|
+
log(` [错误] ${e.message}`);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
return result;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
module.exports = { processExplore };
|
|
@@ -1,165 +1,143 @@
|
|
|
1
|
-
const { delay, ensureBrowserReady, ensureTikTokPage, retryWithBackoff } = require('./scraper/modules/page-helpers.cjs');
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
const
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
else result[key] = match[1];
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
const
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
const
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
log(
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
const output = {
|
|
145
|
-
user: userInfo,
|
|
146
|
-
totalVideos: Math.min(allVideos.length, maxVideos),
|
|
147
|
-
videos: allVideos.slice(0, maxVideos).map(v => ({
|
|
148
|
-
id: v.id,
|
|
149
|
-
url: v.href.startsWith('http') ? v.href : `https://www.tiktok.com${v.href}`,
|
|
150
|
-
})),
|
|
151
|
-
};
|
|
152
|
-
|
|
153
|
-
return { output, browser };
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
async function isPageRestricted(page) {
|
|
157
|
-
return await page.evaluate(() => {
|
|
158
|
-
const bodyText = document.body.innerText;
|
|
159
|
-
return !!(bodyText.includes('登录 TikTok') ||
|
|
160
|
-
bodyText.includes('观众管理功能') ||
|
|
161
|
-
bodyText.includes('Login to TikTok'));
|
|
162
|
-
});
|
|
163
|
-
}
|
|
164
|
-
|
|
1
|
+
const { delay, ensureBrowserReady, ensureTikTokPage, retryWithBackoff } = require('./scraper/modules/page-helpers.cjs');
|
|
2
|
+
const { scrollAndCollect } = require('./scraper/modules/scroll-collector.cjs');
|
|
3
|
+
|
|
4
|
+
async function getUserInfo(page) {
|
|
5
|
+
return await page.evaluate(() => {
|
|
6
|
+
const html = document.documentElement.outerHTML;
|
|
7
|
+
const result = {};
|
|
8
|
+
|
|
9
|
+
const m = window.location.href.match(/\/@([^\/]+)/);
|
|
10
|
+
if (m) result.uniqueId = m[1];
|
|
11
|
+
|
|
12
|
+
const patterns = {
|
|
13
|
+
secUid: /"secUid":"([^"]+)"/,
|
|
14
|
+
nickname: /"nickname":"((?:[^"\\]|\\.)*)"/,
|
|
15
|
+
ttSeller: /"ttSeller":\s*(true|false)/,
|
|
16
|
+
verified: /"verified":\s*(true|false)/,
|
|
17
|
+
followerCount: /"followerCount":(\d+)/,
|
|
18
|
+
videoCount: /"videoCount":(\d+)/,
|
|
19
|
+
followingCount: /"followingCount":(\d+)/,
|
|
20
|
+
heartCount: /"heartCount":(\d+)/,
|
|
21
|
+
signature: /"signature":"((?:[^"\\]|\\.)*)"/,
|
|
22
|
+
locationCreated: /"locationCreated":"([^"]*)/,
|
|
23
|
+
region: /"region":"([^"]*)/,
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
const boolKeys = ['ttSeller', 'verified'];
|
|
27
|
+
const numKeys = ['followerCount', 'videoCount', 'followingCount', 'heartCount'];
|
|
28
|
+
|
|
29
|
+
for (const [key, pat] of Object.entries(patterns)) {
|
|
30
|
+
const match = html.match(pat);
|
|
31
|
+
if (match) {
|
|
32
|
+
if (boolKeys.includes(key)) result[key] = match[1] === 'true';
|
|
33
|
+
else if (numKeys.includes(key)) result[key] = parseInt(match[1], 10);
|
|
34
|
+
else if (key === 'signature') result[key] = match[1].replace(/\\n/g, '\n').replace(/\\\\/g, '\\');
|
|
35
|
+
else result[key] = match[1];
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
return result;
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
async function collectVideos(page, username, maxVideos, log) {
|
|
44
|
+
const allLinks = await scrollAndCollect(page, {
|
|
45
|
+
container: '[class*="ColumnListContainer"]',
|
|
46
|
+
extraArgs: { handle: username },
|
|
47
|
+
collectFn: (container, args) => {
|
|
48
|
+
const pattern = '/@' + args.handle + '/video/';
|
|
49
|
+
return {
|
|
50
|
+
items: Array.from(document.querySelectorAll('a'))
|
|
51
|
+
.filter(el => (el.getAttribute('href') || '').includes(pattern))
|
|
52
|
+
.map(el => {
|
|
53
|
+
const href = el.getAttribute('href') || '';
|
|
54
|
+
const idMatch = href.match(/\/video\/(\d+)/);
|
|
55
|
+
return { id: idMatch ? idMatch[1] : null, href };
|
|
56
|
+
})
|
|
57
|
+
.filter(v => v.id),
|
|
58
|
+
};
|
|
59
|
+
},
|
|
60
|
+
maxItems: maxVideos,
|
|
61
|
+
delayRange: [2000, 3000],
|
|
62
|
+
staleThreshold: 5,
|
|
63
|
+
maxRounds: 500,
|
|
64
|
+
onRound: (round, items, allItems) => {
|
|
65
|
+
const uniqueCount = new Set(allItems.map(v => v.id)).size;
|
|
66
|
+
if (uniqueCount > 0 && (uniqueCount % 10 === 0 || items.length > 0)) {
|
|
67
|
+
log(`滚动 ${round + 1}: ${uniqueCount} 个视频 (本轮 ${items.length} 条)`);
|
|
68
|
+
}
|
|
69
|
+
},
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
// 去重
|
|
73
|
+
const uniqueVideos = new Map();
|
|
74
|
+
allLinks.forEach(v => {
|
|
75
|
+
if (!uniqueVideos.has(v.id)) {
|
|
76
|
+
uniqueVideos.set(v.id, v);
|
|
77
|
+
}
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
log(`收集完成: ${uniqueVideos.size} 个视频`);
|
|
81
|
+
return uniqueVideos;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
async function runGetUserVideos(options) {
|
|
85
|
+
const {
|
|
86
|
+
username,
|
|
87
|
+
maxVideos = 5,
|
|
88
|
+
log = console.error,
|
|
89
|
+
} = options;
|
|
90
|
+
|
|
91
|
+
const url = `https://www.tiktok.com/@${username}`;
|
|
92
|
+
|
|
93
|
+
log(`用户: @${username}`);
|
|
94
|
+
log(`URL: ${url}`);
|
|
95
|
+
log(`最大视频数: ${maxVideos}\n`);
|
|
96
|
+
|
|
97
|
+
log('连接浏览器...');
|
|
98
|
+
const browser = await ensureBrowserReady();
|
|
99
|
+
|
|
100
|
+
let page;
|
|
101
|
+
try {
|
|
102
|
+
page = await ensureTikTokPage(browser, url);
|
|
103
|
+
} catch (e) {
|
|
104
|
+
await browser.close().catch(() => {});
|
|
105
|
+
throw e;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
await retryWithBackoff(() => page.goto(url, { waitUntil: 'load', timeout: 30000 }), { log });
|
|
109
|
+
await delay(3000, 5000);
|
|
110
|
+
await page.waitForSelector('[class*="DivVideoList"]', { timeout: 10000 }).catch(() => {});
|
|
111
|
+
|
|
112
|
+
log('获取用户信息...');
|
|
113
|
+
const userInfo = await getUserInfo(page);
|
|
114
|
+
log('用户信息: ' + JSON.stringify(userInfo, null, 2));
|
|
115
|
+
|
|
116
|
+
log('\n开始滚动收集视频...');
|
|
117
|
+
const videos = await collectVideos(page, username, maxVideos, log);
|
|
118
|
+
const allVideos = Array.from(videos.values());
|
|
119
|
+
|
|
120
|
+
log(`\n总计: ${allVideos.length} 个视频`);
|
|
121
|
+
|
|
122
|
+
const output = {
|
|
123
|
+
user: userInfo,
|
|
124
|
+
totalVideos: Math.min(allVideos.length, maxVideos),
|
|
125
|
+
videos: allVideos.slice(0, maxVideos).map(v => ({
|
|
126
|
+
id: v.id,
|
|
127
|
+
url: v.href.startsWith('http') ? v.href : `https://www.tiktok.com${v.href}`,
|
|
128
|
+
})),
|
|
129
|
+
};
|
|
130
|
+
|
|
131
|
+
return { output, browser };
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
async function isPageRestricted(page) {
|
|
135
|
+
return await page.evaluate(() => {
|
|
136
|
+
const bodyText = document.body.innerText;
|
|
137
|
+
return !!(bodyText.includes('登录 TikTok') ||
|
|
138
|
+
bodyText.includes('观众管理功能') ||
|
|
139
|
+
bodyText.includes('Login to TikTok'));
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
|
|
165
143
|
module.exports = { getUserInfo, collectVideos, runGetUserVideos, isPageRestricted };
|