tt-help-cli-ycl 1.3.0 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -17
- package/cli.js +9 -9
- package/package.json +44 -44
- package/src/cli/auto.js +94 -0
- package/src/cli/explore.js +117 -0
- package/src/cli/progress.js +111 -0
- package/src/cli/scrape.js +47 -0
- package/src/cli/utils.js +18 -0
- package/src/cli/videos.js +41 -0
- package/src/cli/watch.js +28 -0
- package/src/lib/args.js +386 -397
- package/src/lib/browser/anti-detect.js +23 -0
- package/src/lib/browser/cdp.js +142 -0
- package/src/lib/browser/launch.js +43 -0
- package/src/lib/browser/page.js +80 -0
- package/src/lib/constants.js +85 -168
- package/src/lib/delay.js +54 -0
- package/src/lib/explore-fetch.js +118 -0
- package/src/lib/fetcher.js +45 -60
- package/src/lib/filter.js +66 -66
- package/src/lib/io.js +54 -76
- package/src/lib/output.js +80 -80
- package/src/lib/parser.js +47 -47
- package/src/lib/retry.js +44 -0
- package/src/lib/scrape.js +40 -39
- package/src/lib/url.js +52 -0
- package/src/main.mjs +199 -962
- package/src/results/user-videos-bar.lar.lar.moeta.json +37 -0
- package/src/scraper/auto-core.mjs +183 -0
- package/src/scraper/{core.cjs → core.mjs} +188 -214
- package/src/{explore-core.cjs → scraper/explore-core.mjs} +44 -42
- package/src/scraper/modules/captcha-handler.mjs +114 -0
- package/src/scraper/modules/comment-extractor.mjs +69 -0
- package/src/scraper/modules/follow-extractor.mjs +121 -0
- package/src/scraper/modules/{guess-extractor.cjs → guess-extractor.mjs} +51 -53
- package/src/scraper/modules/page-error-detector.mjs +70 -0
- package/src/scraper/modules/page-helpers.mjs +46 -0
- package/src/scraper/modules/scroll-collector.mjs +189 -0
- package/src/{get-user-videos-core.cjs → videos/core.mjs} +126 -143
- package/src/watch/data-store.mjs +239 -0
- package/src/watch/public/index.html +446 -271
- package/src/watch/server.mjs +257 -153
- package/src/auto-core.cjs +0 -367
- package/src/data-store.cjs +0 -69
- package/src/get-user-videos.cjs +0 -59
- package/src/lib/auto-browser.mjs +0 -13
- package/src/lib/explore.js +0 -225
- package/src/lib/get-user-videos-browser.mjs +0 -6
- package/src/lib/scrape-browser.mjs +0 -6
- package/src/scraper/index.cjs +0 -97
- package/src/scraper/modules/comment-extractor.cjs +0 -49
- package/src/scraper/modules/follow-extractor.cjs +0 -112
- package/src/scraper/modules/page-helpers.cjs +0 -422
- package/src/scraper/modules/scroll-collector.cjs +0 -173
- package/src/scraper/modules/video-scanner.cjs +0 -43
- package/src/test-auto-follow.cjs +0 -109
- package/src/test-extractors.cjs +0 -75
- package/src/test-follow.cjs +0 -41
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
{
|
|
2
|
+
"user": {
|
|
3
|
+
"uniqueId": "bar.lar.lar.moeta",
|
|
4
|
+
"secUid": "MS4wLjABAAAA3cgKTWvKfga0JAWeakAzx3zQ-aFAC8RuQvxD4HQFraKKsc_TbOIyMo3_ofVlXofV",
|
|
5
|
+
"nickname": "Bar Lar Lar Moetain",
|
|
6
|
+
"ttSeller": false,
|
|
7
|
+
"verified": false,
|
|
8
|
+
"followerCount": 24000,
|
|
9
|
+
"videoCount": 749,
|
|
10
|
+
"followingCount": 4293,
|
|
11
|
+
"heartCount": 254300,
|
|
12
|
+
"signature": ""
|
|
13
|
+
},
|
|
14
|
+
"totalVideos": 5,
|
|
15
|
+
"videos": [
|
|
16
|
+
{
|
|
17
|
+
"id": "7638231799084158228",
|
|
18
|
+
"url": "https://www.tiktok.com/@bar.lar.lar.moeta/video/7638231799084158228"
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"id": "7638162444698914068",
|
|
22
|
+
"url": "https://www.tiktok.com/@bar.lar.lar.moeta/video/7638162444698914068"
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
"id": "7638116251767819541",
|
|
26
|
+
"url": "https://www.tiktok.com/@bar.lar.lar.moeta/video/7638116251767819541"
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
"id": "7638069637321690388",
|
|
30
|
+
"url": "https://www.tiktok.com/@bar.lar.lar.moeta/video/7638069637321690388"
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"id": "7637927171025112341",
|
|
34
|
+
"url": "https://www.tiktok.com/@bar.lar.lar.moeta/video/7637927171025112341"
|
|
35
|
+
}
|
|
36
|
+
]
|
|
37
|
+
}
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
import {
|
|
2
|
+
delay,
|
|
3
|
+
ensureBrowserReady,
|
|
4
|
+
ensureTikTokPage,
|
|
5
|
+
setDelayConfig,
|
|
6
|
+
getDelayConfig,
|
|
7
|
+
closeCommentPanel,
|
|
8
|
+
retryWithBackoff,
|
|
9
|
+
detectPageError,
|
|
10
|
+
isLoggedIn,
|
|
11
|
+
} from './modules/page-helpers.mjs';
|
|
12
|
+
export { ensureBrowserReady };
|
|
13
|
+
import {
|
|
14
|
+
getUserInfo,
|
|
15
|
+
collectVideos,
|
|
16
|
+
} from '../videos/core.mjs';
|
|
17
|
+
import { runScrape } from './core.mjs';
|
|
18
|
+
import { extractFollowAndFollowers } from './modules/follow-extractor.mjs';
|
|
19
|
+
|
|
20
|
+
function mergeUserInfo(existing, incoming, source) {
|
|
21
|
+
const merged = { ...existing };
|
|
22
|
+
for (const [key, value] of Object.entries(incoming)) {
|
|
23
|
+
if (key === '_sources') continue;
|
|
24
|
+
if (value === undefined || value === null || value === '') continue;
|
|
25
|
+
if (typeof value === 'number' && typeof merged[key] === 'number') {
|
|
26
|
+
merged[key] = Math.max(merged[key], value);
|
|
27
|
+
} else if (merged[key] === undefined || merged[key] === null || merged[key] === '') {
|
|
28
|
+
merged[key] = value;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
if (source) {
|
|
32
|
+
if (!merged._sources) merged._sources = [];
|
|
33
|
+
if (!merged._sources.includes(source)) merged._sources.push(source);
|
|
34
|
+
}
|
|
35
|
+
return merged;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
async function processUser(page, username, options, log) {
|
|
39
|
+
const {
|
|
40
|
+
collectMax = 1,
|
|
41
|
+
scrapeDepth = 50,
|
|
42
|
+
maxComments = 200,
|
|
43
|
+
maxGuess = 10,
|
|
44
|
+
preset = 'fast',
|
|
45
|
+
switchMax = null,
|
|
46
|
+
commentMax = null,
|
|
47
|
+
enableFollow = false,
|
|
48
|
+
maxFollowing = 200,
|
|
49
|
+
maxFollowers = 200,
|
|
50
|
+
browser = null,
|
|
51
|
+
} = options;
|
|
52
|
+
|
|
53
|
+
const result = {
|
|
54
|
+
userInfo: null,
|
|
55
|
+
collectedVideos: [],
|
|
56
|
+
discoveredVideoAuthors: [],
|
|
57
|
+
discoveredCommentAuthors: [],
|
|
58
|
+
discoveredGuessAuthors: [],
|
|
59
|
+
discoveredFollowing: [],
|
|
60
|
+
discoveredFollowers: [],
|
|
61
|
+
error: null,
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
try {
|
|
65
|
+
log(`\n[processUser] 访问 @${username}...`);
|
|
66
|
+
await retryWithBackoff(() => page.goto(`https://www.tiktok.com/@${username}`, {
|
|
67
|
+
waitUntil: 'load', timeout: 30000,
|
|
68
|
+
}), { log });
|
|
69
|
+
await page.waitForSelector('[class*="DivVideoList"]', { timeout: 10000 }).catch(() => {});
|
|
70
|
+
await delay(1000, 2000);
|
|
71
|
+
|
|
72
|
+
const info = await getUserInfo(page);
|
|
73
|
+
result.userInfo = info;
|
|
74
|
+
if (!info.uniqueId) info.uniqueId = username;
|
|
75
|
+
log(` 昵称: ${info.nickname || '-'} | 粉丝: ${info.followerCount || 0}`);
|
|
76
|
+
|
|
77
|
+
if (options.enableFollow) {
|
|
78
|
+
const loggedIn = await isLoggedIn(page);
|
|
79
|
+
if (!loggedIn) {
|
|
80
|
+
log(' [跳过] 提取关注/粉丝:未登录,请先登录 TikTok');
|
|
81
|
+
result.discoveredFollowing = [];
|
|
82
|
+
result.discoveredFollowers = [];
|
|
83
|
+
} else {
|
|
84
|
+
try {
|
|
85
|
+
log(' 提取关注/粉丝列表...');
|
|
86
|
+
const { following, followers } = await extractFollowAndFollowers(page, {
|
|
87
|
+
maxFollowing: options.maxFollowing || 200,
|
|
88
|
+
maxFollowers: options.maxFollowers || 200,
|
|
89
|
+
log,
|
|
90
|
+
});
|
|
91
|
+
result.discoveredFollowing = following;
|
|
92
|
+
result.discoveredFollowers = followers;
|
|
93
|
+
log(` 关注: ${following.length} | 粉丝: ${followers.length}`);
|
|
94
|
+
} catch (e) {
|
|
95
|
+
log(` 关注/粉丝提取失败: ${e.message}`);
|
|
96
|
+
result.discoveredFollowing = [];
|
|
97
|
+
result.discoveredFollowers = [];
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const videos = await collectVideos(page, username, collectMax, log);
|
|
103
|
+
const videoList = Array.from(videos.values()).slice(0, collectMax);
|
|
104
|
+
result.collectedVideos = videoList.map(v => ({
|
|
105
|
+
videoId: v.id,
|
|
106
|
+
videoUrl: v.href,
|
|
107
|
+
}));
|
|
108
|
+
|
|
109
|
+
if (videoList.length > 0) {
|
|
110
|
+
const allVideoAuthors = new Map();
|
|
111
|
+
const allCommentAuthors = new Set();
|
|
112
|
+
const allGuessAuthors = new Set();
|
|
113
|
+
|
|
114
|
+
for (let i = 0; i < videoList.length; i++) {
|
|
115
|
+
const video = videoList[i];
|
|
116
|
+
const videoUrl = video.href.startsWith('http')
|
|
117
|
+
? video.href
|
|
118
|
+
: `https://www.tiktok.com${video.href}`;
|
|
119
|
+
log(` [${i + 1}/${videoList.length}] 开始 scrape: ${videoUrl} (深度 ${scrapeDepth})`);
|
|
120
|
+
|
|
121
|
+
const scrapeResult = await runScrape({
|
|
122
|
+
videoUrl,
|
|
123
|
+
maxVideos: scrapeDepth,
|
|
124
|
+
maxComments,
|
|
125
|
+
maxGuess,
|
|
126
|
+
preset,
|
|
127
|
+
switchMax,
|
|
128
|
+
commentMax,
|
|
129
|
+
browser,
|
|
130
|
+
page,
|
|
131
|
+
log,
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
const scrapeOutput = scrapeResult.output;
|
|
135
|
+
|
|
136
|
+
if (scrapeOutput && scrapeOutput.videoDetails) {
|
|
137
|
+
for (const vd of scrapeOutput.videoDetails) {
|
|
138
|
+
if (!allVideoAuthors.has(vd.uniqueId)) {
|
|
139
|
+
allVideoAuthors.set(vd.uniqueId, {
|
|
140
|
+
uniqueId: vd.uniqueId,
|
|
141
|
+
nickname: vd.nickname,
|
|
142
|
+
locationCreated: vd.locationCreated,
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
if (scrapeOutput && scrapeOutput.commentUsers) {
|
|
149
|
+
for (const cu of scrapeOutput.commentUsers) {
|
|
150
|
+
allCommentAuthors.add(cu);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
if (scrapeOutput && scrapeOutput.guessAuthors) {
|
|
155
|
+
for (const ga of scrapeOutput.guessAuthors) {
|
|
156
|
+
allGuessAuthors.add(ga);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
result.discoveredVideoAuthors = [...allVideoAuthors.values()];
|
|
162
|
+
result.discoveredCommentAuthors = [...allCommentAuthors];
|
|
163
|
+
result.discoveredGuessAuthors = [...allGuessAuthors];
|
|
164
|
+
|
|
165
|
+
log(` 发现: ${result.discoveredVideoAuthors.length} 个视频作者, ${result.discoveredCommentAuthors.length} 个评论作者, ${result.discoveredGuessAuthors.length} 个猜你喜欢作者`);
|
|
166
|
+
} else {
|
|
167
|
+
const pageError = await detectPageError(page);
|
|
168
|
+
result.restricted = !!pageError;
|
|
169
|
+
if (pageError) {
|
|
170
|
+
log(` @${username} 页面受限(${pageError}),标记跳过`);
|
|
171
|
+
} else {
|
|
172
|
+
log(` @${username} 没有视频,跳过 scrape`);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
} catch (e) {
|
|
176
|
+
result.error = e.message;
|
|
177
|
+
log(` [错误] ${e.message}`);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
return result;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
export { processUser, mergeUserInfo };
|
|
@@ -1,214 +1,188 @@
|
|
|
1
|
-
|
|
2
|
-
closeCommentPanel,
|
|
3
|
-
delay,
|
|
4
|
-
ensureBrowserReady,
|
|
5
|
-
ensureTikTokPage,
|
|
6
|
-
setDelayConfig,
|
|
7
|
-
getDelayConfig,
|
|
8
|
-
retryWithBackoff,
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
async function scrapeSingleVideo(page, maxComments, maxGuess, log, location = 'ES') {
|
|
14
|
-
const config = getDelayConfig();
|
|
15
|
-
|
|
16
|
-
await page
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
await
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
const
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
uniqueId: r.uniqueId,
|
|
190
|
-
nickname: r.nickname,
|
|
191
|
-
locationCreated: r.locationCreated,
|
|
192
|
-
};
|
|
193
|
-
}
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
const output = {
|
|
197
|
-
videoDetails: Object.values(videoDetails),
|
|
198
|
-
commentUsers: [...commentUsers].sort(),
|
|
199
|
-
allCommentAuthorsList,
|
|
200
|
-
guessVideos: allGuessVideos,
|
|
201
|
-
guessAuthors: [...allGuessAuthors].sort(),
|
|
202
|
-
stats: {
|
|
203
|
-
totalVideos: allResults.length,
|
|
204
|
-
uniqueVideoAuthors: videoAuthors.size,
|
|
205
|
-
uniqueCommentAuthors: commentUsers.size,
|
|
206
|
-
uniqueGuessAuthors: allGuessAuthors.size,
|
|
207
|
-
totalGuessVideos: allGuessVideos.length,
|
|
208
|
-
},
|
|
209
|
-
};
|
|
210
|
-
|
|
211
|
-
return { output, browser, isExternal };
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
module.exports = { scrapeSingleVideo, runScrape };
|
|
1
|
+
import {
|
|
2
|
+
closeCommentPanel,
|
|
3
|
+
delay,
|
|
4
|
+
ensureBrowserReady,
|
|
5
|
+
ensureTikTokPage,
|
|
6
|
+
setDelayConfig,
|
|
7
|
+
getDelayConfig,
|
|
8
|
+
retryWithBackoff,
|
|
9
|
+
} from './modules/page-helpers.mjs';
|
|
10
|
+
import { extractCommentAuthors } from './modules/comment-extractor.mjs';
|
|
11
|
+
import { extractGuessVideos } from './modules/guess-extractor.mjs';
|
|
12
|
+
|
|
13
|
+
async function scrapeSingleVideo(page, maxComments, maxGuess, log, location = 'ES') {
|
|
14
|
+
const config = getDelayConfig();
|
|
15
|
+
|
|
16
|
+
await page.waitForSelector('[class*="VideoMeta"]', { timeout: 10000 }).catch(() => {});
|
|
17
|
+
await delay(Math.round(config.commentMax * 0.3), config.commentMax);
|
|
18
|
+
|
|
19
|
+
const userData = await page.evaluate(() => {
|
|
20
|
+
const result = {};
|
|
21
|
+
const m = window.location.href.match(/\/@([^/]+)\/video/);
|
|
22
|
+
if (m) result.uniqueId = m[1];
|
|
23
|
+
const authorEls = document.querySelectorAll('[class*="Author"]');
|
|
24
|
+
for (const el of authorEls) {
|
|
25
|
+
const text = (el.textContent || '').trim();
|
|
26
|
+
if (text && !text.includes('TikTok') && !text.includes('Share')) {
|
|
27
|
+
result.nickname = text;
|
|
28
|
+
break;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
const html = document.documentElement.outerHTML;
|
|
32
|
+
const locMatch = html.match(/"locationCreated":"([^"]*)/);
|
|
33
|
+
if (locMatch) result.locationCreated = locMatch[1];
|
|
34
|
+
return result;
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
const videoAuthor = userData.uniqueId ? '@' + userData.uniqueId : null;
|
|
38
|
+
if (!videoAuthor) throw new Error('无法获取视频作者');
|
|
39
|
+
|
|
40
|
+
let guessVideos = [];
|
|
41
|
+
let commentUsers = [];
|
|
42
|
+
|
|
43
|
+
if (userData.locationCreated === location) {
|
|
44
|
+
if (maxGuess > 0) {
|
|
45
|
+
guessVideos = await extractGuessVideos(page, maxGuess);
|
|
46
|
+
}
|
|
47
|
+
if (maxComments > 0) {
|
|
48
|
+
commentUsers = await extractCommentAuthors(page, maxComments);
|
|
49
|
+
}
|
|
50
|
+
await closeCommentPanel(page);
|
|
51
|
+
if (maxGuess > 0 || maxComments > 0) {
|
|
52
|
+
await delay(Math.round(config.commentMax * 0.3), config.commentMax);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
return {
|
|
57
|
+
videoAuthor,
|
|
58
|
+
uniqueId: userData.uniqueId,
|
|
59
|
+
nickname: userData.nickname,
|
|
60
|
+
locationCreated: userData.locationCreated,
|
|
61
|
+
commentUsers: [...new Set(commentUsers)],
|
|
62
|
+
guessVideos,
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
async function runScrape(options) {
|
|
67
|
+
const {
|
|
68
|
+
videoUrl, maxVideos = 20, maxComments = 999, maxGuess = 10,
|
|
69
|
+
preset = null, switchMax = null, commentMax = null,
|
|
70
|
+
log = console.error,
|
|
71
|
+
browser: externalBrowser = null, page: externalPage = null,
|
|
72
|
+
} = options;
|
|
73
|
+
|
|
74
|
+
if (preset) {
|
|
75
|
+
setDelayConfig(preset);
|
|
76
|
+
} else if (switchMax || commentMax) {
|
|
77
|
+
setDelayConfig({ switchMax: switchMax || 5000, commentMax: commentMax || 3000 });
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const config = getDelayConfig();
|
|
81
|
+
let browser, page;
|
|
82
|
+
const isExternal = !!(externalBrowser && externalPage);
|
|
83
|
+
|
|
84
|
+
if (!isExternal) {
|
|
85
|
+
log(`视频地址: ${videoUrl}`);
|
|
86
|
+
log(`视频数: ${maxVideos}, 评论数: ${maxComments}, 猜你喜欢: ${maxGuess}, 切换延迟: ${config.switchMax}ms, 评论延迟: ${config.commentMax}ms`);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (isExternal) {
|
|
90
|
+
browser = externalBrowser;
|
|
91
|
+
page = externalPage;
|
|
92
|
+
} else {
|
|
93
|
+
browser = await ensureBrowserReady();
|
|
94
|
+
try {
|
|
95
|
+
page = await ensureTikTokPage(browser, videoUrl);
|
|
96
|
+
} catch (e) {
|
|
97
|
+
await browser.close().catch(() => {});
|
|
98
|
+
throw e;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
await retryWithBackoff(() => page.goto(videoUrl, { waitUntil: 'load', timeout: 30000 }), { log });
|
|
103
|
+
await delay(Math.round(config.switchMax * 0.5), config.switchMax);
|
|
104
|
+
await closeCommentPanel(page);
|
|
105
|
+
await delay(Math.round(config.commentMax * 0.5), config.commentMax);
|
|
106
|
+
|
|
107
|
+
const allResults = [];
|
|
108
|
+
const videoAuthors = new Set();
|
|
109
|
+
const commentUsers = new Set();
|
|
110
|
+
const allCommentAuthorsList = [];
|
|
111
|
+
const allGuessAuthors = new Set();
|
|
112
|
+
const allGuessVideos = [];
|
|
113
|
+
|
|
114
|
+
for (let i = 0; i < maxVideos; i++) {
|
|
115
|
+
await delay(Math.round(config.commentMax * 0.3), config.commentMax);
|
|
116
|
+
|
|
117
|
+
let result;
|
|
118
|
+
try {
|
|
119
|
+
result = await scrapeSingleVideo(page, maxComments, maxGuess, log);
|
|
120
|
+
} catch (e) {
|
|
121
|
+
log(`[${i + 1}/${maxVideos}] 跳过: ${e.message}`);
|
|
122
|
+
if (i < maxVideos - 1) {
|
|
123
|
+
await page.evaluate(() => {
|
|
124
|
+
const container = document.querySelector('[class*="ColumnListContainer"]');
|
|
125
|
+
if (container) container.scrollTop += 700;
|
|
126
|
+
else window.scrollBy(0, 700);
|
|
127
|
+
});
|
|
128
|
+
await delay(Math.round(config.switchMax * 0.5), config.switchMax);
|
|
129
|
+
}
|
|
130
|
+
continue;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
allResults.push(result);
|
|
134
|
+
videoAuthors.add(result.videoAuthor);
|
|
135
|
+
result.commentUsers.forEach(u => commentUsers.add(u));
|
|
136
|
+
allCommentAuthorsList.push(...result.commentUsers);
|
|
137
|
+
if (result.guessVideos) {
|
|
138
|
+
allGuessVideos.push(...result.guessVideos);
|
|
139
|
+
result.guessVideos.forEach(v => { if (v.author) allGuessAuthors.add(v.author); });
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
if ((i + 1) % 5 === 0 || i === 0) {
|
|
143
|
+
log(`[${i + 1}/${maxVideos}] ${result.videoAuthor} | 昵称: ${result.nickname || '-'} | 评论用户: ${result.commentUsers.length} | 猜你喜欢: ${result.guessVideos ? result.guessVideos.length : 0}`);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
if (i < maxVideos - 1) {
|
|
147
|
+
await page.evaluate(() => {
|
|
148
|
+
const container = document.querySelector('[class*="ColumnListContainer"]');
|
|
149
|
+
if (container) container.scrollTop += 700;
|
|
150
|
+
});
|
|
151
|
+
await delay(2000, config.switchMax);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
log(`\n结果: 视频作者 ${videoAuthors.size} | 评论用户 ${commentUsers.size} | 总评论 ${allCommentAuthorsList.length} | 猜你喜欢作者 ${allGuessAuthors.size} | 总猜中视频 ${allGuessVideos.length}`);
|
|
156
|
+
|
|
157
|
+
const videoDetails = {};
|
|
158
|
+
for (const r of allResults) {
|
|
159
|
+
const key = r.videoAuthor;
|
|
160
|
+
if (!videoDetails[key]) {
|
|
161
|
+
videoDetails[key] = {
|
|
162
|
+
videoAuthor: r.videoAuthor,
|
|
163
|
+
uniqueId: r.uniqueId,
|
|
164
|
+
nickname: r.nickname,
|
|
165
|
+
locationCreated: r.locationCreated,
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
const output = {
|
|
171
|
+
videoDetails: Object.values(videoDetails),
|
|
172
|
+
commentUsers: [...commentUsers].sort(),
|
|
173
|
+
allCommentAuthorsList,
|
|
174
|
+
guessVideos: allGuessVideos,
|
|
175
|
+
guessAuthors: [...allGuessAuthors].sort(),
|
|
176
|
+
stats: {
|
|
177
|
+
totalVideos: allResults.length,
|
|
178
|
+
uniqueVideoAuthors: videoAuthors.size,
|
|
179
|
+
uniqueCommentAuthors: commentUsers.size,
|
|
180
|
+
uniqueGuessAuthors: allGuessAuthors.size,
|
|
181
|
+
totalGuessVideos: allGuessVideos.length,
|
|
182
|
+
},
|
|
183
|
+
};
|
|
184
|
+
|
|
185
|
+
return { output, browser, isExternal };
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
export { scrapeSingleVideo, runScrape };
|