tt-help-cli-ycl 1.3.6 → 1.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -17
- package/cli.js +9 -9
- package/package.json +45 -45
- package/src/cli/auto.js +131 -121
- package/src/cli/explore.js +147 -138
- package/src/cli/progress.js +111 -111
- package/src/cli/scrape.js +47 -47
- package/src/cli/utils.js +18 -18
- package/src/cli/videos.js +41 -41
- package/src/cli/watch.js +31 -31
- package/src/lib/args.js +391 -391
- package/src/lib/browser/anti-detect.js +23 -23
- package/src/lib/browser/cdp.js +142 -142
- package/src/lib/browser/launch.js +43 -43
- package/src/lib/browser/page.js +87 -87
- package/src/lib/constants.js +109 -95
- package/src/lib/delay.js +54 -54
- package/src/lib/explore-fetch.js +118 -118
- package/src/lib/fetcher.js +45 -45
- package/src/lib/filter.js +66 -66
- package/src/lib/io.js +54 -54
- package/src/lib/mac-or-uuid.js +82 -0
- package/src/lib/output.js +80 -80
- package/src/lib/parser.js +47 -47
- package/src/lib/retry.js +44 -44
- package/src/lib/scrape.js +40 -40
- package/src/lib/url.js +52 -52
- package/src/main.mjs +221 -221
- package/src/scraper/auto-core.mjs +185 -185
- package/src/scraper/core.mjs +190 -190
- package/src/scraper/explore-core.mjs +162 -162
- package/src/scraper/modules/captcha-handler.mjs +114 -114
- package/src/scraper/modules/comment-extractor.mjs +69 -69
- package/src/scraper/modules/follow-extractor.mjs +121 -121
- package/src/scraper/modules/guess-extractor.mjs +51 -51
- package/src/scraper/modules/page-error-detector.mjs +70 -70
- package/src/scraper/modules/page-helpers.mjs +48 -48
- package/src/scraper/modules/scroll-collector.mjs +189 -189
- package/src/test-auto-follow.cjs +109 -0
- package/src/test-extractors.cjs +75 -0
- package/src/test-follow.cjs +41 -0
- package/src/videos/core.mjs +126 -126
- package/src/watch/data-store.mjs +258 -261
- package/src/watch/public/index.html +466 -465
- package/src/watch/server.mjs +291 -281
- package/src/results/user-videos-bar.lar.lar.moeta.json +0 -37
|
@@ -1,185 +1,185 @@
|
|
|
1
|
-
import {
|
|
2
|
-
delay,
|
|
3
|
-
ensureBrowserReady,
|
|
4
|
-
ensureTikTokPage,
|
|
5
|
-
setDelayConfig,
|
|
6
|
-
getDelayConfig,
|
|
7
|
-
closeCommentPanel,
|
|
8
|
-
retryWithBackoff,
|
|
9
|
-
detectPageError,
|
|
10
|
-
isLoggedIn,
|
|
11
|
-
assertPageUrl,
|
|
12
|
-
} from './modules/page-helpers.mjs';
|
|
13
|
-
export { ensureBrowserReady };
|
|
14
|
-
import {
|
|
15
|
-
getUserInfo,
|
|
16
|
-
collectVideos,
|
|
17
|
-
} from '../videos/core.mjs';
|
|
18
|
-
import { runScrape } from './core.mjs';
|
|
19
|
-
import { extractFollowAndFollowers } from './modules/follow-extractor.mjs';
|
|
20
|
-
|
|
21
|
-
function mergeUserInfo(existing, incoming, source) {
|
|
22
|
-
const merged = { ...existing };
|
|
23
|
-
for (const [key, value] of Object.entries(incoming)) {
|
|
24
|
-
if (key === '_sources') continue;
|
|
25
|
-
if (value === undefined || value === null || value === '') continue;
|
|
26
|
-
if (typeof value === 'number' && typeof merged[key] === 'number') {
|
|
27
|
-
merged[key] = Math.max(merged[key], value);
|
|
28
|
-
} else if (merged[key] === undefined || merged[key] === null || merged[key] === '') {
|
|
29
|
-
merged[key] = value;
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
if (source) {
|
|
33
|
-
if (!merged._sources) merged._sources = [];
|
|
34
|
-
if (!merged._sources.includes(source)) merged._sources.push(source);
|
|
35
|
-
}
|
|
36
|
-
return merged;
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
async function processUser(page, username, options, log) {
|
|
40
|
-
const {
|
|
41
|
-
collectMax = 1,
|
|
42
|
-
scrapeDepth = 50,
|
|
43
|
-
maxComments = 200,
|
|
44
|
-
maxGuess = 10,
|
|
45
|
-
preset = 'fast',
|
|
46
|
-
switchMax = null,
|
|
47
|
-
commentMax = null,
|
|
48
|
-
enableFollow = false,
|
|
49
|
-
maxFollowing = 200,
|
|
50
|
-
maxFollowers = 200,
|
|
51
|
-
browser = null,
|
|
52
|
-
} = options;
|
|
53
|
-
|
|
54
|
-
const result = {
|
|
55
|
-
userInfo: null,
|
|
56
|
-
collectedVideos: [],
|
|
57
|
-
discoveredVideoAuthors: [],
|
|
58
|
-
discoveredCommentAuthors: [],
|
|
59
|
-
discoveredGuessAuthors: [],
|
|
60
|
-
discoveredFollowing: [],
|
|
61
|
-
discoveredFollowers: [],
|
|
62
|
-
error: null,
|
|
63
|
-
};
|
|
64
|
-
|
|
65
|
-
try {
|
|
66
|
-
log(`\n[processUser] 访问 @${username}...`);
|
|
67
|
-
await retryWithBackoff(() => page.goto(`https://www.tiktok.com/@${username}`, {
|
|
68
|
-
waitUntil: 'load', timeout: 30000,
|
|
69
|
-
}), { log });
|
|
70
|
-
assertPageUrl(page, `@${username}`);
|
|
71
|
-
await page.waitForSelector('[class*="DivVideoList"]', { timeout: 10000 }).catch(() => {});
|
|
72
|
-
await delay(1000, 2000);
|
|
73
|
-
|
|
74
|
-
const info = await getUserInfo(page);
|
|
75
|
-
result.userInfo = info;
|
|
76
|
-
if (!info.uniqueId) info.uniqueId = username;
|
|
77
|
-
log(` 昵称: ${info.nickname || '-'} | 粉丝: ${info.followerCount || 0}`);
|
|
78
|
-
|
|
79
|
-
if (options.enableFollow) {
|
|
80
|
-
const loggedIn = await isLoggedIn(page);
|
|
81
|
-
if (!loggedIn) {
|
|
82
|
-
log(' [跳过] 提取关注/粉丝:未登录,请先登录 TikTok');
|
|
83
|
-
result.discoveredFollowing = [];
|
|
84
|
-
result.discoveredFollowers = [];
|
|
85
|
-
} else {
|
|
86
|
-
try {
|
|
87
|
-
log(' 提取关注/粉丝列表...');
|
|
88
|
-
const { following, followers } = await extractFollowAndFollowers(page, {
|
|
89
|
-
maxFollowing: options.maxFollowing || 200,
|
|
90
|
-
maxFollowers: options.maxFollowers || 200,
|
|
91
|
-
log,
|
|
92
|
-
});
|
|
93
|
-
result.discoveredFollowing = following;
|
|
94
|
-
result.discoveredFollowers = followers;
|
|
95
|
-
log(` 关注: ${following.length} | 粉丝: ${followers.length}`);
|
|
96
|
-
} catch (e) {
|
|
97
|
-
log(` 关注/粉丝提取失败: ${e.message}`);
|
|
98
|
-
result.discoveredFollowing = [];
|
|
99
|
-
result.discoveredFollowers = [];
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
const videos = await collectVideos(page, username, collectMax, log);
|
|
105
|
-
const videoList = Array.from(videos.values()).slice(0, collectMax);
|
|
106
|
-
result.collectedVideos = videoList.map(v => ({
|
|
107
|
-
videoId: v.id,
|
|
108
|
-
videoUrl: v.href,
|
|
109
|
-
}));
|
|
110
|
-
|
|
111
|
-
if (videoList.length > 0) {
|
|
112
|
-
const allVideoAuthors = new Map();
|
|
113
|
-
const allCommentAuthors = new Set();
|
|
114
|
-
const allGuessAuthors = new Set();
|
|
115
|
-
|
|
116
|
-
for (let i = 0; i < videoList.length; i++) {
|
|
117
|
-
const video = videoList[i];
|
|
118
|
-
const videoUrl = video.href.startsWith('http')
|
|
119
|
-
? video.href
|
|
120
|
-
: `https://www.tiktok.com${video.href}`;
|
|
121
|
-
log(` [${i + 1}/${videoList.length}] 开始 scrape: ${videoUrl} (深度 ${scrapeDepth})`);
|
|
122
|
-
|
|
123
|
-
const scrapeResult = await runScrape({
|
|
124
|
-
videoUrl,
|
|
125
|
-
maxVideos: scrapeDepth,
|
|
126
|
-
maxComments,
|
|
127
|
-
maxGuess,
|
|
128
|
-
preset,
|
|
129
|
-
switchMax,
|
|
130
|
-
commentMax,
|
|
131
|
-
browser,
|
|
132
|
-
page,
|
|
133
|
-
log,
|
|
134
|
-
});
|
|
135
|
-
|
|
136
|
-
const scrapeOutput = scrapeResult.output;
|
|
137
|
-
|
|
138
|
-
if (scrapeOutput && scrapeOutput.videoDetails) {
|
|
139
|
-
for (const vd of scrapeOutput.videoDetails) {
|
|
140
|
-
if (!allVideoAuthors.has(vd.uniqueId)) {
|
|
141
|
-
allVideoAuthors.set(vd.uniqueId, {
|
|
142
|
-
uniqueId: vd.uniqueId,
|
|
143
|
-
nickname: vd.nickname,
|
|
144
|
-
locationCreated: vd.locationCreated,
|
|
145
|
-
});
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
if (scrapeOutput && scrapeOutput.commentUsers) {
|
|
151
|
-
for (const cu of scrapeOutput.commentUsers) {
|
|
152
|
-
allCommentAuthors.add(cu);
|
|
153
|
-
}
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
if (scrapeOutput && scrapeOutput.guessAuthors) {
|
|
157
|
-
for (const ga of scrapeOutput.guessAuthors) {
|
|
158
|
-
allGuessAuthors.add(ga);
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
result.discoveredVideoAuthors = [...allVideoAuthors.values()];
|
|
164
|
-
result.discoveredCommentAuthors = [...allCommentAuthors];
|
|
165
|
-
result.discoveredGuessAuthors = [...allGuessAuthors];
|
|
166
|
-
|
|
167
|
-
log(` 发现: ${result.discoveredVideoAuthors.length} 个视频作者, ${result.discoveredCommentAuthors.length} 个评论作者, ${result.discoveredGuessAuthors.length} 个猜你喜欢作者`);
|
|
168
|
-
} else {
|
|
169
|
-
const pageError = await detectPageError(page);
|
|
170
|
-
result.restricted = !!pageError;
|
|
171
|
-
if (pageError) {
|
|
172
|
-
log(` @${username} 页面受限(${pageError}),标记跳过`);
|
|
173
|
-
} else {
|
|
174
|
-
log(` @${username} 没有视频,跳过 scrape`);
|
|
175
|
-
}
|
|
176
|
-
}
|
|
177
|
-
} catch (e) {
|
|
178
|
-
result.error = e.message;
|
|
179
|
-
log(` [错误] ${e.message}`);
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
return result;
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
export { processUser, mergeUserInfo };
|
|
1
|
+
import {
|
|
2
|
+
delay,
|
|
3
|
+
ensureBrowserReady,
|
|
4
|
+
ensureTikTokPage,
|
|
5
|
+
setDelayConfig,
|
|
6
|
+
getDelayConfig,
|
|
7
|
+
closeCommentPanel,
|
|
8
|
+
retryWithBackoff,
|
|
9
|
+
detectPageError,
|
|
10
|
+
isLoggedIn,
|
|
11
|
+
assertPageUrl,
|
|
12
|
+
} from './modules/page-helpers.mjs';
|
|
13
|
+
export { ensureBrowserReady };
|
|
14
|
+
import {
|
|
15
|
+
getUserInfo,
|
|
16
|
+
collectVideos,
|
|
17
|
+
} from '../videos/core.mjs';
|
|
18
|
+
import { runScrape } from './core.mjs';
|
|
19
|
+
import { extractFollowAndFollowers } from './modules/follow-extractor.mjs';
|
|
20
|
+
|
|
21
|
+
function mergeUserInfo(existing, incoming, source) {
|
|
22
|
+
const merged = { ...existing };
|
|
23
|
+
for (const [key, value] of Object.entries(incoming)) {
|
|
24
|
+
if (key === '_sources') continue;
|
|
25
|
+
if (value === undefined || value === null || value === '') continue;
|
|
26
|
+
if (typeof value === 'number' && typeof merged[key] === 'number') {
|
|
27
|
+
merged[key] = Math.max(merged[key], value);
|
|
28
|
+
} else if (merged[key] === undefined || merged[key] === null || merged[key] === '') {
|
|
29
|
+
merged[key] = value;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
if (source) {
|
|
33
|
+
if (!merged._sources) merged._sources = [];
|
|
34
|
+
if (!merged._sources.includes(source)) merged._sources.push(source);
|
|
35
|
+
}
|
|
36
|
+
return merged;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
async function processUser(page, username, options, log) {
|
|
40
|
+
const {
|
|
41
|
+
collectMax = 1,
|
|
42
|
+
scrapeDepth = 50,
|
|
43
|
+
maxComments = 200,
|
|
44
|
+
maxGuess = 10,
|
|
45
|
+
preset = 'fast',
|
|
46
|
+
switchMax = null,
|
|
47
|
+
commentMax = null,
|
|
48
|
+
enableFollow = false,
|
|
49
|
+
maxFollowing = 200,
|
|
50
|
+
maxFollowers = 200,
|
|
51
|
+
browser = null,
|
|
52
|
+
} = options;
|
|
53
|
+
|
|
54
|
+
const result = {
|
|
55
|
+
userInfo: null,
|
|
56
|
+
collectedVideos: [],
|
|
57
|
+
discoveredVideoAuthors: [],
|
|
58
|
+
discoveredCommentAuthors: [],
|
|
59
|
+
discoveredGuessAuthors: [],
|
|
60
|
+
discoveredFollowing: [],
|
|
61
|
+
discoveredFollowers: [],
|
|
62
|
+
error: null,
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
try {
|
|
66
|
+
log(`\n[processUser] 访问 @${username}...`);
|
|
67
|
+
await retryWithBackoff(() => page.goto(`https://www.tiktok.com/@${username}`, {
|
|
68
|
+
waitUntil: 'load', timeout: 30000,
|
|
69
|
+
}), { log });
|
|
70
|
+
assertPageUrl(page, `@${username}`);
|
|
71
|
+
await page.waitForSelector('[class*="DivVideoList"]', { timeout: 10000 }).catch(() => {});
|
|
72
|
+
await delay(1000, 2000);
|
|
73
|
+
|
|
74
|
+
const info = await getUserInfo(page);
|
|
75
|
+
result.userInfo = info;
|
|
76
|
+
if (!info.uniqueId) info.uniqueId = username;
|
|
77
|
+
log(` 昵称: ${info.nickname || '-'} | 粉丝: ${info.followerCount || 0}`);
|
|
78
|
+
|
|
79
|
+
if (options.enableFollow) {
|
|
80
|
+
const loggedIn = await isLoggedIn(page);
|
|
81
|
+
if (!loggedIn) {
|
|
82
|
+
log(' [跳过] 提取关注/粉丝:未登录,请先登录 TikTok');
|
|
83
|
+
result.discoveredFollowing = [];
|
|
84
|
+
result.discoveredFollowers = [];
|
|
85
|
+
} else {
|
|
86
|
+
try {
|
|
87
|
+
log(' 提取关注/粉丝列表...');
|
|
88
|
+
const { following, followers } = await extractFollowAndFollowers(page, {
|
|
89
|
+
maxFollowing: options.maxFollowing || 200,
|
|
90
|
+
maxFollowers: options.maxFollowers || 200,
|
|
91
|
+
log,
|
|
92
|
+
});
|
|
93
|
+
result.discoveredFollowing = following;
|
|
94
|
+
result.discoveredFollowers = followers;
|
|
95
|
+
log(` 关注: ${following.length} | 粉丝: ${followers.length}`);
|
|
96
|
+
} catch (e) {
|
|
97
|
+
log(` 关注/粉丝提取失败: ${e.message}`);
|
|
98
|
+
result.discoveredFollowing = [];
|
|
99
|
+
result.discoveredFollowers = [];
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const videos = await collectVideos(page, username, collectMax, log);
|
|
105
|
+
const videoList = Array.from(videos.values()).slice(0, collectMax);
|
|
106
|
+
result.collectedVideos = videoList.map(v => ({
|
|
107
|
+
videoId: v.id,
|
|
108
|
+
videoUrl: v.href,
|
|
109
|
+
}));
|
|
110
|
+
|
|
111
|
+
if (videoList.length > 0) {
|
|
112
|
+
const allVideoAuthors = new Map();
|
|
113
|
+
const allCommentAuthors = new Set();
|
|
114
|
+
const allGuessAuthors = new Set();
|
|
115
|
+
|
|
116
|
+
for (let i = 0; i < videoList.length; i++) {
|
|
117
|
+
const video = videoList[i];
|
|
118
|
+
const videoUrl = video.href.startsWith('http')
|
|
119
|
+
? video.href
|
|
120
|
+
: `https://www.tiktok.com${video.href}`;
|
|
121
|
+
log(` [${i + 1}/${videoList.length}] 开始 scrape: ${videoUrl} (深度 ${scrapeDepth})`);
|
|
122
|
+
|
|
123
|
+
const scrapeResult = await runScrape({
|
|
124
|
+
videoUrl,
|
|
125
|
+
maxVideos: scrapeDepth,
|
|
126
|
+
maxComments,
|
|
127
|
+
maxGuess,
|
|
128
|
+
preset,
|
|
129
|
+
switchMax,
|
|
130
|
+
commentMax,
|
|
131
|
+
browser,
|
|
132
|
+
page,
|
|
133
|
+
log,
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
const scrapeOutput = scrapeResult.output;
|
|
137
|
+
|
|
138
|
+
if (scrapeOutput && scrapeOutput.videoDetails) {
|
|
139
|
+
for (const vd of scrapeOutput.videoDetails) {
|
|
140
|
+
if (!allVideoAuthors.has(vd.uniqueId)) {
|
|
141
|
+
allVideoAuthors.set(vd.uniqueId, {
|
|
142
|
+
uniqueId: vd.uniqueId,
|
|
143
|
+
nickname: vd.nickname,
|
|
144
|
+
locationCreated: vd.locationCreated,
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
if (scrapeOutput && scrapeOutput.commentUsers) {
|
|
151
|
+
for (const cu of scrapeOutput.commentUsers) {
|
|
152
|
+
allCommentAuthors.add(cu);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
if (scrapeOutput && scrapeOutput.guessAuthors) {
|
|
157
|
+
for (const ga of scrapeOutput.guessAuthors) {
|
|
158
|
+
allGuessAuthors.add(ga);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
result.discoveredVideoAuthors = [...allVideoAuthors.values()];
|
|
164
|
+
result.discoveredCommentAuthors = [...allCommentAuthors];
|
|
165
|
+
result.discoveredGuessAuthors = [...allGuessAuthors];
|
|
166
|
+
|
|
167
|
+
log(` 发现: ${result.discoveredVideoAuthors.length} 个视频作者, ${result.discoveredCommentAuthors.length} 个评论作者, ${result.discoveredGuessAuthors.length} 个猜你喜欢作者`);
|
|
168
|
+
} else {
|
|
169
|
+
const pageError = await detectPageError(page);
|
|
170
|
+
result.restricted = !!pageError;
|
|
171
|
+
if (pageError) {
|
|
172
|
+
log(` @${username} 页面受限(${pageError}),标记跳过`);
|
|
173
|
+
} else {
|
|
174
|
+
log(` @${username} 没有视频,跳过 scrape`);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
} catch (e) {
|
|
178
|
+
result.error = e.message;
|
|
179
|
+
log(` [错误] ${e.message}`);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
return result;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
export { processUser, mergeUserInfo };
|