tt-help-cli-ycl 1.3.11 → 1.3.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -17
- package/cli.js +9 -9
- package/package.json +45 -46
- package/{bat → scripts}/run-explore.bat +68 -68
- package/{bat → scripts}/run-explore.ps1 +81 -81
- package/{bat → scripts}/run-explore.sh +73 -73
- package/scripts/test-captcha-lib.mjs +68 -0
- package/scripts/test-captcha.mjs +81 -0
- package/scripts/test-incognito-lib.mjs +36 -0
- package/scripts/test-login-state.mjs +128 -0
- package/scripts/test-safe-click.mjs +45 -0
- package/src/cli/auto.js +186 -157
- package/src/cli/config.js +116 -0
- package/src/cli/explore-default.js +83 -0
- package/src/cli/explore.js +227 -181
- package/src/cli/progress.js +111 -111
- package/src/cli/refresh.js +216 -0
- package/src/cli/scrape.js +47 -47
- package/src/cli/utils.js +18 -18
- package/src/cli/videos.js +41 -41
- package/src/cli/watch.js +31 -31
- package/src/lib/args.js +456 -391
- package/src/lib/browser/anti-detect.js +23 -23
- package/src/lib/browser/cdp.js +194 -142
- package/src/lib/browser/launch.js +43 -43
- package/src/lib/browser/page.js +146 -87
- package/src/lib/constants.js +119 -119
- package/src/lib/delay.js +54 -54
- package/src/lib/explore-fetch.js +118 -118
- package/src/lib/fetcher.js +45 -45
- package/src/lib/filter.js +66 -66
- package/src/lib/io.js +54 -54
- package/src/lib/output.js +80 -80
- package/src/{scraper/modules/page-error-detector.mjs → lib/page-error-detector.js} +70 -70
- package/src/lib/parser.js +47 -47
- package/src/lib/retry.js +45 -45
- package/src/lib/scrape.js +40 -40
- package/src/{scraper/modules/scroll-collector.mjs → lib/scroll-collector.js} +231 -189
- package/src/lib/url.js +52 -52
- package/src/main.js +48 -0
- package/src/results/user-videos-bar.lar.lar.moeta.json +37 -0
- package/src/scraper/{auto-core.mjs → auto-core.js} +203 -194
- package/src/scraper/{core.mjs → core.js} +211 -190
- package/src/scraper/{explore-core.mjs → explore-core.js} +180 -171
- package/src/scraper/modules/{captcha-handler.mjs → captcha-handler.js} +114 -114
- package/src/scraper/modules/{comment-extractor.mjs → comment-extractor.js} +74 -69
- package/src/scraper/modules/{follow-extractor.mjs → follow-extractor.js} +121 -121
- package/src/scraper/modules/{guess-extractor.mjs → guess-extractor.js} +51 -51
- package/src/scraper/modules/page-error-detector.js +1 -0
- package/src/scraper/modules/{page-helpers.mjs → page-helpers.js} +48 -48
- package/src/scraper/modules/scroll-collector.js +8 -0
- package/src/scraper/refresh-core.js +179 -0
- package/src/videos/{core.mjs → core.js} +126 -126
- package/src/watch/data-store.js +431 -0
- package/src/watch/public/index.html +721 -690
- package/src/watch/{server.mjs → server.js} +484 -349
- package/src/main.mjs +0 -234
- package/src/test-auto-follow.cjs +0 -109
- package/src/test-extractors.cjs +0 -75
- package/src/test-follow.cjs +0 -41
- package/src/watch/data-store.mjs +0 -274
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
import {
|
|
2
|
+
delay,
|
|
3
|
+
retryWithBackoff,
|
|
4
|
+
detectPageError,
|
|
5
|
+
assertPageUrl,
|
|
6
|
+
} from './modules/page-helpers.js';
|
|
7
|
+
import { detectCaptcha } from './modules/captcha-handler.js';
|
|
8
|
+
import {
|
|
9
|
+
getUserInfo,
|
|
10
|
+
collectVideos,
|
|
11
|
+
} from '../videos/core.js';
|
|
12
|
+
import { extractFollowAndFollowers } from './modules/follow-extractor.js';
|
|
13
|
+
import { processExplore } from './explore-core.js';
|
|
14
|
+
|
|
15
|
+
export async function processRefresh(page, username, serverUrl, options, log) {
|
|
16
|
+
const {
|
|
17
|
+
maxFollowing = 100,
|
|
18
|
+
maxFollowers = 100,
|
|
19
|
+
maxVideos = 100,
|
|
20
|
+
} = options;
|
|
21
|
+
|
|
22
|
+
const result = {
|
|
23
|
+
userInfo: null,
|
|
24
|
+
discoveredVideoAuthors: [],
|
|
25
|
+
discoveredFollowing: [],
|
|
26
|
+
discoveredFollowers: [],
|
|
27
|
+
newUsersAdded: 0,
|
|
28
|
+
collectedVideos: 0,
|
|
29
|
+
error: null,
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
try {
|
|
33
|
+
log(` 访问 @${username} 主页...`);
|
|
34
|
+
const homeUrl = `https://www.tiktok.com/@${username}`;
|
|
35
|
+
await retryWithBackoff(async () => {
|
|
36
|
+
await page.goto(homeUrl, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
37
|
+
assertPageUrl(page, `@${username}`);
|
|
38
|
+
}, { log });
|
|
39
|
+
await page.waitForSelector('[class*="DivVideoList"]', { timeout: 10000 }).catch(() => {});
|
|
40
|
+
await delay(1000, 2000);
|
|
41
|
+
|
|
42
|
+
log(' 获取用户信息...');
|
|
43
|
+
const info = await getUserInfo(page);
|
|
44
|
+
if (info) {
|
|
45
|
+
result.userInfo = info;
|
|
46
|
+
log(` 用户: ${info.nickname || username} | 粉丝: ${info.followerCount || '-'} | 视频: ${info.videoCount || '-'}`);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const captcha = await detectCaptcha(page);
|
|
50
|
+
if (captcha && captcha.visible) {
|
|
51
|
+
log(`[验证码] @${username} 页面出现验证码`);
|
|
52
|
+
result.captchaDetected = true;
|
|
53
|
+
result.captchaStage = result.captchaStage || 'video-page';
|
|
54
|
+
result.captchaMessage = result.captchaMessage || '视频页出现验证码';
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// 采集视频
|
|
58
|
+
log(` 采集视频 (最多 ${maxVideos} 个)...`);
|
|
59
|
+
const videoList = await collectVideos(page, username, maxVideos, log);
|
|
60
|
+
const videoArray = videoList ? [...videoList.values()] : [];
|
|
61
|
+
result.collectedVideos = videoArray.length;
|
|
62
|
+
result.discoveredVideoAuthors = videoArray.map(v => v.author);
|
|
63
|
+
|
|
64
|
+
if (videoArray.length <= 0) {
|
|
65
|
+
result.noVideo = true;
|
|
66
|
+
const pageError = await detectPageError(page);
|
|
67
|
+
if (pageError) {
|
|
68
|
+
result.restricted = true;
|
|
69
|
+
log(` @${username} 页面受限(${pageError}),标记跳过`);
|
|
70
|
+
}
|
|
71
|
+
return result;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// 采集关注和粉丝
|
|
75
|
+
log(` 采集关注 (最多 ${maxFollowing}) + 粉丝 (最多 ${maxFollowers})...`);
|
|
76
|
+
try {
|
|
77
|
+
const followResult = await extractFollowAndFollowers(page, {
|
|
78
|
+
maxFollowing,
|
|
79
|
+
maxFollowers,
|
|
80
|
+
});
|
|
81
|
+
result.discoveredFollowing = followResult.following || [];
|
|
82
|
+
result.discoveredFollowers = followResult.followers || [];
|
|
83
|
+
log(` 关注: ${result.discoveredFollowing.length}, 粉丝: ${result.discoveredFollowers.length}`);
|
|
84
|
+
} catch (e) {
|
|
85
|
+
log(` [关注/粉丝采集失败] ${e.message}`);
|
|
86
|
+
result.discoveredFollowing = [];
|
|
87
|
+
result.discoveredFollowers = [];
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// 处理新发现的用户(关注 + 粉丝),循环执行完整 explore
|
|
91
|
+
// follow-extractor 返回 [handle, displayName] 数组
|
|
92
|
+
const allDiscovered = [
|
|
93
|
+
...result.discoveredFollowing.map(h => ({ handle: Array.isArray(h) ? h[0] : h, source: 'refresh-following' })),
|
|
94
|
+
...result.discoveredFollowers.map(h => ({ handle: Array.isArray(h) ? h[0] : h, source: 'refresh-follower' })),
|
|
95
|
+
];
|
|
96
|
+
|
|
97
|
+
for (const { handle, source } of allDiscovered) {
|
|
98
|
+
const uniqueId = handle.replace('@', '');
|
|
99
|
+
|
|
100
|
+
// 检查用户是否已存在
|
|
101
|
+
const existsResp = await fetch(`${serverUrl}/api/user-exists/${encodeURIComponent(uniqueId)}`);
|
|
102
|
+
const existsData = await existsResp.json();
|
|
103
|
+
|
|
104
|
+
if (existsData.exists) {
|
|
105
|
+
continue;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
log(` [新用户] @${uniqueId} 不存在,开始探索 (来源: ${source})...`);
|
|
109
|
+
await delay(1000, 2000);
|
|
110
|
+
|
|
111
|
+
// 对新用户做完整 explore(与 explore 命令逻辑一致)
|
|
112
|
+
const exploreResult = await processExplore(page, uniqueId, {
|
|
113
|
+
maxComments: 10,
|
|
114
|
+
maxGuess: 0,
|
|
115
|
+
enableFollow: true,
|
|
116
|
+
maxFollowing: 5,
|
|
117
|
+
maxFollowers: 5,
|
|
118
|
+
location: 'PL,NL,BE,DE,FR,IT,ES,IE',
|
|
119
|
+
}, log);
|
|
120
|
+
|
|
121
|
+
// 提交 explore 结果到服务端(和 explore 命令的 commitJob 一致)
|
|
122
|
+
if (exploreResult.userInfo) {
|
|
123
|
+
const guessedLocation = exploreResult.locationCreated || null;
|
|
124
|
+
|
|
125
|
+
const payload = {
|
|
126
|
+
userInfo: exploreResult.userInfo || {},
|
|
127
|
+
discoveredVideoAuthors: (exploreResult.discoveredVideoAuthors || []).map(item =>
|
|
128
|
+
typeof item === 'object' ? { ...item, guessedLocation } : item
|
|
129
|
+
),
|
|
130
|
+
discoveredCommentAuthors: (exploreResult.discoveredCommentAuthors || []).map(author => ({ author, guessedLocation })),
|
|
131
|
+
discoveredGuessAuthors: (exploreResult.discoveredGuessAuthors || []).map(author => ({ author, guessedLocation })),
|
|
132
|
+
discoveredFollowing: (exploreResult.discoveredFollowing || []).map(f => ({
|
|
133
|
+
handle: Array.isArray(f) ? f[0] : f,
|
|
134
|
+
displayName: Array.isArray(f) ? f[1] : null,
|
|
135
|
+
guessedLocation,
|
|
136
|
+
})),
|
|
137
|
+
discoveredFollowers: (exploreResult.discoveredFollowers || []).map(f => ({
|
|
138
|
+
handle: Array.isArray(f) ? f[0] : f,
|
|
139
|
+
displayName: Array.isArray(f) ? f[1] : null,
|
|
140
|
+
guessedLocation,
|
|
141
|
+
})),
|
|
142
|
+
processed: exploreResult.processed,
|
|
143
|
+
hasFollowData: exploreResult.hasFollowData,
|
|
144
|
+
keepFollow: exploreResult.keepFollow,
|
|
145
|
+
locationCreated: exploreResult.locationCreated,
|
|
146
|
+
noVideo: exploreResult.noVideo,
|
|
147
|
+
restricted: exploreResult.restricted,
|
|
148
|
+
error: exploreResult.error,
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
const addResp = await fetch(`${serverUrl}/api/explore-new/${uniqueId}`, {
|
|
152
|
+
method: 'POST',
|
|
153
|
+
headers: { 'Content-Type': 'application/json' },
|
|
154
|
+
body: JSON.stringify(payload),
|
|
155
|
+
});
|
|
156
|
+
const addResult = await addResp.json();
|
|
157
|
+
|
|
158
|
+
if (!addResult.saved) {
|
|
159
|
+
log(` [跳过] @${uniqueId} 提交失败`);
|
|
160
|
+
continue;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
result.newUsersAdded++;
|
|
164
|
+
if (exploreResult.captchaDetected) {
|
|
165
|
+
result.captchaDetected = true;
|
|
166
|
+
}
|
|
167
|
+
log(` [已提交] @${uniqueId} ${addResult.created ? '(新用户)' : '(已存在)'} | 发现: ${addResult.newUsers?.length || 0} 个`);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
await delay(2000, 4000);
|
|
171
|
+
}
|
|
172
|
+
} catch (e) {
|
|
173
|
+
log(` [错误] ${e.message}`);
|
|
174
|
+
result.error = e.message;
|
|
175
|
+
result.errorStack = e.stack || '';
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
return result;
|
|
179
|
+
}
|
|
@@ -1,126 +1,126 @@
|
|
|
1
|
-
import { delay, ensureBrowserReady, ensureTikTokPage, retryWithBackoff } from '../scraper/modules/page-helpers.
|
|
2
|
-
import { scrollAndCollect } from '../scraper/modules/scroll-collector.
|
|
3
|
-
|
|
4
|
-
async function getUserInfo(page) {
|
|
5
|
-
return await page.evaluate(() => {
|
|
6
|
-
const html = document.documentElement.outerHTML;
|
|
7
|
-
const result = {};
|
|
8
|
-
|
|
9
|
-
const m = window.location.href.match(/\/@([^/]+)/);
|
|
10
|
-
if (m) result.uniqueId = m[1];
|
|
11
|
-
|
|
12
|
-
const patterns = {
|
|
13
|
-
secUid: /"secUid":"([^"]+)"/,
|
|
14
|
-
nickname: /"nickname":"((?:[^"\\]|\\.)*)"/,
|
|
15
|
-
ttSeller: /"ttSeller":\s*(true|false)/,
|
|
16
|
-
verified: /"verified":\s*(true|false)/,
|
|
17
|
-
followerCount: /"followerCount":(\d+)/,
|
|
18
|
-
videoCount: /"videoCount":(\d+)/,
|
|
19
|
-
followingCount: /"followingCount":(\d+)/,
|
|
20
|
-
heartCount: /"heartCount":(\d+)/,
|
|
21
|
-
signature: /"signature":"((?:[^"\\]|\\.)*)"/,
|
|
22
|
-
locationCreated: /"locationCreated":"([^"]*)/,
|
|
23
|
-
region: /"region":"([^"]*)/,
|
|
24
|
-
};
|
|
25
|
-
|
|
26
|
-
const boolKeys = ['ttSeller', 'verified'];
|
|
27
|
-
const numKeys = ['followerCount', 'videoCount', 'followingCount', 'heartCount'];
|
|
28
|
-
|
|
29
|
-
for (const [key, pat] of Object.entries(patterns)) {
|
|
30
|
-
const match = html.match(pat);
|
|
31
|
-
if (match) {
|
|
32
|
-
if (boolKeys.includes(key)) result[key] = match[1] === 'true';
|
|
33
|
-
else if (numKeys.includes(key)) result[key] = parseInt(match[1], 10);
|
|
34
|
-
else if (key === 'signature') result[key] = match[1].replace(/\\n/g, '\n').replace(/\\\\/g, '\\');
|
|
35
|
-
else result[key] = match[1];
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
return result;
|
|
40
|
-
});
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
async function collectVideos(page, username, maxVideos, log) {
|
|
44
|
-
const allLinks = await scrollAndCollect(page, {
|
|
45
|
-
container: '[class*="ColumnListContainer"]',
|
|
46
|
-
extraArgs: { handle: username },
|
|
47
|
-
collectFn: (container, args) => {
|
|
48
|
-
const pattern = '/@' + args.handle + '/video/';
|
|
49
|
-
return {
|
|
50
|
-
items: Array.from(document.querySelectorAll('a'))
|
|
51
|
-
.filter(el => (el.getAttribute('href') || '').includes(pattern))
|
|
52
|
-
.map(el => {
|
|
53
|
-
const href = el.getAttribute('href') || '';
|
|
54
|
-
const idMatch = href.match(/\/video\/(\d+)/);
|
|
55
|
-
return { id: idMatch ? idMatch[1] : null, href };
|
|
56
|
-
})
|
|
57
|
-
.filter(v => v.id),
|
|
58
|
-
};
|
|
59
|
-
},
|
|
60
|
-
maxItems: maxVideos,
|
|
61
|
-
delayRange: [2000, 3000],
|
|
62
|
-
staleThreshold: 5,
|
|
63
|
-
maxRounds: 500,
|
|
64
|
-
onRound: (round, items, allItems) => {
|
|
65
|
-
const uniqueCount = new Set(allItems.map(v => v.id)).size;
|
|
66
|
-
if (uniqueCount > 0 && (uniqueCount % 10 === 0 || items.length > 0)) {
|
|
67
|
-
log(`滚动 ${round + 1}: ${uniqueCount} 个视频 (本轮 ${items.length} 条)`);
|
|
68
|
-
}
|
|
69
|
-
},
|
|
70
|
-
});
|
|
71
|
-
|
|
72
|
-
const uniqueVideos = new Map();
|
|
73
|
-
allLinks.forEach(v => {
|
|
74
|
-
if (!uniqueVideos.has(v.id)) uniqueVideos.set(v.id, v);
|
|
75
|
-
});
|
|
76
|
-
|
|
77
|
-
log(`收集完成: ${uniqueVideos.size} 个视频`);
|
|
78
|
-
return uniqueVideos;
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
async function runGetUserVideos(options) {
|
|
82
|
-
const { username, maxVideos = 5, log = console.error } = options;
|
|
83
|
-
const url = `https://www.tiktok.com/@${username}`;
|
|
84
|
-
|
|
85
|
-
log(`用户: @${username}`);
|
|
86
|
-
log(`URL: ${url}`);
|
|
87
|
-
log(`最大视频数: ${maxVideos}\n`);
|
|
88
|
-
|
|
89
|
-
log('连接浏览器...');
|
|
90
|
-
const browser = await ensureBrowserReady();
|
|
91
|
-
|
|
92
|
-
let page;
|
|
93
|
-
try {
|
|
94
|
-
page = await ensureTikTokPage(browser, url);
|
|
95
|
-
} catch (e) {
|
|
96
|
-
await browser.close().catch(() => {});
|
|
97
|
-
throw e;
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
await retryWithBackoff(() => page.goto(url, { waitUntil: 'load', timeout: 30000 }), { log });
|
|
101
|
-
await delay(3000, 5000);
|
|
102
|
-
await page.waitForSelector('[class*="DivVideoList"]', { timeout: 10000 }).catch(() => {});
|
|
103
|
-
|
|
104
|
-
log('获取用户信息...');
|
|
105
|
-
const userInfo = await getUserInfo(page);
|
|
106
|
-
log('用户信息: ' + JSON.stringify(userInfo, null, 2));
|
|
107
|
-
|
|
108
|
-
log('\n开始滚动收集视频...');
|
|
109
|
-
const videos = await collectVideos(page, username, maxVideos, log);
|
|
110
|
-
const allVideos = Array.from(videos.values());
|
|
111
|
-
|
|
112
|
-
log(`\n总计: ${allVideos.length} 个视频`);
|
|
113
|
-
|
|
114
|
-
const output = {
|
|
115
|
-
user: userInfo,
|
|
116
|
-
totalVideos: Math.min(allVideos.length, maxVideos),
|
|
117
|
-
videos: allVideos.slice(0, maxVideos).map(v => ({
|
|
118
|
-
id: v.id,
|
|
119
|
-
url: v.href.startsWith('http') ? v.href : `https://www.tiktok.com${v.href}`,
|
|
120
|
-
})),
|
|
121
|
-
};
|
|
122
|
-
|
|
123
|
-
return { output, browser };
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
export { getUserInfo, collectVideos, runGetUserVideos };
|
|
1
|
+
import { delay, ensureBrowserReady, ensureTikTokPage, retryWithBackoff } from '../scraper/modules/page-helpers.js';
|
|
2
|
+
import { scrollAndCollect } from '../scraper/modules/scroll-collector.js';
|
|
3
|
+
|
|
4
|
+
async function getUserInfo(page) {
|
|
5
|
+
return await page.evaluate(() => {
|
|
6
|
+
const html = document.documentElement.outerHTML;
|
|
7
|
+
const result = {};
|
|
8
|
+
|
|
9
|
+
const m = window.location.href.match(/\/@([^/]+)/);
|
|
10
|
+
if (m) result.uniqueId = m[1];
|
|
11
|
+
|
|
12
|
+
const patterns = {
|
|
13
|
+
secUid: /"secUid":"([^"]+)"/,
|
|
14
|
+
nickname: /"nickname":"((?:[^"\\]|\\.)*)"/,
|
|
15
|
+
ttSeller: /"ttSeller":\s*(true|false)/,
|
|
16
|
+
verified: /"verified":\s*(true|false)/,
|
|
17
|
+
followerCount: /"followerCount":(\d+)/,
|
|
18
|
+
videoCount: /"videoCount":(\d+)/,
|
|
19
|
+
followingCount: /"followingCount":(\d+)/,
|
|
20
|
+
heartCount: /"heartCount":(\d+)/,
|
|
21
|
+
signature: /"signature":"((?:[^"\\]|\\.)*)"/,
|
|
22
|
+
locationCreated: /"locationCreated":"([^"]*)/,
|
|
23
|
+
region: /"region":"([^"]*)/,
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
const boolKeys = ['ttSeller', 'verified'];
|
|
27
|
+
const numKeys = ['followerCount', 'videoCount', 'followingCount', 'heartCount'];
|
|
28
|
+
|
|
29
|
+
for (const [key, pat] of Object.entries(patterns)) {
|
|
30
|
+
const match = html.match(pat);
|
|
31
|
+
if (match) {
|
|
32
|
+
if (boolKeys.includes(key)) result[key] = match[1] === 'true';
|
|
33
|
+
else if (numKeys.includes(key)) result[key] = parseInt(match[1], 10);
|
|
34
|
+
else if (key === 'signature') result[key] = match[1].replace(/\\n/g, '\n').replace(/\\\\/g, '\\');
|
|
35
|
+
else result[key] = match[1];
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
return result;
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
async function collectVideos(page, username, maxVideos, log) {
|
|
44
|
+
const allLinks = await scrollAndCollect(page, {
|
|
45
|
+
container: '[class*="ColumnListContainer"]',
|
|
46
|
+
extraArgs: { handle: username },
|
|
47
|
+
collectFn: (container, args) => {
|
|
48
|
+
const pattern = '/@' + args.handle + '/video/';
|
|
49
|
+
return {
|
|
50
|
+
items: Array.from(document.querySelectorAll('a'))
|
|
51
|
+
.filter(el => (el.getAttribute('href') || '').includes(pattern))
|
|
52
|
+
.map(el => {
|
|
53
|
+
const href = el.getAttribute('href') || '';
|
|
54
|
+
const idMatch = href.match(/\/video\/(\d+)/);
|
|
55
|
+
return { id: idMatch ? idMatch[1] : null, href };
|
|
56
|
+
})
|
|
57
|
+
.filter(v => v.id),
|
|
58
|
+
};
|
|
59
|
+
},
|
|
60
|
+
maxItems: maxVideos,
|
|
61
|
+
delayRange: [2000, 3000],
|
|
62
|
+
staleThreshold: 5,
|
|
63
|
+
maxRounds: 500,
|
|
64
|
+
onRound: (round, items, allItems) => {
|
|
65
|
+
const uniqueCount = new Set(allItems.map(v => v.id)).size;
|
|
66
|
+
if (uniqueCount > 0 && (uniqueCount % 10 === 0 || items.length > 0)) {
|
|
67
|
+
log(`滚动 ${round + 1}: ${uniqueCount} 个视频 (本轮 ${items.length} 条)`);
|
|
68
|
+
}
|
|
69
|
+
},
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
const uniqueVideos = new Map();
|
|
73
|
+
allLinks.forEach(v => {
|
|
74
|
+
if (!uniqueVideos.has(v.id)) uniqueVideos.set(v.id, v);
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
log(`收集完成: ${uniqueVideos.size} 个视频`);
|
|
78
|
+
return uniqueVideos;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
async function runGetUserVideos(options) {
|
|
82
|
+
const { username, maxVideos = 5, log = console.error } = options;
|
|
83
|
+
const url = `https://www.tiktok.com/@${username}`;
|
|
84
|
+
|
|
85
|
+
log(`用户: @${username}`);
|
|
86
|
+
log(`URL: ${url}`);
|
|
87
|
+
log(`最大视频数: ${maxVideos}\n`);
|
|
88
|
+
|
|
89
|
+
log('连接浏览器...');
|
|
90
|
+
const browser = await ensureBrowserReady();
|
|
91
|
+
|
|
92
|
+
let page;
|
|
93
|
+
try {
|
|
94
|
+
page = await ensureTikTokPage(browser, url);
|
|
95
|
+
} catch (e) {
|
|
96
|
+
await browser.close().catch(() => {});
|
|
97
|
+
throw e;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
await retryWithBackoff(() => page.goto(url, { waitUntil: 'load', timeout: 30000 }), { log });
|
|
101
|
+
await delay(3000, 5000);
|
|
102
|
+
await page.waitForSelector('[class*="DivVideoList"]', { timeout: 10000 }).catch(() => {});
|
|
103
|
+
|
|
104
|
+
log('获取用户信息...');
|
|
105
|
+
const userInfo = await getUserInfo(page);
|
|
106
|
+
log('用户信息: ' + JSON.stringify(userInfo, null, 2));
|
|
107
|
+
|
|
108
|
+
log('\n开始滚动收集视频...');
|
|
109
|
+
const videos = await collectVideos(page, username, maxVideos, log);
|
|
110
|
+
const allVideos = Array.from(videos.values());
|
|
111
|
+
|
|
112
|
+
log(`\n总计: ${allVideos.length} 个视频`);
|
|
113
|
+
|
|
114
|
+
const output = {
|
|
115
|
+
user: userInfo,
|
|
116
|
+
totalVideos: Math.min(allVideos.length, maxVideos),
|
|
117
|
+
videos: allVideos.slice(0, maxVideos).map(v => ({
|
|
118
|
+
id: v.id,
|
|
119
|
+
url: v.href.startsWith('http') ? v.href : `https://www.tiktok.com${v.href}`,
|
|
120
|
+
})),
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
return { output, browser };
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
export { getUserInfo, collectVideos, runGetUserVideos };
|