tt-help-cli-ycl 1.3.13 → 1.3.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -1
- package/src/cli/attach.js +160 -0
- package/src/cli/config.js +39 -3
- package/src/cli/explore.js +17 -10
- package/src/cli/info.js +88 -0
- package/src/lib/args.js +76 -15
- package/src/lib/constants.js +100 -20
- package/src/lib/parse-ssr.mjs +69 -0
- package/src/lib/scrape.js +69 -20
- package/src/lib/tiktok-scraper.mjs +176 -0
- package/src/lib/url.js +2 -2
- package/src/main.js +12 -18
- package/src/scraper/explore-core.js +56 -74
- package/src/watch/data-store.js +112 -7
- package/src/watch/server.js +44 -0
package/src/lib/constants.js
CHANGED
|
@@ -15,6 +15,10 @@ let server = 'http://127.0.0.1:3001';
|
|
|
15
15
|
let configFile = null;
|
|
16
16
|
let browser = null;
|
|
17
17
|
let userId = null;
|
|
18
|
+
let maxFollowing = 5;
|
|
19
|
+
let maxFollowers = 5;
|
|
20
|
+
let maxVideos = 1;
|
|
21
|
+
let maxComments = 10;
|
|
18
22
|
|
|
19
23
|
try {
|
|
20
24
|
if (existsSync(configPath)) {
|
|
@@ -31,6 +35,18 @@ try {
|
|
|
31
35
|
if (cfg.userId) {
|
|
32
36
|
userId = cfg.userId;
|
|
33
37
|
}
|
|
38
|
+
if (cfg.maxFollowing !== undefined) {
|
|
39
|
+
maxFollowing = cfg.maxFollowing;
|
|
40
|
+
}
|
|
41
|
+
if (cfg.maxFollowers !== undefined) {
|
|
42
|
+
maxFollowers = cfg.maxFollowers;
|
|
43
|
+
}
|
|
44
|
+
if (cfg.maxVideos !== undefined) {
|
|
45
|
+
maxVideos = cfg.maxVideos;
|
|
46
|
+
}
|
|
47
|
+
if (cfg.maxComments !== undefined) {
|
|
48
|
+
maxComments = cfg.maxComments;
|
|
49
|
+
}
|
|
34
50
|
configFile = configPath;
|
|
35
51
|
}
|
|
36
52
|
} catch {
|
|
@@ -53,34 +69,86 @@ function saveUserId(id) {
|
|
|
53
69
|
configFile = configPath;
|
|
54
70
|
}
|
|
55
71
|
|
|
72
|
+
function saveMaxFollowing(val) {
|
|
73
|
+
const cfg = existsSync(configPath) ? JSON.parse(readFileSync(configPath, 'utf-8')) : {};
|
|
74
|
+
cfg.maxFollowing = parseInt(val) || 5;
|
|
75
|
+
writeFileSync(configPath, JSON.stringify(cfg, null, 2), 'utf-8');
|
|
76
|
+
maxFollowing = cfg.maxFollowing;
|
|
77
|
+
configFile = configPath;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function saveMaxFollowers(val) {
|
|
81
|
+
const cfg = existsSync(configPath) ? JSON.parse(readFileSync(configPath, 'utf-8')) : {};
|
|
82
|
+
cfg.maxFollowers = parseInt(val) || 5;
|
|
83
|
+
writeFileSync(configPath, JSON.stringify(cfg, null, 2), 'utf-8');
|
|
84
|
+
maxFollowers = cfg.maxFollowers;
|
|
85
|
+
configFile = configPath;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function saveMaxVideos(val) {
|
|
89
|
+
const cfg = existsSync(configPath) ? JSON.parse(readFileSync(configPath, 'utf-8')) : {};
|
|
90
|
+
cfg.maxVideos = parseInt(val) || 1;
|
|
91
|
+
writeFileSync(configPath, JSON.stringify(cfg, null, 2), 'utf-8');
|
|
92
|
+
maxVideos = cfg.maxVideos;
|
|
93
|
+
configFile = configPath;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function saveMaxComments(val) {
|
|
97
|
+
const cfg = existsSync(configPath) ? JSON.parse(readFileSync(configPath, 'utf-8')) : {};
|
|
98
|
+
cfg.maxComments = parseInt(val) || 10;
|
|
99
|
+
writeFileSync(configPath, JSON.stringify(cfg, null, 2), 'utf-8');
|
|
100
|
+
maxComments = cfg.maxComments;
|
|
101
|
+
configFile = configPath;
|
|
102
|
+
}
|
|
103
|
+
|
|
56
104
|
const HELP_TEXT = [
|
|
57
|
-
'用法:
|
|
105
|
+
'用法:',
|
|
58
106
|
'',
|
|
59
|
-
'
|
|
60
|
-
'
|
|
61
|
-
'
|
|
62
|
-
'
|
|
63
|
-
'
|
|
64
|
-
'
|
|
65
|
-
'
|
|
66
|
-
'
|
|
67
|
-
'
|
|
68
|
-
'
|
|
69
|
-
'
|
|
70
|
-
'
|
|
71
|
-
'
|
|
72
|
-
'
|
|
73
|
-
'
|
|
74
|
-
'
|
|
107
|
+
' tt-help explore <用户名> [preset] [选项]',
|
|
108
|
+
' 支持多个用户名: tt-help explore @user1 @user2 --server http://127.0.0.1:3001',
|
|
109
|
+
' 预设: fast, normal(默认), slow, stealth',
|
|
110
|
+
' 选项:',
|
|
111
|
+
' --server <URL> 服务端地址,默认 http://127.0.0.1:3001',
|
|
112
|
+
' --location <国家代码> 国家筛选,逗号分隔,默认 PL,NL,BE,DE,FR,IT,ES,IE',
|
|
113
|
+
' --max-comments <数量> 每视频最大评论数,默认 10',
|
|
114
|
+
' --max-guess <数量> 每视频最大猜你喜欢数,默认 0',
|
|
115
|
+
' --enable-follow 启用关注/粉丝提取(默认启用)',
|
|
116
|
+
' --disable-follow 禁用关注/粉丝提取',
|
|
117
|
+
' --max-following <数量> 最大获取关注数,默认 5',
|
|
118
|
+
' --max-followers <数量> 最大获取粉丝数,默认 5',
|
|
119
|
+
' --max-users <数量> 最大处理用户数,默认无限制',
|
|
120
|
+
' --port <端口号> CDP 端口,默认 9222',
|
|
121
|
+
' --profile <名称> 浏览器 Profile 名称',
|
|
122
|
+
' --user-id <编号> 客户端编号(设备ID),默认自动生成',
|
|
123
|
+
'',
|
|
124
|
+
' tt-help info <URL> [URL2 ...] [--onlyvideo]',
|
|
125
|
+
' 获取用户/视频信息,支持多个 URL',
|
|
126
|
+
' 主页 URL → 返回用户信息',
|
|
127
|
+
' 视频 URL → 返回用户信息 + 视频信息',
|
|
128
|
+
' 视频 URL + --onlyvideo → 只返回视频信息',
|
|
129
|
+
' 示例: tt-help info https://www.tiktok.com/@nike',
|
|
130
|
+
' tt-help info https://www.tiktok.com/@nike/video/7234567890 --onlyvideo',
|
|
131
|
+
'',
|
|
132
|
+
' tt-help attach [-p 并行数] [-i 间隔秒数] [-s 服务端地址]',
|
|
133
|
+
' 后台轮询服务端任务接口,自动抓取 TikTok 用户信息',
|
|
134
|
+
' -p, --parallel <N> 并行抓取数(默认: 1)',
|
|
135
|
+
' -i, --interval <N> 无任务时轮询间隔,单位秒(默认: 10)',
|
|
136
|
+
' -s, --server <URL> 服务端地址(默认: http://127.0.0.1:3001)',
|
|
137
|
+
' 示例: tt-help attach -p 5 -i 10',
|
|
138
|
+
'',
|
|
139
|
+
' config [show|set|reset]',
|
|
75
140
|
' config 查看当前配置',
|
|
76
|
-
|
|
141
|
+
' config set <key> <value> 设置配置(key: proxy, server, browser, userId, maxFollowing, maxFollowers, maxVideos, maxComments)',
|
|
77
142
|
' config reset 重置所有配置为默认',
|
|
143
|
+
'',
|
|
144
|
+
' 全局选项:',
|
|
78
145
|
' -h, --help 显示帮助',
|
|
79
146
|
' --version 显示版本号',
|
|
80
147
|
'',
|
|
81
|
-
' 示例: tt-help
|
|
82
|
-
' tt-help explore
|
|
148
|
+
' 示例: tt-help info https://www.tiktok.com/@nike https://www.tiktok.com/@adidas',
|
|
149
|
+
' tt-help explore qiqi23280 fast --location ES --max-comments 50',
|
|
83
150
|
' tt-help config set server http://127.0.0.1:3001',
|
|
151
|
+
' tt-help attach -p 5 -i 10',
|
|
84
152
|
];
|
|
85
153
|
|
|
86
154
|
function getConfigText() {
|
|
@@ -99,6 +167,10 @@ function getConfigText() {
|
|
|
99
167
|
` 服务端: ${server}`,
|
|
100
168
|
` 浏览器: ${browser || '未配置(将自动探测或回退)'}`,
|
|
101
169
|
` 用户号: ${currentUserId || '未设置(首次运行 auto 自动创建)'}`,
|
|
170
|
+
` 商家关注采集数: ${maxFollowing}`,
|
|
171
|
+
` 粉丝采集数: ${maxFollowers}`,
|
|
172
|
+
` 视频采集数: ${maxVideos}`,
|
|
173
|
+
` 评论采集数: ${maxComments}`,
|
|
102
174
|
` 输出格式: json`,
|
|
103
175
|
` 默认输出: ${DEFAULT_OUTPUT}`,
|
|
104
176
|
` 配置文件: ${configFile || '无(使用默认值)'}`,
|
|
@@ -113,7 +185,15 @@ export {
|
|
|
113
185
|
HELP_TEXT,
|
|
114
186
|
browser,
|
|
115
187
|
userId,
|
|
188
|
+
maxFollowing,
|
|
189
|
+
maxFollowers,
|
|
190
|
+
maxVideos,
|
|
191
|
+
maxComments,
|
|
116
192
|
saveBrowser,
|
|
117
193
|
saveUserId,
|
|
194
|
+
saveMaxFollowing,
|
|
195
|
+
saveMaxFollowers,
|
|
196
|
+
saveMaxVideos,
|
|
197
|
+
saveMaxComments,
|
|
118
198
|
getConfigText,
|
|
119
199
|
};
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
function parseSSR(rawHtml) {
|
|
2
|
+
if (!rawHtml.includes('__UNIVERSAL_DATA_FOR_REHYDRATION__')) return null;
|
|
3
|
+
const dataStart = rawHtml.indexOf('__UNIVERSAL_DATA_FOR_REHYDRATION__');
|
|
4
|
+
// 从该字符串后面找 <script 标签的 >,确保找到的是正确行的 >
|
|
5
|
+
const scriptStart = rawHtml.lastIndexOf('<script', dataStart);
|
|
6
|
+
const sIdx = (scriptStart >= 0 ? rawHtml.indexOf('>', scriptStart) : rawHtml.indexOf('>', dataStart)) + 1;
|
|
7
|
+
const eIdx = rawHtml.indexOf('</script>', sIdx);
|
|
8
|
+
if (sIdx < 0 || eIdx < 0) return null;
|
|
9
|
+
const jsonStr = rawHtml.substring(sIdx, eIdx);
|
|
10
|
+
return JSON.parse(jsonStr);
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function parseUserInfo(rawHtml) {
|
|
14
|
+
const data = parseSSR(rawHtml);
|
|
15
|
+
if (!data) return null;
|
|
16
|
+
const ud = data.__DEFAULT_SCOPE__['webapp.user-detail'];
|
|
17
|
+
if (!ud || !ud.userInfo) return null;
|
|
18
|
+
const u = ud.userInfo.user;
|
|
19
|
+
const s = ud.userInfo.stats;
|
|
20
|
+
return {
|
|
21
|
+
uniqueId: u.uniqueId,
|
|
22
|
+
nickname: u.nickname,
|
|
23
|
+
id: u.id,
|
|
24
|
+
verified: u.verified,
|
|
25
|
+
privateAccount: u.privateAccount,
|
|
26
|
+
language: u.language,
|
|
27
|
+
bio: u.signature || '',
|
|
28
|
+
avatar: u.avatarLarger || u.avatarMedium || u.avatarThumb || '',
|
|
29
|
+
followerCount: s.followerCount,
|
|
30
|
+
followingCount: s.followingCount,
|
|
31
|
+
heartCount: s.heartCount,
|
|
32
|
+
videoCount: s.videoCount,
|
|
33
|
+
diggCount: s.diggCount,
|
|
34
|
+
friendCount: s.friendCount,
|
|
35
|
+
secUid: u.secUid,
|
|
36
|
+
ttSeller: u.ttSeller || false,
|
|
37
|
+
locationCreated: u.locationCreated || null,
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export function parseVideoInfo(rawHtml) {
|
|
42
|
+
const data = parseSSR(rawHtml);
|
|
43
|
+
if (!data) return null;
|
|
44
|
+
const vd = data.__DEFAULT_SCOPE__['webapp.video-detail'];
|
|
45
|
+
if (!vd || !vd.itemInfo || !vd.itemInfo.itemStruct) return null;
|
|
46
|
+
const item = vd.itemInfo.itemStruct;
|
|
47
|
+
const author = item.author || {};
|
|
48
|
+
const stats = item.stats || {};
|
|
49
|
+
return {
|
|
50
|
+
id: item.id,
|
|
51
|
+
desc: item.desc || '',
|
|
52
|
+
createTime: item.createTime || null,
|
|
53
|
+
locationCreated: item.locationCreated || null,
|
|
54
|
+
author: {
|
|
55
|
+
uniqueId: author.uniqueId,
|
|
56
|
+
nickname: author.nickname,
|
|
57
|
+
id: author.id,
|
|
58
|
+
verified: author.verified,
|
|
59
|
+
secUid: author.secUid,
|
|
60
|
+
},
|
|
61
|
+
stats: {
|
|
62
|
+
playCount: stats.playCount,
|
|
63
|
+
diggCount: stats.diggCount,
|
|
64
|
+
commentCount: stats.commentCount,
|
|
65
|
+
shareCount: stats.shareCount,
|
|
66
|
+
collectCount: stats.collectCount,
|
|
67
|
+
},
|
|
68
|
+
};
|
|
69
|
+
}
|
package/src/lib/scrape.js
CHANGED
|
@@ -1,25 +1,75 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
1
|
+
import { TikTokScraper } from './tiktok-scraper.mjs';
|
|
2
|
+
import { isProfileUrl, isVideoUrl, extractUniqueId, normalizeUsername } from './url.js';
|
|
3
|
+
|
|
4
|
+
// Lazy singleton for TikTokScraper
|
|
5
|
+
let scraperInstance = null;
|
|
6
|
+
let scraperInitPromise = null;
|
|
7
|
+
|
|
8
|
+
async function getScraper() {
|
|
9
|
+
if (scraperInstance) return scraperInstance;
|
|
10
|
+
if (scraperInitPromise) return scraperInitPromise;
|
|
11
|
+
scraperInitPromise = (async () => {
|
|
12
|
+
const scraper = new TikTokScraper();
|
|
13
|
+
await scraper.init();
|
|
14
|
+
scraperInstance = scraper;
|
|
15
|
+
scraperInitPromise = null;
|
|
16
|
+
return scraper;
|
|
17
|
+
})();
|
|
18
|
+
return scraperInitPromise;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export async function closeScraper() {
|
|
22
|
+
if (scraperInstance) {
|
|
23
|
+
await scraperInstance.close();
|
|
24
|
+
scraperInstance = null;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// Map parseUserInfo output to legacy parser.js format
|
|
29
|
+
function mapUserInfo(user) {
|
|
30
|
+
if (!user) return null;
|
|
31
|
+
return {
|
|
32
|
+
uniqueId: user.uniqueId,
|
|
33
|
+
uid: user.id,
|
|
34
|
+
secUid: user.secUid,
|
|
35
|
+
nickname: user.nickname,
|
|
36
|
+
signature: user.bio,
|
|
37
|
+
ttSeller: user.ttSeller,
|
|
38
|
+
verified: user.verified,
|
|
39
|
+
followerCount: user.followerCount,
|
|
40
|
+
followingCount: user.followingCount,
|
|
41
|
+
heartCount: user.heartCount,
|
|
42
|
+
videoCount: user.videoCount,
|
|
43
|
+
diggCount: user.diggCount,
|
|
44
|
+
avatarLarger: user.avatar,
|
|
45
|
+
locationCreated: user.locationCreated,
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Map parseVideoInfo output to legacy format
|
|
50
|
+
function mapVideoLocation(video) {
|
|
51
|
+
if (!video) return null;
|
|
52
|
+
return video.locationCreated;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export async function extractUserData(url) {
|
|
56
|
+
const scraper = await getScraper();
|
|
57
|
+
const uniqueId = extractUniqueId(url);
|
|
58
|
+
if (!uniqueId) throw new Error(`无法从URL提取用户名: ${url}`);
|
|
59
|
+
const user = await scraper.getUserInfo(normalizeUsername(uniqueId));
|
|
60
|
+
if (!user) throw new Error('无法解析用户信息');
|
|
61
|
+
return mapUserInfo(user);
|
|
12
62
|
}
|
|
13
63
|
|
|
14
|
-
export async function extractVideoLocation(videoUrl
|
|
15
|
-
const
|
|
16
|
-
|
|
64
|
+
export async function extractVideoLocation(videoUrl) {
|
|
65
|
+
const scraper = await getScraper();
|
|
66
|
+
const video = await scraper.getVideoInfo(videoUrl);
|
|
67
|
+
return mapVideoLocation(video);
|
|
17
68
|
}
|
|
18
69
|
|
|
19
|
-
export async function processUrl(url
|
|
70
|
+
export async function processUrl(url) {
|
|
20
71
|
if (isProfileUrl(url)) {
|
|
21
|
-
const
|
|
22
|
-
const profileData = await extractUserData(profileUrl, proxyUrl);
|
|
72
|
+
const profileData = await extractUserData(url);
|
|
23
73
|
return [profileData];
|
|
24
74
|
}
|
|
25
75
|
|
|
@@ -27,10 +77,9 @@ export async function processUrl(url, proxyUrl) {
|
|
|
27
77
|
const profileHandle = extractUniqueId(url);
|
|
28
78
|
if (!profileHandle) throw new Error(`无法从视频URL提取用户主页: ${url}`);
|
|
29
79
|
|
|
30
|
-
const profileUrl = toProfileUrl(profileHandle);
|
|
31
80
|
const [profileData, locationCreated] = await Promise.all([
|
|
32
|
-
extractUserData(
|
|
33
|
-
extractVideoLocation(url
|
|
81
|
+
extractUserData(url),
|
|
82
|
+
extractVideoLocation(url),
|
|
34
83
|
]);
|
|
35
84
|
|
|
36
85
|
return [{ ...profileData, locationCreated }];
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
import { chromium } from 'playwright';
|
|
2
|
+
import { detectBrowser } from './browser/launch.js';
|
|
3
|
+
import { parseUserInfo, parseVideoInfo } from './parse-ssr.mjs';
|
|
4
|
+
|
|
5
|
+
const DEFAULT_POOL_SIZE = 3;
|
|
6
|
+
const DEFAULT_WAF_TTL = 120000;
|
|
7
|
+
const DEFAULT_WARM_URL = 'https://www.tiktok.com/@nike';
|
|
8
|
+
|
|
9
|
+
function delay(ms) {
|
|
10
|
+
return new Promise(r => setTimeout(r, ms));
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
class PageSlot {
|
|
14
|
+
constructor(page) {
|
|
15
|
+
this.page = page;
|
|
16
|
+
this.lock = new PromiseQueue();
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
class PromiseQueue {
|
|
21
|
+
constructor() {
|
|
22
|
+
this._queue = [];
|
|
23
|
+
this._processing = false;
|
|
24
|
+
}
|
|
25
|
+
async run(task) {
|
|
26
|
+
return new Promise((resolve, reject) => {
|
|
27
|
+
this._queue.push({ task, resolve, reject });
|
|
28
|
+
this._process();
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
async _process() {
|
|
32
|
+
if (this._processing) return;
|
|
33
|
+
this._processing = true;
|
|
34
|
+
while (this._queue.length > 0) {
|
|
35
|
+
const { task, resolve, reject } = this._queue.shift();
|
|
36
|
+
try {
|
|
37
|
+
const result = await task();
|
|
38
|
+
resolve(result);
|
|
39
|
+
} catch (e) {
|
|
40
|
+
reject(e);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
this._processing = false;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export class TikTokScraper {
|
|
48
|
+
constructor({ poolSize = DEFAULT_POOL_SIZE, wafTtl = DEFAULT_WAF_TTL, warmUrl = DEFAULT_WARM_URL } = {}) {
|
|
49
|
+
this.poolSize = poolSize;
|
|
50
|
+
this.wafTtl = wafTtl;
|
|
51
|
+
this.warmUrl = warmUrl;
|
|
52
|
+
this.browser = null;
|
|
53
|
+
this.context = null;
|
|
54
|
+
this.slots = [];
|
|
55
|
+
this.slotIdx = 0;
|
|
56
|
+
this.lastWarmTime = 0;
|
|
57
|
+
this.warmPromise = null;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
async init() {
|
|
61
|
+
const executablePath = detectBrowser();
|
|
62
|
+
if (!executablePath) {
|
|
63
|
+
throw new Error('未找到本地浏览器(Chrome/Edge),请先安装浏览器或执行 npx playwright install');
|
|
64
|
+
}
|
|
65
|
+
this.browser = await chromium.launch({
|
|
66
|
+
headless: true,
|
|
67
|
+
executablePath,
|
|
68
|
+
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage'],
|
|
69
|
+
});
|
|
70
|
+
this.context = await this.browser.newContext();
|
|
71
|
+
for (let i = 0; i < this.poolSize; i++) {
|
|
72
|
+
this.slots.push(new PageSlot(await this.context.newPage()));
|
|
73
|
+
}
|
|
74
|
+
await this.warmWaf();
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
async close() {
|
|
78
|
+
if (this.browser) {
|
|
79
|
+
await this.browser.close();
|
|
80
|
+
this.browser = null;
|
|
81
|
+
this.context = null;
|
|
82
|
+
this.slots = [];
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
async restart() {
|
|
87
|
+
await this.close();
|
|
88
|
+
await this.init();
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
isAlive() {
|
|
92
|
+
try {
|
|
93
|
+
return !!(this.browser && this.browser.isConnected());
|
|
94
|
+
} catch {
|
|
95
|
+
return false;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
async warmWaf() {
|
|
100
|
+
if (this.warmPromise) return this.warmPromise;
|
|
101
|
+
this.warmPromise = (async () => {
|
|
102
|
+
const page = this.slots[0].page;
|
|
103
|
+
await page.goto(this.warmUrl, { waitUntil: 'domcontentloaded', timeout: 15000 });
|
|
104
|
+
await delay(1500);
|
|
105
|
+
this.lastWarmTime = Date.now();
|
|
106
|
+
this.warmPromise = null;
|
|
107
|
+
})();
|
|
108
|
+
return this.warmPromise;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
_needWarm() {
|
|
112
|
+
return Date.now() - this.lastWarmTime > this.wafTtl;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
_pickSlot() {
|
|
116
|
+
const slot = this.slots[this.slotIdx % this.poolSize];
|
|
117
|
+
this.slotIdx++;
|
|
118
|
+
return slot;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
async _ensurePage(slot) {
|
|
122
|
+
try {
|
|
123
|
+
if (!slot.page.isClosed()) return slot.page;
|
|
124
|
+
} catch {}
|
|
125
|
+
slot.page = await this.context.newPage();
|
|
126
|
+
return slot.page;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
async _fetchViewSource(url, slot) {
|
|
130
|
+
const page = await this._ensurePage(slot);
|
|
131
|
+
|
|
132
|
+
await page.goto('view-source:' + url, {
|
|
133
|
+
waitUntil: 'domcontentloaded',
|
|
134
|
+
timeout: 15000,
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
return await page.evaluate(() => {
|
|
138
|
+
const rows = document.querySelectorAll('tr');
|
|
139
|
+
let content = '';
|
|
140
|
+
rows.forEach(r => {
|
|
141
|
+
const lc = r.querySelector('.line-content');
|
|
142
|
+
if (lc) content += lc.textContent + '\n';
|
|
143
|
+
});
|
|
144
|
+
return content;
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
async getUserInfo(uniqueId) {
|
|
149
|
+
if (this._needWarm()) await this.warmWaf();
|
|
150
|
+
const slot = this._pickSlot();
|
|
151
|
+
return slot.lock.run(async () => {
|
|
152
|
+
const rawHtml = await this._fetchViewSource(
|
|
153
|
+
`https://www.tiktok.com/@${uniqueId}`,
|
|
154
|
+
slot
|
|
155
|
+
);
|
|
156
|
+
return parseUserInfo(rawHtml);
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
async getVideoInfo(videoUrl) {
|
|
161
|
+
if (this._needWarm()) await this.warmWaf();
|
|
162
|
+
const slot = this._pickSlot();
|
|
163
|
+
return slot.lock.run(async () => {
|
|
164
|
+
const rawHtml = await this._fetchViewSource(videoUrl, slot);
|
|
165
|
+
return parseVideoInfo(rawHtml);
|
|
166
|
+
});
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
async getUserAndVideo(videoUrl) {
|
|
170
|
+
if (this._needWarm()) await this.warmWaf();
|
|
171
|
+
const video = await this.getVideoInfo(videoUrl);
|
|
172
|
+
if (!video) return null;
|
|
173
|
+
const user = await this.getUserInfo(video.author.uniqueId);
|
|
174
|
+
return { user, video };
|
|
175
|
+
}
|
|
176
|
+
}
|
package/src/lib/url.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
const BASE_URL = 'https://www.tiktok.com';
|
|
2
2
|
|
|
3
3
|
export function extractUniqueId(url) {
|
|
4
|
-
const m = url.match(/\/@([
|
|
4
|
+
const m = url.match(/\/@([\w.-]+)/);
|
|
5
5
|
return m ? m[1] : null;
|
|
6
6
|
}
|
|
7
7
|
|
|
@@ -30,7 +30,7 @@ export function ensureAbsoluteUrl(href) {
|
|
|
30
30
|
}
|
|
31
31
|
|
|
32
32
|
export function isProfileUrl(url) {
|
|
33
|
-
return /\/@[\w
|
|
33
|
+
return /\/@[\w.-]+(?:$|[?#])/.test(url);
|
|
34
34
|
}
|
|
35
35
|
|
|
36
36
|
export function isVideoUrl(url) {
|
package/src/main.js
CHANGED
|
@@ -1,30 +1,22 @@
|
|
|
1
1
|
import { parseArgs } from './lib/args.js';
|
|
2
2
|
import { proxy, HELP_TEXT, getConfigText } from './lib/constants.js';
|
|
3
|
-
import {
|
|
4
|
-
import { handleScrape } from './cli/scrape.js';
|
|
5
|
-
import { handleVideos } from './cli/videos.js';
|
|
6
|
-
import { handleAuto } from './cli/auto.js';
|
|
3
|
+
import { handleInfo } from './cli/info.js';
|
|
7
4
|
import { handleExplore } from './cli/explore.js';
|
|
8
|
-
import {
|
|
5
|
+
import { handleAttach } from './cli/attach.js';
|
|
9
6
|
import { handleWatch } from './cli/watch.js';
|
|
10
7
|
import { handleConfig, showConfig, showUsage, version } from './cli/config.js';
|
|
11
|
-
import { runExploreDefault, runScrapeDefault } from './cli/explore-default.js';
|
|
12
8
|
|
|
13
9
|
async function main() {
|
|
14
10
|
const parsed = parseArgs();
|
|
15
11
|
|
|
16
12
|
switch (parsed.subcommand) {
|
|
17
|
-
case '
|
|
18
|
-
case '
|
|
19
|
-
case '
|
|
20
|
-
case '
|
|
21
|
-
case 'refresh':return handleRefresh(parsed);
|
|
22
|
-
case 'watch': return handleWatch(parsed);
|
|
13
|
+
case 'explore': return handleExplore(parsed);
|
|
14
|
+
case 'info': return handleInfo(parsed);
|
|
15
|
+
case 'attach': return handleAttach(parsed);
|
|
16
|
+
case 'watch': return handleWatch(parsed);
|
|
23
17
|
}
|
|
24
18
|
|
|
25
|
-
const { urls, outputFile, outputFormat, exploreCount, showConfig: showCfg, showHelp, showVersion, customProxy, configAction, configKey, configValue
|
|
26
|
-
const proxyUrl = customProxy || proxy;
|
|
27
|
-
const filter = parseFilter(filterStr);
|
|
19
|
+
const { urls, outputFile, outputFormat, exploreCount, showConfig: showCfg, showHelp, showVersion, customProxy, configAction, configKey, configValue } = parsed;
|
|
28
20
|
|
|
29
21
|
if (showVersion) {
|
|
30
22
|
console.log(version);
|
|
@@ -35,11 +27,13 @@ async function main() {
|
|
|
35
27
|
if (showCfg) return showConfig(urls, outputFile);
|
|
36
28
|
if (urls.length === 0 && exploreCount === 0) return showUsage();
|
|
37
29
|
|
|
30
|
+
// 默认行为:URL 走 info,--explore 走 explore
|
|
38
31
|
if (exploreCount > 0) {
|
|
39
|
-
|
|
40
|
-
} else {
|
|
41
|
-
await runScrapeDefault(urls, proxyUrl, outputFile, outputFormat, filter);
|
|
32
|
+
return handleExplore({ ...parsed, subcommand: 'explore' });
|
|
42
33
|
}
|
|
34
|
+
|
|
35
|
+
// 有 URL 默认走 info
|
|
36
|
+
return handleInfo(parsed);
|
|
43
37
|
}
|
|
44
38
|
|
|
45
39
|
main().catch(err => {
|