tt-help-cli-ycl 1.3.14 → 1.3.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/lib/api-interceptor.js +124 -0
- package/src/lib/args.js +2 -2
- package/src/lib/constants.js +2 -2
- package/src/lib/tiktok-scraper.mjs +19 -7
- package/src/scraper/explore-core.js +1 -1
- package/src/videos/core.js +7 -36
package/package.json
CHANGED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import { delay } from './delay.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* 通过拦截 TikTok 内部 API 获取用户视频列表
|
|
5
|
+
* 比 DOM 滚动解析快 5-10 倍
|
|
6
|
+
*
|
|
7
|
+
* @param {import('playwright').Page} page - Playwright page (CDP 连接)
|
|
8
|
+
* @param {string} username - TikTok 用户名
|
|
9
|
+
* @param {number} maxVideos - 最大视频数
|
|
10
|
+
* @param {Function} log - 日志函数
|
|
11
|
+
* @returns {Map<string, {id: string, href: string}>} 与 collectVideos 返回格式一致
|
|
12
|
+
*/
|
|
13
|
+
async function fetchUserVideosAPI(page, username, maxVideos, log) {
|
|
14
|
+
const url = `https://www.tiktok.com/@${username}`;
|
|
15
|
+
const items = [];
|
|
16
|
+
|
|
17
|
+
// 1. 注册 response 拦截器 + request URL 捕获
|
|
18
|
+
let apiResolve = null;
|
|
19
|
+
const apiPromise = new Promise(r => { apiResolve = r; });
|
|
20
|
+
|
|
21
|
+
let apiRequestUrl = null;
|
|
22
|
+
|
|
23
|
+
const responseHandler = async (response) => {
|
|
24
|
+
if (response.url().includes('/api/post/item_list/')) {
|
|
25
|
+
try {
|
|
26
|
+
apiResolve(await response.json());
|
|
27
|
+
} catch (e) {
|
|
28
|
+
apiResolve(null);
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
const requestHandler = (request) => {
|
|
34
|
+
if (request.url().includes('/api/post/item_list/') && !apiRequestUrl) {
|
|
35
|
+
apiRequestUrl = request.url();
|
|
36
|
+
}
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
page.on('response', responseHandler);
|
|
40
|
+
page.on('request', requestHandler);
|
|
41
|
+
|
|
42
|
+
try {
|
|
43
|
+
// 2. 导航并等待 API 响应
|
|
44
|
+
log(' [API拦截] 导航到用户页,等待 /api/post/item_list/ ...');
|
|
45
|
+
const t0 = Date.now();
|
|
46
|
+
|
|
47
|
+
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
48
|
+
|
|
49
|
+
const data = await Promise.race([
|
|
50
|
+
apiPromise,
|
|
51
|
+
new Promise(r => setTimeout(() => r(null), 8000)),
|
|
52
|
+
]);
|
|
53
|
+
|
|
54
|
+
const elapsed = Date.now() - t0;
|
|
55
|
+
|
|
56
|
+
if (!data || !data.itemList) {
|
|
57
|
+
log(` [API拦截] ${elapsed}ms 后未拿到 API 数据`);
|
|
58
|
+
return null;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// 3. 提取首页视频
|
|
62
|
+
const firstPageItems = data.itemList || [];
|
|
63
|
+
for (const item of firstPageItems) {
|
|
64
|
+
if (items.length >= maxVideos) break;
|
|
65
|
+
const href = `https://www.tiktok.com/@${username}/video/${item.id}`;
|
|
66
|
+
items.push({ id: item.id, href });
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
log(` [API拦截] ${elapsed}ms 获取首页 ${firstPageItems.length} 条视频`);
|
|
70
|
+
|
|
71
|
+
// 4. 翻页获取后续视频
|
|
72
|
+
let cursor = data.cursor;
|
|
73
|
+
let hasMore = data.hasMore;
|
|
74
|
+
|
|
75
|
+
while (hasMore && cursor && items.length < maxVideos) {
|
|
76
|
+
if (!apiRequestUrl) {
|
|
77
|
+
log(' [API拦截] 未捕获到 API 请求 URL,无法翻页');
|
|
78
|
+
break;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const newUrl = apiRequestUrl.replace(/cursor=\d+/, `cursor=${cursor}`);
|
|
82
|
+
|
|
83
|
+
try {
|
|
84
|
+
const pageData = await page.evaluate(async (u) => {
|
|
85
|
+
const res = await fetch(u);
|
|
86
|
+
return await res.json();
|
|
87
|
+
}, newUrl);
|
|
88
|
+
|
|
89
|
+
if (pageData && pageData.itemList) {
|
|
90
|
+
for (const item of pageData.itemList) {
|
|
91
|
+
if (items.length >= maxVideos) break;
|
|
92
|
+
const href = `https://www.tiktok.com/@${username}/video/${item.id}`;
|
|
93
|
+
items.push({ id: item.id, href });
|
|
94
|
+
}
|
|
95
|
+
log(` [API拦截] 翻页 cursor=${cursor},获取 ${pageData.itemList.length} 条,累计 ${items.length}`);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
cursor = pageData.cursor;
|
|
99
|
+
hasMore = pageData.hasMore;
|
|
100
|
+
} catch (e) {
|
|
101
|
+
log(` [API拦截] 翻页失败: ${e.message}`);
|
|
102
|
+
break;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
await delay(300, 600);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
log(` [API拦截] 总计获取 ${items.length} 条视频`);
|
|
109
|
+
|
|
110
|
+
// 转成 Map 返回,与 collectVideos 一致
|
|
111
|
+
const videoMap = new Map();
|
|
112
|
+
for (const v of items) {
|
|
113
|
+
if (!videoMap.has(v.id)) videoMap.set(v.id, v);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return videoMap;
|
|
117
|
+
} finally {
|
|
118
|
+
// 5. 必须清理拦截器,防止累积
|
|
119
|
+
page.off('response', responseHandler);
|
|
120
|
+
page.off('request', requestHandler);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
export { fetchUserVideosAPI };
|
package/src/lib/args.js
CHANGED
|
@@ -164,7 +164,7 @@ function parseExploreArgs(args) {
|
|
|
164
164
|
let explorePort = null;
|
|
165
165
|
let exploreProfile = null;
|
|
166
166
|
let exploreUserId = null;
|
|
167
|
-
let exploreMaxVideos =
|
|
167
|
+
let exploreMaxVideos = 16;
|
|
168
168
|
|
|
169
169
|
const positional = [];
|
|
170
170
|
const PRESETS = ['fast', 'normal', 'slow', 'stealth'];
|
|
@@ -196,7 +196,7 @@ function parseExploreArgs(args) {
|
|
|
196
196
|
} else if (arg === '--user-id') {
|
|
197
197
|
exploreUserId = args[++i];
|
|
198
198
|
} else if (arg === '--max-videos') {
|
|
199
|
-
exploreMaxVideos = parseInt(args[++i]) ||
|
|
199
|
+
exploreMaxVideos = parseInt(args[++i]) || 16;
|
|
200
200
|
} else {
|
|
201
201
|
positional.push(arg);
|
|
202
202
|
}
|
package/src/lib/constants.js
CHANGED
|
@@ -17,7 +17,7 @@ let browser = null;
|
|
|
17
17
|
let userId = null;
|
|
18
18
|
let maxFollowing = 5;
|
|
19
19
|
let maxFollowers = 5;
|
|
20
|
-
let maxVideos =
|
|
20
|
+
let maxVideos = 16;
|
|
21
21
|
let maxComments = 10;
|
|
22
22
|
|
|
23
23
|
try {
|
|
@@ -87,7 +87,7 @@ function saveMaxFollowers(val) {
|
|
|
87
87
|
|
|
88
88
|
function saveMaxVideos(val) {
|
|
89
89
|
const cfg = existsSync(configPath) ? JSON.parse(readFileSync(configPath, 'utf-8')) : {};
|
|
90
|
-
cfg.maxVideos = parseInt(val) ||
|
|
90
|
+
cfg.maxVideos = parseInt(val) || 16;
|
|
91
91
|
writeFileSync(configPath, JSON.stringify(cfg, null, 2), 'utf-8');
|
|
92
92
|
maxVideos = cfg.maxVideos;
|
|
93
93
|
configFile = configPath;
|
|
@@ -146,28 +146,40 @@ export class TikTokScraper {
|
|
|
146
146
|
}
|
|
147
147
|
|
|
148
148
|
async getUserInfo(uniqueId) {
|
|
149
|
-
if (this._needWarm()) await this.warmWaf();
|
|
150
149
|
const slot = this._pickSlot();
|
|
151
150
|
return slot.lock.run(async () => {
|
|
152
|
-
|
|
151
|
+
let rawHtml = await this._fetchViewSource(
|
|
153
152
|
`https://www.tiktok.com/@${uniqueId}`,
|
|
154
153
|
slot
|
|
155
154
|
);
|
|
156
|
-
|
|
155
|
+
let result = parseUserInfo(rawHtml);
|
|
156
|
+
if (!result) {
|
|
157
|
+
await this.warmWaf();
|
|
158
|
+
rawHtml = await this._fetchViewSource(
|
|
159
|
+
`https://www.tiktok.com/@${uniqueId}`,
|
|
160
|
+
slot
|
|
161
|
+
);
|
|
162
|
+
result = parseUserInfo(rawHtml);
|
|
163
|
+
}
|
|
164
|
+
return result || null;
|
|
157
165
|
});
|
|
158
166
|
}
|
|
159
167
|
|
|
160
168
|
async getVideoInfo(videoUrl) {
|
|
161
|
-
if (this._needWarm()) await this.warmWaf();
|
|
162
169
|
const slot = this._pickSlot();
|
|
163
170
|
return slot.lock.run(async () => {
|
|
164
|
-
|
|
165
|
-
|
|
171
|
+
let rawHtml = await this._fetchViewSource(videoUrl, slot);
|
|
172
|
+
let result = parseVideoInfo(rawHtml);
|
|
173
|
+
if (!result) {
|
|
174
|
+
await this.warmWaf();
|
|
175
|
+
rawHtml = await this._fetchViewSource(videoUrl, slot);
|
|
176
|
+
result = parseVideoInfo(rawHtml);
|
|
177
|
+
}
|
|
178
|
+
return result || null;
|
|
166
179
|
});
|
|
167
180
|
}
|
|
168
181
|
|
|
169
182
|
async getUserAndVideo(videoUrl) {
|
|
170
|
-
if (this._needWarm()) await this.warmWaf();
|
|
171
183
|
const video = await this.getVideoInfo(videoUrl);
|
|
172
184
|
if (!video) return null;
|
|
173
185
|
const user = await this.getUserInfo(video.author.uniqueId);
|
|
@@ -15,7 +15,7 @@ import { maxFollowing as globalMaxFollowing, maxFollowers as globalMaxFollowers,
|
|
|
15
15
|
|
|
16
16
|
async function processExplore(page, username, options, log) {
|
|
17
17
|
const {
|
|
18
|
-
maxVideos =
|
|
18
|
+
maxVideos = 16,
|
|
19
19
|
enableFollow = true,
|
|
20
20
|
maxFollowing = 5,
|
|
21
21
|
maxFollowers = 5,
|
package/src/videos/core.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { delay, ensureBrowserReady, ensureTikTokPage, retryWithBackoff } from '../scraper/modules/page-helpers.js';
|
|
2
|
-
import {
|
|
2
|
+
import { fetchUserVideosAPI } from '../lib/api-interceptor.js';
|
|
3
3
|
|
|
4
4
|
async function getUserInfo(page) {
|
|
5
5
|
return await page.evaluate(() => {
|
|
@@ -41,41 +41,12 @@ async function getUserInfo(page) {
|
|
|
41
41
|
}
|
|
42
42
|
|
|
43
43
|
async function collectVideos(page, username, maxVideos, log) {
|
|
44
|
-
const
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
items: Array.from(document.querySelectorAll('a'))
|
|
51
|
-
.filter(el => (el.getAttribute('href') || '').includes(pattern))
|
|
52
|
-
.map(el => {
|
|
53
|
-
const href = el.getAttribute('href') || '';
|
|
54
|
-
const idMatch = href.match(/\/video\/(\d+)/);
|
|
55
|
-
return { id: idMatch ? idMatch[1] : null, href };
|
|
56
|
-
})
|
|
57
|
-
.filter(v => v.id),
|
|
58
|
-
};
|
|
59
|
-
},
|
|
60
|
-
maxItems: maxVideos,
|
|
61
|
-
delayRange: [2000, 3000],
|
|
62
|
-
staleThreshold: 5,
|
|
63
|
-
maxRounds: 500,
|
|
64
|
-
onRound: (round, items, allItems) => {
|
|
65
|
-
const uniqueCount = new Set(allItems.map(v => v.id)).size;
|
|
66
|
-
if (uniqueCount > 0 && (uniqueCount % 10 === 0 || items.length > 0)) {
|
|
67
|
-
log(`滚动 ${round + 1}: ${uniqueCount} 个视频 (本轮 ${items.length} 条)`);
|
|
68
|
-
}
|
|
69
|
-
},
|
|
70
|
-
});
|
|
71
|
-
|
|
72
|
-
const uniqueVideos = new Map();
|
|
73
|
-
allLinks.forEach(v => {
|
|
74
|
-
if (!uniqueVideos.has(v.id)) uniqueVideos.set(v.id, v);
|
|
75
|
-
});
|
|
76
|
-
|
|
77
|
-
log(`收集完成: ${uniqueVideos.size} 个视频`);
|
|
78
|
-
return uniqueVideos;
|
|
44
|
+
const apiResult = await fetchUserVideosAPI(page, username, maxVideos, log);
|
|
45
|
+
if (apiResult && apiResult.size > 0) {
|
|
46
|
+
log(`收集完成: ${apiResult.size} 个视频`);
|
|
47
|
+
return apiResult;
|
|
48
|
+
}
|
|
49
|
+
return new Map();
|
|
79
50
|
}
|
|
80
51
|
|
|
81
52
|
async function runGetUserVideos(options) {
|