tt-help-cli-ycl 1.3.14 → 1.3.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/lib/api-interceptor.js +124 -0
- package/src/lib/args.js +2 -2
- package/src/lib/constants.js +2 -2
- package/src/lib/tiktok-scraper.mjs +29 -12
- package/src/scraper/explore-core.js +1 -1
- package/src/videos/core.js +7 -36
package/package.json
CHANGED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import { delay } from './delay.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* 通过拦截 TikTok 内部 API 获取用户视频列表
|
|
5
|
+
* 比 DOM 滚动解析快 5-10 倍
|
|
6
|
+
*
|
|
7
|
+
* @param {import('playwright').Page} page - Playwright page (CDP 连接)
|
|
8
|
+
* @param {string} username - TikTok 用户名
|
|
9
|
+
* @param {number} maxVideos - 最大视频数
|
|
10
|
+
* @param {Function} log - 日志函数
|
|
11
|
+
* @returns {Map<string, {id: string, href: string}>} 与 collectVideos 返回格式一致
|
|
12
|
+
*/
|
|
13
|
+
async function fetchUserVideosAPI(page, username, maxVideos, log) {
|
|
14
|
+
const url = `https://www.tiktok.com/@${username}`;
|
|
15
|
+
const items = [];
|
|
16
|
+
|
|
17
|
+
// 1. 注册 response 拦截器 + request URL 捕获
|
|
18
|
+
let apiResolve = null;
|
|
19
|
+
const apiPromise = new Promise(r => { apiResolve = r; });
|
|
20
|
+
|
|
21
|
+
let apiRequestUrl = null;
|
|
22
|
+
|
|
23
|
+
const responseHandler = async (response) => {
|
|
24
|
+
if (response.url().includes('/api/post/item_list/')) {
|
|
25
|
+
try {
|
|
26
|
+
apiResolve(await response.json());
|
|
27
|
+
} catch (e) {
|
|
28
|
+
apiResolve(null);
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
const requestHandler = (request) => {
|
|
34
|
+
if (request.url().includes('/api/post/item_list/') && !apiRequestUrl) {
|
|
35
|
+
apiRequestUrl = request.url();
|
|
36
|
+
}
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
page.on('response', responseHandler);
|
|
40
|
+
page.on('request', requestHandler);
|
|
41
|
+
|
|
42
|
+
try {
|
|
43
|
+
// 2. 导航并等待 API 响应
|
|
44
|
+
log(' [API拦截] 导航到用户页,等待 /api/post/item_list/ ...');
|
|
45
|
+
const t0 = Date.now();
|
|
46
|
+
|
|
47
|
+
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
48
|
+
|
|
49
|
+
const data = await Promise.race([
|
|
50
|
+
apiPromise,
|
|
51
|
+
new Promise(r => setTimeout(() => r(null), 8000)),
|
|
52
|
+
]);
|
|
53
|
+
|
|
54
|
+
const elapsed = Date.now() - t0;
|
|
55
|
+
|
|
56
|
+
if (!data || !data.itemList) {
|
|
57
|
+
log(` [API拦截] ${elapsed}ms 后未拿到 API 数据`);
|
|
58
|
+
return null;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// 3. 提取首页视频
|
|
62
|
+
const firstPageItems = data.itemList || [];
|
|
63
|
+
for (const item of firstPageItems) {
|
|
64
|
+
if (items.length >= maxVideos) break;
|
|
65
|
+
const href = `https://www.tiktok.com/@${username}/video/${item.id}`;
|
|
66
|
+
items.push({ id: item.id, href });
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
log(` [API拦截] ${elapsed}ms 获取首页 ${firstPageItems.length} 条视频`);
|
|
70
|
+
|
|
71
|
+
// 4. 翻页获取后续视频
|
|
72
|
+
let cursor = data.cursor;
|
|
73
|
+
let hasMore = data.hasMore;
|
|
74
|
+
|
|
75
|
+
while (hasMore && cursor && items.length < maxVideos) {
|
|
76
|
+
if (!apiRequestUrl) {
|
|
77
|
+
log(' [API拦截] 未捕获到 API 请求 URL,无法翻页');
|
|
78
|
+
break;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const newUrl = apiRequestUrl.replace(/cursor=\d+/, `cursor=${cursor}`);
|
|
82
|
+
|
|
83
|
+
try {
|
|
84
|
+
const pageData = await page.evaluate(async (u) => {
|
|
85
|
+
const res = await fetch(u);
|
|
86
|
+
return await res.json();
|
|
87
|
+
}, newUrl);
|
|
88
|
+
|
|
89
|
+
if (pageData && pageData.itemList) {
|
|
90
|
+
for (const item of pageData.itemList) {
|
|
91
|
+
if (items.length >= maxVideos) break;
|
|
92
|
+
const href = `https://www.tiktok.com/@${username}/video/${item.id}`;
|
|
93
|
+
items.push({ id: item.id, href });
|
|
94
|
+
}
|
|
95
|
+
log(` [API拦截] 翻页 cursor=${cursor},获取 ${pageData.itemList.length} 条,累计 ${items.length}`);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
cursor = pageData.cursor;
|
|
99
|
+
hasMore = pageData.hasMore;
|
|
100
|
+
} catch (e) {
|
|
101
|
+
log(` [API拦截] 翻页失败: ${e.message}`);
|
|
102
|
+
break;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
await delay(300, 600);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
log(` [API拦截] 总计获取 ${items.length} 条视频`);
|
|
109
|
+
|
|
110
|
+
// 转成 Map 返回,与 collectVideos 一致
|
|
111
|
+
const videoMap = new Map();
|
|
112
|
+
for (const v of items) {
|
|
113
|
+
if (!videoMap.has(v.id)) videoMap.set(v.id, v);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return videoMap;
|
|
117
|
+
} finally {
|
|
118
|
+
// 5. 必须清理拦截器,防止累积
|
|
119
|
+
page.off('response', responseHandler);
|
|
120
|
+
page.off('request', requestHandler);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
export { fetchUserVideosAPI };
|
package/src/lib/args.js
CHANGED
|
@@ -164,7 +164,7 @@ function parseExploreArgs(args) {
|
|
|
164
164
|
let explorePort = null;
|
|
165
165
|
let exploreProfile = null;
|
|
166
166
|
let exploreUserId = null;
|
|
167
|
-
let exploreMaxVideos =
|
|
167
|
+
let exploreMaxVideos = 16;
|
|
168
168
|
|
|
169
169
|
const positional = [];
|
|
170
170
|
const PRESETS = ['fast', 'normal', 'slow', 'stealth'];
|
|
@@ -196,7 +196,7 @@ function parseExploreArgs(args) {
|
|
|
196
196
|
} else if (arg === '--user-id') {
|
|
197
197
|
exploreUserId = args[++i];
|
|
198
198
|
} else if (arg === '--max-videos') {
|
|
199
|
-
exploreMaxVideos = parseInt(args[++i]) ||
|
|
199
|
+
exploreMaxVideos = parseInt(args[++i]) || 16;
|
|
200
200
|
} else {
|
|
201
201
|
positional.push(arg);
|
|
202
202
|
}
|
package/src/lib/constants.js
CHANGED
|
@@ -17,7 +17,7 @@ let browser = null;
|
|
|
17
17
|
let userId = null;
|
|
18
18
|
let maxFollowing = 5;
|
|
19
19
|
let maxFollowers = 5;
|
|
20
|
-
let maxVideos =
|
|
20
|
+
let maxVideos = 16;
|
|
21
21
|
let maxComments = 10;
|
|
22
22
|
|
|
23
23
|
try {
|
|
@@ -87,7 +87,7 @@ function saveMaxFollowers(val) {
|
|
|
87
87
|
|
|
88
88
|
function saveMaxVideos(val) {
|
|
89
89
|
const cfg = existsSync(configPath) ? JSON.parse(readFileSync(configPath, 'utf-8')) : {};
|
|
90
|
-
cfg.maxVideos = parseInt(val) ||
|
|
90
|
+
cfg.maxVideos = parseInt(val) || 16;
|
|
91
91
|
writeFileSync(configPath, JSON.stringify(cfg, null, 2), 'utf-8');
|
|
92
92
|
maxVideos = cfg.maxVideos;
|
|
93
93
|
configFile = configPath;
|
|
@@ -99,11 +99,16 @@ export class TikTokScraper {
|
|
|
99
99
|
async warmWaf() {
|
|
100
100
|
if (this.warmPromise) return this.warmPromise;
|
|
101
101
|
this.warmPromise = (async () => {
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
102
|
+
try {
|
|
103
|
+
const page = this.slots[0].page;
|
|
104
|
+
await page.goto(this.warmUrl, { waitUntil: 'domcontentloaded', timeout: 15000 });
|
|
105
|
+
await delay(1500);
|
|
106
|
+
this.lastWarmTime = Date.now();
|
|
107
|
+
} catch (e) {
|
|
108
|
+
console.error(`[warmWaf] failed: ${e.message}`);
|
|
109
|
+
} finally {
|
|
110
|
+
this.warmPromise = null;
|
|
111
|
+
}
|
|
107
112
|
})();
|
|
108
113
|
return this.warmPromise;
|
|
109
114
|
}
|
|
@@ -146,28 +151,40 @@ export class TikTokScraper {
|
|
|
146
151
|
}
|
|
147
152
|
|
|
148
153
|
async getUserInfo(uniqueId) {
|
|
149
|
-
if (this._needWarm()) await this.warmWaf();
|
|
150
154
|
const slot = this._pickSlot();
|
|
151
155
|
return slot.lock.run(async () => {
|
|
152
|
-
|
|
156
|
+
let rawHtml = await this._fetchViewSource(
|
|
153
157
|
`https://www.tiktok.com/@${uniqueId}`,
|
|
154
158
|
slot
|
|
155
159
|
);
|
|
156
|
-
|
|
160
|
+
let result = parseUserInfo(rawHtml);
|
|
161
|
+
if (!result) {
|
|
162
|
+
try { await this.warmWaf(); } catch {}
|
|
163
|
+
rawHtml = await this._fetchViewSource(
|
|
164
|
+
`https://www.tiktok.com/@${uniqueId}`,
|
|
165
|
+
slot
|
|
166
|
+
);
|
|
167
|
+
result = parseUserInfo(rawHtml);
|
|
168
|
+
}
|
|
169
|
+
return result || null;
|
|
157
170
|
});
|
|
158
171
|
}
|
|
159
172
|
|
|
160
173
|
async getVideoInfo(videoUrl) {
|
|
161
|
-
if (this._needWarm()) await this.warmWaf();
|
|
162
174
|
const slot = this._pickSlot();
|
|
163
175
|
return slot.lock.run(async () => {
|
|
164
|
-
|
|
165
|
-
|
|
176
|
+
let rawHtml = await this._fetchViewSource(videoUrl, slot);
|
|
177
|
+
let result = parseVideoInfo(rawHtml);
|
|
178
|
+
if (!result) {
|
|
179
|
+
try { await this.warmWaf(); } catch {}
|
|
180
|
+
rawHtml = await this._fetchViewSource(videoUrl, slot);
|
|
181
|
+
result = parseVideoInfo(rawHtml);
|
|
182
|
+
}
|
|
183
|
+
return result || null;
|
|
166
184
|
});
|
|
167
185
|
}
|
|
168
186
|
|
|
169
187
|
async getUserAndVideo(videoUrl) {
|
|
170
|
-
if (this._needWarm()) await this.warmWaf();
|
|
171
188
|
const video = await this.getVideoInfo(videoUrl);
|
|
172
189
|
if (!video) return null;
|
|
173
190
|
const user = await this.getUserInfo(video.author.uniqueId);
|
|
@@ -15,7 +15,7 @@ import { maxFollowing as globalMaxFollowing, maxFollowers as globalMaxFollowers,
|
|
|
15
15
|
|
|
16
16
|
async function processExplore(page, username, options, log) {
|
|
17
17
|
const {
|
|
18
|
-
maxVideos =
|
|
18
|
+
maxVideos = 16,
|
|
19
19
|
enableFollow = true,
|
|
20
20
|
maxFollowing = 5,
|
|
21
21
|
maxFollowers = 5,
|
package/src/videos/core.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { delay, ensureBrowserReady, ensureTikTokPage, retryWithBackoff } from '../scraper/modules/page-helpers.js';
|
|
2
|
-
import {
|
|
2
|
+
import { fetchUserVideosAPI } from '../lib/api-interceptor.js';
|
|
3
3
|
|
|
4
4
|
async function getUserInfo(page) {
|
|
5
5
|
return await page.evaluate(() => {
|
|
@@ -41,41 +41,12 @@ async function getUserInfo(page) {
|
|
|
41
41
|
}
|
|
42
42
|
|
|
43
43
|
async function collectVideos(page, username, maxVideos, log) {
|
|
44
|
-
const
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
items: Array.from(document.querySelectorAll('a'))
|
|
51
|
-
.filter(el => (el.getAttribute('href') || '').includes(pattern))
|
|
52
|
-
.map(el => {
|
|
53
|
-
const href = el.getAttribute('href') || '';
|
|
54
|
-
const idMatch = href.match(/\/video\/(\d+)/);
|
|
55
|
-
return { id: idMatch ? idMatch[1] : null, href };
|
|
56
|
-
})
|
|
57
|
-
.filter(v => v.id),
|
|
58
|
-
};
|
|
59
|
-
},
|
|
60
|
-
maxItems: maxVideos,
|
|
61
|
-
delayRange: [2000, 3000],
|
|
62
|
-
staleThreshold: 5,
|
|
63
|
-
maxRounds: 500,
|
|
64
|
-
onRound: (round, items, allItems) => {
|
|
65
|
-
const uniqueCount = new Set(allItems.map(v => v.id)).size;
|
|
66
|
-
if (uniqueCount > 0 && (uniqueCount % 10 === 0 || items.length > 0)) {
|
|
67
|
-
log(`滚动 ${round + 1}: ${uniqueCount} 个视频 (本轮 ${items.length} 条)`);
|
|
68
|
-
}
|
|
69
|
-
},
|
|
70
|
-
});
|
|
71
|
-
|
|
72
|
-
const uniqueVideos = new Map();
|
|
73
|
-
allLinks.forEach(v => {
|
|
74
|
-
if (!uniqueVideos.has(v.id)) uniqueVideos.set(v.id, v);
|
|
75
|
-
});
|
|
76
|
-
|
|
77
|
-
log(`收集完成: ${uniqueVideos.size} 个视频`);
|
|
78
|
-
return uniqueVideos;
|
|
44
|
+
const apiResult = await fetchUserVideosAPI(page, username, maxVideos, log);
|
|
45
|
+
if (apiResult && apiResult.size > 0) {
|
|
46
|
+
log(`收集完成: ${apiResult.size} 个视频`);
|
|
47
|
+
return apiResult;
|
|
48
|
+
}
|
|
49
|
+
return new Map();
|
|
79
50
|
}
|
|
80
51
|
|
|
81
52
|
async function runGetUserVideos(options) {
|