tt-help-cli-ycl 1.3.14 → 1.3.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tt-help-cli-ycl",
3
- "version": "1.3.14",
3
+ "version": "1.3.15",
4
4
  "description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
5
5
  "type": "module",
6
6
  "bin": {
@@ -0,0 +1,124 @@
1
+ import { delay } from './delay.js';
2
+
3
+ /**
4
+ * 通过拦截 TikTok 内部 API 获取用户视频列表
5
+ * 比 DOM 滚动解析快 5-10 倍
6
+ *
7
+ * @param {import('playwright').Page} page - Playwright page (CDP 连接)
8
+ * @param {string} username - TikTok 用户名
9
+ * @param {number} maxVideos - 最大视频数
10
+ * @param {Function} log - 日志函数
11
+ * @returns {Map<string, {id: string, href: string}>} 与 collectVideos 返回格式一致
12
+ */
13
+ async function fetchUserVideosAPI(page, username, maxVideos, log) {
14
+ const url = `https://www.tiktok.com/@${username}`;
15
+ const items = [];
16
+
17
+ // 1. 注册 response 拦截器 + request URL 捕获
18
+ let apiResolve = null;
19
+ const apiPromise = new Promise(r => { apiResolve = r; });
20
+
21
+ let apiRequestUrl = null;
22
+
23
+ const responseHandler = async (response) => {
24
+ if (response.url().includes('/api/post/item_list/')) {
25
+ try {
26
+ apiResolve(await response.json());
27
+ } catch (e) {
28
+ apiResolve(null);
29
+ }
30
+ }
31
+ };
32
+
33
+ const requestHandler = (request) => {
34
+ if (request.url().includes('/api/post/item_list/') && !apiRequestUrl) {
35
+ apiRequestUrl = request.url();
36
+ }
37
+ };
38
+
39
+ page.on('response', responseHandler);
40
+ page.on('request', requestHandler);
41
+
42
+ try {
43
+ // 2. 导航并等待 API 响应
44
+ log(' [API拦截] 导航到用户页,等待 /api/post/item_list/ ...');
45
+ const t0 = Date.now();
46
+
47
+ await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
48
+
49
+ const data = await Promise.race([
50
+ apiPromise,
51
+ new Promise(r => setTimeout(() => r(null), 8000)),
52
+ ]);
53
+
54
+ const elapsed = Date.now() - t0;
55
+
56
+ if (!data || !data.itemList) {
57
+ log(` [API拦截] ${elapsed}ms 后未拿到 API 数据`);
58
+ return null;
59
+ }
60
+
61
+ // 3. 提取首页视频
62
+ const firstPageItems = data.itemList || [];
63
+ for (const item of firstPageItems) {
64
+ if (items.length >= maxVideos) break;
65
+ const href = `https://www.tiktok.com/@${username}/video/${item.id}`;
66
+ items.push({ id: item.id, href });
67
+ }
68
+
69
+ log(` [API拦截] ${elapsed}ms 获取首页 ${firstPageItems.length} 条视频`);
70
+
71
+ // 4. 翻页获取后续视频
72
+ let cursor = data.cursor;
73
+ let hasMore = data.hasMore;
74
+
75
+ while (hasMore && cursor && items.length < maxVideos) {
76
+ if (!apiRequestUrl) {
77
+ log(' [API拦截] 未捕获到 API 请求 URL,无法翻页');
78
+ break;
79
+ }
80
+
81
+ const newUrl = apiRequestUrl.replace(/cursor=\d+/, `cursor=${cursor}`);
82
+
83
+ try {
84
+ const pageData = await page.evaluate(async (u) => {
85
+ const res = await fetch(u);
86
+ return await res.json();
87
+ }, newUrl);
88
+
89
+ if (pageData && pageData.itemList) {
90
+ for (const item of pageData.itemList) {
91
+ if (items.length >= maxVideos) break;
92
+ const href = `https://www.tiktok.com/@${username}/video/${item.id}`;
93
+ items.push({ id: item.id, href });
94
+ }
95
+ log(` [API拦截] 翻页 cursor=${cursor},获取 ${pageData.itemList.length} 条,累计 ${items.length}`);
96
+ }
97
+
98
+ cursor = pageData.cursor;
99
+ hasMore = pageData.hasMore;
100
+ } catch (e) {
101
+ log(` [API拦截] 翻页失败: ${e.message}`);
102
+ break;
103
+ }
104
+
105
+ await delay(300, 600);
106
+ }
107
+
108
+ log(` [API拦截] 总计获取 ${items.length} 条视频`);
109
+
110
+ // 转成 Map 返回,与 collectVideos 一致
111
+ const videoMap = new Map();
112
+ for (const v of items) {
113
+ if (!videoMap.has(v.id)) videoMap.set(v.id, v);
114
+ }
115
+
116
+ return videoMap;
117
+ } finally {
118
+ // 5. 必须清理拦截器,防止累积
119
+ page.off('response', responseHandler);
120
+ page.off('request', requestHandler);
121
+ }
122
+ }
123
+
124
+ export { fetchUserVideosAPI };
package/src/lib/args.js CHANGED
@@ -164,7 +164,7 @@ function parseExploreArgs(args) {
164
164
  let explorePort = null;
165
165
  let exploreProfile = null;
166
166
  let exploreUserId = null;
167
- let exploreMaxVideos = 1;
167
+ let exploreMaxVideos = 16;
168
168
 
169
169
  const positional = [];
170
170
  const PRESETS = ['fast', 'normal', 'slow', 'stealth'];
@@ -196,7 +196,7 @@ function parseExploreArgs(args) {
196
196
  } else if (arg === '--user-id') {
197
197
  exploreUserId = args[++i];
198
198
  } else if (arg === '--max-videos') {
199
- exploreMaxVideos = parseInt(args[++i]) || 1;
199
+ exploreMaxVideos = parseInt(args[++i]) || 16;
200
200
  } else {
201
201
  positional.push(arg);
202
202
  }
@@ -17,7 +17,7 @@ let browser = null;
17
17
  let userId = null;
18
18
  let maxFollowing = 5;
19
19
  let maxFollowers = 5;
20
- let maxVideos = 1;
20
+ let maxVideos = 16;
21
21
  let maxComments = 10;
22
22
 
23
23
  try {
@@ -87,7 +87,7 @@ function saveMaxFollowers(val) {
87
87
 
88
88
  function saveMaxVideos(val) {
89
89
  const cfg = existsSync(configPath) ? JSON.parse(readFileSync(configPath, 'utf-8')) : {};
90
- cfg.maxVideos = parseInt(val) || 1;
90
+ cfg.maxVideos = parseInt(val) || 16;
91
91
  writeFileSync(configPath, JSON.stringify(cfg, null, 2), 'utf-8');
92
92
  maxVideos = cfg.maxVideos;
93
93
  configFile = configPath;
@@ -146,28 +146,40 @@ export class TikTokScraper {
146
146
  }
147
147
 
148
148
  async getUserInfo(uniqueId) {
149
- if (this._needWarm()) await this.warmWaf();
150
149
  const slot = this._pickSlot();
151
150
  return slot.lock.run(async () => {
152
- const rawHtml = await this._fetchViewSource(
151
+ let rawHtml = await this._fetchViewSource(
153
152
  `https://www.tiktok.com/@${uniqueId}`,
154
153
  slot
155
154
  );
156
- return parseUserInfo(rawHtml);
155
+ let result = parseUserInfo(rawHtml);
156
+ if (!result) {
157
+ await this.warmWaf();
158
+ rawHtml = await this._fetchViewSource(
159
+ `https://www.tiktok.com/@${uniqueId}`,
160
+ slot
161
+ );
162
+ result = parseUserInfo(rawHtml);
163
+ }
164
+ return result || null;
157
165
  });
158
166
  }
159
167
 
160
168
  async getVideoInfo(videoUrl) {
161
- if (this._needWarm()) await this.warmWaf();
162
169
  const slot = this._pickSlot();
163
170
  return slot.lock.run(async () => {
164
- const rawHtml = await this._fetchViewSource(videoUrl, slot);
165
- return parseVideoInfo(rawHtml);
171
+ let rawHtml = await this._fetchViewSource(videoUrl, slot);
172
+ let result = parseVideoInfo(rawHtml);
173
+ if (!result) {
174
+ await this.warmWaf();
175
+ rawHtml = await this._fetchViewSource(videoUrl, slot);
176
+ result = parseVideoInfo(rawHtml);
177
+ }
178
+ return result || null;
166
179
  });
167
180
  }
168
181
 
169
182
  async getUserAndVideo(videoUrl) {
170
- if (this._needWarm()) await this.warmWaf();
171
183
  const video = await this.getVideoInfo(videoUrl);
172
184
  if (!video) return null;
173
185
  const user = await this.getUserInfo(video.author.uniqueId);
@@ -15,7 +15,7 @@ import { maxFollowing as globalMaxFollowing, maxFollowers as globalMaxFollowers,
15
15
 
16
16
  async function processExplore(page, username, options, log) {
17
17
  const {
18
- maxVideos = 1,
18
+ maxVideos = 16,
19
19
  enableFollow = true,
20
20
  maxFollowing = 5,
21
21
  maxFollowers = 5,
@@ -1,5 +1,5 @@
1
1
  import { delay, ensureBrowserReady, ensureTikTokPage, retryWithBackoff } from '../scraper/modules/page-helpers.js';
2
- import { scrollAndCollect } from '../scraper/modules/scroll-collector.js';
2
+ import { fetchUserVideosAPI } from '../lib/api-interceptor.js';
3
3
 
4
4
  async function getUserInfo(page) {
5
5
  return await page.evaluate(() => {
@@ -41,41 +41,12 @@ async function getUserInfo(page) {
41
41
  }
42
42
 
43
43
  async function collectVideos(page, username, maxVideos, log) {
44
- const allLinks = await scrollAndCollect(page, {
45
- container: '[class*="ColumnListContainer"]',
46
- extraArgs: { handle: username },
47
- collectFn: (container, args) => {
48
- const pattern = '/@' + args.handle + '/video/';
49
- return {
50
- items: Array.from(document.querySelectorAll('a'))
51
- .filter(el => (el.getAttribute('href') || '').includes(pattern))
52
- .map(el => {
53
- const href = el.getAttribute('href') || '';
54
- const idMatch = href.match(/\/video\/(\d+)/);
55
- return { id: idMatch ? idMatch[1] : null, href };
56
- })
57
- .filter(v => v.id),
58
- };
59
- },
60
- maxItems: maxVideos,
61
- delayRange: [2000, 3000],
62
- staleThreshold: 5,
63
- maxRounds: 500,
64
- onRound: (round, items, allItems) => {
65
- const uniqueCount = new Set(allItems.map(v => v.id)).size;
66
- if (uniqueCount > 0 && (uniqueCount % 10 === 0 || items.length > 0)) {
67
- log(`滚动 ${round + 1}: ${uniqueCount} 个视频 (本轮 ${items.length} 条)`);
68
- }
69
- },
70
- });
71
-
72
- const uniqueVideos = new Map();
73
- allLinks.forEach(v => {
74
- if (!uniqueVideos.has(v.id)) uniqueVideos.set(v.id, v);
75
- });
76
-
77
- log(`收集完成: ${uniqueVideos.size} 个视频`);
78
- return uniqueVideos;
44
+ const apiResult = await fetchUserVideosAPI(page, username, maxVideos, log);
45
+ if (apiResult && apiResult.size > 0) {
46
+ log(`收集完成: ${apiResult.size} 个视频`);
47
+ return apiResult;
48
+ }
49
+ return new Map();
79
50
  }
80
51
 
81
52
  async function runGetUserVideos(options) {