tt-help-cli-ycl 1.3.14 → 1.3.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tt-help-cli-ycl",
3
- "version": "1.3.14",
3
+ "version": "1.3.16",
4
4
  "description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
5
5
  "type": "module",
6
6
  "bin": {
@@ -0,0 +1,124 @@
1
+ import { delay } from './delay.js';
2
+
3
+ /**
4
+ * 通过拦截 TikTok 内部 API 获取用户视频列表
5
+ * 比 DOM 滚动解析快 5-10 倍
6
+ *
7
+ * @param {import('playwright').Page} page - Playwright page (CDP 连接)
8
+ * @param {string} username - TikTok 用户名
9
+ * @param {number} maxVideos - 最大视频数
10
+ * @param {Function} log - 日志函数
11
+ * @returns {Map<string, {id: string, href: string}>} 与 collectVideos 返回格式一致
12
+ */
13
+ async function fetchUserVideosAPI(page, username, maxVideos, log) {
14
+ const url = `https://www.tiktok.com/@${username}`;
15
+ const items = [];
16
+
17
+ // 1. 注册 response 拦截器 + request URL 捕获
18
+ let apiResolve = null;
19
+ const apiPromise = new Promise(r => { apiResolve = r; });
20
+
21
+ let apiRequestUrl = null;
22
+
23
+ const responseHandler = async (response) => {
24
+ if (response.url().includes('/api/post/item_list/')) {
25
+ try {
26
+ apiResolve(await response.json());
27
+ } catch (e) {
28
+ apiResolve(null);
29
+ }
30
+ }
31
+ };
32
+
33
+ const requestHandler = (request) => {
34
+ if (request.url().includes('/api/post/item_list/') && !apiRequestUrl) {
35
+ apiRequestUrl = request.url();
36
+ }
37
+ };
38
+
39
+ page.on('response', responseHandler);
40
+ page.on('request', requestHandler);
41
+
42
+ try {
43
+ // 2. 导航并等待 API 响应
44
+ log(' [API拦截] 导航到用户页,等待 /api/post/item_list/ ...');
45
+ const t0 = Date.now();
46
+
47
+ await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
48
+
49
+ const data = await Promise.race([
50
+ apiPromise,
51
+ new Promise(r => setTimeout(() => r(null), 8000)),
52
+ ]);
53
+
54
+ const elapsed = Date.now() - t0;
55
+
56
+ if (!data || !data.itemList) {
57
+ log(` [API拦截] ${elapsed}ms 后未拿到 API 数据`);
58
+ return null;
59
+ }
60
+
61
+ // 3. 提取首页视频
62
+ const firstPageItems = data.itemList || [];
63
+ for (const item of firstPageItems) {
64
+ if (items.length >= maxVideos) break;
65
+ const href = `https://www.tiktok.com/@${username}/video/${item.id}`;
66
+ items.push({ id: item.id, href });
67
+ }
68
+
69
+ log(` [API拦截] ${elapsed}ms 获取首页 ${firstPageItems.length} 条视频`);
70
+
71
+ // 4. 翻页获取后续视频
72
+ let cursor = data.cursor;
73
+ let hasMore = data.hasMore;
74
+
75
+ while (hasMore && cursor && items.length < maxVideos) {
76
+ if (!apiRequestUrl) {
77
+ log(' [API拦截] 未捕获到 API 请求 URL,无法翻页');
78
+ break;
79
+ }
80
+
81
+ const newUrl = apiRequestUrl.replace(/cursor=\d+/, `cursor=${cursor}`);
82
+
83
+ try {
84
+ const pageData = await page.evaluate(async (u) => {
85
+ const res = await fetch(u);
86
+ return await res.json();
87
+ }, newUrl);
88
+
89
+ if (pageData && pageData.itemList) {
90
+ for (const item of pageData.itemList) {
91
+ if (items.length >= maxVideos) break;
92
+ const href = `https://www.tiktok.com/@${username}/video/${item.id}`;
93
+ items.push({ id: item.id, href });
94
+ }
95
+ log(` [API拦截] 翻页 cursor=${cursor},获取 ${pageData.itemList.length} 条,累计 ${items.length}`);
96
+ }
97
+
98
+ cursor = pageData.cursor;
99
+ hasMore = pageData.hasMore;
100
+ } catch (e) {
101
+ log(` [API拦截] 翻页失败: ${e.message}`);
102
+ break;
103
+ }
104
+
105
+ await delay(300, 600);
106
+ }
107
+
108
+ log(` [API拦截] 总计获取 ${items.length} 条视频`);
109
+
110
+ // 转成 Map 返回,与 collectVideos 一致
111
+ const videoMap = new Map();
112
+ for (const v of items) {
113
+ if (!videoMap.has(v.id)) videoMap.set(v.id, v);
114
+ }
115
+
116
+ return videoMap;
117
+ } finally {
118
+ // 5. 必须清理拦截器,防止累积
119
+ page.off('response', responseHandler);
120
+ page.off('request', requestHandler);
121
+ }
122
+ }
123
+
124
+ export { fetchUserVideosAPI };
package/src/lib/args.js CHANGED
@@ -164,7 +164,7 @@ function parseExploreArgs(args) {
164
164
  let explorePort = null;
165
165
  let exploreProfile = null;
166
166
  let exploreUserId = null;
167
- let exploreMaxVideos = 1;
167
+ let exploreMaxVideos = 16;
168
168
 
169
169
  const positional = [];
170
170
  const PRESETS = ['fast', 'normal', 'slow', 'stealth'];
@@ -196,7 +196,7 @@ function parseExploreArgs(args) {
196
196
  } else if (arg === '--user-id') {
197
197
  exploreUserId = args[++i];
198
198
  } else if (arg === '--max-videos') {
199
- exploreMaxVideos = parseInt(args[++i]) || 1;
199
+ exploreMaxVideos = parseInt(args[++i]) || 16;
200
200
  } else {
201
201
  positional.push(arg);
202
202
  }
@@ -17,7 +17,7 @@ let browser = null;
17
17
  let userId = null;
18
18
  let maxFollowing = 5;
19
19
  let maxFollowers = 5;
20
- let maxVideos = 1;
20
+ let maxVideos = 16;
21
21
  let maxComments = 10;
22
22
 
23
23
  try {
@@ -87,7 +87,7 @@ function saveMaxFollowers(val) {
87
87
 
88
88
  function saveMaxVideos(val) {
89
89
  const cfg = existsSync(configPath) ? JSON.parse(readFileSync(configPath, 'utf-8')) : {};
90
- cfg.maxVideos = parseInt(val) || 1;
90
+ cfg.maxVideos = parseInt(val) || 16;
91
91
  writeFileSync(configPath, JSON.stringify(cfg, null, 2), 'utf-8');
92
92
  maxVideos = cfg.maxVideos;
93
93
  configFile = configPath;
@@ -99,11 +99,16 @@ export class TikTokScraper {
99
99
  async warmWaf() {
100
100
  if (this.warmPromise) return this.warmPromise;
101
101
  this.warmPromise = (async () => {
102
- const page = this.slots[0].page;
103
- await page.goto(this.warmUrl, { waitUntil: 'domcontentloaded', timeout: 15000 });
104
- await delay(1500);
105
- this.lastWarmTime = Date.now();
106
- this.warmPromise = null;
102
+ try {
103
+ const page = this.slots[0].page;
104
+ await page.goto(this.warmUrl, { waitUntil: 'domcontentloaded', timeout: 15000 });
105
+ await delay(1500);
106
+ this.lastWarmTime = Date.now();
107
+ } catch (e) {
108
+ console.error(`[warmWaf] failed: ${e.message}`);
109
+ } finally {
110
+ this.warmPromise = null;
111
+ }
107
112
  })();
108
113
  return this.warmPromise;
109
114
  }
@@ -146,28 +151,40 @@ export class TikTokScraper {
146
151
  }
147
152
 
148
153
  async getUserInfo(uniqueId) {
149
- if (this._needWarm()) await this.warmWaf();
150
154
  const slot = this._pickSlot();
151
155
  return slot.lock.run(async () => {
152
- const rawHtml = await this._fetchViewSource(
156
+ let rawHtml = await this._fetchViewSource(
153
157
  `https://www.tiktok.com/@${uniqueId}`,
154
158
  slot
155
159
  );
156
- return parseUserInfo(rawHtml);
160
+ let result = parseUserInfo(rawHtml);
161
+ if (!result) {
162
+ try { await this.warmWaf(); } catch {}
163
+ rawHtml = await this._fetchViewSource(
164
+ `https://www.tiktok.com/@${uniqueId}`,
165
+ slot
166
+ );
167
+ result = parseUserInfo(rawHtml);
168
+ }
169
+ return result || null;
157
170
  });
158
171
  }
159
172
 
160
173
  async getVideoInfo(videoUrl) {
161
- if (this._needWarm()) await this.warmWaf();
162
174
  const slot = this._pickSlot();
163
175
  return slot.lock.run(async () => {
164
- const rawHtml = await this._fetchViewSource(videoUrl, slot);
165
- return parseVideoInfo(rawHtml);
176
+ let rawHtml = await this._fetchViewSource(videoUrl, slot);
177
+ let result = parseVideoInfo(rawHtml);
178
+ if (!result) {
179
+ try { await this.warmWaf(); } catch {}
180
+ rawHtml = await this._fetchViewSource(videoUrl, slot);
181
+ result = parseVideoInfo(rawHtml);
182
+ }
183
+ return result || null;
166
184
  });
167
185
  }
168
186
 
169
187
  async getUserAndVideo(videoUrl) {
170
- if (this._needWarm()) await this.warmWaf();
171
188
  const video = await this.getVideoInfo(videoUrl);
172
189
  if (!video) return null;
173
190
  const user = await this.getUserInfo(video.author.uniqueId);
@@ -15,7 +15,7 @@ import { maxFollowing as globalMaxFollowing, maxFollowers as globalMaxFollowers,
15
15
 
16
16
  async function processExplore(page, username, options, log) {
17
17
  const {
18
- maxVideos = 1,
18
+ maxVideos = 16,
19
19
  enableFollow = true,
20
20
  maxFollowing = 5,
21
21
  maxFollowers = 5,
@@ -1,5 +1,5 @@
1
1
  import { delay, ensureBrowserReady, ensureTikTokPage, retryWithBackoff } from '../scraper/modules/page-helpers.js';
2
- import { scrollAndCollect } from '../scraper/modules/scroll-collector.js';
2
+ import { fetchUserVideosAPI } from '../lib/api-interceptor.js';
3
3
 
4
4
  async function getUserInfo(page) {
5
5
  return await page.evaluate(() => {
@@ -41,41 +41,12 @@ async function getUserInfo(page) {
41
41
  }
42
42
 
43
43
  async function collectVideos(page, username, maxVideos, log) {
44
- const allLinks = await scrollAndCollect(page, {
45
- container: '[class*="ColumnListContainer"]',
46
- extraArgs: { handle: username },
47
- collectFn: (container, args) => {
48
- const pattern = '/@' + args.handle + '/video/';
49
- return {
50
- items: Array.from(document.querySelectorAll('a'))
51
- .filter(el => (el.getAttribute('href') || '').includes(pattern))
52
- .map(el => {
53
- const href = el.getAttribute('href') || '';
54
- const idMatch = href.match(/\/video\/(\d+)/);
55
- return { id: idMatch ? idMatch[1] : null, href };
56
- })
57
- .filter(v => v.id),
58
- };
59
- },
60
- maxItems: maxVideos,
61
- delayRange: [2000, 3000],
62
- staleThreshold: 5,
63
- maxRounds: 500,
64
- onRound: (round, items, allItems) => {
65
- const uniqueCount = new Set(allItems.map(v => v.id)).size;
66
- if (uniqueCount > 0 && (uniqueCount % 10 === 0 || items.length > 0)) {
67
- log(`滚动 ${round + 1}: ${uniqueCount} 个视频 (本轮 ${items.length} 条)`);
68
- }
69
- },
70
- });
71
-
72
- const uniqueVideos = new Map();
73
- allLinks.forEach(v => {
74
- if (!uniqueVideos.has(v.id)) uniqueVideos.set(v.id, v);
75
- });
76
-
77
- log(`收集完成: ${uniqueVideos.size} 个视频`);
78
- return uniqueVideos;
44
+ const apiResult = await fetchUserVideosAPI(page, username, maxVideos, log);
45
+ if (apiResult && apiResult.size > 0) {
46
+ log(`收集完成: ${apiResult.size} 个视频`);
47
+ return apiResult;
48
+ }
49
+ return new Map();
79
50
  }
80
51
 
81
52
  async function runGetUserVideos(options) {