tt-help-cli-ycl 1.0.8 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,173 @@
1
+ const { delay } = require('./page-helpers.cjs');
2
+
3
+ /**
4
+ * 通用滚动收集器 - 三段式滚动策略
5
+ *
6
+ * 滚动逻辑由框架负责,收集逻辑由调用方通过 collectFn 提供。
7
+ *
8
+ * @param {Page} page - Playwright page 对象
9
+ * @param {Object} options
10
+ * @param {string} [options.container] - 滚动容器选择器,不传则用 window
11
+ * @param {boolean} [options.findScrollable] - 找到容器后向上查可滚动祖先
12
+ * @param {Function} options.collectFn - 在 page context 执行,接收 (containerEl, extraArgs),
13
+ * 返回 { items: any[] },items 为本轮收集到的原始数据
14
+ * @param {*} [options.extraArgs] - 额外参数,透传给 collectFn 的第二个参数
15
+ * @param {number[]} [options.delayRange] - 每轮滚动后等待时间 [min, max],默认 [800, 1500]
16
+ * @param {number} [options.maxItems] - 最大去重后数据数量,不传则滚动到底(靠 stale 停)
17
+ * @param {number} [options.maxRounds] - 最大滚动轮次(安全保险),默认 200
18
+ * @param {number} [options.staleThreshold] - 连续 N 次无新增则停止,默认 3
19
+ * @param {Function} [options.uniqueKey] - 去重键提取函数 (item) => key,不传则不做去重
20
+ * @param {Function} [options.onRound] - 每轮回调 (round, newItems, allItems)
21
+ * @returns {Promise<any[]>} 所有收集到的数据(已去重如果传了 uniqueKey)
22
+ */
23
+ async function scrollAndCollect(page, options) {
24
+ const {
25
+ container,
26
+ findScrollable = false,
27
+ collectFn,
28
+ extraArgs,
29
+ delayRange = [800, 1500],
30
+ maxItems,
31
+ maxRounds = 200,
32
+ staleThreshold = 3,
33
+ uniqueKey,
34
+ onRound,
35
+ } = options;
36
+
37
+ if (!collectFn) {
38
+ throw new Error('collectFn is required');
39
+ }
40
+
41
+ const fnStr = typeof collectFn === 'function' ? collectFn.toString() : collectFn;
42
+ const allItems = [];
43
+ const seenKeys = uniqueKey ? new Set() : null;
44
+ let staleCount = 0;
45
+
46
+ for (let round = 0; round < maxRounds; round++) {
47
+ // 1. 三段式滚动
48
+ await threePhaseScroll(page, { container, findScrollable });
49
+
50
+ // 2. 等待内容加载
51
+ await delay(delayRange[0], delayRange[1]);
52
+
53
+ // 3. 收集数据
54
+ const result = await page.evaluate(({ fn: fnStr, containerSelector, findScrollableFlag, args }) => {
55
+ let el;
56
+ if (!containerSelector) {
57
+ el = window;
58
+ } else {
59
+ el = document.querySelector(containerSelector);
60
+ if (!el) {
61
+ el = window;
62
+ } else if (findScrollableFlag) {
63
+ let current = el;
64
+ let found = false;
65
+ while (current && current !== document.body) {
66
+ if (current.scrollHeight > current.clientHeight + 10) {
67
+ el = current;
68
+ found = true;
69
+ break;
70
+ }
71
+ current = current.parentElement;
72
+ }
73
+ if (!found) {
74
+ el = document.body;
75
+ }
76
+ }
77
+ }
78
+
79
+ const fn = eval('(' + fnStr + ')');
80
+ return fn(el, args);
81
+ }, { fn: fnStr, containerSelector: container, findScrollableFlag: findScrollable, args: extraArgs });
82
+
83
+ const raw = result.items || [];
84
+
85
+ // 4. 去重:只保留本轮新增的
86
+ const newItems = uniqueKey
87
+ ? raw.filter(item => {
88
+ const key = uniqueKey(item);
89
+ if (seenKeys.has(key)) return false;
90
+ seenKeys.add(key);
91
+ return true;
92
+ })
93
+ : raw;
94
+
95
+ allItems.push(...newItems);
96
+
97
+ // 5. 回调通知
98
+ if (onRound) {
99
+ onRound(round, newItems, allItems);
100
+ }
101
+
102
+ // 6. 判断是否达到 maxItems
103
+ if (maxItems !== undefined && allItems.length >= maxItems) {
104
+ break;
105
+ }
106
+
107
+ // 7. stale 判断(基于本轮新增数)
108
+ if (newItems.length === 0) {
109
+ staleCount++;
110
+ if (staleCount >= staleThreshold) {
111
+ break;
112
+ }
113
+ } else {
114
+ staleCount = 0;
115
+ }
116
+ }
117
+
118
+ return allItems;
119
+ }
120
+
121
+ /**
122
+ * 三段式滚动:滚到底 → 回退一点 → 再滚到底
123
+ * 用于触发 TikTok 的 IntersectionObserver 懒加载
124
+ */
125
+ async function threePhaseScroll(page, { container, findScrollable }) {
126
+ await page.evaluate(async (opts) => {
127
+ let el;
128
+
129
+ if (!opts.container) {
130
+ el = window;
131
+ } else {
132
+ el = document.querySelector(opts.container);
133
+ if (!el) {
134
+ el = window;
135
+ } else if (opts.findScrollable) {
136
+ let current = el;
137
+ let found = false;
138
+ while (current && current !== document.body) {
139
+ if (current.scrollHeight > current.clientHeight + 10) {
140
+ el = current;
141
+ found = true;
142
+ break;
143
+ }
144
+ current = current.parentElement;
145
+ }
146
+ if (!found) {
147
+ el = document.body;
148
+ }
149
+ }
150
+ }
151
+
152
+ const randDelay = (min, max) =>
153
+ new Promise(r => setTimeout(r, min + Math.random() * (max - min)));
154
+
155
+ if (el === window) {
156
+ window.scrollBy(0, window.innerHeight);
157
+ await randDelay(400, 800);
158
+ window.scrollBy(0, -200);
159
+ await randDelay(200, 400);
160
+ window.scrollBy(0, window.innerHeight);
161
+ } else {
162
+ el.scrollTop = el.scrollHeight;
163
+ await randDelay(400, 800);
164
+ el.scrollTop -= 100 + Math.random() * 100;
165
+ await randDelay(200, 400);
166
+ el.scrollTop = el.scrollHeight;
167
+ }
168
+ }, { container, findScrollable });
169
+ }
170
+
171
+ module.exports = {
172
+ scrollAndCollect,
173
+ };
@@ -1,43 +1,43 @@
1
- const { swipeNextVideo, getVideoAuthor, closeCommentPanel, getDelayConfig } = require('./page-helpers.cjs');
2
- const { extractCommentAuthors } = require('./comment-extractor');
3
- const { delay } = require('./page-helpers.cjs');
4
-
5
- async function scrapeSingleVideo(page, maxComments = 10) {
6
- const videoAuthor = await getVideoAuthor(page);
7
- if (!videoAuthor) {
8
- throw new Error('无法获取视频作者');
9
- }
10
-
11
- const commentAuthors = await extractCommentAuthors(page, maxComments);
12
- await closeCommentPanel(page);
13
- const config = getDelayConfig();
14
- await delay(Math.round(config.commentMax * 0.3), config.commentMax);
15
-
16
- const uniqueAuthors = [...new Set(commentAuthors.map(c => c.author))];
17
-
18
- return {
19
- videoAuthor,
20
- commentAuthors: uniqueAuthors,
21
- };
22
- }
23
-
24
- async function scanAndScrape(page, maxComments = 10) {
25
- const results = [];
26
-
27
- for (let i = 0; i < maxComments * 5; i++) {
28
- const result = await scrapeSingleVideo(page, maxComments);
29
- results.push(result);
30
- console.log(`[${results.length}] ${result.videoAuthor} | 评论作者: ${result.commentAuthors.length}`);
31
-
32
- if (i < maxComments * 5 - 1) {
33
- await swipeNextVideo(page);
34
- }
35
- }
36
-
37
- return results;
38
- }
39
-
40
- module.exports = {
41
- scrapeSingleVideo,
42
- scanAndScrape,
43
- };
1
+ const { swipeNextVideo, getVideoAuthor, closeCommentPanel, getDelayConfig } = require('./page-helpers.cjs');
2
+ const { extractCommentAuthors } = require('./comment-extractor');
3
+ const { delay } = require('./page-helpers.cjs');
4
+
5
+ async function scrapeSingleVideo(page, maxComments = 10) {
6
+ const videoAuthor = await getVideoAuthor(page);
7
+ if (!videoAuthor) {
8
+ throw new Error('无法获取视频作者');
9
+ }
10
+
11
+ const commentAuthors = await extractCommentAuthors(page, maxComments);
12
+ await closeCommentPanel(page);
13
+ const config = getDelayConfig();
14
+ await delay(Math.round(config.commentMax * 0.3), config.commentMax);
15
+
16
+ const uniqueAuthors = [...new Set(commentAuthors.map(c => c.author))];
17
+
18
+ return {
19
+ videoAuthor,
20
+ commentAuthors: uniqueAuthors,
21
+ };
22
+ }
23
+
24
+ async function scanAndScrape(page, maxComments = 10) {
25
+ const results = [];
26
+
27
+ for (let i = 0; i < maxComments * 5; i++) {
28
+ const result = await scrapeSingleVideo(page, maxComments);
29
+ results.push(result);
30
+ console.log(`[${results.length}] ${result.videoAuthor} | 评论作者: ${result.commentAuthors.length}`);
31
+
32
+ if (i < maxComments * 5 - 1) {
33
+ await swipeNextVideo(page);
34
+ }
35
+ }
36
+
37
+ return results;
38
+ }
39
+
40
+ module.exports = {
41
+ scrapeSingleVideo,
42
+ scanAndScrape,
43
+ };
@@ -0,0 +1,109 @@
1
+ const path = require('path');
2
+ const fs = require('fs');
3
+ const { ensureBrowserReady, setDelayConfig } = require('./scraper/modules/page-helpers.cjs');
4
+ const { processUser } = require('./auto-core.cjs');
5
+ const { createStore } = require('./data-store.cjs');
6
+
7
+ async function main() {
8
+ const outFile = path.join(__dirname, '..', 'results', 'auto-test.json');
9
+ const store = createStore(outFile);
10
+
11
+ setDelayConfig('fast');
12
+
13
+ const browser = await ensureBrowserReady();
14
+ let page;
15
+ try {
16
+ const contexts = browser.contexts();
17
+ page = null;
18
+ for (const ctx of contexts) {
19
+ for (const p of ctx.pages()) {
20
+ if (p.url().includes('tiktok.com')) { page = p; break; }
21
+ }
22
+ if (page) break;
23
+ }
24
+ if (!page) page = await contexts[0].newPage();
25
+
26
+ console.error('========== 测试 processUser + enableFollow ==========');
27
+ console.error('用户: @qiqi23280\n');
28
+
29
+ const result = await processUser(page, 'qiqi23280', {
30
+ collectMax: 1,
31
+ scrapeDepth: 1,
32
+ maxComments: 10,
33
+ maxGuess: 5,
34
+ preset: 'fast',
35
+ enableFollow: true,
36
+ maxFollowing: 50,
37
+ maxFollowers: 50,
38
+ browser,
39
+ }, console.error);
40
+
41
+ console.error('\n========== 结果验证 ==========');
42
+ let allPassed = true;
43
+
44
+ const checks = [
45
+ { label: '用户信息', ok: result.userInfo && result.userInfo.uniqueId, detail: result.userInfo?.uniqueId },
46
+ { label: '关注列表', ok: Array.isArray(result.discoveredFollowing) && result.discoveredFollowing.length > 0, detail: `${result.discoveredFollowing?.length || 0} 人` },
47
+ { label: '粉丝列表', ok: Array.isArray(result.discoveredFollowers) && result.discoveredFollowers.length > 0, detail: `${result.discoveredFollowers?.length || 0} 人` },
48
+ { label: '关注格式', ok: result.discoveredFollowing?.every(p => Array.isArray(p) && p.length === 2 && p[0].startsWith('@')), detail: null },
49
+ { label: '粉丝格式', ok: result.discoveredFollowers?.every(p => Array.isArray(p) && p.length === 2 && p[0].startsWith('@')), detail: null },
50
+ { label: '无错误', ok: !result.error, detail: result.error },
51
+ ];
52
+
53
+ for (const c of checks) {
54
+ const status = c.ok ? 'PASS' : 'FAIL';
55
+ const detailStr = c.detail !== null ? ` (${c.detail})` : '';
56
+ console.error(` ${status}: ${c.label}${detailStr}`);
57
+ if (!c.ok) allPassed = false;
58
+ }
59
+
60
+ // 模拟入队逻辑
61
+ const queue = ['qiqi23280'];
62
+ const followingIds = (result.discoveredFollowing || []).map(([h]) => h.replace(/^@/, ''));
63
+ const followerIds = (result.discoveredFollowers || []).map(([h]) => h.replace(/^@/, ''));
64
+
65
+ for (const uid of followingIds) queue.push(uid);
66
+ for (const uid of followerIds) queue.push(uid);
67
+ const uniqueQueue = [...new Set(queue)];
68
+
69
+ console.error(`\n 队列长度: ${uniqueQueue.length}(关注 ${followingIds.length} + 粉丝 ${followerIds.length} + 种子 1)`);
70
+
71
+ // 写入 store 验证
72
+ store.addUser({
73
+ uniqueId: 'qiqi23280',
74
+ ...result.userInfo,
75
+ sources: ['seed'],
76
+ });
77
+ for (const [handle, name] of (result.discoveredFollowing || [])) {
78
+ store.addUser({ uniqueId: handle.replace(/^@/, ''), nickname: name, sources: ['following'] });
79
+ }
80
+ for (const [handle, name] of (result.discoveredFollowers || [])) {
81
+ store.addUser({ uniqueId: handle.replace(/^@/, ''), nickname: name, sources: ['follower'] });
82
+ }
83
+ store.save();
84
+
85
+ const allUsers = store.getAllUsers();
86
+ console.error(` Store 用户数: ${allUsers.length}`);
87
+
88
+ // 验证 source 标记
89
+ const followingUsers = allUsers.filter(u => u.sources?.includes('following'));
90
+ const followerUsers = allUsers.filter(u => u.sources?.includes('follower'));
91
+ console.error(` 关注来源: ${followingUsers.length} | 粉丝来源: ${followerUsers.length}`);
92
+
93
+ if (followingUsers.length === 0 || followerUsers.length === 0) {
94
+ console.error(' FAIL: 缺少 following 或 follower 来源标记');
95
+ allPassed = false;
96
+ } else {
97
+ console.error(' PASS: 来源标记正确');
98
+ }
99
+
100
+ console.error(`\n${allPassed ? 'ALL PASSED' : 'SOME FAILED'}`);
101
+ console.error(`数据保存到: ${outFile}`);
102
+ process.exit(allPassed ? 0 : 1);
103
+
104
+ } finally {
105
+ await browser.close().catch(() => {});
106
+ }
107
+ }
108
+
109
+ main().catch(err => { console.error('FATAL:', err.message); process.exit(1); });
@@ -0,0 +1,75 @@
1
+ const { ensureBrowserReady, delay, setDelayConfig } = require('./scraper/modules/page-helpers.cjs');
2
+ const { extractCommentAuthors } = require('./scraper/modules/comment-extractor.cjs');
3
+ const { extractGuessVideos } = require('./scraper/modules/guess-extractor.cjs');
4
+
5
+ async function main() {
6
+ setDelayConfig('fast');
7
+
8
+ const videoUrl = process.argv[2] || 'https://www.tiktok.com/@porfirio.fructuoso/video/7615853535955111198';
9
+ console.error(`目标: ${videoUrl}`);
10
+
11
+ const browser = await ensureBrowserReady();
12
+ let page;
13
+ try {
14
+ const contexts = browser.contexts();
15
+ page = null;
16
+ for (const ctx of contexts) {
17
+ for (const p of ctx.pages()) {
18
+ if (p.url().includes('tiktok.com')) { page = p; break; }
19
+ }
20
+ if (page) break;
21
+ }
22
+ if (!page) {
23
+ page = await contexts[0].newPage();
24
+ }
25
+
26
+ await page.goto(videoUrl, { waitUntil: 'networkidle', timeout: 60000 });
27
+ await delay(5000, 8000);
28
+
29
+ console.error(`当前URL: ${page.url()}`);
30
+
31
+ let allPassed = true;
32
+
33
+ // ========== 评论提取 ==========
34
+ console.error('\n--- 评论提取 (max=30) ---');
35
+ const t1 = Date.now();
36
+ let commentUsers = [];
37
+ try { commentUsers = await extractCommentAuthors(page, 30); }
38
+ catch (e) { console.error(` 异常: ${e.message}`); }
39
+ console.error(` 耗时: ${((Date.now()-t1)/1000).toFixed(1)}s, 结果: ${commentUsers.length} 个`);
40
+
41
+ if (commentUsers.length > 0) {
42
+ const s = new Set(commentUsers);
43
+ const ok = s.size === commentUsers.length;
44
+ console.error(` ${ok ? 'PASS' : 'FAIL'}: 唯一${s.size}/总数${commentUsers.length}`);
45
+ if (!ok) allPassed = false;
46
+ }
47
+
48
+ // ========== 猜你喜欢提取 ==========
49
+ console.error('\n--- 猜你喜欢提取 (max=20) ---');
50
+ const t2 = Date.now();
51
+ let guessVideos = [];
52
+ try { guessVideos = await extractGuessVideos(page, 20); }
53
+ catch (e) { console.error(` 异常: ${e.message}`); }
54
+ console.error(` 耗时: ${((Date.now()-t2)/1000).toFixed(1)}s, 结果: ${guessVideos.length} 个`);
55
+
56
+ if (guessVideos.length > 0) {
57
+ const ids = guessVideos.map(v => v.videoId);
58
+ const s = new Set(ids);
59
+ const ok = s.size === ids.length;
60
+ console.error(` ${ok ? 'PASS' : 'FAIL'}: 唯一${s.size}/总数${ids.length}`);
61
+ if (!ok) allPassed = false;
62
+ const ok2 = guessVideos.every(v => v.author && v.videoId && v.url);
63
+ console.error(` ${ok2 ? 'PASS' : 'FAIL'}: 结构完整`);
64
+ if (!ok2) allPassed = false;
65
+ }
66
+
67
+ console.error(`\n${allPassed ? 'ALL PASSED' : 'SOME FAILED'}`);
68
+ process.exit(allPassed ? 0 : 1);
69
+
70
+ } finally {
71
+ await browser.close().catch(() => {});
72
+ }
73
+ }
74
+
75
+ main().catch(err => { console.error('FATAL:', err.message); process.exit(1); });
@@ -0,0 +1,41 @@
1
+ const path = require('path');
2
+ const { ensureBrowserReady, ensureTikTokPage, delay } = require('./scraper/modules/page-helpers.cjs');
3
+ const { extractFollowAndFollowers } = require('./scraper/modules/follow-extractor.cjs');
4
+
5
+ async function main() {
6
+ const url = process.argv[2] || 'https://www.tiktok.com/@qiqi23280';
7
+ console.error(`目标: ${url}`);
8
+
9
+ const browser = await ensureBrowserReady();
10
+ try {
11
+ const page = await ensureTikTokPage(browser, url);
12
+ await page.goto(url, { waitUntil: 'load', timeout: 30000 });
13
+ console.error('等待页面加载...');
14
+ await delay(3000, 5000);
15
+
16
+ console.error('开始提取关注和粉丝...\n');
17
+ const result = await extractFollowAndFollowers(page, {
18
+ log: console.error,
19
+ });
20
+
21
+ console.error('\n--- 提取完成 ---');
22
+ console.error(`关注: ${result.following.length} 人`);
23
+ console.error(`粉丝: ${result.followers.length} 人`);
24
+
25
+ const outDir = path.join(__dirname, '..', 'results');
26
+ const fs = require('fs');
27
+ fs.mkdirSync(outDir, { recursive: true });
28
+ const outPath = path.join(outDir, 'follow-result.json');
29
+ fs.writeFileSync(outPath, JSON.stringify(result, null, 2));
30
+ console.error(`已保存到 ${outPath}`);
31
+
32
+ console.log(JSON.stringify(result, null, 2));
33
+ } finally {
34
+ await browser.close().catch(() => {});
35
+ }
36
+ }
37
+
38
+ main().catch(err => {
39
+ console.error('错误:', err.message);
40
+ process.exit(1);
41
+ });