tt-help-cli-ycl 1.3.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +17 -17
  2. package/cli.js +9 -9
  3. package/package.json +44 -44
  4. package/src/cli/auto.js +94 -0
  5. package/src/cli/explore.js +117 -0
  6. package/src/cli/progress.js +111 -0
  7. package/src/cli/scrape.js +47 -0
  8. package/src/cli/utils.js +18 -0
  9. package/src/cli/videos.js +41 -0
  10. package/src/cli/watch.js +28 -0
  11. package/src/lib/args.js +386 -397
  12. package/src/lib/browser/anti-detect.js +23 -0
  13. package/src/lib/browser/cdp.js +142 -0
  14. package/src/lib/browser/launch.js +43 -0
  15. package/src/lib/browser/page.js +80 -0
  16. package/src/lib/constants.js +85 -168
  17. package/src/lib/delay.js +54 -0
  18. package/src/lib/explore-fetch.js +118 -0
  19. package/src/lib/fetcher.js +45 -60
  20. package/src/lib/filter.js +66 -66
  21. package/src/lib/io.js +54 -76
  22. package/src/lib/output.js +80 -80
  23. package/src/lib/parser.js +47 -47
  24. package/src/lib/retry.js +44 -0
  25. package/src/lib/scrape.js +40 -39
  26. package/src/lib/url.js +52 -0
  27. package/src/main.mjs +199 -962
  28. package/src/results/user-videos-bar.lar.lar.moeta.json +37 -0
  29. package/src/scraper/auto-core.mjs +183 -0
  30. package/src/scraper/{core.cjs → core.mjs} +188 -214
  31. package/src/{explore-core.cjs → scraper/explore-core.mjs} +44 -42
  32. package/src/scraper/modules/captcha-handler.mjs +114 -0
  33. package/src/scraper/modules/comment-extractor.mjs +69 -0
  34. package/src/scraper/modules/follow-extractor.mjs +121 -0
  35. package/src/scraper/modules/{guess-extractor.cjs → guess-extractor.mjs} +51 -53
  36. package/src/scraper/modules/page-error-detector.mjs +70 -0
  37. package/src/scraper/modules/page-helpers.mjs +46 -0
  38. package/src/scraper/modules/scroll-collector.mjs +189 -0
  39. package/src/{get-user-videos-core.cjs → videos/core.mjs} +126 -143
  40. package/src/watch/data-store.mjs +239 -0
  41. package/src/watch/public/index.html +446 -271
  42. package/src/watch/server.mjs +257 -153
  43. package/src/auto-core.cjs +0 -367
  44. package/src/data-store.cjs +0 -69
  45. package/src/get-user-videos.cjs +0 -59
  46. package/src/lib/auto-browser.mjs +0 -13
  47. package/src/lib/explore.js +0 -225
  48. package/src/lib/get-user-videos-browser.mjs +0 -6
  49. package/src/lib/scrape-browser.mjs +0 -6
  50. package/src/scraper/index.cjs +0 -97
  51. package/src/scraper/modules/comment-extractor.cjs +0 -49
  52. package/src/scraper/modules/follow-extractor.cjs +0 -112
  53. package/src/scraper/modules/page-helpers.cjs +0 -422
  54. package/src/scraper/modules/scroll-collector.cjs +0 -173
  55. package/src/scraper/modules/video-scanner.cjs +0 -43
  56. package/src/test-auto-follow.cjs +0 -109
  57. package/src/test-extractors.cjs +0 -75
  58. package/src/test-follow.cjs +0 -41
@@ -1,225 +0,0 @@
1
- import { chromium } from 'playwright';
2
- import { existsSync, accessSync } from 'fs';
3
- import { browser, saveBrowser, configPath } from './constants.js';
4
- import scrollCollector from '../scraper/modules/scroll-collector.cjs';
5
-
6
- const EXPLORE_URL = 'https://www.tiktok.com/explore';
7
-
8
- function sleep(ms) {
9
- return new Promise(r => setTimeout(r, ms));
10
- }
11
-
12
- function isRetryableError(error) {
13
- if (!error) return false;
14
- const msg = (error.message || error.toString() || '').toLowerCase();
15
- const patterns = ['interrupted', 'net::', 'econn', 'etimedout', 'enotfound', 'eai_again', 'esocketreset', 'connection.*refused', 'connection.*reset', 'failed.*navigate', 'target.*closed', 'crash'];
16
- return patterns.some(p => new RegExp(p, 'i').test(msg));
17
- }
18
-
19
- async function retryGoto(page, url, options, { maxRetries = 3, baseDelay = 3000 } = {}) {
20
- let lastError;
21
- for (let attempt = 0; attempt <= maxRetries; attempt++) {
22
- try {
23
- return await page.goto(url, options);
24
- } catch (error) {
25
- lastError = error;
26
- if (attempt >= maxRetries || !isRetryableError(error)) {
27
- throw error;
28
- }
29
- const jitter = Math.random() * 500;
30
- const waitTime = baseDelay * Math.pow(2, attempt) + jitter;
31
- console.log(` [重试] ${attempt + 1}/${maxRetries},${Math.round(waitTime)}ms 后重试...`);
32
- await sleep(waitTime);
33
- }
34
- }
35
- throw lastError;
36
- }
37
-
38
- function detectBrowser() {
39
- const isMac = process.platform === 'darwin';
40
- const isWin = process.platform === 'win32';
41
- const isLinux = process.platform === 'linux';
42
-
43
- const paths = [];
44
-
45
- if (isMac) {
46
- paths.push(
47
- '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
48
- '/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary',
49
- '/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge',
50
- '/Applications/Brave Browser.app/Contents/MacOS/Brave Browser',
51
- );
52
- } else if (isWin) {
53
- const localAppData = process.env.LOCALAPPDATA || '';
54
- const programFiles = process.env.PROGRAMFILES || '';
55
- const programFilesX86 = process.env['PROGRAMFILES(X86)'] || '';
56
- paths.push(
57
- `${programFiles}\\Google\\Chrome\\Application\\chrome.exe`,
58
- `${programFilesX86}\\Google\\Chrome\\Application\\chrome.exe`,
59
- `${localAppData}\\Google\\Chrome\\Application\\chrome.exe`,
60
- `${programFiles}\\Microsoft\\Edge\\Application\\msedge.exe`,
61
- `${programFilesX86}\\Microsoft\\Edge\\Application\\msedge.exe`,
62
- );
63
- } else if (isLinux) {
64
- paths.push(
65
- '/usr/bin/google-chrome',
66
- '/usr/bin/google-chrome-stable',
67
- '/usr/bin/chromium-browser',
68
- '/usr/bin/chromium',
69
- '/snap/bin/chromium',
70
- '/usr/bin/microsoft-edge',
71
- );
72
- }
73
-
74
- for (const p of paths) {
75
- try {
76
- accessSync(p);
77
- return p;
78
- } catch {
79
- // 文件不存在或无权限
80
- }
81
- }
82
- return null;
83
- }
84
-
85
- export async function fetchExplore(count = 100) {
86
- let browserPath = browser;
87
- let browserSource = '配置';
88
-
89
- if (!browserPath) {
90
- console.log(` [0/6] 未配置浏览器,正在自动探测...`);
91
- const detected = detectBrowser();
92
- if (detected) {
93
- browserPath = detected;
94
- browserSource = '自动探测';
95
- try {
96
- saveBrowser(browserPath);
97
- console.log(` [0/6] 已保存浏览器路径到配置: ${configPath}`);
98
- } catch (err) {
99
- console.log(` [0/6] 保存配置失败: ${err.message}`);
100
- }
101
- }
102
- }
103
-
104
- let browserLaunched = false;
105
- let instance;
106
-
107
- const launchOptions = {
108
- headless: true,
109
- args: [
110
- '--no-sandbox',
111
- '--disable-setuid-sandbox',
112
- '--disable-blink-features=AutomationControlled',
113
- '--disable-dev-shm-usage',
114
- ],
115
- };
116
-
117
- if (browserPath) {
118
- console.log(` [0/6] 使用${browserSource}浏览器: ${browserPath}`);
119
- launchOptions.executablePath = browserPath;
120
- }
121
-
122
- try {
123
- instance = await chromium.launch(launchOptions);
124
- browserLaunched = true;
125
- } catch (err) {
126
- if (browserPath) {
127
- console.log(` [0/6] 浏览器启动失败 (${err.message}),回退到 Playwright Chromium...`);
128
- }
129
- instance = await chromium.launch({
130
- headless: true,
131
- args: launchOptions.args,
132
- });
133
- browserLaunched = true;
134
- }
135
-
136
- if (!browserLaunched) {
137
- throw new Error('无法启动浏览器,请确保已安装 Chrome/Edge 或运行 "npx playwright install chromium"');
138
- }
139
-
140
- try {
141
- const context = await instance.newContext({
142
- viewport: { width: 1280, height: 900 },
143
- userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
144
- locale: 'en-US',
145
- });
146
-
147
- // 注入反检测脚本
148
- await context.addInitScript(() => {
149
- // 重写 navigator.webdriver
150
- Object.defineProperty(navigator, 'webdriver', { get: () => false });
151
-
152
- // 伪造 window.chrome
153
- if (!window.chrome) {
154
- window.chrome = { runtime: {} };
155
- }
156
-
157
- // 覆写 permissions query
158
- const originalQuery = window.navigator.permissions.query;
159
- window.navigator.permissions.query = (params) =>
160
- params.name === 'notifications'
161
- ? Promise.resolve({ state: Notification.permission })
162
- : originalQuery(params);
163
-
164
- // 覆写 languages
165
- Object.defineProperty(navigator, 'languages', {
166
- get: () => ['en-US', 'en'],
167
- });
168
-
169
- // 覆写 plugins
170
- Object.defineProperty(navigator, 'plugins', {
171
- get: () => [1, 2, 3, 4, 5],
172
- });
173
- });
174
-
175
- const page = await context.newPage();
176
- await retryGoto(page, EXPLORE_URL, { waitUntil: 'load', timeout: 30000 });
177
- console.log(` [1/6] 页面已加载`);
178
-
179
- await sleep(5000);
180
-
181
- const allUrls = await scrollCollector.scrollAndCollect(page, {
182
- container: null,
183
- collectFn: () => {
184
- return {
185
- items: Array.from(document.querySelectorAll('a'))
186
- .filter(a => /\/video\/\d{16,20}/.test(a.href))
187
- .map(a => a.href),
188
- };
189
- },
190
- maxItems: count * 2,
191
- delayRange: [1500, 2500],
192
- staleThreshold: 5,
193
- onRound: (round, items, allItems) => {
194
- if ((round + 1) % 10 === 0) {
195
- const uniqueCount = [...new Set(allItems)].length;
196
- console.log(` [2/6] 滚动 ${round + 1},当前 ${uniqueCount} 个视频`);
197
- }
198
- },
199
- });
200
-
201
- await sleep(3000);
202
-
203
- const unique = [...new Set(allUrls)];
204
- console.log(` [4/6] 共检测到 ${unique.length} 个不重复视频`);
205
-
206
- const results = [];
207
- const seen = new Set();
208
- for (const url of unique) {
209
- if (results.length >= count) break;
210
- const videoId = url.match(/video\/(\d{16,20})$/)?.[1];
211
- if (videoId && !seen.has(videoId)) {
212
- seen.add(videoId);
213
- const user = url.match(/\/@([^\/]+)/)?.[1];
214
- if (user) {
215
- results.push({ user, id: videoId, url });
216
- }
217
- }
218
- }
219
-
220
- console.log(` [5/6] 去重后 ${results.length} 个`);
221
- return results;
222
- } finally {
223
- await instance.close();
224
- }
225
- }
@@ -1,6 +0,0 @@
1
- import { createRequire } from 'module';
2
-
3
- const require = createRequire(import.meta.url);
4
- const core = require('../get-user-videos-core.cjs');
5
-
6
- export const runGetUserVideos = core.runGetUserVideos;
@@ -1,6 +0,0 @@
1
- import { createRequire } from 'module';
2
-
3
- const require = createRequire(import.meta.url);
4
- const core = require('../scraper/core.cjs');
5
-
6
- export const runScrape = core.runScrape;
@@ -1,97 +0,0 @@
1
- const { runScrape } = require("./core.cjs");
2
-
3
- async function main() {
4
- const rawArgs = process.argv.slice(2);
5
-
6
- let outputPath = null;
7
- const args = [];
8
- for (let i = 0; i < rawArgs.length; i++) {
9
- if (rawArgs[i] === "-o" || rawArgs[i] === "--output") {
10
- outputPath = rawArgs[++i];
11
- } else {
12
- args.push(rawArgs[i]);
13
- }
14
- }
15
-
16
- const videoUrl = args[0];
17
-
18
- let preset = null;
19
- let maxVideos = 20;
20
- let maxComments = 999;
21
- let maxGuess = 10;
22
- let switchMax = null;
23
- let commentMax = null;
24
-
25
- if (args[1]) {
26
- if (["fast", "normal", "slow", "stealth"].includes(args[1].toLowerCase())) {
27
- preset = args[1].toLowerCase();
28
- maxVideos = parseInt(args[2]) || 20;
29
- maxComments = parseInt(args[3]) || 999;
30
- maxGuess = parseInt(args[4]) || 10;
31
- } else {
32
- maxVideos = parseInt(args[1]) || 20;
33
- maxComments = parseInt(args[2]) || 999;
34
- maxGuess = parseInt(args[3]) || 10;
35
- switchMax = parseInt(args[4]) || null;
36
- commentMax = parseInt(args[5]) || null;
37
- }
38
- }
39
-
40
- if (!videoUrl) {
41
- console.error("用法:");
42
- console.error(
43
- " 预设模式: node index.cjs <视频URL> <preset> [最大视频数] [最大评论数] [猜你喜欢数] [-o 输出路径]",
44
- );
45
- console.error(
46
- " 手动模式: node index.cjs <视频URL> [最大视频数] [最大评论数] [猜你喜欢数] [切换延迟ms] [评论延迟ms] [-o 输出路径]",
47
- );
48
- console.error(
49
- "预设: fast(1s/0.8s), normal(2.5s/1.5s), slow(5s/3s), stealth(8s/5s)",
50
- );
51
- console.error(
52
- "选项: -o, --output <路径> 输出到文件; 不指定则输出到 stdout",
53
- );
54
- process.exit(1);
55
- }
56
-
57
- let browser;
58
- try {
59
- const { output, browser: b, isExternal } = await runScrape({
60
- videoUrl,
61
- maxVideos,
62
- maxComments,
63
- maxGuess,
64
- preset,
65
- switchMax,
66
- commentMax,
67
- log: console.error,
68
- });
69
- if (!isExternal) {
70
- browser = b;
71
- }
72
-
73
- const json = JSON.stringify(output, null, 2);
74
- if (outputPath) {
75
- const fs = require("fs");
76
- const path = require("path");
77
- const resultFile = path.isAbsolute(outputPath)
78
- ? outputPath
79
- : path.resolve(outputPath);
80
- fs.mkdirSync(path.dirname(resultFile), { recursive: true });
81
- fs.writeFileSync(resultFile, json);
82
- console.error(`已保存到 ${resultFile}`);
83
- } else {
84
- process.stdout.write(json + "\n");
85
- }
86
- } catch (err) {
87
- console.error(err.message);
88
- process.exit(1);
89
- } finally {
90
- if (browser) await browser.close().catch(() => {});
91
- }
92
- }
93
-
94
- main().catch((err) => {
95
- console.error(err.message);
96
- process.exit(1);
97
- });
@@ -1,49 +0,0 @@
1
- const { delay, getDelayConfig, closeCommentPanel } = require('./page-helpers.cjs');
2
- const { scrollAndCollect } = require('./scroll-collector.cjs');
3
-
4
- async function openCommentPanel(page) {
5
- const tabs = page.locator('[class*="tabbar-item"]');
6
- const commentTab = tabs.filter({ hasText: '评论' }).first();
7
- await commentTab.click();
8
- const config = getDelayConfig();
9
- await delay(Math.round(config.commentMax * 0.5), config.commentMax);
10
- await page.waitForSelector('[class*="CommentListContainer"]', { timeout: 5000 }).catch(() => {});
11
- }
12
-
13
- async function extractCommentAuthors(page, maxComments = 10) {
14
- await openCommentPanel(page);
15
-
16
- const config = getDelayConfig();
17
- const allAuthors = await scrollAndCollect(page, {
18
- container: '[class*="CommentMain"]',
19
- findScrollable: true,
20
- collectFn: (container) => {
21
- const list = document.querySelector('[class*="CommentListContainer"]');
22
- if (!list) return { items: [] };
23
-
24
- const authors = [];
25
- Array.from(list.children).forEach(wrapper => {
26
- const link = wrapper.querySelector('[class*="UsernameContentWrapper"] a');
27
- if (link) {
28
- const href = link.href || link.getAttribute('href');
29
- const m = href && href.match(/@([^/]+)/);
30
- if (m) authors.push('@' + m[1]);
31
- }
32
- });
33
- return { items: authors };
34
- },
35
- uniqueKey: (a) => a,
36
- maxItems: maxComments,
37
- delayRange: [Math.round(config.commentMax * 0.3), config.commentMax],
38
- staleThreshold: 3,
39
- });
40
-
41
- await closeCommentPanel(page);
42
- await delay(Math.round(config.commentMax * 0.3), config.commentMax);
43
-
44
- return allAuthors.slice(0, maxComments);
45
- }
46
-
47
- module.exports = {
48
- extractCommentAuthors,
49
- };
@@ -1,112 +0,0 @@
1
- const { delay, getDelayConfig } = require('./page-helpers.cjs');
2
- const { scrollAndCollect } = require('./scroll-collector.cjs');
3
-
4
- const FILTER_WORDS = ['主页', '已关注', '粉丝', '推荐'];
5
-
6
- async function openFollowModal(page) {
7
- const el = await page.$('[data-e2e=following]');
8
- if (!el) {
9
- throw new Error('未找到 [data-e2e=following] 元素,请确认当前页面为用户主页');
10
- }
11
- await el.evaluate(el => el.parentElement.click());
12
- await delay(2000, 3000);
13
-
14
- await page.waitForSelector('[class*=DivUserListContainer]', { timeout: 5000 }).catch(() => {
15
- throw new Error('关注弹窗未出现 DivUserListContainer');
16
- });
17
- }
18
-
19
- async function switchToFollowersTab(page) {
20
- await page.evaluate(() => {
21
- const tabs = document.querySelectorAll('[class*=DivTabItem]');
22
- for (const tab of tabs) {
23
- if (tab.textContent?.includes('粉丝')) {
24
- tab.click();
25
- return;
26
- }
27
- }
28
- throw new Error('未找到粉丝 Tab');
29
- });
30
- await delay(2000, 3000);
31
- await page.waitForSelector('[class*=DivUserListContainer]', { timeout: 5000 }).catch(() => {});
32
- }
33
-
34
- async function closeFollowModal(page) {
35
- await page.evaluate(() => {
36
- const closeBtn = document.querySelector('[data-e2e=follow-popup-close]');
37
- if (closeBtn) closeBtn.click();
38
- });
39
- await delay(1000, 2000);
40
- }
41
-
42
- function createUserCollectFn() {
43
- return (container) => {
44
- const FILTER_WORDS = ['主页', '已关注', '粉丝', '推荐'];
45
- const modal = document.querySelector('[class*=eyhy6180]');
46
- const root = modal || document;
47
- const users = [];
48
- const seen = new Set();
49
- const links = root.querySelectorAll('a[href*="/@"]');
50
- for (const link of links) {
51
- const match = link.href.match(/@([^/?]+)/);
52
- if (!match) continue;
53
- const handle = '@' + decodeURIComponent(match[1]);
54
- const text = (link.textContent || '').trim();
55
- if (text.length <= 2) continue;
56
- if (FILTER_WORDS.includes(text)) continue;
57
- if (seen.has(handle)) continue;
58
- seen.add(handle);
59
- users.push({ handle, displayName: text });
60
- }
61
- return { items: users };
62
- };
63
- }
64
-
65
- async function extractUsersFromModal(page, maxUsers) {
66
- const config = getDelayConfig();
67
- const allUsers = await scrollAndCollect(page, {
68
- container: '[class*=DivUserListContainer]',
69
- findScrollable: false,
70
- collectFn: createUserCollectFn(),
71
- uniqueKey: (u) => u.handle,
72
- maxItems: maxUsers,
73
- delayRange: [Math.round(config.commentMax * 0.3), config.commentMax],
74
- staleThreshold: 5,
75
- });
76
-
77
- return allUsers.slice(0, maxUsers);
78
- }
79
-
80
- async function extractFollowAndFollowers(page, options = {}) {
81
- const {
82
- maxFollowing = 999,
83
- maxFollowers = 999,
84
- log = () => {},
85
- } = options;
86
-
87
- log('打开关注弹窗...');
88
- await openFollowModal(page);
89
-
90
- log('提取关注列表...');
91
- const following = await extractUsersFromModal(page, maxFollowing);
92
- log(` 已关注: ${following.length}`);
93
-
94
- log('切换到粉丝 Tab...');
95
- await switchToFollowersTab(page);
96
-
97
- log('提取粉丝列表...');
98
- const followers = await extractUsersFromModal(page, maxFollowers);
99
- log(` 粉丝: ${followers.length}`);
100
-
101
- log('关闭弹窗...');
102
- await closeFollowModal(page);
103
-
104
- return {
105
- following: following.map(u => [u.handle, u.displayName]),
106
- followers: followers.map(u => [u.handle, u.displayName]),
107
- };
108
- }
109
-
110
- module.exports = {
111
- extractFollowAndFollowers,
112
- };