tt-help-cli-ycl 1.0.8 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,244 +1,225 @@
1
- import { chromium } from 'playwright';
2
- import { existsSync, accessSync } from 'fs';
3
- import { browser, saveBrowser, configPath } from './constants.js';
4
-
5
- const EXPLORE_URL = 'https://www.tiktok.com/explore';
6
-
7
- function sleep(ms) {
8
- return new Promise(r => setTimeout(r, ms));
9
- }
10
-
11
- function isRetryableError(error) {
12
- if (!error) return false;
13
- const msg = (error.message || error.toString() || '').toLowerCase();
14
- const patterns = ['interrupted', 'net::', 'econn', 'etimedout', 'enotfound', 'eai_again', 'esocketreset', 'connection.*refused', 'connection.*reset', 'failed.*navigate', 'target.*closed', 'crash'];
15
- return patterns.some(p => new RegExp(p, 'i').test(msg));
16
- }
17
-
18
- async function retryGoto(page, url, options, { maxRetries = 3, baseDelay = 3000 } = {}) {
19
- let lastError;
20
- for (let attempt = 0; attempt <= maxRetries; attempt++) {
21
- try {
22
- return await page.goto(url, options);
23
- } catch (error) {
24
- lastError = error;
25
- if (attempt >= maxRetries || !isRetryableError(error)) {
26
- throw error;
27
- }
28
- const jitter = Math.random() * 500;
29
- const waitTime = baseDelay * Math.pow(2, attempt) + jitter;
30
- console.log(` [重试] ${attempt + 1}/${maxRetries},${Math.round(waitTime)}ms 后重试...`);
31
- await sleep(waitTime);
32
- }
33
- }
34
- throw lastError;
35
- }
36
-
37
- function detectBrowser() {
38
- const isMac = process.platform === 'darwin';
39
- const isWin = process.platform === 'win32';
40
- const isLinux = process.platform === 'linux';
41
-
42
- const paths = [];
43
-
44
- if (isMac) {
45
- paths.push(
46
- '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
47
- '/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary',
48
- '/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge',
49
- '/Applications/Brave Browser.app/Contents/MacOS/Brave Browser',
50
- );
51
- } else if (isWin) {
52
- const localAppData = process.env.LOCALAPPDATA || '';
53
- const programFiles = process.env.PROGRAMFILES || '';
54
- const programFilesX86 = process.env['PROGRAMFILES(X86)'] || '';
55
- paths.push(
56
- `${programFiles}\\Google\\Chrome\\Application\\chrome.exe`,
57
- `${programFilesX86}\\Google\\Chrome\\Application\\chrome.exe`,
58
- `${localAppData}\\Google\\Chrome\\Application\\chrome.exe`,
59
- `${programFiles}\\Microsoft\\Edge\\Application\\msedge.exe`,
60
- `${programFilesX86}\\Microsoft\\Edge\\Application\\msedge.exe`,
61
- );
62
- } else if (isLinux) {
63
- paths.push(
64
- '/usr/bin/google-chrome',
65
- '/usr/bin/google-chrome-stable',
66
- '/usr/bin/chromium-browser',
67
- '/usr/bin/chromium',
68
- '/snap/bin/chromium',
69
- '/usr/bin/microsoft-edge',
70
- );
71
- }
72
-
73
- for (const p of paths) {
74
- try {
75
- accessSync(p);
76
- return p;
77
- } catch {
78
- // 文件不存在或无权限
79
- }
80
- }
81
- return null;
82
- }
83
-
84
- export async function fetchExplore(count = 100) {
85
- let browserPath = browser;
86
- let browserSource = '配置';
87
-
88
- if (!browserPath) {
89
- console.log(` [0/6] 未配置浏览器,正在自动探测...`);
90
- const detected = detectBrowser();
91
- if (detected) {
92
- browserPath = detected;
93
- browserSource = '自动探测';
94
- try {
95
- saveBrowser(browserPath);
96
- console.log(` [0/6] 已保存浏览器路径到配置: ${configPath}`);
97
- } catch (err) {
98
- console.log(` [0/6] 保存配置失败: ${err.message}`);
99
- }
100
- }
101
- }
102
-
103
- let browserLaunched = false;
104
- let instance;
105
-
106
- const launchOptions = {
107
- headless: true,
108
- args: [
109
- '--no-sandbox',
110
- '--disable-setuid-sandbox',
111
- '--disable-blink-features=AutomationControlled',
112
- '--disable-dev-shm-usage',
113
- ],
114
- };
115
-
116
- if (browserPath) {
117
- console.log(` [0/6] 使用${browserSource}浏览器: ${browserPath}`);
118
- launchOptions.executablePath = browserPath;
119
- }
120
-
121
- try {
122
- instance = await chromium.launch(launchOptions);
123
- browserLaunched = true;
124
- } catch (err) {
125
- if (browserPath) {
126
- console.log(` [0/6] 浏览器启动失败 (${err.message}),回退到 Playwright Chromium...`);
127
- }
128
- instance = await chromium.launch({
129
- headless: true,
130
- args: launchOptions.args,
131
- });
132
- browserLaunched = true;
133
- }
134
-
135
- if (!browserLaunched) {
136
- throw new Error('无法启动浏览器,请确保已安装 Chrome/Edge 或运行 "npx playwright install chromium"');
137
- }
138
-
139
- try {
140
- const context = await instance.newContext({
141
- viewport: { width: 1280, height: 900 },
142
- userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
143
- locale: 'en-US',
144
- });
145
-
146
- // 注入反检测脚本
147
- await context.addInitScript(() => {
148
- // 重写 navigator.webdriver
149
- Object.defineProperty(navigator, 'webdriver', { get: () => false });
150
-
151
- // 伪造 window.chrome
152
- if (!window.chrome) {
153
- window.chrome = { runtime: {} };
154
- }
155
-
156
- // 覆写 permissions query
157
- const originalQuery = window.navigator.permissions.query;
158
- window.navigator.permissions.query = (params) =>
159
- params.name === 'notifications'
160
- ? Promise.resolve({ state: Notification.permission })
161
- : originalQuery(params);
162
-
163
- // 覆写 languages
164
- Object.defineProperty(navigator, 'languages', {
165
- get: () => ['en-US', 'en'],
166
- });
167
-
168
- // 覆写 plugins
169
- Object.defineProperty(navigator, 'plugins', {
170
- get: () => [1, 2, 3, 4, 5],
171
- });
172
- });
173
-
174
- const page = await context.newPage();
175
- await retryGoto(page, EXPLORE_URL, { waitUntil: 'load', timeout: 30000 });
176
- console.log(` [1/6] 页面已加载`);
177
-
178
- await sleep(5000);
179
-
180
- let lastCount = 0;
181
- let noNewCount = 0;
182
-
183
- for (let i = 0; i < 50; i++) {
184
- await page.evaluate(() => {
185
- window.scrollBy({ top: window.innerHeight * 0.8, behavior: 'smooth' });
186
- });
187
-
188
- await sleep(1500 + Math.random() * 1000);
189
-
190
- const urls = await page.$$eval('a', els =>
191
- els.map(a => a.href).filter(u => /\/video\/\d{16,20}/.test(u))
192
- );
193
- const uniqueCount = [...new Set(urls)].length;
194
-
195
- if (uniqueCount > lastCount) {
196
- noNewCount = 0;
197
- } else {
198
- noNewCount++;
199
- }
200
- lastCount = uniqueCount;
201
-
202
- if ((i + 1) % 10 === 0) {
203
- console.log(` [2/6] 滚动 ${i + 1}/50,当前 ${uniqueCount} 个视频`);
204
- }
205
-
206
- if (noNewCount >= 5) {
207
- console.log(` [3/6] 内容加载完成(${uniqueCount} 个视频)`);
208
- break;
209
- }
210
-
211
- if (uniqueCount >= count * 2) {
212
- console.log(` [3/6] 视频数量已充足(${uniqueCount} 个)`);
213
- break;
214
- }
215
- }
216
-
217
- await sleep(3000);
218
-
219
- const urls = await page.$$eval('a', els =>
220
- els.map(a => a.href).filter(u => /\/video\/\d{16,20}/.test(u))
221
- );
222
- const unique = [...new Set(urls)];
223
- console.log(` [4/6] 共检测到 ${unique.length} 个不重复视频`);
224
-
225
- const results = [];
226
- const seen = new Set();
227
- for (const url of unique) {
228
- if (results.length >= count) break;
229
- const videoId = url.match(/video\/(\d{16,20})$/)?.[1];
230
- if (videoId && !seen.has(videoId)) {
231
- seen.add(videoId);
232
- const user = url.match(/\/@([^\/]+)/)?.[1];
233
- if (user) {
234
- results.push({ user, id: videoId, url });
235
- }
236
- }
237
- }
238
-
239
- console.log(` [5/6] 去重后 ${results.length} 个`);
240
- return results;
241
- } finally {
242
- await instance.close();
243
- }
244
- }
1
+ import { chromium } from 'playwright';
2
+ import { existsSync, accessSync } from 'fs';
3
+ import { browser, saveBrowser, configPath } from './constants.js';
4
+ import scrollCollector from '../scraper/modules/scroll-collector.cjs';
5
+
6
+ const EXPLORE_URL = 'https://www.tiktok.com/explore';
7
+
8
+ function sleep(ms) {
9
+ return new Promise(r => setTimeout(r, ms));
10
+ }
11
+
12
+ function isRetryableError(error) {
13
+ if (!error) return false;
14
+ const msg = (error.message || error.toString() || '').toLowerCase();
15
+ const patterns = ['interrupted', 'net::', 'econn', 'etimedout', 'enotfound', 'eai_again', 'esocketreset', 'connection.*refused', 'connection.*reset', 'failed.*navigate', 'target.*closed', 'crash'];
16
+ return patterns.some(p => new RegExp(p, 'i').test(msg));
17
+ }
18
+
19
+ async function retryGoto(page, url, options, { maxRetries = 3, baseDelay = 3000 } = {}) {
20
+ let lastError;
21
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
22
+ try {
23
+ return await page.goto(url, options);
24
+ } catch (error) {
25
+ lastError = error;
26
+ if (attempt >= maxRetries || !isRetryableError(error)) {
27
+ throw error;
28
+ }
29
+ const jitter = Math.random() * 500;
30
+ const waitTime = baseDelay * Math.pow(2, attempt) + jitter;
31
+ console.log(` [重试] ${attempt + 1}/${maxRetries},${Math.round(waitTime)}ms 后重试...`);
32
+ await sleep(waitTime);
33
+ }
34
+ }
35
+ throw lastError;
36
+ }
37
+
38
+ function detectBrowser() {
39
+ const isMac = process.platform === 'darwin';
40
+ const isWin = process.platform === 'win32';
41
+ const isLinux = process.platform === 'linux';
42
+
43
+ const paths = [];
44
+
45
+ if (isMac) {
46
+ paths.push(
47
+ '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
48
+ '/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary',
49
+ '/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge',
50
+ '/Applications/Brave Browser.app/Contents/MacOS/Brave Browser',
51
+ );
52
+ } else if (isWin) {
53
+ const localAppData = process.env.LOCALAPPDATA || '';
54
+ const programFiles = process.env.PROGRAMFILES || '';
55
+ const programFilesX86 = process.env['PROGRAMFILES(X86)'] || '';
56
+ paths.push(
57
+ `${programFiles}\\Google\\Chrome\\Application\\chrome.exe`,
58
+ `${programFilesX86}\\Google\\Chrome\\Application\\chrome.exe`,
59
+ `${localAppData}\\Google\\Chrome\\Application\\chrome.exe`,
60
+ `${programFiles}\\Microsoft\\Edge\\Application\\msedge.exe`,
61
+ `${programFilesX86}\\Microsoft\\Edge\\Application\\msedge.exe`,
62
+ );
63
+ } else if (isLinux) {
64
+ paths.push(
65
+ '/usr/bin/google-chrome',
66
+ '/usr/bin/google-chrome-stable',
67
+ '/usr/bin/chromium-browser',
68
+ '/usr/bin/chromium',
69
+ '/snap/bin/chromium',
70
+ '/usr/bin/microsoft-edge',
71
+ );
72
+ }
73
+
74
+ for (const p of paths) {
75
+ try {
76
+ accessSync(p);
77
+ return p;
78
+ } catch {
79
+ // 文件不存在或无权限
80
+ }
81
+ }
82
+ return null;
83
+ }
84
+
85
+ export async function fetchExplore(count = 100) {
86
+ let browserPath = browser;
87
+ let browserSource = '配置';
88
+
89
+ if (!browserPath) {
90
+ console.log(` [0/6] 未配置浏览器,正在自动探测...`);
91
+ const detected = detectBrowser();
92
+ if (detected) {
93
+ browserPath = detected;
94
+ browserSource = '自动探测';
95
+ try {
96
+ saveBrowser(browserPath);
97
+ console.log(` [0/6] 已保存浏览器路径到配置: ${configPath}`);
98
+ } catch (err) {
99
+ console.log(` [0/6] 保存配置失败: ${err.message}`);
100
+ }
101
+ }
102
+ }
103
+
104
+ let browserLaunched = false;
105
+ let instance;
106
+
107
+ const launchOptions = {
108
+ headless: true,
109
+ args: [
110
+ '--no-sandbox',
111
+ '--disable-setuid-sandbox',
112
+ '--disable-blink-features=AutomationControlled',
113
+ '--disable-dev-shm-usage',
114
+ ],
115
+ };
116
+
117
+ if (browserPath) {
118
+ console.log(` [0/6] 使用${browserSource}浏览器: ${browserPath}`);
119
+ launchOptions.executablePath = browserPath;
120
+ }
121
+
122
+ try {
123
+ instance = await chromium.launch(launchOptions);
124
+ browserLaunched = true;
125
+ } catch (err) {
126
+ if (browserPath) {
127
+ console.log(` [0/6] 浏览器启动失败 (${err.message}),回退到 Playwright Chromium...`);
128
+ }
129
+ instance = await chromium.launch({
130
+ headless: true,
131
+ args: launchOptions.args,
132
+ });
133
+ browserLaunched = true;
134
+ }
135
+
136
+ if (!browserLaunched) {
137
+ throw new Error('无法启动浏览器,请确保已安装 Chrome/Edge 或运行 "npx playwright install chromium"');
138
+ }
139
+
140
+ try {
141
+ const context = await instance.newContext({
142
+ viewport: { width: 1280, height: 900 },
143
+ userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
144
+ locale: 'en-US',
145
+ });
146
+
147
+ // 注入反检测脚本
148
+ await context.addInitScript(() => {
149
+ // 重写 navigator.webdriver
150
+ Object.defineProperty(navigator, 'webdriver', { get: () => false });
151
+
152
+ // 伪造 window.chrome
153
+ if (!window.chrome) {
154
+ window.chrome = { runtime: {} };
155
+ }
156
+
157
+ // 覆写 permissions query
158
+ const originalQuery = window.navigator.permissions.query;
159
+ window.navigator.permissions.query = (params) =>
160
+ params.name === 'notifications'
161
+ ? Promise.resolve({ state: Notification.permission })
162
+ : originalQuery(params);
163
+
164
+ // 覆写 languages
165
+ Object.defineProperty(navigator, 'languages', {
166
+ get: () => ['en-US', 'en'],
167
+ });
168
+
169
+ // 覆写 plugins
170
+ Object.defineProperty(navigator, 'plugins', {
171
+ get: () => [1, 2, 3, 4, 5],
172
+ });
173
+ });
174
+
175
+ const page = await context.newPage();
176
+ await retryGoto(page, EXPLORE_URL, { waitUntil: 'load', timeout: 30000 });
177
+ console.log(` [1/6] 页面已加载`);
178
+
179
+ await sleep(5000);
180
+
181
+ const allUrls = await scrollCollector.scrollAndCollect(page, {
182
+ container: null,
183
+ collectFn: () => {
184
+ return {
185
+ items: Array.from(document.querySelectorAll('a'))
186
+ .filter(a => /\/video\/\d{16,20}/.test(a.href))
187
+ .map(a => a.href),
188
+ };
189
+ },
190
+ maxItems: count * 2,
191
+ delayRange: [1500, 2500],
192
+ staleThreshold: 5,
193
+ onRound: (round, items, allItems) => {
194
+ if ((round + 1) % 10 === 0) {
195
+ const uniqueCount = [...new Set(allItems)].length;
196
+ console.log(` [2/6] 滚动 ${round + 1},当前 ${uniqueCount} 个视频`);
197
+ }
198
+ },
199
+ });
200
+
201
+ await sleep(3000);
202
+
203
+ const unique = [...new Set(allUrls)];
204
+ console.log(` [4/6] 共检测到 ${unique.length} 个不重复视频`);
205
+
206
+ const results = [];
207
+ const seen = new Set();
208
+ for (const url of unique) {
209
+ if (results.length >= count) break;
210
+ const videoId = url.match(/video\/(\d{16,20})$/)?.[1];
211
+ if (videoId && !seen.has(videoId)) {
212
+ seen.add(videoId);
213
+ const user = url.match(/\/@([^\/]+)/)?.[1];
214
+ if (user) {
215
+ results.push({ user, id: videoId, url });
216
+ }
217
+ }
218
+ }
219
+
220
+ console.log(` [5/6] 去重后 ${results.length} 个`);
221
+ return results;
222
+ } finally {
223
+ await instance.close();
224
+ }
225
+ }
@@ -1,60 +1,60 @@
1
- import { fetch, ProxyAgent } from 'undici';
2
- import { DEFAULT_PROXY } from './constants.js';
3
-
4
- const HEADERS = {
5
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
6
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
7
- 'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
8
- 'Accept-Encoding': 'gzip, deflate, br',
9
- 'Connection': 'keep-alive',
10
- 'Upgrade-Insecure-Requests': '1',
11
- 'Sec-Fetch-Dest': 'document',
12
- 'Sec-Fetch-Mode': 'navigate',
13
- 'Sec-Fetch-Site': 'none',
14
- 'Sec-Fetch-User': '?1',
15
- 'Cache-Control': 'max-age=0',
16
- };
17
-
18
- export async function fetchHtml(url, proxyUrl) {
19
- const p = proxyUrl || DEFAULT_PROXY;
20
- const agent = new ProxyAgent(p);
21
- let lastError;
22
-
23
- for (let attempt = 1; attempt <= 3; attempt++) {
24
- try {
25
- const res = await fetch(url, {
26
- headers: HEADERS,
27
- dispatcher: agent,
28
- redirect: 'follow',
29
- });
30
- const html = await res.text();
31
- return html;
32
- } catch (err) {
33
- lastError = err;
34
- if (attempt < 3) {
35
- const waitMs = Math.pow(2, attempt - 1) * 3000 + Math.random() * 2000;
36
- await new Promise(r => setTimeout(r, waitMs));
37
- }
38
- }
39
- }
40
-
41
- throw new Error(`请求 ${url} 失败(已重试 3 次),代理 ${p} 不可用`);
42
- }
43
-
44
- export function makeProfileUrl(handle) {
45
- if (handle.startsWith('http')) return handle;
46
- return `https://www.tiktok.com/${handle}`;
47
- }
48
-
49
- export function isProfileUrl(url) {
50
- return /\/@[\w-]+(?:$|[?#])/.test(url);
51
- }
52
-
53
- export function isVideoUrl(url) {
54
- return /\/video\/\d+/.test(url);
55
- }
56
-
57
- export function extractProfileHandle(url) {
58
- const m = url.match(/https:\/\/www\.tiktok\.com\/(@[\w-]+)/);
59
- return m ? m[1] : null;
60
- }
1
+ import { fetch, ProxyAgent } from 'undici';
2
+ import { DEFAULT_PROXY } from './constants.js';
3
+
4
+ const HEADERS = {
5
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
6
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
7
+ 'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
8
+ 'Accept-Encoding': 'gzip, deflate, br',
9
+ 'Connection': 'keep-alive',
10
+ 'Upgrade-Insecure-Requests': '1',
11
+ 'Sec-Fetch-Dest': 'document',
12
+ 'Sec-Fetch-Mode': 'navigate',
13
+ 'Sec-Fetch-Site': 'none',
14
+ 'Sec-Fetch-User': '?1',
15
+ 'Cache-Control': 'max-age=0',
16
+ };
17
+
18
+ export async function fetchHtml(url, proxyUrl) {
19
+ const p = proxyUrl || DEFAULT_PROXY;
20
+ const agent = new ProxyAgent(p);
21
+ let lastError;
22
+
23
+ for (let attempt = 1; attempt <= 3; attempt++) {
24
+ try {
25
+ const res = await fetch(url, {
26
+ headers: HEADERS,
27
+ dispatcher: agent,
28
+ redirect: 'follow',
29
+ });
30
+ const html = await res.text();
31
+ return html;
32
+ } catch (err) {
33
+ lastError = err;
34
+ if (attempt < 3) {
35
+ const waitMs = Math.pow(2, attempt - 1) * 3000 + Math.random() * 2000;
36
+ await new Promise(r => setTimeout(r, waitMs));
37
+ }
38
+ }
39
+ }
40
+
41
+ throw new Error(`请求 ${url} 失败(已重试 3 次),代理 ${p} 不可用`);
42
+ }
43
+
44
+ export function makeProfileUrl(handle) {
45
+ if (handle.startsWith('http')) return handle;
46
+ return `https://www.tiktok.com/${handle}`;
47
+ }
48
+
49
+ export function isProfileUrl(url) {
50
+ return /\/@[\w-]+(?:$|[?#])/.test(url);
51
+ }
52
+
53
+ export function isVideoUrl(url) {
54
+ return /\/video\/\d+/.test(url);
55
+ }
56
+
57
+ export function extractProfileHandle(url) {
58
+ const m = url.match(/https:\/\/www\.tiktok\.com\/(@[\w-]+)/);
59
+ return m ? m[1] : null;
60
+ }