tt-help-cli-ycl 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,124 +1,218 @@
1
- import { chromium } from 'playwright';
2
-
3
- const EXPLORE_URL = 'https://www.tiktok.com/explore';
4
-
5
- function sleep(ms) {
6
- return new Promise(r => setTimeout(r, ms));
7
- }
8
-
9
- export async function fetchExplore(count = 100) {
10
- const browser = await chromium.launch({
11
- headless: true,
12
- args: [
13
- '--no-sandbox',
14
- '--disable-setuid-sandbox',
15
- '--disable-blink-features=AutomationControlled',
16
- '--disable-dev-shm-usage',
17
- ],
18
- });
19
- try {
20
- const context = await browser.newContext({
21
- viewport: { width: 1280, height: 900 },
22
- userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
23
- locale: 'en-US',
24
- });
25
-
26
- // 注入反检测脚本
27
- await context.addInitScript(() => {
28
- // 重写 navigator.webdriver
29
- Object.defineProperty(navigator, 'webdriver', { get: () => false });
30
-
31
- // 伪造 window.chrome
32
- if (!window.chrome) {
33
- window.chrome = { runtime: {} };
34
- }
35
-
36
- // 覆写 permissions query
37
- const originalQuery = window.navigator.permissions.query;
38
- window.navigator.permissions.query = (params) =>
39
- params.name === 'notifications'
40
- ? Promise.resolve({ state: Notification.permission })
41
- : originalQuery(params);
42
-
43
- // 覆写 languages
44
- Object.defineProperty(navigator, 'languages', {
45
- get: () => ['en-US', 'en'],
46
- });
47
-
48
- // 覆写 plugins
49
- Object.defineProperty(navigator, 'plugins', {
50
- get: () => [1, 2, 3, 4, 5],
51
- });
52
- });
53
-
54
- const page = await context.newPage();
55
- await page.goto(EXPLORE_URL, { waitUntil: 'domcontentloaded', timeout: 30000 });
56
- console.log(` [1/6] 页面已加载`);
57
-
58
- await sleep(5000);
59
-
60
- let lastCount = 0;
61
- let noNewCount = 0;
62
-
63
- for (let i = 0; i < 50; i++) {
64
- await page.evaluate(() => {
65
- window.scrollBy({ top: window.innerHeight * 0.8, behavior: 'smooth' });
66
- });
67
-
68
- await sleep(1500 + Math.random() * 1000);
69
-
70
- const urls = await page.$$eval('a', els =>
71
- els.map(a => a.href).filter(u => /\/video\/\d{16,20}/.test(u))
72
- );
73
- const uniqueCount = [...new Set(urls)].length;
74
-
75
- if (uniqueCount > lastCount) {
76
- noNewCount = 0;
77
- } else {
78
- noNewCount++;
79
- }
80
- lastCount = uniqueCount;
81
-
82
- if ((i + 1) % 10 === 0) {
83
- console.log(` [2/6] 滚动 ${i + 1}/50,当前 ${uniqueCount} 个视频`);
84
- }
85
-
86
- if (noNewCount >= 5) {
87
- console.log(` [3/6] 内容加载完成(${uniqueCount} 个视频)`);
88
- break;
89
- }
90
-
91
- if (uniqueCount >= count * 2) {
92
- console.log(` [3/6] 视频数量已充足(${uniqueCount} 个)`);
93
- break;
94
- }
95
- }
96
-
97
- await sleep(3000);
98
-
99
- const urls = await page.$$eval('a', els =>
100
- els.map(a => a.href).filter(u => /\/video\/\d{16,20}/.test(u))
101
- );
102
- const unique = [...new Set(urls)];
103
- console.log(` [4/6] 共检测到 ${unique.length} 个不重复视频`);
104
-
105
- const results = [];
106
- const seen = new Set();
107
- for (const url of unique) {
108
- if (results.length >= count) break;
109
- const videoId = url.match(/video\/(\d{16,20})$/)?.[1];
110
- if (videoId && !seen.has(videoId)) {
111
- seen.add(videoId);
112
- const user = url.match(/\/@([^\/]+)/)?.[1];
113
- if (user) {
114
- results.push({ user, id: videoId, url });
115
- }
116
- }
117
- }
118
-
119
- console.log(` [5/6] 去重后 ${results.length} 个`);
120
- return results;
121
- } finally {
122
- await browser.close();
123
- }
124
- }
1
+ import { chromium } from 'playwright';
2
+ import { existsSync, accessSync } from 'fs';
3
+ import { browser, saveBrowser, configPath } from './constants.js';
4
+
5
+ const EXPLORE_URL = 'https://www.tiktok.com/explore';
6
+
7
+ function sleep(ms) {
8
+ return new Promise(r => setTimeout(r, ms));
9
+ }
10
+
11
+ function detectBrowser() {
12
+ const isMac = process.platform === 'darwin';
13
+ const isWin = process.platform === 'win32';
14
+ const isLinux = process.platform === 'linux';
15
+
16
+ const paths = [];
17
+
18
+ if (isMac) {
19
+ paths.push(
20
+ '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
21
+ '/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary',
22
+ '/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge',
23
+ '/Applications/Brave Browser.app/Contents/MacOS/Brave Browser',
24
+ );
25
+ } else if (isWin) {
26
+ const localAppData = process.env.LOCALAPPDATA || '';
27
+ const programFiles = process.env.PROGRAMFILES || '';
28
+ const programFilesX86 = process.env['PROGRAMFILES(X86)'] || '';
29
+ paths.push(
30
+ `${programFiles}\\Google\\Chrome\\Application\\chrome.exe`,
31
+ `${programFilesX86}\\Google\\Chrome\\Application\\chrome.exe`,
32
+ `${localAppData}\\Google\\Chrome\\Application\\chrome.exe`,
33
+ `${programFiles}\\Microsoft\\Edge\\Application\\msedge.exe`,
34
+ `${programFilesX86}\\Microsoft\\Edge\\Application\\msedge.exe`,
35
+ );
36
+ } else if (isLinux) {
37
+ paths.push(
38
+ '/usr/bin/google-chrome',
39
+ '/usr/bin/google-chrome-stable',
40
+ '/usr/bin/chromium-browser',
41
+ '/usr/bin/chromium',
42
+ '/snap/bin/chromium',
43
+ '/usr/bin/microsoft-edge',
44
+ );
45
+ }
46
+
47
+ for (const p of paths) {
48
+ try {
49
+ accessSync(p);
50
+ return p;
51
+ } catch {
52
+ // 文件不存在或无权限
53
+ }
54
+ }
55
+ return null;
56
+ }
57
+
58
+ export async function fetchExplore(count = 100) {
59
+ let browserPath = browser;
60
+ let browserSource = '配置';
61
+
62
+ if (!browserPath) {
63
+ console.log(` [0/6] 未配置浏览器,正在自动探测...`);
64
+ const detected = detectBrowser();
65
+ if (detected) {
66
+ browserPath = detected;
67
+ browserSource = '自动探测';
68
+ try {
69
+ saveBrowser(browserPath);
70
+ console.log(` [0/6] 已保存浏览器路径到配置: ${configPath}`);
71
+ } catch (err) {
72
+ console.log(` [0/6] 保存配置失败: ${err.message}`);
73
+ }
74
+ }
75
+ }
76
+
77
+ let browserLaunched = false;
78
+ let instance;
79
+
80
+ const launchOptions = {
81
+ headless: true,
82
+ args: [
83
+ '--no-sandbox',
84
+ '--disable-setuid-sandbox',
85
+ '--disable-blink-features=AutomationControlled',
86
+ '--disable-dev-shm-usage',
87
+ ],
88
+ };
89
+
90
+ if (browserPath) {
91
+ console.log(` [0/6] 使用${browserSource}浏览器: ${browserPath}`);
92
+ launchOptions.executablePath = browserPath;
93
+ }
94
+
95
+ try {
96
+ instance = await chromium.launch(launchOptions);
97
+ browserLaunched = true;
98
+ } catch (err) {
99
+ if (browserPath) {
100
+ console.log(` [0/6] 浏览器启动失败 (${err.message}),回退到 Playwright Chromium...`);
101
+ }
102
+ instance = await chromium.launch({
103
+ headless: true,
104
+ args: launchOptions.args,
105
+ });
106
+ browserLaunched = true;
107
+ }
108
+
109
+ if (!browserLaunched) {
110
+ throw new Error('无法启动浏览器,请确保已安装 Chrome/Edge 或运行 "npx playwright install chromium"');
111
+ }
112
+
113
+ try {
114
+ const context = await instance.newContext({
115
+ viewport: { width: 1280, height: 900 },
116
+ userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
117
+ locale: 'en-US',
118
+ });
119
+
120
+ // 注入反检测脚本
121
+ await context.addInitScript(() => {
122
+ // 重写 navigator.webdriver
123
+ Object.defineProperty(navigator, 'webdriver', { get: () => false });
124
+
125
+ // 伪造 window.chrome
126
+ if (!window.chrome) {
127
+ window.chrome = { runtime: {} };
128
+ }
129
+
130
+ // 覆写 permissions query
131
+ const originalQuery = window.navigator.permissions.query;
132
+ window.navigator.permissions.query = (params) =>
133
+ params.name === 'notifications'
134
+ ? Promise.resolve({ state: Notification.permission })
135
+ : originalQuery(params);
136
+
137
+ // 覆写 languages
138
+ Object.defineProperty(navigator, 'languages', {
139
+ get: () => ['en-US', 'en'],
140
+ });
141
+
142
+ // 覆写 plugins
143
+ Object.defineProperty(navigator, 'plugins', {
144
+ get: () => [1, 2, 3, 4, 5],
145
+ });
146
+ });
147
+
148
+ const page = await context.newPage();
149
+ await page.goto(EXPLORE_URL, { waitUntil: 'domcontentloaded', timeout: 30000 });
150
+ console.log(` [1/6] 页面已加载`);
151
+
152
+ await sleep(5000);
153
+
154
+ let lastCount = 0;
155
+ let noNewCount = 0;
156
+
157
+ for (let i = 0; i < 50; i++) {
158
+ await page.evaluate(() => {
159
+ window.scrollBy({ top: window.innerHeight * 0.8, behavior: 'smooth' });
160
+ });
161
+
162
+ await sleep(1500 + Math.random() * 1000);
163
+
164
+ const urls = await page.$$eval('a', els =>
165
+ els.map(a => a.href).filter(u => /\/video\/\d{16,20}/.test(u))
166
+ );
167
+ const uniqueCount = [...new Set(urls)].length;
168
+
169
+ if (uniqueCount > lastCount) {
170
+ noNewCount = 0;
171
+ } else {
172
+ noNewCount++;
173
+ }
174
+ lastCount = uniqueCount;
175
+
176
+ if ((i + 1) % 10 === 0) {
177
+ console.log(` [2/6] 滚动 ${i + 1}/50,当前 ${uniqueCount} 个视频`);
178
+ }
179
+
180
+ if (noNewCount >= 5) {
181
+ console.log(` [3/6] 内容加载完成(${uniqueCount} 个视频)`);
182
+ break;
183
+ }
184
+
185
+ if (uniqueCount >= count * 2) {
186
+ console.log(` [3/6] 视频数量已充足(${uniqueCount} 个)`);
187
+ break;
188
+ }
189
+ }
190
+
191
+ await sleep(3000);
192
+
193
+ const urls = await page.$$eval('a', els =>
194
+ els.map(a => a.href).filter(u => /\/video\/\d{16,20}/.test(u))
195
+ );
196
+ const unique = [...new Set(urls)];
197
+ console.log(` [4/6] 共检测到 ${unique.length} 个不重复视频`);
198
+
199
+ const results = [];
200
+ const seen = new Set();
201
+ for (const url of unique) {
202
+ if (results.length >= count) break;
203
+ const videoId = url.match(/video\/(\d{16,20})$/)?.[1];
204
+ if (videoId && !seen.has(videoId)) {
205
+ seen.add(videoId);
206
+ const user = url.match(/\/@([^\/]+)/)?.[1];
207
+ if (user) {
208
+ results.push({ user, id: videoId, url });
209
+ }
210
+ }
211
+ }
212
+
213
+ console.log(` [5/6] 去重后 ${results.length} 个`);
214
+ return results;
215
+ } finally {
216
+ await instance.close();
217
+ }
218
+ }
@@ -1,36 +1,50 @@
1
- import { fetch, ProxyAgent } from 'undici';
2
- import { DEFAULT_PROXY } from './constants.js';
3
-
4
- const HEADERS = {
5
- 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
6
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
7
- };
8
-
9
- export async function fetchHtml(url, proxyUrl) {
10
- const p = proxyUrl || DEFAULT_PROXY;
11
- const agent = new ProxyAgent(p);
12
- try {
13
- const res = await fetch(url, { headers: HEADERS, dispatcher: agent });
14
- return res.text();
15
- } catch (err) {
16
- throw new Error(`请求 ${url} 失败,代理 ${p} 不可用`);
17
- }
18
- }
19
-
20
- export function makeProfileUrl(handle) {
21
- if (handle.startsWith('http')) return handle;
22
- return `https://www.tiktok.com/${handle}`;
23
- }
24
-
25
- export function isProfileUrl(url) {
26
- return /\/@[\w-]+(?:$|[?#])/.test(url);
27
- }
28
-
29
- export function isVideoUrl(url) {
30
- return /\/video\/\d+/.test(url);
31
- }
32
-
33
- export function extractProfileHandle(url) {
34
- const m = url.match(/https:\/\/www\.tiktok\.com\/(@[\w-]+)/);
35
- return m ? m[1] : null;
36
- }
1
+ import { fetch, ProxyAgent } from 'undici';
2
+ import { DEFAULT_PROXY } from './constants.js';
3
+
4
+ const HEADERS = {
5
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
6
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
7
+ 'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
8
+ 'Accept-Encoding': 'gzip, deflate, br',
9
+ 'Connection': 'keep-alive',
10
+ 'Upgrade-Insecure-Requests': '1',
11
+ 'Sec-Fetch-Dest': 'document',
12
+ 'Sec-Fetch-Mode': 'navigate',
13
+ 'Sec-Fetch-Site': 'none',
14
+ 'Sec-Fetch-User': '?1',
15
+ 'Cache-Control': 'max-age=0',
16
+ };
17
+
18
+ export async function fetchHtml(url, proxyUrl) {
19
+ const p = proxyUrl || DEFAULT_PROXY;
20
+ const agent = new ProxyAgent(p);
21
+ try {
22
+ const res = await fetch(url, {
23
+ headers: HEADERS,
24
+ dispatcher: agent,
25
+ redirect: 'follow',
26
+ });
27
+ const html = await res.text();
28
+ return html;
29
+ } catch (err) {
30
+ throw new Error(`请求 ${url} 失败,代理 ${p} 不可用`);
31
+ }
32
+ }
33
+
34
+ export function makeProfileUrl(handle) {
35
+ if (handle.startsWith('http')) return handle;
36
+ return `https://www.tiktok.com/${handle}`;
37
+ }
38
+
39
+ export function isProfileUrl(url) {
40
+ return /\/@[\w-]+(?:$|[?#])/.test(url);
41
+ }
42
+
43
+ export function isVideoUrl(url) {
44
+ return /\/video\/\d+/.test(url);
45
+ }
46
+
47
+ export function extractProfileHandle(url) {
48
+ const m = url.match(/https:\/\/www\.tiktok\.com\/(@[\w-]+)/);
49
+ return m ? m[1] : null;
50
+ }
@@ -0,0 +1,66 @@
1
+ export function parseFilter(filterStr) {
2
+ if (!filterStr) return null;
3
+
4
+ const filter = {};
5
+ const pairs = filterStr.split('&');
6
+
7
+ for (const pair of pairs) {
8
+ const [key, value] = pair.split('=');
9
+ if (!key || value === undefined) continue;
10
+
11
+ const trimmedKey = key.trim();
12
+ const trimmedValue = value.trim();
13
+
14
+ // 处理布尔值
15
+ if (trimmedValue === 'true') {
16
+ filter[trimmedKey] = true;
17
+ } else if (trimmedValue === 'false') {
18
+ filter[trimmedKey] = false;
19
+ } else {
20
+ // 支持逗号分隔的多个值(如 locationCreated=DE,ES)
21
+ filter[trimmedKey] = trimmedValue.split(',').map(v => v.trim());
22
+ }
23
+ }
24
+
25
+ return Object.keys(filter).length > 0 ? filter : null;
26
+ }
27
+
28
+ export function applyFilter(results, filter) {
29
+ if (!filter || results.length === 0) return results;
30
+
31
+ return results.filter(item => {
32
+ for (const [key, expectedValue] of Object.entries(filter)) {
33
+ const actualValue = item[key];
34
+
35
+ // 如果字段不存在,过滤掉
36
+ if (actualValue === undefined || actualValue === null) {
37
+ return false;
38
+ }
39
+
40
+ // 数组值匹配(如 locationCreated=DE,ES)
41
+ if (Array.isArray(expectedValue)) {
42
+ if (!expectedValue.includes(String(actualValue))) {
43
+ return false;
44
+ }
45
+ }
46
+ // 布尔值或精确匹配
47
+ else if (actualValue !== expectedValue) {
48
+ return false;
49
+ }
50
+ }
51
+ return true;
52
+ });
53
+ }
54
+
55
+ export function formatFilterDescription(filter) {
56
+ if (!filter) return '';
57
+
58
+ const parts = Object.entries(filter).map(([key, value]) => {
59
+ if (Array.isArray(value)) {
60
+ return `${key}=${value.join(',')}`;
61
+ }
62
+ return `${key}=${value}`;
63
+ });
64
+
65
+ return parts.join(' & ');
66
+ }
package/src/lib/io.js CHANGED
@@ -1,13 +1,13 @@
1
- import { writeFileSync, readFileSync } from 'fs';
2
-
3
- export function writeOutput(data, outputFile) {
4
- const output = JSON.stringify(data, null, 2);
5
- const target = outputFile || 'tiktok_data.json';
6
- writeFileSync(target, output, 'utf-8');
7
- console.log(`结果已写入: ${target}`);
8
- }
9
-
10
- export function readUrlFile(filePath) {
11
- const content = readFileSync(filePath, 'utf-8');
12
- return content.split(/\r?\n/).map(l => l.trim()).filter(l => l.startsWith('http'));
13
- }
1
+ import { writeFileSync, readFileSync } from 'fs';
2
+
3
+ export function writeOutput(data, outputFile) {
4
+ const output = JSON.stringify(data, null, 2);
5
+ const target = outputFile || 'tiktok_data.json';
6
+ writeFileSync(target, output, 'utf-8');
7
+ console.log(`结果已写入: ${target}`);
8
+ }
9
+
10
+ export function readUrlFile(filePath) {
11
+ const content = readFileSync(filePath, 'utf-8');
12
+ return content.split(/\r?\n/).map(l => l.trim()).filter(l => l.startsWith('http'));
13
+ }