tt-help-cli-ycl 1.0.6 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/lib/args.js CHANGED
@@ -1,8 +1,234 @@
1
1
  import { readFileSync } from 'fs';
2
2
  import { proxy } from './constants.js';
3
3
 
4
+ const PRESETS = ['fast', 'normal', 'slow', 'stealth'];
5
+
6
+ function parseScrapeArgs(args) {
7
+ let scrapeUrl = null;
8
+ let scrapePreset = null;
9
+ let scrapeMaxVideos = 20;
10
+ let scrapeMaxComments = 999;
11
+ let scrapeSwitchDelay = null;
12
+ let scrapeCommentDelay = null;
13
+ let outputFile = null;
14
+
15
+ const positional = [];
16
+
17
+ for (let i = 0; i < args.length; i++) {
18
+ const arg = args[i];
19
+ if (arg === '-o' || arg === '--output') {
20
+ outputFile = args[++i];
21
+ } else if (arg === '--switch-delay') {
22
+ scrapeSwitchDelay = parseInt(args[++i]) || null;
23
+ } else if (arg === '--comment-delay') {
24
+ scrapeCommentDelay = parseInt(args[++i]) || null;
25
+ } else {
26
+ positional.push(arg);
27
+ }
28
+ }
29
+
30
+ scrapeUrl = positional[0] || null;
31
+
32
+ if (positional[1]) {
33
+ if (PRESETS.includes(positional[1].toLowerCase())) {
34
+ scrapePreset = positional[1].toLowerCase();
35
+ scrapeMaxVideos = parseInt(positional[2]) || 20;
36
+ scrapeMaxComments = parseInt(positional[3]) || 999;
37
+ } else {
38
+ scrapeMaxVideos = parseInt(positional[1]) || 20;
39
+ scrapeMaxComments = parseInt(positional[2]) || 999;
40
+ }
41
+ }
42
+
43
+ return {
44
+ subcommand: 'scrape',
45
+ scrapeUrl,
46
+ scrapePreset,
47
+ scrapeMaxVideos,
48
+ scrapeMaxComments,
49
+ scrapeSwitchDelay,
50
+ scrapeCommentDelay,
51
+ outputFile,
52
+ urls: [],
53
+ outputFormat: 'json',
54
+ exploreCount: 0,
55
+ showConfig: false,
56
+ showHelp: false,
57
+ customProxy: null,
58
+ configAction: null,
59
+ configValue: null,
60
+ pipeMode: false,
61
+ filterStr: null,
62
+ };
63
+ }
64
+
65
+ function parseAutoArgs(args) {
66
+ let autoCollectMax = 1;
67
+ let autoScrapeDepth = 50;
68
+ let autoMaxComments = 200;
69
+ let autoPreset = 'fast';
70
+ let autoSwitchDelay = null;
71
+ let autoCommentDelay = null;
72
+ let outputFile = null;
73
+ let autoWatch = false;
74
+ let autoWatchPort = 3000;
75
+
76
+ const positional = [];
77
+ const PRESETS = ['fast', 'normal', 'slow', 'stealth'];
78
+
79
+ for (let i = 0; i < args.length; i++) {
80
+ const arg = args[i];
81
+ if (arg === '-o' || arg === '--output') {
82
+ outputFile = args[++i];
83
+ } else if (arg === '--switch-delay') {
84
+ autoSwitchDelay = parseInt(args[++i]) || null;
85
+ } else if (arg === '--comment-delay') {
86
+ autoCommentDelay = parseInt(args[++i]) || null;
87
+ } else if (arg === '--watch') {
88
+ autoWatch = true;
89
+ if (args[i + 1] === '-p') {
90
+ autoWatchPort = parseInt(args[i + 2]) || 3000;
91
+ i += 2;
92
+ }
93
+ } else if (arg === '-p' && autoWatch) {
94
+ autoWatchPort = parseInt(args[++i]) || 3000;
95
+ } else {
96
+ positional.push(arg);
97
+ }
98
+ }
99
+
100
+ // 收集用户名(非 preset、非数字的都是用户名)
101
+ const usernames = [];
102
+ let j = 0;
103
+ while (j < positional.length && !PRESETS.includes(positional[j]?.toLowerCase()) && isNaN(positional[j])) {
104
+ usernames.push(positional[j].replace('@', ''));
105
+ j++;
106
+ }
107
+
108
+ // preset
109
+ if (j < positional.length && PRESETS.includes(positional[j].toLowerCase())) {
110
+ autoPreset = positional[j].toLowerCase();
111
+ j++;
112
+ autoCollectMax = parseInt(positional[j]) || 1; j++;
113
+ autoScrapeDepth = parseInt(positional[j]) || 50; j++;
114
+ autoMaxComments = parseInt(positional[j]) || 200; j++;
115
+ } else if (usernames.length > 0) {
116
+ autoCollectMax = parseInt(positional[j]) || 1; j++;
117
+ autoScrapeDepth = parseInt(positional[j]) || 50; j++;
118
+ autoMaxComments = parseInt(positional[j]) || 200;
119
+ }
120
+
121
+ return {
122
+ subcommand: 'auto',
123
+ autoUsernames: usernames,
124
+ autoCollectMax,
125
+ autoScrapeDepth,
126
+ autoMaxComments,
127
+ autoPreset,
128
+ autoSwitchDelay,
129
+ autoCommentDelay,
130
+ outputFile,
131
+ autoWatch,
132
+ autoWatchPort,
133
+ urls: [],
134
+ outputFormat: 'json',
135
+ exploreCount: 0,
136
+ showConfig: false,
137
+ showHelp: false,
138
+ customProxy: null,
139
+ configAction: null,
140
+ configValue: null,
141
+ pipeMode: false,
142
+ filterStr: null,
143
+ };
144
+ }
145
+
146
+ function parseVideosArgs(args) {
147
+ let videosUsername = null;
148
+ let videosMax = 5;
149
+ let outputFile = null;
150
+
151
+ const positional = [];
152
+
153
+ for (let i = 0; i < args.length; i++) {
154
+ const arg = args[i];
155
+ if (arg === '-o' || arg === '--output') {
156
+ outputFile = args[++i];
157
+ } else {
158
+ positional.push(arg);
159
+ }
160
+ }
161
+
162
+ videosUsername = positional[0] ? positional[0].replace('@', '') : null;
163
+ videosMax = parseInt(positional[1]) || 5;
164
+
165
+ return {
166
+ subcommand: 'videos',
167
+ videosUsername,
168
+ videosMax,
169
+ outputFile,
170
+ urls: [],
171
+ outputFormat: 'json',
172
+ exploreCount: 0,
173
+ showConfig: false,
174
+ showHelp: false,
175
+ customProxy: null,
176
+ configAction: null,
177
+ configValue: null,
178
+ pipeMode: false,
179
+ filterStr: null,
180
+ };
181
+ }
182
+
183
+ function parseWatchArgs(args) {
184
+ let outputFile = null;
185
+ let watchPort = 3000;
186
+
187
+ for (let i = 0; i < args.length; i++) {
188
+ const arg = args[i];
189
+ if (arg === '-o' || arg === '--output') {
190
+ outputFile = args[++i];
191
+ } else if (arg === '-p') {
192
+ watchPort = parseInt(args[++i]) || 3000;
193
+ }
194
+ }
195
+
196
+ return {
197
+ subcommand: 'watch',
198
+ outputFile,
199
+ watchPort,
200
+ urls: [],
201
+ outputFormat: 'json',
202
+ exploreCount: 0,
203
+ showConfig: false,
204
+ showHelp: false,
205
+ customProxy: null,
206
+ configAction: null,
207
+ configValue: null,
208
+ pipeMode: false,
209
+ filterStr: null,
210
+ };
211
+ }
212
+
4
213
  export function parseArgs() {
5
214
  const args = process.argv.slice(2);
215
+
216
+ if (args.length > 0 && args[0] === 'scrape') {
217
+ return parseScrapeArgs(args.slice(1));
218
+ }
219
+
220
+ if (args.length > 0 && args[0] === 'videos') {
221
+ return parseVideosArgs(args.slice(1));
222
+ }
223
+
224
+ if (args.length > 0 && args[0] === 'auto') {
225
+ return parseAutoArgs(args.slice(1));
226
+ }
227
+
228
+ if (args.length > 0 && args[0] === 'watch') {
229
+ return parseWatchArgs(args.slice(1));
230
+ }
231
+
6
232
  const urls = [];
7
233
  let inputFile = null;
8
234
  let outputFile = null;
@@ -59,4 +285,4 @@ export function parseArgs() {
59
285
  }
60
286
 
61
287
  return { urls, outputFile, outputFormat, exploreCount, showConfig, showHelp, customProxy, configAction, configValue, pipeMode, filterStr };
62
- }
288
+ }
@@ -0,0 +1,11 @@
1
+ import { createRequire } from 'module';
2
+
3
+ const require = createRequire(import.meta.url);
4
+ const core = require('../auto-core.cjs');
5
+ const helpers = require('../scraper/modules/page-helpers.cjs');
6
+
7
+ export const runAuto = core.runAuto;
8
+ export const processUser = core.processUser;
9
+ export const ensureBrowserReady = helpers.ensureBrowserReady;
10
+ export const ensureTikTokPage = helpers.ensureTikTokPage;
11
+ export const closeCommentPanel = helpers.closeCommentPanel;
@@ -40,6 +40,9 @@ function saveBrowser(path) {
40
40
 
41
41
  const HELP_TEXT = [
42
42
  '用法: tt-help [选项]',
43
+ ' tt-help scrape <URL> [预设] [选项]',
44
+ ' tt-help videos <用户名> [最大视频数] [选项]',
45
+ ' tt-help auto <用户名> [preset] [收集数] [切换数] [每视频评论数] [选项]',
43
46
  '',
44
47
  '参数:',
45
48
  ' --explore [count] 从 Explore 页面获取视频列表(默认: 100)',
@@ -52,6 +55,42 @@ const HELP_TEXT = [
52
55
  ' -c, --config 显示当前配置',
53
56
  ' -h, --help 显示帮助',
54
57
  '',
58
+ '浏览器抓取模式 (scrape):',
59
+ ' tt-help scrape <URL> [preset] [最大视频数] [最大评论数] [-o 输出路径]',
60
+ ' tt-help scrape <URL> [最大视频数] [最大评论数] [--switch-delay ms] [--comment-delay ms]',
61
+ ' 预设: fast(600ms/400ms), normal(1.5s/800ms), slow(3s/2s), stealth(5s/3.5s)',
62
+ ' 不指定 -o 则输出到 stdout(可 pipe)',
63
+ '',
64
+ '用户视频模式 (videos):',
65
+ ' tt-help videos <用户名> [最大视频数] [-o 输出路径]',
66
+ ' 获取用户信息和视频列表(浏览器模式)',
67
+ ' 不指定 -o 则输出到 stdout(可 pipe)',
68
+ '',
69
+ '自动探索模式 (auto):',
70
+ ' tt-help auto [用户名...] [preset] [收集数] [切换数] [每视频评论数] [选项]',
71
+ ' 支持多个用户名: tt-help auto @user1 @user2 -o data.json',
72
+ ' 不指定用户名: 从 -o 数据源中读取未处理的用户继续探索',
73
+ ' 收集数: 从种子用户收集的种子视频数(默认 1)',
74
+ ' 切换数: 每个种子作者主页下切换视频的次数(默认 50)',
75
+ ' 每视频评论数: 每个视频抓取评论数(默认 200)',
76
+ ' 预设: fast(默认), normal, slow, stealth',
77
+ ' 选项: -o <路径> 数据源文件(不指定则输出到 stdout)',
78
+ ' --switch-delay <ms> 视频切换延迟(毫秒)',
79
+ ' --comment-delay <ms> 评论滚动延迟(毫秒)',
80
+ ' --watch [-p 3000] 启动 Web 监控页面实时查看数据',
81
+ ' 返回: 纯用户数组,按 uniqueId 去重',
82
+ ' 示例: tt-help auto username -o result.json',
83
+ ' tt-help auto @a @b fast 1 30 100 -o data.json',
84
+ ' tt-help auto -o data.json # 续跑',
85
+ ' tt-help auto username --watch -o data.json # 带监控页面',
86
+ '',
87
+ '实时监控模式 (watch):',
88
+ ' tt-help watch -o <数据文件> [-p 端口]',
89
+ ' 启动 Web 监控页面,实时查看采集数据',
90
+ ' 支持在 auto 模式中通过 --watch 参数同时启动',
91
+ ' 示例: tt-help watch -o data.json',
92
+ ' tt-help watch -o data.json -p 8080',
93
+ '',
55
94
  '过滤示例:',
56
95
  ' --filter "ttSeller=true&verified=false" 过滤卖家且未认证',
57
96
  ' --filter "locationCreated=DE,ES" 过滤指定地区',
@@ -72,6 +111,13 @@ const HELP_TEXT = [
72
111
  ' tt-help --explore -f raw # 仅输出 URL 列表',
73
112
  ' tt-help --explore -f raw -o urls.txt # 保存 URL 到文件',
74
113
  ' tt-help -i urls.txt -o result.json # 再爬取这些 URL',
114
+ ' tt-help scrape <URL> fast 50 999 # 浏览器抓取(快速预设)',
115
+ ' tt-help scrape <URL> slow -o out.json # 慢速预设,输出到文件',
116
+ ' tt-help scrape <URL> 20 999 | jq . # 输出到 stdout 可 pipe',
117
+ ' tt-help videos username 100 # 获取用户 100 个视频',
118
+ ' tt-help videos username 50 -o out.json # 输出到文件',
119
+ ' tt-help auto username -o out.json # 自动探索(默认 fast/1/50/200)',
120
+ ' tt-help auto username slow 1 30 100 -o out.json # 慢速探索',
75
121
  ' tt-help config set http://127.0.0.1:7890',
76
122
  ' tt-help https://www.tiktok.com/@username',
77
123
  ' tt-help https://... --filter "ttSeller=true&locationCreated=DE"',
@@ -8,6 +8,32 @@ function sleep(ms) {
8
8
  return new Promise(r => setTimeout(r, ms));
9
9
  }
10
10
 
11
+ function isRetryableError(error) {
12
+ if (!error) return false;
13
+ const msg = (error.message || error.toString() || '').toLowerCase();
14
+ const patterns = ['interrupted', 'net::', 'econn', 'etimedout', 'enotfound', 'eai_again', 'esocketreset', 'connection.*refused', 'connection.*reset', 'failed.*navigate', 'target.*closed', 'crash'];
15
+ return patterns.some(p => new RegExp(p, 'i').test(msg));
16
+ }
17
+
18
+ async function retryGoto(page, url, options, { maxRetries = 3, baseDelay = 3000 } = {}) {
19
+ let lastError;
20
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
21
+ try {
22
+ return await page.goto(url, options);
23
+ } catch (error) {
24
+ lastError = error;
25
+ if (attempt >= maxRetries || !isRetryableError(error)) {
26
+ throw error;
27
+ }
28
+ const jitter = Math.random() * 500;
29
+ const waitTime = baseDelay * Math.pow(2, attempt) + jitter;
30
+ console.log(` [重试] ${attempt + 1}/${maxRetries},${Math.round(waitTime)}ms 后重试...`);
31
+ await sleep(waitTime);
32
+ }
33
+ }
34
+ throw lastError;
35
+ }
36
+
11
37
  function detectBrowser() {
12
38
  const isMac = process.platform === 'darwin';
13
39
  const isWin = process.platform === 'win32';
@@ -146,7 +172,7 @@ export async function fetchExplore(count = 100) {
146
172
  });
147
173
 
148
174
  const page = await context.newPage();
149
- await page.goto(EXPLORE_URL, { waitUntil: 'domcontentloaded', timeout: 30000 });
175
+ await retryGoto(page, EXPLORE_URL, { waitUntil: 'load', timeout: 30000 });
150
176
  console.log(` [1/6] 页面已加载`);
151
177
 
152
178
  await sleep(5000);
@@ -18,17 +18,27 @@ const HEADERS = {
18
18
  export async function fetchHtml(url, proxyUrl) {
19
19
  const p = proxyUrl || DEFAULT_PROXY;
20
20
  const agent = new ProxyAgent(p);
21
- try {
22
- const res = await fetch(url, {
23
- headers: HEADERS,
24
- dispatcher: agent,
25
- redirect: 'follow',
26
- });
27
- const html = await res.text();
28
- return html;
29
- } catch (err) {
30
- throw new Error(`请求 ${url} 失败,代理 ${p} 不可用`);
21
+ let lastError;
22
+
23
+ for (let attempt = 1; attempt <= 3; attempt++) {
24
+ try {
25
+ const res = await fetch(url, {
26
+ headers: HEADERS,
27
+ dispatcher: agent,
28
+ redirect: 'follow',
29
+ });
30
+ const html = await res.text();
31
+ return html;
32
+ } catch (err) {
33
+ lastError = err;
34
+ if (attempt < 3) {
35
+ const waitMs = Math.pow(2, attempt - 1) * 3000 + Math.random() * 2000;
36
+ await new Promise(r => setTimeout(r, waitMs));
37
+ }
38
+ }
31
39
  }
40
+
41
+ throw new Error(`请求 ${url} 失败(已重试 3 次),代理 ${p} 不可用`);
32
42
  }
33
43
 
34
44
  export function makeProfileUrl(handle) {
@@ -0,0 +1,6 @@
1
+ import { createRequire } from 'module';
2
+
3
+ const require = createRequire(import.meta.url);
4
+ const core = require('../get-user-videos-core.cjs');
5
+
6
+ export const runGetUserVideos = core.runGetUserVideos;
@@ -0,0 +1,6 @@
1
+ import { createRequire } from 'module';
2
+
3
+ const require = createRequire(import.meta.url);
4
+ const core = require('../scraper/core.cjs');
5
+
6
+ export const runScrape = core.runScrape;