tt-help-cli-ycl 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tt-help-cli-ycl",
3
- "version": "1.0.3",
3
+ "version": "1.0.4",
4
4
  "description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
5
5
  "type": "module",
6
6
  "bin": {
@@ -31,7 +31,7 @@
31
31
  },
32
32
  "repository": {
33
33
  "type": "git",
34
- "url": "https://github.com/jsjhycl/tt-help-cli.git"
34
+ "url": "git+https://github.com/jsjhycl/tt-help-cli.git"
35
35
  },
36
36
  "bugs": {
37
37
  "url": "https://github.com/jsjhycl/tt-help-cli/issues"
@@ -41,4 +41,4 @@
41
41
  "playwright": "^1.59.1",
42
42
  "undici": "^8.1.0"
43
43
  }
44
- }
44
+ }
package/src/lib/args.js CHANGED
@@ -14,6 +14,7 @@ export function parseArgs() {
14
14
  let configAction = null;
15
15
  let configValue = null;
16
16
  let pipeMode = false;
17
+ let filterStr = null;
17
18
 
18
19
  for (let i = 0; i < args.length; i++) {
19
20
  const arg = args[i];
@@ -24,6 +25,8 @@ export function parseArgs() {
24
25
  : 100;
25
26
  } else if (arg === '--proxy') {
26
27
  customProxy = args[++i];
28
+ } else if (arg === '--filter') {
29
+ filterStr = args[++i];
27
30
  } else if (arg === 'config') {
28
31
  configAction = args[i + 1];
29
32
  if (configAction === 'set' || configAction === 'set-proxy' || configAction === 'set-browser') {
@@ -55,5 +58,5 @@ export function parseArgs() {
55
58
  urls.push(...lines);
56
59
  }
57
60
 
58
- return { urls, outputFile, outputFormat, exploreCount, showConfig, showHelp, customProxy, configAction, configValue, pipeMode };
61
+ return { urls, outputFile, outputFormat, exploreCount, showConfig, showHelp, customProxy, configAction, configValue, pipeMode, filterStr };
59
62
  }
@@ -45,12 +45,18 @@ const HELP_TEXT = [
45
45
  ' --explore [count] 从 Explore 页面获取视频列表(默认: 100)',
46
46
  ' --pipe 将 Explore 结果自动传给 URL 爬取',
47
47
  ' --proxy <地址> 临时指定代理地址',
48
+ ' --filter <条件> 过滤结果(格式: key=value&key2=value2)',
48
49
  ' -i, --input <file> 从文件读取 URL 列表(每行一个)',
49
50
  ' -o, --output <file> 指定输出文件(默认: tiktok_data.json)',
50
51
  ' -f, --format <fmt> 输出格式: json(默认), table, raw',
51
52
  ' -c, --config 显示当前配置',
52
53
  ' -h, --help 显示帮助',
53
54
  '',
55
+ '过滤示例:',
56
+ ' --filter "ttSeller=true&verified=false" 过滤卖家且未认证',
57
+ ' --filter "locationCreated=DE,ES" 过滤指定地区',
58
+ ' --filter "ttSeller=true&locationCreated=US" 组合条件',
59
+ '',
54
60
  '配置代理:',
55
61
  ' tt-help config set http://127.0.0.1:7890 设置代理',
56
62
  ' tt-help config show 查看配置',
@@ -68,6 +74,7 @@ const HELP_TEXT = [
68
74
  ' tt-help -i urls.txt -o result.json # 再爬取这些 URL',
69
75
  ' tt-help config set http://127.0.0.1:7890',
70
76
  ' tt-help https://www.tiktok.com/@username',
77
+ ' tt-help https://... --filter "ttSeller=true&locationCreated=DE"',
71
78
  ];
72
79
 
73
80
  const CONFIG_TEXT = [
@@ -2,16 +2,30 @@ import { fetch, ProxyAgent } from 'undici';
2
2
  import { DEFAULT_PROXY } from './constants.js';
3
3
 
4
4
  const HEADERS = {
5
- 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
6
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
5
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
6
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
7
+ 'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
8
+ 'Accept-Encoding': 'gzip, deflate, br',
9
+ 'Connection': 'keep-alive',
10
+ 'Upgrade-Insecure-Requests': '1',
11
+ 'Sec-Fetch-Dest': 'document',
12
+ 'Sec-Fetch-Mode': 'navigate',
13
+ 'Sec-Fetch-Site': 'none',
14
+ 'Sec-Fetch-User': '?1',
15
+ 'Cache-Control': 'max-age=0',
7
16
  };
8
17
 
9
18
  export async function fetchHtml(url, proxyUrl) {
10
19
  const p = proxyUrl || DEFAULT_PROXY;
11
20
  const agent = new ProxyAgent(p);
12
21
  try {
13
- const res = await fetch(url, { headers: HEADERS, dispatcher: agent });
14
- return res.text();
22
+ const res = await fetch(url, {
23
+ headers: HEADERS,
24
+ dispatcher: agent,
25
+ redirect: 'follow',
26
+ });
27
+ const html = await res.text();
28
+ return html;
15
29
  } catch (err) {
16
30
  throw new Error(`请求 ${url} 失败,代理 ${p} 不可用`);
17
31
  }
@@ -0,0 +1,66 @@
1
+ export function parseFilter(filterStr) {
2
+ if (!filterStr) return null;
3
+
4
+ const filter = {};
5
+ const pairs = filterStr.split('&');
6
+
7
+ for (const pair of pairs) {
8
+ const [key, value] = pair.split('=');
9
+ if (!key || value === undefined) continue;
10
+
11
+ const trimmedKey = key.trim();
12
+ const trimmedValue = value.trim();
13
+
14
+ // 处理布尔值
15
+ if (trimmedValue === 'true') {
16
+ filter[trimmedKey] = true;
17
+ } else if (trimmedValue === 'false') {
18
+ filter[trimmedKey] = false;
19
+ } else {
20
+ // 支持逗号分隔的多个值(如 locationCreated=DE,ES)
21
+ filter[trimmedKey] = trimmedValue.split(',').map(v => v.trim());
22
+ }
23
+ }
24
+
25
+ return Object.keys(filter).length > 0 ? filter : null;
26
+ }
27
+
28
+ export function applyFilter(results, filter) {
29
+ if (!filter || results.length === 0) return results;
30
+
31
+ return results.filter(item => {
32
+ for (const [key, expectedValue] of Object.entries(filter)) {
33
+ const actualValue = item[key];
34
+
35
+ // 如果字段不存在,过滤掉
36
+ if (actualValue === undefined || actualValue === null) {
37
+ return false;
38
+ }
39
+
40
+ // 数组值匹配(如 locationCreated=DE,ES)
41
+ if (Array.isArray(expectedValue)) {
42
+ if (!expectedValue.includes(String(actualValue))) {
43
+ return false;
44
+ }
45
+ }
46
+ // 布尔值或精确匹配
47
+ else if (actualValue !== expectedValue) {
48
+ return false;
49
+ }
50
+ }
51
+ return true;
52
+ });
53
+ }
54
+
55
+ export function formatFilterDescription(filter) {
56
+ if (!filter) return '';
57
+
58
+ const parts = Object.entries(filter).map(([key, value]) => {
59
+ if (Array.isArray(value)) {
60
+ return `${key}=${value.join(',')}`;
61
+ }
62
+ return `${key}=${value}`;
63
+ });
64
+
65
+ return parts.join(' & ');
66
+ }
package/src/main.mjs CHANGED
@@ -3,6 +3,7 @@ import { HELP_TEXT, CONFIG_TEXT, proxy, configFile, configPath, DEFAULT_PROXY, s
3
3
  import { fetchExplore } from './lib/explore.js';
4
4
  import { processUrl } from './lib/scrape.js';
5
5
  import { deduplicate, formatOutput } from './lib/output.js';
6
+ import { parseFilter, applyFilter, formatFilterDescription } from './lib/filter.js';
6
7
  import { writeFileSync, readFileSync, existsSync } from 'fs';
7
8
 
8
9
  function showConfig(urls, outputFile) {
@@ -79,7 +80,7 @@ function cleanError(msg) {
79
80
  .trim();
80
81
  }
81
82
 
82
- async function runExplore(exploreCount, urls, proxyUrl, outputFile, outputFormat, isPipe) {
83
+ async function runExplore(exploreCount, urls, proxyUrl, outputFile, outputFormat, isPipe, filter) {
83
84
  console.log(`\n代理: ${proxyUrl}`);
84
85
  console.log(`Explore 数量: ${exploreCount}`);
85
86
  if (urls.length > 0) {
@@ -97,7 +98,7 @@ async function runExplore(exploreCount, urls, proxyUrl, outputFile, outputFormat
97
98
  if (isPipe) {
98
99
  const videoUrls = exploreResults.map(r => r.url).filter(Boolean);
99
100
  if (videoUrls.length > 0) {
100
- await runScrape(videoUrls, proxyUrl, outputFile, outputFormat);
101
+ await runScrape(videoUrls, proxyUrl, outputFile, outputFormat, filter);
101
102
  return;
102
103
  }
103
104
  }
@@ -142,8 +143,9 @@ async function runExplore(exploreCount, urls, proxyUrl, outputFile, outputFormat
142
143
  }
143
144
 
144
145
  const uniqueResults = deduplicate(allResults);
146
+ const filteredResults = applyFilter(uniqueResults, filter);
145
147
 
146
- if (uniqueResults.length === 0) {
148
+ if (filteredResults.length === 0) {
147
149
  console.log('\n未获取到数据');
148
150
  if (outputFile) {
149
151
  writeFileSync(outputFile, '[]', 'utf-8');
@@ -151,7 +153,7 @@ async function runExplore(exploreCount, urls, proxyUrl, outputFile, outputFormat
151
153
  return;
152
154
  }
153
155
 
154
- const output = formatOutput(uniqueResults, outputFormat);
156
+ const output = formatOutput(filteredResults, outputFormat);
155
157
 
156
158
  if (outputFile) {
157
159
  writeFileSync(outputFile, output, 'utf-8');
@@ -159,10 +161,15 @@ async function runExplore(exploreCount, urls, proxyUrl, outputFile, outputFormat
159
161
  } else {
160
162
  console.log(output);
161
163
  }
162
- console.log(`\n共 ${uniqueResults.length} 个数据`);
164
+
165
+ if (filter) {
166
+ console.log(`\n共 ${uniqueResults.length} 个数据,过滤后 ${filteredResults.length} 个(过滤条件: ${formatFilterDescription(filter)})`);
167
+ } else {
168
+ console.log(`\n共 ${filteredResults.length} 个数据`);
169
+ }
163
170
  }
164
171
 
165
- async function runScrape(urls, proxyUrl, outputFile, outputFormat) {
172
+ async function runScrape(urls, proxyUrl, outputFile, outputFormat, filter) {
166
173
  const allResults = [];
167
174
  const errors = [];
168
175
 
@@ -179,9 +186,10 @@ async function runScrape(urls, proxyUrl, outputFile, outputFormat) {
179
186
  console.log();
180
187
 
181
188
  const uniqueResults = deduplicate(allResults);
189
+ const filteredResults = applyFilter(uniqueResults, filter);
182
190
 
183
191
  if (errors.length > 0) {
184
- if (uniqueResults.length === 0) {
192
+ if (filteredResults.length === 0) {
185
193
  const msg = errors[0].message;
186
194
  if (msg.includes('不可用') || msg.includes('连接被拒绝') || msg.includes('连接中断') ||
187
195
  msg.includes('超时') || msg.includes('无法解析')) {
@@ -218,7 +226,7 @@ async function runScrape(urls, proxyUrl, outputFile, outputFormat) {
218
226
  }
219
227
  }
220
228
 
221
- const output = formatOutput(uniqueResults, outputFormat);
229
+ const output = formatOutput(filteredResults, outputFormat);
222
230
 
223
231
  if (outputFile) {
224
232
  writeFileSync(outputFile, output, 'utf-8');
@@ -226,12 +234,18 @@ async function runScrape(urls, proxyUrl, outputFile, outputFormat) {
226
234
  } else {
227
235
  console.log(output);
228
236
  }
229
- console.log(`\n共 ${uniqueResults.length} 个用户的数据`);
237
+
238
+ if (filter) {
239
+ console.log(`\n共 ${uniqueResults.length} 个数据,过滤后 ${filteredResults.length} 个(过滤条件: ${formatFilterDescription(filter)})`);
240
+ } else {
241
+ console.log(`\n共 ${filteredResults.length} 个用户的数据`);
242
+ }
230
243
  }
231
244
 
232
245
  async function main() {
233
- const { urls, outputFile, outputFormat, exploreCount, showConfig: showCfg, showHelp, customProxy, configAction, configValue, pipeMode } = parseArgs();
246
+ const { urls, outputFile, outputFormat, exploreCount, showConfig: showCfg, showHelp, customProxy, configAction, configValue, pipeMode, filterStr } = parseArgs();
234
247
  const proxyUrl = customProxy || proxy;
248
+ const filter = parseFilter(filterStr);
235
249
 
236
250
  if (showHelp) {
237
251
  showUsage();
@@ -254,9 +268,9 @@ async function main() {
254
268
  }
255
269
 
256
270
  if (exploreCount > 0) {
257
- await runExplore(exploreCount, urls, proxyUrl, outputFile, outputFormat, pipeMode);
271
+ await runExplore(exploreCount, urls, proxyUrl, outputFile, outputFormat, pipeMode, filter);
258
272
  } else {
259
- await runScrape(urls, proxyUrl, outputFile, outputFormat);
273
+ await runScrape(urls, proxyUrl, outputFile, outputFormat, filter);
260
274
  }
261
275
  }
262
276