tt-help-cli-ycl 1.3.12 → 1.3.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -17
- package/cli.js +9 -9
- package/package.json +45 -45
- package/scripts/run-explore.bat +68 -68
- package/scripts/run-explore.ps1 +81 -81
- package/scripts/run-explore.sh +73 -73
- package/scripts/test-captcha-lib.mjs +68 -0
- package/scripts/test-captcha.mjs +81 -0
- package/scripts/test-incognito-lib.mjs +36 -0
- package/scripts/test-login-state.mjs +128 -0
- package/scripts/test-safe-click.mjs +45 -0
- package/src/cli/auto.js +186 -157
- package/src/cli/explore.js +227 -193
- package/src/cli/progress.js +111 -111
- package/src/cli/refresh.js +216 -0
- package/src/cli/scrape.js +47 -47
- package/src/cli/utils.js +18 -18
- package/src/cli/videos.js +41 -41
- package/src/cli/watch.js +31 -31
- package/src/lib/args.js +456 -402
- package/src/lib/browser/anti-detect.js +23 -23
- package/src/lib/browser/cdp.js +52 -10
- package/src/lib/browser/launch.js +43 -43
- package/src/lib/browser/page.js +146 -87
- package/src/lib/constants.js +119 -115
- package/src/lib/delay.js +54 -54
- package/src/lib/explore-fetch.js +118 -118
- package/src/lib/fetcher.js +45 -45
- package/src/lib/filter.js +66 -66
- package/src/lib/io.js +54 -54
- package/src/lib/output.js +80 -80
- package/src/lib/parser.js +47 -47
- package/src/lib/retry.js +45 -45
- package/src/lib/scrape.js +40 -40
- package/src/lib/url.js +52 -52
- package/src/main.js +2 -0
- package/src/results/user-videos-bar.lar.lar.moeta.json +37 -0
- package/src/scraper/auto-core.js +203 -194
- package/src/scraper/core.js +211 -190
- package/src/scraper/explore-core.js +180 -171
- package/src/scraper/modules/captcha-handler.js +114 -114
- package/src/scraper/modules/comment-extractor.js +74 -69
- package/src/scraper/modules/follow-extractor.js +121 -121
- package/src/scraper/modules/guess-extractor.js +51 -51
- package/src/scraper/modules/page-helpers.js +48 -48
- package/src/scraper/refresh-core.js +179 -0
- package/src/videos/core.js +126 -126
- package/src/watch/data-store.js +431 -302
- package/src/watch/public/index.html +721 -701
- package/src/watch/server.js +483 -359
package/src/lib/fetcher.js
CHANGED
|
@@ -1,45 +1,45 @@
|
|
|
1
|
-
import { fetch, ProxyAgent } from 'undici';
|
|
2
|
-
import { DEFAULT_PROXY } from './constants.js';
|
|
3
|
-
import { isProfileUrl } from './url.js';
|
|
4
|
-
|
|
5
|
-
const HEADERS = {
|
|
6
|
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
|
7
|
-
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
|
|
8
|
-
'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
|
|
9
|
-
'Accept-Encoding': 'gzip, deflate, br',
|
|
10
|
-
'Connection': 'keep-alive',
|
|
11
|
-
'Upgrade-Insecure-Requests': '1',
|
|
12
|
-
'Sec-Fetch-Dest': 'document',
|
|
13
|
-
'Sec-Fetch-Mode': 'navigate',
|
|
14
|
-
'Sec-Fetch-Site': 'none',
|
|
15
|
-
'Sec-Fetch-User': '?1',
|
|
16
|
-
'Cache-Control': 'max-age=0',
|
|
17
|
-
};
|
|
18
|
-
|
|
19
|
-
export { isProfileUrl } from './url.js';
|
|
20
|
-
|
|
21
|
-
export async function fetchHtml(url, proxyUrl) {
|
|
22
|
-
const p = proxyUrl || DEFAULT_PROXY;
|
|
23
|
-
const agent = new ProxyAgent(p);
|
|
24
|
-
let lastError;
|
|
25
|
-
|
|
26
|
-
for (let attempt = 1; attempt <= 3; attempt++) {
|
|
27
|
-
try {
|
|
28
|
-
const res = await fetch(url, {
|
|
29
|
-
headers: HEADERS,
|
|
30
|
-
dispatcher: agent,
|
|
31
|
-
redirect: 'follow',
|
|
32
|
-
});
|
|
33
|
-
const html = await res.text();
|
|
34
|
-
return html;
|
|
35
|
-
} catch (err) {
|
|
36
|
-
lastError = err;
|
|
37
|
-
if (attempt < 3) {
|
|
38
|
-
const waitMs = Math.pow(2, attempt - 1) * 3000 + Math.random() * 2000;
|
|
39
|
-
await new Promise(r => setTimeout(r, waitMs));
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
throw new Error(`请求 ${url} 失败(已重试 3 次),代理 ${p} 不可用`);
|
|
45
|
-
}
|
|
1
|
+
import { fetch, ProxyAgent } from 'undici';
|
|
2
|
+
import { DEFAULT_PROXY } from './constants.js';
|
|
3
|
+
import { isProfileUrl } from './url.js';
|
|
4
|
+
|
|
5
|
+
const HEADERS = {
|
|
6
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
|
7
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
|
|
8
|
+
'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
|
|
9
|
+
'Accept-Encoding': 'gzip, deflate, br',
|
|
10
|
+
'Connection': 'keep-alive',
|
|
11
|
+
'Upgrade-Insecure-Requests': '1',
|
|
12
|
+
'Sec-Fetch-Dest': 'document',
|
|
13
|
+
'Sec-Fetch-Mode': 'navigate',
|
|
14
|
+
'Sec-Fetch-Site': 'none',
|
|
15
|
+
'Sec-Fetch-User': '?1',
|
|
16
|
+
'Cache-Control': 'max-age=0',
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
export { isProfileUrl } from './url.js';
|
|
20
|
+
|
|
21
|
+
export async function fetchHtml(url, proxyUrl) {
|
|
22
|
+
const p = proxyUrl || DEFAULT_PROXY;
|
|
23
|
+
const agent = new ProxyAgent(p);
|
|
24
|
+
let lastError;
|
|
25
|
+
|
|
26
|
+
for (let attempt = 1; attempt <= 3; attempt++) {
|
|
27
|
+
try {
|
|
28
|
+
const res = await fetch(url, {
|
|
29
|
+
headers: HEADERS,
|
|
30
|
+
dispatcher: agent,
|
|
31
|
+
redirect: 'follow',
|
|
32
|
+
});
|
|
33
|
+
const html = await res.text();
|
|
34
|
+
return html;
|
|
35
|
+
} catch (err) {
|
|
36
|
+
lastError = err;
|
|
37
|
+
if (attempt < 3) {
|
|
38
|
+
const waitMs = Math.pow(2, attempt - 1) * 3000 + Math.random() * 2000;
|
|
39
|
+
await new Promise(r => setTimeout(r, waitMs));
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
throw new Error(`请求 ${url} 失败(已重试 3 次),代理 ${p} 不可用`);
|
|
45
|
+
}
|
package/src/lib/filter.js
CHANGED
|
@@ -1,66 +1,66 @@
|
|
|
1
|
-
export function parseFilter(filterStr) {
|
|
2
|
-
if (!filterStr) return null;
|
|
3
|
-
|
|
4
|
-
const filter = {};
|
|
5
|
-
const pairs = filterStr.split('&');
|
|
6
|
-
|
|
7
|
-
for (const pair of pairs) {
|
|
8
|
-
const [key, value] = pair.split('=');
|
|
9
|
-
if (!key || value === undefined) continue;
|
|
10
|
-
|
|
11
|
-
const trimmedKey = key.trim();
|
|
12
|
-
const trimmedValue = value.trim();
|
|
13
|
-
|
|
14
|
-
// 处理布尔值
|
|
15
|
-
if (trimmedValue === 'true') {
|
|
16
|
-
filter[trimmedKey] = true;
|
|
17
|
-
} else if (trimmedValue === 'false') {
|
|
18
|
-
filter[trimmedKey] = false;
|
|
19
|
-
} else {
|
|
20
|
-
// 支持逗号分隔的多个值(如 locationCreated=DE,ES)
|
|
21
|
-
filter[trimmedKey] = trimmedValue.split(',').map(v => v.trim());
|
|
22
|
-
}
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
return Object.keys(filter).length > 0 ? filter : null;
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
export function applyFilter(results, filter) {
|
|
29
|
-
if (!filter || results.length === 0) return results;
|
|
30
|
-
|
|
31
|
-
return results.filter(item => {
|
|
32
|
-
for (const [key, expectedValue] of Object.entries(filter)) {
|
|
33
|
-
const actualValue = item[key];
|
|
34
|
-
|
|
35
|
-
// 如果字段不存在,过滤掉
|
|
36
|
-
if (actualValue === undefined || actualValue === null) {
|
|
37
|
-
return false;
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
// 数组值匹配(如 locationCreated=DE,ES)
|
|
41
|
-
if (Array.isArray(expectedValue)) {
|
|
42
|
-
if (!expectedValue.includes(String(actualValue))) {
|
|
43
|
-
return false;
|
|
44
|
-
}
|
|
45
|
-
}
|
|
46
|
-
// 布尔值或精确匹配
|
|
47
|
-
else if (actualValue !== expectedValue) {
|
|
48
|
-
return false;
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
return true;
|
|
52
|
-
});
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
export function formatFilterDescription(filter) {
|
|
56
|
-
if (!filter) return '';
|
|
57
|
-
|
|
58
|
-
const parts = Object.entries(filter).map(([key, value]) => {
|
|
59
|
-
if (Array.isArray(value)) {
|
|
60
|
-
return `${key}=${value.join(',')}`;
|
|
61
|
-
}
|
|
62
|
-
return `${key}=${value}`;
|
|
63
|
-
});
|
|
64
|
-
|
|
65
|
-
return parts.join(' & ');
|
|
66
|
-
}
|
|
1
|
+
export function parseFilter(filterStr) {
|
|
2
|
+
if (!filterStr) return null;
|
|
3
|
+
|
|
4
|
+
const filter = {};
|
|
5
|
+
const pairs = filterStr.split('&');
|
|
6
|
+
|
|
7
|
+
for (const pair of pairs) {
|
|
8
|
+
const [key, value] = pair.split('=');
|
|
9
|
+
if (!key || value === undefined) continue;
|
|
10
|
+
|
|
11
|
+
const trimmedKey = key.trim();
|
|
12
|
+
const trimmedValue = value.trim();
|
|
13
|
+
|
|
14
|
+
// 处理布尔值
|
|
15
|
+
if (trimmedValue === 'true') {
|
|
16
|
+
filter[trimmedKey] = true;
|
|
17
|
+
} else if (trimmedValue === 'false') {
|
|
18
|
+
filter[trimmedKey] = false;
|
|
19
|
+
} else {
|
|
20
|
+
// 支持逗号分隔的多个值(如 locationCreated=DE,ES)
|
|
21
|
+
filter[trimmedKey] = trimmedValue.split(',').map(v => v.trim());
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
return Object.keys(filter).length > 0 ? filter : null;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export function applyFilter(results, filter) {
|
|
29
|
+
if (!filter || results.length === 0) return results;
|
|
30
|
+
|
|
31
|
+
return results.filter(item => {
|
|
32
|
+
for (const [key, expectedValue] of Object.entries(filter)) {
|
|
33
|
+
const actualValue = item[key];
|
|
34
|
+
|
|
35
|
+
// 如果字段不存在,过滤掉
|
|
36
|
+
if (actualValue === undefined || actualValue === null) {
|
|
37
|
+
return false;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// 数组值匹配(如 locationCreated=DE,ES)
|
|
41
|
+
if (Array.isArray(expectedValue)) {
|
|
42
|
+
if (!expectedValue.includes(String(actualValue))) {
|
|
43
|
+
return false;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
// 布尔值或精确匹配
|
|
47
|
+
else if (actualValue !== expectedValue) {
|
|
48
|
+
return false;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
return true;
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export function formatFilterDescription(filter) {
|
|
56
|
+
if (!filter) return '';
|
|
57
|
+
|
|
58
|
+
const parts = Object.entries(filter).map(([key, value]) => {
|
|
59
|
+
if (Array.isArray(value)) {
|
|
60
|
+
return `${key}=${value.join(',')}`;
|
|
61
|
+
}
|
|
62
|
+
return `${key}=${value}`;
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
return parts.join(' & ');
|
|
66
|
+
}
|
package/src/lib/io.js
CHANGED
|
@@ -1,54 +1,54 @@
|
|
|
1
|
-
import { extractDisplayPath } from './url.js';
|
|
2
|
-
|
|
3
|
-
let lastBarCount = 0;
|
|
4
|
-
|
|
5
|
-
export function createProgressBar(current, total, maxWidth = 30) {
|
|
6
|
-
const filled = Math.round((current / total) * maxWidth);
|
|
7
|
-
return '█'.repeat(filled).padEnd(maxWidth);
|
|
8
|
-
}
|
|
9
|
-
|
|
10
|
-
export function calculateConcurrency(total) {
|
|
11
|
-
return Math.min(5, Math.max(1, Math.floor(total / 10)), total);
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
export function createMultiProgressBars(count) {
|
|
15
|
-
return Array.from({ length: count }, () => ({
|
|
16
|
-
current: 0,
|
|
17
|
-
total: 0,
|
|
18
|
-
status: 'pending',
|
|
19
|
-
url: '',
|
|
20
|
-
}));
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
export function renderMultiProgressBars(bars, maxWidth = 30) {
|
|
24
|
-
const activeBars = bars.filter(bar => bar.total > 0);
|
|
25
|
-
|
|
26
|
-
if (activeBars.length === 0) return;
|
|
27
|
-
|
|
28
|
-
const lines = activeBars.map((bar) => {
|
|
29
|
-
const prog = createProgressBar(bar.current, bar.total, maxWidth);
|
|
30
|
-
const icon = bar.status === 'done' ? '✓' :
|
|
31
|
-
bar.status === 'error' ? '' : '⟳';
|
|
32
|
-
const urlDisplay = bar.url ? extractDisplayPath(bar.url) : '';
|
|
33
|
-
return ` [${prog}] ${bar.current}/${bar.total} ${icon} ${urlDisplay}`;
|
|
34
|
-
});
|
|
35
|
-
|
|
36
|
-
const output = lines.join('\n');
|
|
37
|
-
|
|
38
|
-
if (lastBarCount > 0) {
|
|
39
|
-
process.stdout.write(`\x1b[${lastBarCount}A`);
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
process.stdout.write('\x1b[0J');
|
|
43
|
-
process.stdout.write(output + '\n');
|
|
44
|
-
|
|
45
|
-
lastBarCount = activeBars.length;
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
export function clearProgressBars() {
|
|
49
|
-
if (lastBarCount > 0) {
|
|
50
|
-
process.stdout.write(`\x1b[${lastBarCount}A`);
|
|
51
|
-
process.stdout.write('\x1b[0J');
|
|
52
|
-
lastBarCount = 0;
|
|
53
|
-
}
|
|
54
|
-
}
|
|
1
|
+
import { extractDisplayPath } from './url.js';
|
|
2
|
+
|
|
3
|
+
let lastBarCount = 0;
|
|
4
|
+
|
|
5
|
+
export function createProgressBar(current, total, maxWidth = 30) {
|
|
6
|
+
const filled = Math.round((current / total) * maxWidth);
|
|
7
|
+
return '█'.repeat(filled).padEnd(maxWidth);
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export function calculateConcurrency(total) {
|
|
11
|
+
return Math.min(5, Math.max(1, Math.floor(total / 10)), total);
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export function createMultiProgressBars(count) {
|
|
15
|
+
return Array.from({ length: count }, () => ({
|
|
16
|
+
current: 0,
|
|
17
|
+
total: 0,
|
|
18
|
+
status: 'pending',
|
|
19
|
+
url: '',
|
|
20
|
+
}));
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export function renderMultiProgressBars(bars, maxWidth = 30) {
|
|
24
|
+
const activeBars = bars.filter(bar => bar.total > 0);
|
|
25
|
+
|
|
26
|
+
if (activeBars.length === 0) return;
|
|
27
|
+
|
|
28
|
+
const lines = activeBars.map((bar) => {
|
|
29
|
+
const prog = createProgressBar(bar.current, bar.total, maxWidth);
|
|
30
|
+
const icon = bar.status === 'done' ? '✓' :
|
|
31
|
+
bar.status === 'error' ? '' : '⟳';
|
|
32
|
+
const urlDisplay = bar.url ? extractDisplayPath(bar.url) : '';
|
|
33
|
+
return ` [${prog}] ${bar.current}/${bar.total} ${icon} ${urlDisplay}`;
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
const output = lines.join('\n');
|
|
37
|
+
|
|
38
|
+
if (lastBarCount > 0) {
|
|
39
|
+
process.stdout.write(`\x1b[${lastBarCount}A`);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
process.stdout.write('\x1b[0J');
|
|
43
|
+
process.stdout.write(output + '\n');
|
|
44
|
+
|
|
45
|
+
lastBarCount = activeBars.length;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export function clearProgressBars() {
|
|
49
|
+
if (lastBarCount > 0) {
|
|
50
|
+
process.stdout.write(`\x1b[${lastBarCount}A`);
|
|
51
|
+
process.stdout.write('\x1b[0J');
|
|
52
|
+
lastBarCount = 0;
|
|
53
|
+
}
|
|
54
|
+
}
|
package/src/lib/output.js
CHANGED
|
@@ -1,80 +1,80 @@
|
|
|
1
|
-
export function deduplicate(results) {
|
|
2
|
-
const seen = new Set();
|
|
3
|
-
return results.filter(r => {
|
|
4
|
-
if (r.id) {
|
|
5
|
-
const key = r.id;
|
|
6
|
-
if (seen.has(key)) return false;
|
|
7
|
-
seen.add(key);
|
|
8
|
-
return true;
|
|
9
|
-
}
|
|
10
|
-
const key = r.secUid || r.uniqueId;
|
|
11
|
-
if (seen.has(key)) return false;
|
|
12
|
-
seen.add(key);
|
|
13
|
-
return true;
|
|
14
|
-
});
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
export function formatTable(data) {
|
|
18
|
-
if (data.length === 0) return '';
|
|
19
|
-
|
|
20
|
-
if (data.length === 1) {
|
|
21
|
-
const lines = [];
|
|
22
|
-
for (const [key, val] of Object.entries(data[0])) {
|
|
23
|
-
if (typeof val === 'string' && val.length > 80) {
|
|
24
|
-
lines.push(` ${key}: ${val.substring(0, 80)}...`);
|
|
25
|
-
} else {
|
|
26
|
-
lines.push(` ${key}: ${val}`);
|
|
27
|
-
}
|
|
28
|
-
}
|
|
29
|
-
return lines.join('\n');
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
const cols = [
|
|
33
|
-
{ key: 'uniqueId', label: '用户名', width: 20 },
|
|
34
|
-
{ key: 'locationCreated', label: '地区', width: 6 },
|
|
35
|
-
{ key: 'nickname', label: '昵称', width: 20 },
|
|
36
|
-
{ key: 'ttSeller', label: 'TT卖家', width: 8 },
|
|
37
|
-
{ key: 'verified', label: '已认证', width: 8 },
|
|
38
|
-
{ key: 'followerCount', label: '粉丝', width: 10 },
|
|
39
|
-
{ key: 'videoCount', label: '视频', width: 8 },
|
|
40
|
-
];
|
|
41
|
-
|
|
42
|
-
for (const row of data) {
|
|
43
|
-
for (const col of cols) {
|
|
44
|
-
const val = String(row[col.key] ?? '-');
|
|
45
|
-
col.width = Math.max(col.width, val.length, col.label.length);
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
const sep = (w) => '-'.repeat(w);
|
|
50
|
-
const pad = (s, w) => s.padEnd(w);
|
|
51
|
-
|
|
52
|
-
const header = cols.map(c => pad(c.label, c.width)).join(' │ ');
|
|
53
|
-
const divider = cols.map(c => sep(c.width)).join('-+-');
|
|
54
|
-
const rows = data.map(r =>
|
|
55
|
-
cols.map(c => pad(String(r[c.key] ?? '-'), c.width)).join(' │ ')
|
|
56
|
-
);
|
|
57
|
-
|
|
58
|
-
return [header, divider, ...rows].join('\n');
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
export function formatOutput(data, format) {
|
|
62
|
-
if (format === 'table') return formatTable(data);
|
|
63
|
-
|
|
64
|
-
if (format === 'raw') {
|
|
65
|
-
if (Array.isArray(data) && data.length > 0 && 'url' in data[0]) {
|
|
66
|
-
return data.map(d => d.url).join('\n');
|
|
67
|
-
}
|
|
68
|
-
if (Array.isArray(data) && data.length > 0 && 'uniqueId' in data[0]) {
|
|
69
|
-
return data.map(d => `https://www.tiktok.com/@${d.uniqueId}`).join('\n');
|
|
70
|
-
}
|
|
71
|
-
return JSON.stringify(data, null, 2);
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
// Default JSON output, but for explore results (url-only) output pure text
|
|
75
|
-
if (Array.isArray(data) && data.length > 0 && 'url' in data[0]) {
|
|
76
|
-
return data.map(d => d.url).join('\n');
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
return JSON.stringify(data, null, 2);
|
|
80
|
-
}
|
|
1
|
+
export function deduplicate(results) {
|
|
2
|
+
const seen = new Set();
|
|
3
|
+
return results.filter(r => {
|
|
4
|
+
if (r.id) {
|
|
5
|
+
const key = r.id;
|
|
6
|
+
if (seen.has(key)) return false;
|
|
7
|
+
seen.add(key);
|
|
8
|
+
return true;
|
|
9
|
+
}
|
|
10
|
+
const key = r.secUid || r.uniqueId;
|
|
11
|
+
if (seen.has(key)) return false;
|
|
12
|
+
seen.add(key);
|
|
13
|
+
return true;
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function formatTable(data) {
|
|
18
|
+
if (data.length === 0) return '';
|
|
19
|
+
|
|
20
|
+
if (data.length === 1) {
|
|
21
|
+
const lines = [];
|
|
22
|
+
for (const [key, val] of Object.entries(data[0])) {
|
|
23
|
+
if (typeof val === 'string' && val.length > 80) {
|
|
24
|
+
lines.push(` ${key}: ${val.substring(0, 80)}...`);
|
|
25
|
+
} else {
|
|
26
|
+
lines.push(` ${key}: ${val}`);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return lines.join('\n');
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const cols = [
|
|
33
|
+
{ key: 'uniqueId', label: '用户名', width: 20 },
|
|
34
|
+
{ key: 'locationCreated', label: '地区', width: 6 },
|
|
35
|
+
{ key: 'nickname', label: '昵称', width: 20 },
|
|
36
|
+
{ key: 'ttSeller', label: 'TT卖家', width: 8 },
|
|
37
|
+
{ key: 'verified', label: '已认证', width: 8 },
|
|
38
|
+
{ key: 'followerCount', label: '粉丝', width: 10 },
|
|
39
|
+
{ key: 'videoCount', label: '视频', width: 8 },
|
|
40
|
+
];
|
|
41
|
+
|
|
42
|
+
for (const row of data) {
|
|
43
|
+
for (const col of cols) {
|
|
44
|
+
const val = String(row[col.key] ?? '-');
|
|
45
|
+
col.width = Math.max(col.width, val.length, col.label.length);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const sep = (w) => '-'.repeat(w);
|
|
50
|
+
const pad = (s, w) => s.padEnd(w);
|
|
51
|
+
|
|
52
|
+
const header = cols.map(c => pad(c.label, c.width)).join(' │ ');
|
|
53
|
+
const divider = cols.map(c => sep(c.width)).join('-+-');
|
|
54
|
+
const rows = data.map(r =>
|
|
55
|
+
cols.map(c => pad(String(r[c.key] ?? '-'), c.width)).join(' │ ')
|
|
56
|
+
);
|
|
57
|
+
|
|
58
|
+
return [header, divider, ...rows].join('\n');
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export function formatOutput(data, format) {
|
|
62
|
+
if (format === 'table') return formatTable(data);
|
|
63
|
+
|
|
64
|
+
if (format === 'raw') {
|
|
65
|
+
if (Array.isArray(data) && data.length > 0 && 'url' in data[0]) {
|
|
66
|
+
return data.map(d => d.url).join('\n');
|
|
67
|
+
}
|
|
68
|
+
if (Array.isArray(data) && data.length > 0 && 'uniqueId' in data[0]) {
|
|
69
|
+
return data.map(d => `https://www.tiktok.com/@${d.uniqueId}`).join('\n');
|
|
70
|
+
}
|
|
71
|
+
return JSON.stringify(data, null, 2);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Default JSON output, but for explore results (url-only) output pure text
|
|
75
|
+
if (Array.isArray(data) && data.length > 0 && 'url' in data[0]) {
|
|
76
|
+
return data.map(d => d.url).join('\n');
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
return JSON.stringify(data, null, 2);
|
|
80
|
+
}
|
package/src/lib/parser.js
CHANGED
|
@@ -1,47 +1,47 @@
|
|
|
1
|
-
export const USER_SECTION_SIZE = 12000;
|
|
2
|
-
|
|
3
|
-
export function extractUserSection(html) {
|
|
4
|
-
const idx = html.indexOf('"uniqueId"');
|
|
5
|
-
if (idx < 0) return null;
|
|
6
|
-
return html.substring(idx, idx + USER_SECTION_SIZE);
|
|
7
|
-
}
|
|
8
|
-
|
|
9
|
-
export function parseUserSection(section) {
|
|
10
|
-
const data = {};
|
|
11
|
-
|
|
12
|
-
for (const key of ['uniqueId', 'uid', 'secUid']) {
|
|
13
|
-
const m = section.match(new RegExp(`"${key}":"([^"]*)`));
|
|
14
|
-
if (m) data[key] = m[1];
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
for (const key of ['nickname', 'signature']) {
|
|
18
|
-
const m = section.match(new RegExp(`"${key}":"((?:[^"\\\\]|\\\\.)*)"`, 'g'));
|
|
19
|
-
if (m) {
|
|
20
|
-
const raw = m[0].replace(`"${key}":"`, '').replace(/"$/, '');
|
|
21
|
-
data[key] = raw.replace(/\\n/g, '\n').replace(/\\\\/g, '\\');
|
|
22
|
-
}
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
for (const key of ['ttSeller', 'verified']) {
|
|
26
|
-
const m = section.match(new RegExp(`"${key}":\\s*(true|false)`));
|
|
27
|
-
data[key] = m ? m[1] === 'true' : undefined;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
for (const key of ['followerCount', 'followingCount', 'heartCount', 'videoCount', 'diggCount']) {
|
|
31
|
-
const m = section.match(new RegExp(`"${key}":(\\d+)`));
|
|
32
|
-
if (m) data[key] = parseInt(m[1], 10);
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
const mt = section.match(/"createTime":(\d+)/);
|
|
36
|
-
if (mt) data.createTime = parseInt(mt[1], 10);
|
|
37
|
-
|
|
38
|
-
const ma = section.match(/"avatarLarger":"([^"]*)/);
|
|
39
|
-
if (ma) data.avatarLarger = ma[1].replace(/\\u002F/g, '/');
|
|
40
|
-
|
|
41
|
-
return data;
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
export function extractLocationCreated(html) {
|
|
45
|
-
const m = html.match(/"locationCreated":"([^"]*)/);
|
|
46
|
-
return m ? m[1] : null;
|
|
47
|
-
}
|
|
1
|
+
export const USER_SECTION_SIZE = 12000;
|
|
2
|
+
|
|
3
|
+
export function extractUserSection(html) {
|
|
4
|
+
const idx = html.indexOf('"uniqueId"');
|
|
5
|
+
if (idx < 0) return null;
|
|
6
|
+
return html.substring(idx, idx + USER_SECTION_SIZE);
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export function parseUserSection(section) {
|
|
10
|
+
const data = {};
|
|
11
|
+
|
|
12
|
+
for (const key of ['uniqueId', 'uid', 'secUid']) {
|
|
13
|
+
const m = section.match(new RegExp(`"${key}":"([^"]*)`));
|
|
14
|
+
if (m) data[key] = m[1];
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
for (const key of ['nickname', 'signature']) {
|
|
18
|
+
const m = section.match(new RegExp(`"${key}":"((?:[^"\\\\]|\\\\.)*)"`, 'g'));
|
|
19
|
+
if (m) {
|
|
20
|
+
const raw = m[0].replace(`"${key}":"`, '').replace(/"$/, '');
|
|
21
|
+
data[key] = raw.replace(/\\n/g, '\n').replace(/\\\\/g, '\\');
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
for (const key of ['ttSeller', 'verified']) {
|
|
26
|
+
const m = section.match(new RegExp(`"${key}":\\s*(true|false)`));
|
|
27
|
+
data[key] = m ? m[1] === 'true' : undefined;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
for (const key of ['followerCount', 'followingCount', 'heartCount', 'videoCount', 'diggCount']) {
|
|
31
|
+
const m = section.match(new RegExp(`"${key}":(\\d+)`));
|
|
32
|
+
if (m) data[key] = parseInt(m[1], 10);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const mt = section.match(/"createTime":(\d+)/);
|
|
36
|
+
if (mt) data.createTime = parseInt(mt[1], 10);
|
|
37
|
+
|
|
38
|
+
const ma = section.match(/"avatarLarger":"([^"]*)/);
|
|
39
|
+
if (ma) data.avatarLarger = ma[1].replace(/\\u002F/g, '/');
|
|
40
|
+
|
|
41
|
+
return data;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export function extractLocationCreated(html) {
|
|
45
|
+
const m = html.match(/"locationCreated":"([^"]*)/);
|
|
46
|
+
return m ? m[1] : null;
|
|
47
|
+
}
|