tt-help-cli-ycl 1.3.48 → 1.3.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -33
- package/cli.js +9 -9
- package/package.json +52 -52
- package/scripts/run-explore copy.bat +101 -101
- package/scripts/run-explore.bat +134 -134
- package/scripts/run-explore.ps1 +159 -159
- package/scripts/run-explore.sh +121 -121
- package/scripts/test-captcha-lib.mjs +68 -0
- package/scripts/test-captcha.mjs +81 -0
- package/scripts/test-incognito-lib.mjs +36 -0
- package/scripts/test-login-state.mjs +128 -0
- package/scripts/test-safe-click.mjs +45 -0
- package/scripts/test-watch-db-smoke.mjs +246 -0
- package/src/cli/attach.js +331 -331
- package/src/cli/auto.js +265 -265
- package/src/cli/comments.js +620 -620
- package/src/cli/config.js +170 -170
- package/src/cli/db-import.js +51 -51
- package/src/cli/explore.js +555 -555
- package/src/cli/open.js +109 -111
- package/src/cli/progress.js +111 -111
- package/src/cli/refresh.js +288 -288
- package/src/cli/scrape.js +47 -47
- package/src/cli/utils.js +18 -18
- package/src/cli/videos.js +41 -41
- package/src/cli/videostats.js +196 -196
- package/src/cli/watch.js +30 -30
- package/src/lib/api-interceptor.js +161 -161
- package/src/lib/args.js +809 -809
- package/src/lib/browser/anti-detect.js +23 -23
- package/src/lib/browser/cdp.js +261 -261
- package/src/lib/browser/health-checker.js +114 -114
- package/src/lib/browser/launch.js +43 -43
- package/src/lib/browser/page.js +184 -184
- package/src/lib/constants.js +297 -297
- package/src/lib/delay.js +54 -54
- package/src/lib/explore-fetch.js +118 -118
- package/src/lib/fetcher.js +45 -45
- package/src/lib/filter.js +66 -66
- package/src/lib/io.js +54 -54
- package/src/lib/output.js +80 -80
- package/src/lib/page-error-detector.js +109 -109
- package/src/lib/parse-ssr.mjs +69 -69
- package/src/lib/parser.js +47 -47
- package/src/lib/retry.js +45 -45
- package/src/lib/scrape.js +90 -90
- package/src/lib/target-locations.js +61 -61
- package/src/lib/tiktok-scraper.mjs +98 -61
- package/src/lib/url.js +52 -52
- package/src/main.js +73 -73
- package/src/npm-main.js +70 -70
- package/src/results/user-videos-bar.lar.lar.moeta.json +37 -0
- package/src/scraper/auto-core.js +203 -203
- package/src/scraper/core.js +255 -255
- package/src/scraper/explore-core.js +208 -208
- package/src/scraper/modules/captcha-handler.js +114 -114
- package/src/scraper/modules/follow-extractor.js +250 -250
- package/src/scraper/modules/guess-extractor.js +51 -51
- package/src/scraper/modules/page-helpers.js +48 -48
- package/src/scraper/refresh-core.js +213 -213
- package/src/videos/core.js +143 -143
- package/src/watch/data-store.js +2980 -2980
- package/src/watch/public/index.html +2355 -2355
- package/src/watch/server.js +727 -727
package/src/lib/io.js
CHANGED
|
@@ -1,54 +1,54 @@
|
|
|
1
|
-
import { extractDisplayPath } from './url.js';
|
|
2
|
-
|
|
3
|
-
let lastBarCount = 0;
|
|
4
|
-
|
|
5
|
-
export function createProgressBar(current, total, maxWidth = 30) {
|
|
6
|
-
const filled = Math.round((current / total) * maxWidth);
|
|
7
|
-
return '█'.repeat(filled).padEnd(maxWidth);
|
|
8
|
-
}
|
|
9
|
-
|
|
10
|
-
export function calculateConcurrency(total) {
|
|
11
|
-
return Math.min(5, Math.max(1, Math.floor(total / 10)), total);
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
export function createMultiProgressBars(count) {
|
|
15
|
-
return Array.from({ length: count }, () => ({
|
|
16
|
-
current: 0,
|
|
17
|
-
total: 0,
|
|
18
|
-
status: 'pending',
|
|
19
|
-
url: '',
|
|
20
|
-
}));
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
export function renderMultiProgressBars(bars, maxWidth = 30) {
|
|
24
|
-
const activeBars = bars.filter(bar => bar.total > 0);
|
|
25
|
-
|
|
26
|
-
if (activeBars.length === 0) return;
|
|
27
|
-
|
|
28
|
-
const lines = activeBars.map((bar) => {
|
|
29
|
-
const prog = createProgressBar(bar.current, bar.total, maxWidth);
|
|
30
|
-
const icon = bar.status === 'done' ? '✓' :
|
|
31
|
-
bar.status === 'error' ? '' : '⟳';
|
|
32
|
-
const urlDisplay = bar.url ? extractDisplayPath(bar.url) : '';
|
|
33
|
-
return ` [${prog}] ${bar.current}/${bar.total} ${icon} ${urlDisplay}`;
|
|
34
|
-
});
|
|
35
|
-
|
|
36
|
-
const output = lines.join('\n');
|
|
37
|
-
|
|
38
|
-
if (lastBarCount > 0) {
|
|
39
|
-
process.stdout.write(`\x1b[${lastBarCount}A`);
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
process.stdout.write('\x1b[0J');
|
|
43
|
-
process.stdout.write(output + '\n');
|
|
44
|
-
|
|
45
|
-
lastBarCount = activeBars.length;
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
export function clearProgressBars() {
|
|
49
|
-
if (lastBarCount > 0) {
|
|
50
|
-
process.stdout.write(`\x1b[${lastBarCount}A`);
|
|
51
|
-
process.stdout.write('\x1b[0J');
|
|
52
|
-
lastBarCount = 0;
|
|
53
|
-
}
|
|
54
|
-
}
|
|
1
|
+
import { extractDisplayPath } from './url.js';
|
|
2
|
+
|
|
3
|
+
let lastBarCount = 0;
|
|
4
|
+
|
|
5
|
+
export function createProgressBar(current, total, maxWidth = 30) {
|
|
6
|
+
const filled = Math.round((current / total) * maxWidth);
|
|
7
|
+
return '█'.repeat(filled).padEnd(maxWidth);
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export function calculateConcurrency(total) {
|
|
11
|
+
return Math.min(5, Math.max(1, Math.floor(total / 10)), total);
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export function createMultiProgressBars(count) {
|
|
15
|
+
return Array.from({ length: count }, () => ({
|
|
16
|
+
current: 0,
|
|
17
|
+
total: 0,
|
|
18
|
+
status: 'pending',
|
|
19
|
+
url: '',
|
|
20
|
+
}));
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export function renderMultiProgressBars(bars, maxWidth = 30) {
|
|
24
|
+
const activeBars = bars.filter(bar => bar.total > 0);
|
|
25
|
+
|
|
26
|
+
if (activeBars.length === 0) return;
|
|
27
|
+
|
|
28
|
+
const lines = activeBars.map((bar) => {
|
|
29
|
+
const prog = createProgressBar(bar.current, bar.total, maxWidth);
|
|
30
|
+
const icon = bar.status === 'done' ? '✓' :
|
|
31
|
+
bar.status === 'error' ? '' : '⟳';
|
|
32
|
+
const urlDisplay = bar.url ? extractDisplayPath(bar.url) : '';
|
|
33
|
+
return ` [${prog}] ${bar.current}/${bar.total} ${icon} ${urlDisplay}`;
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
const output = lines.join('\n');
|
|
37
|
+
|
|
38
|
+
if (lastBarCount > 0) {
|
|
39
|
+
process.stdout.write(`\x1b[${lastBarCount}A`);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
process.stdout.write('\x1b[0J');
|
|
43
|
+
process.stdout.write(output + '\n');
|
|
44
|
+
|
|
45
|
+
lastBarCount = activeBars.length;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export function clearProgressBars() {
|
|
49
|
+
if (lastBarCount > 0) {
|
|
50
|
+
process.stdout.write(`\x1b[${lastBarCount}A`);
|
|
51
|
+
process.stdout.write('\x1b[0J');
|
|
52
|
+
lastBarCount = 0;
|
|
53
|
+
}
|
|
54
|
+
}
|
package/src/lib/output.js
CHANGED
|
@@ -1,80 +1,80 @@
|
|
|
1
|
-
export function deduplicate(results) {
|
|
2
|
-
const seen = new Set();
|
|
3
|
-
return results.filter(r => {
|
|
4
|
-
if (r.id) {
|
|
5
|
-
const key = r.id;
|
|
6
|
-
if (seen.has(key)) return false;
|
|
7
|
-
seen.add(key);
|
|
8
|
-
return true;
|
|
9
|
-
}
|
|
10
|
-
const key = r.secUid || r.uniqueId;
|
|
11
|
-
if (seen.has(key)) return false;
|
|
12
|
-
seen.add(key);
|
|
13
|
-
return true;
|
|
14
|
-
});
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
export function formatTable(data) {
|
|
18
|
-
if (data.length === 0) return '';
|
|
19
|
-
|
|
20
|
-
if (data.length === 1) {
|
|
21
|
-
const lines = [];
|
|
22
|
-
for (const [key, val] of Object.entries(data[0])) {
|
|
23
|
-
if (typeof val === 'string' && val.length > 80) {
|
|
24
|
-
lines.push(` ${key}: ${val.substring(0, 80)}...`);
|
|
25
|
-
} else {
|
|
26
|
-
lines.push(` ${key}: ${val}`);
|
|
27
|
-
}
|
|
28
|
-
}
|
|
29
|
-
return lines.join('\n');
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
const cols = [
|
|
33
|
-
{ key: 'uniqueId', label: '用户名', width: 20 },
|
|
34
|
-
{ key: 'locationCreated', label: '地区', width: 6 },
|
|
35
|
-
{ key: 'nickname', label: '昵称', width: 20 },
|
|
36
|
-
{ key: 'ttSeller', label: 'TT卖家', width: 8 },
|
|
37
|
-
{ key: 'verified', label: '已认证', width: 8 },
|
|
38
|
-
{ key: 'followerCount', label: '粉丝', width: 10 },
|
|
39
|
-
{ key: 'videoCount', label: '视频', width: 8 },
|
|
40
|
-
];
|
|
41
|
-
|
|
42
|
-
for (const row of data) {
|
|
43
|
-
for (const col of cols) {
|
|
44
|
-
const val = String(row[col.key] ?? '-');
|
|
45
|
-
col.width = Math.max(col.width, val.length, col.label.length);
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
const sep = (w) => '-'.repeat(w);
|
|
50
|
-
const pad = (s, w) => s.padEnd(w);
|
|
51
|
-
|
|
52
|
-
const header = cols.map(c => pad(c.label, c.width)).join(' │ ');
|
|
53
|
-
const divider = cols.map(c => sep(c.width)).join('-+-');
|
|
54
|
-
const rows = data.map(r =>
|
|
55
|
-
cols.map(c => pad(String(r[c.key] ?? '-'), c.width)).join(' │ ')
|
|
56
|
-
);
|
|
57
|
-
|
|
58
|
-
return [header, divider, ...rows].join('\n');
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
export function formatOutput(data, format) {
|
|
62
|
-
if (format === 'table') return formatTable(data);
|
|
63
|
-
|
|
64
|
-
if (format === 'raw') {
|
|
65
|
-
if (Array.isArray(data) && data.length > 0 && 'url' in data[0]) {
|
|
66
|
-
return data.map(d => d.url).join('\n');
|
|
67
|
-
}
|
|
68
|
-
if (Array.isArray(data) && data.length > 0 && 'uniqueId' in data[0]) {
|
|
69
|
-
return data.map(d => `https://www.tiktok.com/@${d.uniqueId}`).join('\n');
|
|
70
|
-
}
|
|
71
|
-
return JSON.stringify(data, null, 2);
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
// Default JSON output, but for explore results (url-only) output pure text
|
|
75
|
-
if (Array.isArray(data) && data.length > 0 && 'url' in data[0]) {
|
|
76
|
-
return data.map(d => d.url).join('\n');
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
return JSON.stringify(data, null, 2);
|
|
80
|
-
}
|
|
1
|
+
export function deduplicate(results) {
|
|
2
|
+
const seen = new Set();
|
|
3
|
+
return results.filter(r => {
|
|
4
|
+
if (r.id) {
|
|
5
|
+
const key = r.id;
|
|
6
|
+
if (seen.has(key)) return false;
|
|
7
|
+
seen.add(key);
|
|
8
|
+
return true;
|
|
9
|
+
}
|
|
10
|
+
const key = r.secUid || r.uniqueId;
|
|
11
|
+
if (seen.has(key)) return false;
|
|
12
|
+
seen.add(key);
|
|
13
|
+
return true;
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function formatTable(data) {
|
|
18
|
+
if (data.length === 0) return '';
|
|
19
|
+
|
|
20
|
+
if (data.length === 1) {
|
|
21
|
+
const lines = [];
|
|
22
|
+
for (const [key, val] of Object.entries(data[0])) {
|
|
23
|
+
if (typeof val === 'string' && val.length > 80) {
|
|
24
|
+
lines.push(` ${key}: ${val.substring(0, 80)}...`);
|
|
25
|
+
} else {
|
|
26
|
+
lines.push(` ${key}: ${val}`);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return lines.join('\n');
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const cols = [
|
|
33
|
+
{ key: 'uniqueId', label: '用户名', width: 20 },
|
|
34
|
+
{ key: 'locationCreated', label: '地区', width: 6 },
|
|
35
|
+
{ key: 'nickname', label: '昵称', width: 20 },
|
|
36
|
+
{ key: 'ttSeller', label: 'TT卖家', width: 8 },
|
|
37
|
+
{ key: 'verified', label: '已认证', width: 8 },
|
|
38
|
+
{ key: 'followerCount', label: '粉丝', width: 10 },
|
|
39
|
+
{ key: 'videoCount', label: '视频', width: 8 },
|
|
40
|
+
];
|
|
41
|
+
|
|
42
|
+
for (const row of data) {
|
|
43
|
+
for (const col of cols) {
|
|
44
|
+
const val = String(row[col.key] ?? '-');
|
|
45
|
+
col.width = Math.max(col.width, val.length, col.label.length);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const sep = (w) => '-'.repeat(w);
|
|
50
|
+
const pad = (s, w) => s.padEnd(w);
|
|
51
|
+
|
|
52
|
+
const header = cols.map(c => pad(c.label, c.width)).join(' │ ');
|
|
53
|
+
const divider = cols.map(c => sep(c.width)).join('-+-');
|
|
54
|
+
const rows = data.map(r =>
|
|
55
|
+
cols.map(c => pad(String(r[c.key] ?? '-'), c.width)).join(' │ ')
|
|
56
|
+
);
|
|
57
|
+
|
|
58
|
+
return [header, divider, ...rows].join('\n');
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export function formatOutput(data, format) {
|
|
62
|
+
if (format === 'table') return formatTable(data);
|
|
63
|
+
|
|
64
|
+
if (format === 'raw') {
|
|
65
|
+
if (Array.isArray(data) && data.length > 0 && 'url' in data[0]) {
|
|
66
|
+
return data.map(d => d.url).join('\n');
|
|
67
|
+
}
|
|
68
|
+
if (Array.isArray(data) && data.length > 0 && 'uniqueId' in data[0]) {
|
|
69
|
+
return data.map(d => `https://www.tiktok.com/@${d.uniqueId}`).join('\n');
|
|
70
|
+
}
|
|
71
|
+
return JSON.stringify(data, null, 2);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Default JSON output, but for explore results (url-only) output pure text
|
|
75
|
+
if (Array.isArray(data) && data.length > 0 && 'url' in data[0]) {
|
|
76
|
+
return data.map(d => d.url).join('\n');
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
return JSON.stringify(data, null, 2);
|
|
80
|
+
}
|
|
@@ -1,109 +1,109 @@
|
|
|
1
|
-
const PATTERNS = {
|
|
2
|
-
login_required: [
|
|
3
|
-
"登录 TikTok",
|
|
4
|
-
"登录后查看",
|
|
5
|
-
"查看需登录",
|
|
6
|
-
"Log in to TikTok",
|
|
7
|
-
"Login to TikTok",
|
|
8
|
-
"观众管理功能",
|
|
9
|
-
"Viewer management",
|
|
10
|
-
"私密账号",
|
|
11
|
-
"私密状态",
|
|
12
|
-
],
|
|
13
|
-
captcha: [
|
|
14
|
-
"captcha",
|
|
15
|
-
"verify",
|
|
16
|
-
"验证码",
|
|
17
|
-
"点击下一步",
|
|
18
|
-
"Press and hold",
|
|
19
|
-
"slide to verify",
|
|
20
|
-
"滑动验证",
|
|
21
|
-
"人机验证",
|
|
22
|
-
"安全验证",
|
|
23
|
-
],
|
|
24
|
-
rate_limited: [
|
|
25
|
-
"访问过于频繁",
|
|
26
|
-
"操作过于频繁",
|
|
27
|
-
"too many requests",
|
|
28
|
-
"rate limit",
|
|
29
|
-
"稍后再试",
|
|
30
|
-
"try again later",
|
|
31
|
-
"请稍后再来",
|
|
32
|
-
],
|
|
33
|
-
region_blocked: [
|
|
34
|
-
"地区限制",
|
|
35
|
-
"not available in your",
|
|
36
|
-
"此内容不可用",
|
|
37
|
-
"content not available",
|
|
38
|
-
"currently unavailable",
|
|
39
|
-
"抱歉,此内容",
|
|
40
|
-
"此页面不可用",
|
|
41
|
-
],
|
|
42
|
-
not_found: [
|
|
43
|
-
"页面不存在",
|
|
44
|
-
"page not found",
|
|
45
|
-
"找不到",
|
|
46
|
-
"Couldn't find this",
|
|
47
|
-
"nothing here",
|
|
48
|
-
"此页面不存在",
|
|
49
|
-
"没有内容",
|
|
50
|
-
"发起对话",
|
|
51
|
-
"0 条评论",
|
|
52
|
-
"找不到此账号",
|
|
53
|
-
],
|
|
54
|
-
service_error: ["出错了", "很抱歉"],
|
|
55
|
-
};
|
|
56
|
-
|
|
57
|
-
export async function detectPageError(page) {
|
|
58
|
-
return page.evaluate((patterns) => {
|
|
59
|
-
const body = document.body;
|
|
60
|
-
if (!body) return null;
|
|
61
|
-
const bodyText = body.innerText;
|
|
62
|
-
const lower = bodyText.toLowerCase();
|
|
63
|
-
|
|
64
|
-
for (const [type, phrases] of Object.entries(patterns)) {
|
|
65
|
-
for (const phrase of phrases) {
|
|
66
|
-
if (lower.includes(phrase.toLowerCase())) {
|
|
67
|
-
return type;
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
return null;
|
|
73
|
-
}, PATTERNS);
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
/**
|
|
77
|
-
* 等待页面错误信息出现(轮询检测,最多等待 timeout ms)
|
|
78
|
-
* @param {import('playwright').Page} page
|
|
79
|
-
* @param {number} timeout - 超时时间(毫秒),默认 8000
|
|
80
|
-
* @returns {Promise<string|null>} 错误类型或 null
|
|
81
|
-
*/
|
|
82
|
-
export async function detectPageErrorWithWait(page, timeout = 8000) {
|
|
83
|
-
try {
|
|
84
|
-
const handle = await page.waitForFunction(
|
|
85
|
-
(patterns) => {
|
|
86
|
-
const body = document.body;
|
|
87
|
-
if (!body) return null;
|
|
88
|
-
const bodyText = body.innerText;
|
|
89
|
-
const lower = bodyText.toLowerCase();
|
|
90
|
-
|
|
91
|
-
for (const [type, phrases] of Object.entries(patterns)) {
|
|
92
|
-
for (const phrase of phrases) {
|
|
93
|
-
if (lower.includes(phrase.toLowerCase())) {
|
|
94
|
-
return type;
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
return null;
|
|
100
|
-
},
|
|
101
|
-
PATTERNS,
|
|
102
|
-
{ timeout },
|
|
103
|
-
);
|
|
104
|
-
return await handle.jsonValue();
|
|
105
|
-
} catch {
|
|
106
|
-
// 超时或未检测到错误
|
|
107
|
-
return null;
|
|
108
|
-
}
|
|
109
|
-
}
|
|
1
|
+
const PATTERNS = {
|
|
2
|
+
login_required: [
|
|
3
|
+
"登录 TikTok",
|
|
4
|
+
"登录后查看",
|
|
5
|
+
"查看需登录",
|
|
6
|
+
"Log in to TikTok",
|
|
7
|
+
"Login to TikTok",
|
|
8
|
+
"观众管理功能",
|
|
9
|
+
"Viewer management",
|
|
10
|
+
"私密账号",
|
|
11
|
+
"私密状态",
|
|
12
|
+
],
|
|
13
|
+
captcha: [
|
|
14
|
+
"captcha",
|
|
15
|
+
"verify",
|
|
16
|
+
"验证码",
|
|
17
|
+
"点击下一步",
|
|
18
|
+
"Press and hold",
|
|
19
|
+
"slide to verify",
|
|
20
|
+
"滑动验证",
|
|
21
|
+
"人机验证",
|
|
22
|
+
"安全验证",
|
|
23
|
+
],
|
|
24
|
+
rate_limited: [
|
|
25
|
+
"访问过于频繁",
|
|
26
|
+
"操作过于频繁",
|
|
27
|
+
"too many requests",
|
|
28
|
+
"rate limit",
|
|
29
|
+
"稍后再试",
|
|
30
|
+
"try again later",
|
|
31
|
+
"请稍后再来",
|
|
32
|
+
],
|
|
33
|
+
region_blocked: [
|
|
34
|
+
"地区限制",
|
|
35
|
+
"not available in your",
|
|
36
|
+
"此内容不可用",
|
|
37
|
+
"content not available",
|
|
38
|
+
"currently unavailable",
|
|
39
|
+
"抱歉,此内容",
|
|
40
|
+
"此页面不可用",
|
|
41
|
+
],
|
|
42
|
+
not_found: [
|
|
43
|
+
"页面不存在",
|
|
44
|
+
"page not found",
|
|
45
|
+
"找不到",
|
|
46
|
+
"Couldn't find this",
|
|
47
|
+
"nothing here",
|
|
48
|
+
"此页面不存在",
|
|
49
|
+
"没有内容",
|
|
50
|
+
"发起对话",
|
|
51
|
+
"0 条评论",
|
|
52
|
+
"找不到此账号",
|
|
53
|
+
],
|
|
54
|
+
service_error: ["出错了", "很抱歉"],
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
export async function detectPageError(page) {
|
|
58
|
+
return page.evaluate((patterns) => {
|
|
59
|
+
const body = document.body;
|
|
60
|
+
if (!body) return null;
|
|
61
|
+
const bodyText = body.innerText;
|
|
62
|
+
const lower = bodyText.toLowerCase();
|
|
63
|
+
|
|
64
|
+
for (const [type, phrases] of Object.entries(patterns)) {
|
|
65
|
+
for (const phrase of phrases) {
|
|
66
|
+
if (lower.includes(phrase.toLowerCase())) {
|
|
67
|
+
return type;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
return null;
|
|
73
|
+
}, PATTERNS);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* 等待页面错误信息出现(轮询检测,最多等待 timeout ms)
|
|
78
|
+
* @param {import('playwright').Page} page
|
|
79
|
+
* @param {number} timeout - 超时时间(毫秒),默认 8000
|
|
80
|
+
* @returns {Promise<string|null>} 错误类型或 null
|
|
81
|
+
*/
|
|
82
|
+
export async function detectPageErrorWithWait(page, timeout = 8000) {
|
|
83
|
+
try {
|
|
84
|
+
const handle = await page.waitForFunction(
|
|
85
|
+
(patterns) => {
|
|
86
|
+
const body = document.body;
|
|
87
|
+
if (!body) return null;
|
|
88
|
+
const bodyText = body.innerText;
|
|
89
|
+
const lower = bodyText.toLowerCase();
|
|
90
|
+
|
|
91
|
+
for (const [type, phrases] of Object.entries(patterns)) {
|
|
92
|
+
for (const phrase of phrases) {
|
|
93
|
+
if (lower.includes(phrase.toLowerCase())) {
|
|
94
|
+
return type;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
return null;
|
|
100
|
+
},
|
|
101
|
+
PATTERNS,
|
|
102
|
+
{ timeout },
|
|
103
|
+
);
|
|
104
|
+
return await handle.jsonValue();
|
|
105
|
+
} catch {
|
|
106
|
+
// 超时或未检测到错误
|
|
107
|
+
return null;
|
|
108
|
+
}
|
|
109
|
+
}
|
package/src/lib/parse-ssr.mjs
CHANGED
|
@@ -1,69 +1,69 @@
|
|
|
1
|
-
function parseSSR(rawHtml) {
|
|
2
|
-
if (!rawHtml.includes('__UNIVERSAL_DATA_FOR_REHYDRATION__')) return null;
|
|
3
|
-
const dataStart = rawHtml.indexOf('__UNIVERSAL_DATA_FOR_REHYDRATION__');
|
|
4
|
-
// 从该字符串后面找 <script 标签的 >,确保找到的是正确行的 >
|
|
5
|
-
const scriptStart = rawHtml.lastIndexOf('<script', dataStart);
|
|
6
|
-
const sIdx = (scriptStart >= 0 ? rawHtml.indexOf('>', scriptStart) : rawHtml.indexOf('>', dataStart)) + 1;
|
|
7
|
-
const eIdx = rawHtml.indexOf('</script>', sIdx);
|
|
8
|
-
if (sIdx < 0 || eIdx < 0) return null;
|
|
9
|
-
const jsonStr = rawHtml.substring(sIdx, eIdx);
|
|
10
|
-
return JSON.parse(jsonStr);
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
export function parseUserInfo(rawHtml) {
|
|
14
|
-
const data = parseSSR(rawHtml);
|
|
15
|
-
if (!data) return null;
|
|
16
|
-
const ud = data.__DEFAULT_SCOPE__['webapp.user-detail'];
|
|
17
|
-
if (!ud || !ud.userInfo) return null;
|
|
18
|
-
const u = ud.userInfo.user;
|
|
19
|
-
const s = ud.userInfo.stats;
|
|
20
|
-
return {
|
|
21
|
-
uniqueId: u.uniqueId,
|
|
22
|
-
nickname: u.nickname,
|
|
23
|
-
id: u.id,
|
|
24
|
-
verified: u.verified,
|
|
25
|
-
privateAccount: u.privateAccount,
|
|
26
|
-
language: u.language,
|
|
27
|
-
bio: u.signature || '',
|
|
28
|
-
avatar: u.avatarLarger || u.avatarMedium || u.avatarThumb || '',
|
|
29
|
-
followerCount: s.followerCount,
|
|
30
|
-
followingCount: s.followingCount,
|
|
31
|
-
heartCount: s.heartCount,
|
|
32
|
-
videoCount: s.videoCount,
|
|
33
|
-
diggCount: s.diggCount,
|
|
34
|
-
friendCount: s.friendCount,
|
|
35
|
-
secUid: u.secUid,
|
|
36
|
-
ttSeller: u.ttSeller || false,
|
|
37
|
-
locationCreated: u.locationCreated || null,
|
|
38
|
-
};
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
export function parseVideoInfo(rawHtml) {
|
|
42
|
-
const data = parseSSR(rawHtml);
|
|
43
|
-
if (!data) return null;
|
|
44
|
-
const vd = data.__DEFAULT_SCOPE__['webapp.video-detail'];
|
|
45
|
-
if (!vd || !vd.itemInfo || !vd.itemInfo.itemStruct) return null;
|
|
46
|
-
const item = vd.itemInfo.itemStruct;
|
|
47
|
-
const author = item.author || {};
|
|
48
|
-
const stats = item.stats || {};
|
|
49
|
-
return {
|
|
50
|
-
id: item.id,
|
|
51
|
-
desc: item.desc || '',
|
|
52
|
-
createTime: item.createTime || null,
|
|
53
|
-
locationCreated: item.locationCreated || null,
|
|
54
|
-
author: {
|
|
55
|
-
uniqueId: author.uniqueId,
|
|
56
|
-
nickname: author.nickname,
|
|
57
|
-
id: author.id,
|
|
58
|
-
verified: author.verified,
|
|
59
|
-
secUid: author.secUid,
|
|
60
|
-
},
|
|
61
|
-
stats: {
|
|
62
|
-
playCount: stats.playCount,
|
|
63
|
-
diggCount: stats.diggCount,
|
|
64
|
-
commentCount: stats.commentCount,
|
|
65
|
-
shareCount: stats.shareCount,
|
|
66
|
-
collectCount: stats.collectCount,
|
|
67
|
-
},
|
|
68
|
-
};
|
|
69
|
-
}
|
|
1
|
+
function parseSSR(rawHtml) {
|
|
2
|
+
if (!rawHtml.includes('__UNIVERSAL_DATA_FOR_REHYDRATION__')) return null;
|
|
3
|
+
const dataStart = rawHtml.indexOf('__UNIVERSAL_DATA_FOR_REHYDRATION__');
|
|
4
|
+
// 从该字符串后面找 <script 标签的 >,确保找到的是正确行的 >
|
|
5
|
+
const scriptStart = rawHtml.lastIndexOf('<script', dataStart);
|
|
6
|
+
const sIdx = (scriptStart >= 0 ? rawHtml.indexOf('>', scriptStart) : rawHtml.indexOf('>', dataStart)) + 1;
|
|
7
|
+
const eIdx = rawHtml.indexOf('</script>', sIdx);
|
|
8
|
+
if (sIdx < 0 || eIdx < 0) return null;
|
|
9
|
+
const jsonStr = rawHtml.substring(sIdx, eIdx);
|
|
10
|
+
return JSON.parse(jsonStr);
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function parseUserInfo(rawHtml) {
|
|
14
|
+
const data = parseSSR(rawHtml);
|
|
15
|
+
if (!data) return null;
|
|
16
|
+
const ud = data.__DEFAULT_SCOPE__['webapp.user-detail'];
|
|
17
|
+
if (!ud || !ud.userInfo) return null;
|
|
18
|
+
const u = ud.userInfo.user;
|
|
19
|
+
const s = ud.userInfo.stats;
|
|
20
|
+
return {
|
|
21
|
+
uniqueId: u.uniqueId,
|
|
22
|
+
nickname: u.nickname,
|
|
23
|
+
id: u.id,
|
|
24
|
+
verified: u.verified,
|
|
25
|
+
privateAccount: u.privateAccount,
|
|
26
|
+
language: u.language,
|
|
27
|
+
bio: u.signature || '',
|
|
28
|
+
avatar: u.avatarLarger || u.avatarMedium || u.avatarThumb || '',
|
|
29
|
+
followerCount: s.followerCount,
|
|
30
|
+
followingCount: s.followingCount,
|
|
31
|
+
heartCount: s.heartCount,
|
|
32
|
+
videoCount: s.videoCount,
|
|
33
|
+
diggCount: s.diggCount,
|
|
34
|
+
friendCount: s.friendCount,
|
|
35
|
+
secUid: u.secUid,
|
|
36
|
+
ttSeller: u.ttSeller || false,
|
|
37
|
+
locationCreated: u.locationCreated || null,
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export function parseVideoInfo(rawHtml) {
|
|
42
|
+
const data = parseSSR(rawHtml);
|
|
43
|
+
if (!data) return null;
|
|
44
|
+
const vd = data.__DEFAULT_SCOPE__['webapp.video-detail'];
|
|
45
|
+
if (!vd || !vd.itemInfo || !vd.itemInfo.itemStruct) return null;
|
|
46
|
+
const item = vd.itemInfo.itemStruct;
|
|
47
|
+
const author = item.author || {};
|
|
48
|
+
const stats = item.stats || {};
|
|
49
|
+
return {
|
|
50
|
+
id: item.id,
|
|
51
|
+
desc: item.desc || '',
|
|
52
|
+
createTime: item.createTime || null,
|
|
53
|
+
locationCreated: item.locationCreated || null,
|
|
54
|
+
author: {
|
|
55
|
+
uniqueId: author.uniqueId,
|
|
56
|
+
nickname: author.nickname,
|
|
57
|
+
id: author.id,
|
|
58
|
+
verified: author.verified,
|
|
59
|
+
secUid: author.secUid,
|
|
60
|
+
},
|
|
61
|
+
stats: {
|
|
62
|
+
playCount: stats.playCount,
|
|
63
|
+
diggCount: stats.diggCount,
|
|
64
|
+
commentCount: stats.commentCount,
|
|
65
|
+
shareCount: stats.shareCount,
|
|
66
|
+
collectCount: stats.collectCount,
|
|
67
|
+
},
|
|
68
|
+
};
|
|
69
|
+
}
|