tt-help-cli-ycl 1.3.11 → 1.3.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -17
- package/cli.js +9 -9
- package/package.json +45 -46
- package/{bat → scripts}/run-explore.bat +68 -68
- package/{bat → scripts}/run-explore.ps1 +81 -81
- package/{bat → scripts}/run-explore.sh +73 -73
- package/scripts/test-captcha-lib.mjs +68 -0
- package/scripts/test-captcha.mjs +81 -0
- package/scripts/test-incognito-lib.mjs +36 -0
- package/scripts/test-login-state.mjs +128 -0
- package/scripts/test-safe-click.mjs +45 -0
- package/src/cli/auto.js +186 -157
- package/src/cli/config.js +116 -0
- package/src/cli/explore-default.js +83 -0
- package/src/cli/explore.js +227 -181
- package/src/cli/progress.js +111 -111
- package/src/cli/refresh.js +216 -0
- package/src/cli/scrape.js +47 -47
- package/src/cli/utils.js +18 -18
- package/src/cli/videos.js +41 -41
- package/src/cli/watch.js +31 -31
- package/src/lib/args.js +456 -391
- package/src/lib/browser/anti-detect.js +23 -23
- package/src/lib/browser/cdp.js +194 -142
- package/src/lib/browser/launch.js +43 -43
- package/src/lib/browser/page.js +146 -87
- package/src/lib/constants.js +119 -119
- package/src/lib/delay.js +54 -54
- package/src/lib/explore-fetch.js +118 -118
- package/src/lib/fetcher.js +45 -45
- package/src/lib/filter.js +66 -66
- package/src/lib/io.js +54 -54
- package/src/lib/output.js +80 -80
- package/src/{scraper/modules/page-error-detector.mjs → lib/page-error-detector.js} +70 -70
- package/src/lib/parser.js +47 -47
- package/src/lib/retry.js +45 -45
- package/src/lib/scrape.js +40 -40
- package/src/{scraper/modules/scroll-collector.mjs → lib/scroll-collector.js} +231 -189
- package/src/lib/url.js +52 -52
- package/src/main.js +48 -0
- package/src/results/user-videos-bar.lar.lar.moeta.json +37 -0
- package/src/scraper/{auto-core.mjs → auto-core.js} +203 -194
- package/src/scraper/{core.mjs → core.js} +211 -190
- package/src/scraper/{explore-core.mjs → explore-core.js} +180 -171
- package/src/scraper/modules/{captcha-handler.mjs → captcha-handler.js} +114 -114
- package/src/scraper/modules/{comment-extractor.mjs → comment-extractor.js} +74 -69
- package/src/scraper/modules/{follow-extractor.mjs → follow-extractor.js} +121 -121
- package/src/scraper/modules/{guess-extractor.mjs → guess-extractor.js} +51 -51
- package/src/scraper/modules/page-error-detector.js +1 -0
- package/src/scraper/modules/{page-helpers.mjs → page-helpers.js} +48 -48
- package/src/scraper/modules/scroll-collector.js +8 -0
- package/src/scraper/refresh-core.js +179 -0
- package/src/videos/{core.mjs → core.js} +126 -126
- package/src/watch/data-store.js +431 -0
- package/src/watch/public/index.html +721 -690
- package/src/watch/{server.mjs → server.js} +484 -349
- package/src/main.mjs +0 -234
- package/src/test-auto-follow.cjs +0 -109
- package/src/test-extractors.cjs +0 -75
- package/src/test-follow.cjs +0 -41
- package/src/watch/data-store.mjs +0 -274
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import { HELP_TEXT, configPath, saveBrowser, saveUserId, getConfigText } from '../lib/constants.js';
|
|
2
|
+
import { readFileSync, writeFileSync, existsSync } from 'fs';
|
|
3
|
+
import { fileURLToPath } from 'url';
|
|
4
|
+
import { dirname, join } from 'path';
|
|
5
|
+
|
|
6
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
7
|
+
const pkgPath = join(__dirname, '..', '..', 'package.json');
|
|
8
|
+
const { version } = JSON.parse(readFileSync(pkgPath, 'utf-8'));
|
|
9
|
+
|
|
10
|
+
function showConfig(urls, outputFile) {
|
|
11
|
+
const configLines = getConfigText();
|
|
12
|
+
if (urls.length > 0) {
|
|
13
|
+
configLines.push(`\n URL数量: ${urls.length}`);
|
|
14
|
+
}
|
|
15
|
+
if (outputFile) {
|
|
16
|
+
configLines.push(` 输出文件: ${outputFile}`);
|
|
17
|
+
}
|
|
18
|
+
console.error(configLines.join('\n'));
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function showUsage() {
|
|
22
|
+
console.error(HELP_TEXT.join('\n'));
|
|
23
|
+
process.exit(0);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function handleConfig(action, key, value) {
|
|
27
|
+
switch (action) {
|
|
28
|
+
case 'show': {
|
|
29
|
+
const configLines = getConfigText();
|
|
30
|
+
console.error(configLines.join('\n'));
|
|
31
|
+
break;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
case 'set': {
|
|
35
|
+
if (!key) {
|
|
36
|
+
console.error('用法: tt-help config set <key> <value>');
|
|
37
|
+
console.error(' 可用 key: proxy, server, browser, userId');
|
|
38
|
+
return;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
switch (key) {
|
|
42
|
+
case 'proxy':
|
|
43
|
+
if (!value) {
|
|
44
|
+
console.error('请提供 proxy 的值');
|
|
45
|
+
console.error('用法: tt-help config set proxy <代理地址>');
|
|
46
|
+
return;
|
|
47
|
+
}
|
|
48
|
+
saveProxy(value);
|
|
49
|
+
console.error(`代理已更新: ${value}`);
|
|
50
|
+
break;
|
|
51
|
+
|
|
52
|
+
case 'server':
|
|
53
|
+
if (!value) {
|
|
54
|
+
console.error('请提供 server 的值');
|
|
55
|
+
return;
|
|
56
|
+
}
|
|
57
|
+
saveServer(value);
|
|
58
|
+
console.error(`服务器已更新: ${value}`);
|
|
59
|
+
break;
|
|
60
|
+
|
|
61
|
+
case 'browser':
|
|
62
|
+
if (!value) {
|
|
63
|
+
console.error('请提供 browser 的值');
|
|
64
|
+
console.error('用法: tt-help config set browser <浏览器路径>');
|
|
65
|
+
console.error(' 或: tt-help config set-browser <浏览器路径>');
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
saveBrowser(value);
|
|
69
|
+
console.error(`浏览器路径已更新: ${value}`);
|
|
70
|
+
break;
|
|
71
|
+
|
|
72
|
+
case 'userId':
|
|
73
|
+
if (!value) {
|
|
74
|
+
console.error('请提供 userId 的值');
|
|
75
|
+
return;
|
|
76
|
+
}
|
|
77
|
+
saveUserId(value);
|
|
78
|
+
console.error(`用户号已更新: ${value}`);
|
|
79
|
+
break;
|
|
80
|
+
|
|
81
|
+
default:
|
|
82
|
+
console.error(`未知配置项: ${key}`);
|
|
83
|
+
console.error(' 可用 key: proxy, server, browser, userId');
|
|
84
|
+
}
|
|
85
|
+
break;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
case 'reset': {
|
|
89
|
+
if (existsSync(configPath)) {
|
|
90
|
+
writeFileSync(configPath, '{}', 'utf-8');
|
|
91
|
+
console.error('配置已重置为默认');
|
|
92
|
+
} else {
|
|
93
|
+
console.error('配置文件不存在或已是默认状态');
|
|
94
|
+
}
|
|
95
|
+
break;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
default:
|
|
99
|
+
console.error(`未知配置命令: ${action}`);
|
|
100
|
+
console.error('用法: tt-help config [show|set|reset]');
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function saveProxy(newProxy) {
|
|
105
|
+
const cfg = existsSync(configPath) ? JSON.parse(readFileSync(configPath, 'utf-8')) : {};
|
|
106
|
+
cfg.proxy = newProxy;
|
|
107
|
+
writeFileSync(configPath, JSON.stringify(cfg, null, 2), 'utf-8');
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function saveServer(newServer) {
|
|
111
|
+
const cfg = existsSync(configPath) ? JSON.parse(readFileSync(configPath, 'utf-8')) : {};
|
|
112
|
+
cfg.server = newServer;
|
|
113
|
+
writeFileSync(configPath, JSON.stringify(cfg, null, 2), 'utf-8');
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
export { handleConfig, showConfig, showUsage, version };
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import { parseFilter, applyFilter, formatFilterDescription } from '../lib/filter.js';
|
|
2
|
+
import { writeFileSync } from 'fs';
|
|
3
|
+
import { processUrlsWithProgress } from './progress.js';
|
|
4
|
+
import { cleanError } from './utils.js';
|
|
5
|
+
|
|
6
|
+
async function runExploreDefault(exploreCount, urls, proxyUrl, outputFile, outputFormat, pipeMode, filter) {
|
|
7
|
+
const allResults = [];
|
|
8
|
+
|
|
9
|
+
if (exploreCount > 0) {
|
|
10
|
+
try {
|
|
11
|
+
const { fetchExplore } = await import('../lib/explore-fetch.js');
|
|
12
|
+
const exploreResults = await fetchExplore(exploreCount);
|
|
13
|
+
|
|
14
|
+
console.log(` 获取到 ${exploreResults.length} 个视频\n`);
|
|
15
|
+
if (pipeMode) {
|
|
16
|
+
const videoUrls = exploreResults.map(r => r.url).filter(Boolean);
|
|
17
|
+
if (videoUrls.length > 0) {
|
|
18
|
+
await runScrapeDefault(videoUrls, proxyUrl, outputFile, outputFormat, filter);
|
|
19
|
+
return;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
allResults.push(...exploreResults);
|
|
23
|
+
} catch (err) {
|
|
24
|
+
console.error(` Explore 获取失败: ${cleanError(err.message)}\n`);
|
|
25
|
+
console.error(` 请确保代理 ${proxyUrl} 正常运行\n`);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
if (urls.length > 0) {
|
|
30
|
+
const { processUrl } = await import('../lib/scrape.js');
|
|
31
|
+
await processUrlsWithProgress({
|
|
32
|
+
urls,
|
|
33
|
+
proxyUrl,
|
|
34
|
+
outputFile,
|
|
35
|
+
outputFormat,
|
|
36
|
+
filter,
|
|
37
|
+
processFn: (url, px) => processUrl(url, px),
|
|
38
|
+
label: '数据',
|
|
39
|
+
log: console.log,
|
|
40
|
+
});
|
|
41
|
+
return;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const { deduplicate, formatOutput } = await import('../lib/output.js');
|
|
45
|
+
const uniqueResults = deduplicate(allResults);
|
|
46
|
+
const filteredResults = applyFilter(uniqueResults, filter);
|
|
47
|
+
|
|
48
|
+
if (filteredResults.length === 0) {
|
|
49
|
+
console.log('\n未获取到数据');
|
|
50
|
+
if (outputFile) writeFileSync(outputFile, '[]', 'utf-8');
|
|
51
|
+
return;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const output = formatOutput(filteredResults, outputFormat);
|
|
55
|
+
if (outputFile) {
|
|
56
|
+
writeFileSync(outputFile, output, 'utf-8');
|
|
57
|
+
console.log(`\n结果已写入: ${outputFile}`);
|
|
58
|
+
} else {
|
|
59
|
+
console.log(output);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if (filter) {
|
|
63
|
+
console.log(`\n共 ${uniqueResults.length} 个数据,过滤后 ${filteredResults.length} 个(过滤条件: ${formatFilterDescription(filter)})`);
|
|
64
|
+
} else {
|
|
65
|
+
console.log(`\n共 ${filteredResults.length} 个数据`);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
async function runScrapeDefault(urls, proxyUrl, outputFile, outputFormat, filter) {
|
|
70
|
+
const { processUrl } = await import('../lib/scrape.js');
|
|
71
|
+
await processUrlsWithProgress({
|
|
72
|
+
urls,
|
|
73
|
+
proxyUrl,
|
|
74
|
+
outputFile,
|
|
75
|
+
outputFormat,
|
|
76
|
+
filter,
|
|
77
|
+
processFn: (url, px) => processUrl(url, px),
|
|
78
|
+
label: '用户的数据',
|
|
79
|
+
log: console.log,
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
export { runExploreDefault, runScrapeDefault };
|
package/src/cli/explore.js
CHANGED
|
@@ -1,181 +1,227 @@
|
|
|
1
|
-
import { getOrCreatePage } from '../lib/browser/page.js';
|
|
2
|
-
import { delay, getDelayConfig, setDelayConfig } from '../scraper/modules/page-helpers.
|
|
3
|
-
import { userId as configuredUserId, saveUserId } from '../lib/constants.js';
|
|
4
|
-
import { getMacOrUuid } from '../lib/mac-or-uuid.js';
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
await
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
})
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
const
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
if (result.
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
1
|
+
import { getOrCreatePage, isBrowserClosedError, relaunchBrowser } from '../lib/browser/page.js';
|
|
2
|
+
import { delay, getDelayConfig, setDelayConfig } from '../scraper/modules/page-helpers.js';
|
|
3
|
+
import { userId as configuredUserId, saveUserId } from '../lib/constants.js';
|
|
4
|
+
import { getMacOrUuid } from '../lib/mac-or-uuid.js';
|
|
5
|
+
import { ensureBrowserReady as ensureBrowserReadyCDP } from '../lib/browser/cdp.js';
|
|
6
|
+
import path from 'path';
|
|
7
|
+
import os from 'os';
|
|
8
|
+
|
|
9
|
+
const MAX_RETRY_WAIT = 5 * 60 * 1000;
|
|
10
|
+
|
|
11
|
+
async function withRetry(label, fn) {
|
|
12
|
+
let backoff = 1000;
|
|
13
|
+
while (true) {
|
|
14
|
+
try {
|
|
15
|
+
return await fn();
|
|
16
|
+
} catch (err) {
|
|
17
|
+
console.error(`[连接] ${label} 失败: ${err.message},${backoff / 1000}秒后重试...`);
|
|
18
|
+
await new Promise(r => setTimeout(r, backoff));
|
|
19
|
+
if (backoff < MAX_RETRY_WAIT) backoff *= 2;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
async function apiPost(url, body) {
|
|
25
|
+
return withRetry(`POST ${url}`, async () => {
|
|
26
|
+
const res = await fetch(url, {
|
|
27
|
+
method: 'POST',
|
|
28
|
+
headers: { 'Content-Type': 'application/json' },
|
|
29
|
+
body: JSON.stringify(body),
|
|
30
|
+
});
|
|
31
|
+
return res.json();
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
async function apiGet(url) {
|
|
36
|
+
return withRetry(`GET ${url}`, async () => {
|
|
37
|
+
const res = await fetch(url);
|
|
38
|
+
return res.json();
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export async function handleExplore(options) {
|
|
43
|
+
const {
|
|
44
|
+
exploreUsernames, explorePreset, exploreMaxComments, exploreMaxGuess,
|
|
45
|
+
exploreEnableFollow, exploreMaxFollowing, exploreMaxFollowers,
|
|
46
|
+
exploreLocation, exploreMaxUsers, serverUrl,
|
|
47
|
+
explorePort, exploreProfile, exploreUserId,
|
|
48
|
+
} = options;
|
|
49
|
+
|
|
50
|
+
let userId = exploreUserId || configuredUserId;
|
|
51
|
+
if (!userId) {
|
|
52
|
+
userId = await getMacOrUuid();
|
|
53
|
+
saveUserId(userId);
|
|
54
|
+
console.error(`[初始化] 未检测到本地用户编号,已生成并使用: ${userId}`);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
setDelayConfig(explorePreset);
|
|
58
|
+
|
|
59
|
+
await apiGet(`${serverUrl}/api/stats`);
|
|
60
|
+
|
|
61
|
+
if (exploreUsernames && exploreUsernames.length > 0) {
|
|
62
|
+
const { added, skipped } = await apiPost(`${serverUrl}/api/users`, { usernames: exploreUsernames });
|
|
63
|
+
console.error(`种子用户: ${added} 个新增, ${skipped} 个已存在`);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
console.error(`\n国家筛选: ${exploreLocation}`);
|
|
67
|
+
console.error(`评论: ${exploreMaxComments}, 猜你喜欢: ${exploreMaxGuess}`);
|
|
68
|
+
console.error(`关注/粉丝: ${exploreEnableFollow ? '启用' : '禁用'}`);
|
|
69
|
+
console.error(`服务器: ${serverUrl}(断开会自动重连)`);
|
|
70
|
+
if (exploreMaxUsers > 0) console.error(`上限: ${exploreMaxUsers} 个用户`);
|
|
71
|
+
console.error(`CDP 端口: ${explorePort || 9222}, 用户编号: ${userId}`);
|
|
72
|
+
if (exploreProfile) console.error(`浏览器配置: ${exploreProfile}`);
|
|
73
|
+
|
|
74
|
+
const cdpOptions = {};
|
|
75
|
+
if (explorePort) cdpOptions.port = explorePort;
|
|
76
|
+
if (exploreProfile) {
|
|
77
|
+
cdpOptions.userDataDir = path.join(os.homedir(), 'Library', 'Application Support', `Microsoft Edge For Testing_${exploreProfile}`);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
let browser = await ensureBrowserReadyCDP(cdpOptions);
|
|
81
|
+
const { processExplore } = await import('../scraper/explore-core.js');
|
|
82
|
+
|
|
83
|
+
const page = await getOrCreatePage(browser);
|
|
84
|
+
|
|
85
|
+
let processedCount = 0;
|
|
86
|
+
let errorCount = 0;
|
|
87
|
+
let consecutiveNetworkErrors = 0;
|
|
88
|
+
|
|
89
|
+
while (true) {
|
|
90
|
+
const job = await apiGet(`${serverUrl}/api/job?userId=${encodeURIComponent(userId)}`);
|
|
91
|
+
if (!job.hasJob) break;
|
|
92
|
+
|
|
93
|
+
const username = job.user.uniqueId;
|
|
94
|
+
processedCount++;
|
|
95
|
+
|
|
96
|
+
if (consecutiveNetworkErrors > 0) {
|
|
97
|
+
const waitTime = consecutiveNetworkErrors <= 2
|
|
98
|
+
? 0
|
|
99
|
+
: consecutiveNetworkErrors <= 5
|
|
100
|
+
? 30000
|
|
101
|
+
: 300000;
|
|
102
|
+
if (waitTime > 0) {
|
|
103
|
+
console.error(` [网络] 连续 ${consecutiveNetworkErrors} 次网络异常,等待 ${waitTime / 1000}s 后重试...`);
|
|
104
|
+
await new Promise(r => setTimeout(r, waitTime));
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
console.error(`\n[${processedCount}] 探索 @${username}...`);
|
|
109
|
+
|
|
110
|
+
const { switchMax } = getDelayConfig();
|
|
111
|
+
await delay(switchMax, switchMax * 3);
|
|
112
|
+
|
|
113
|
+
let result = await processExplore(page, username, {
|
|
114
|
+
maxComments: exploreMaxComments,
|
|
115
|
+
maxGuess: exploreMaxGuess,
|
|
116
|
+
enableFollow: exploreEnableFollow,
|
|
117
|
+
maxFollowing: exploreMaxFollowing,
|
|
118
|
+
maxFollowers: exploreMaxFollowers,
|
|
119
|
+
location: exploreLocation,
|
|
120
|
+
browser,
|
|
121
|
+
}, console.error);
|
|
122
|
+
|
|
123
|
+
// 浏览器关闭检测:processExplore 内部 catch 了异常,需要从 result.error 判断
|
|
124
|
+
if (result.error && isBrowserClosedError(new Error(result.error))) {
|
|
125
|
+
const newBrowser = await relaunchBrowser(cdpOptions, explorePort || 9222);
|
|
126
|
+
browser = newBrowser;
|
|
127
|
+
const newPage = await getOrCreatePage(browser);
|
|
128
|
+
Object.assign(page, newPage);
|
|
129
|
+
// 重试当前用户
|
|
130
|
+
result = await processExplore(page, username, {
|
|
131
|
+
maxComments: exploreMaxComments,
|
|
132
|
+
maxGuess: exploreMaxGuess,
|
|
133
|
+
enableFollow: exploreEnableFollow,
|
|
134
|
+
maxFollowing: exploreMaxFollowing,
|
|
135
|
+
maxFollowers: exploreMaxFollowers,
|
|
136
|
+
location: exploreLocation,
|
|
137
|
+
browser,
|
|
138
|
+
}, console.error);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
if (result.restricted) {
|
|
142
|
+
consecutiveNetworkErrors = 0;
|
|
143
|
+
await apiPost(`${serverUrl}/api/job/${username}`, { restricted: true, userInfo: result.userInfo || {} });
|
|
144
|
+
if (exploreMaxUsers > 0 && processedCount >= exploreMaxUsers) {
|
|
145
|
+
console.error(`\n已达上限 ${exploreMaxUsers} 个用户,停止处理`);
|
|
146
|
+
break;
|
|
147
|
+
}
|
|
148
|
+
continue;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if (result.error) {
|
|
152
|
+
consecutiveNetworkErrors++;
|
|
153
|
+
errorCount++;
|
|
154
|
+
await apiPost(`${serverUrl}/api/job/${username}`, { error: result.error });
|
|
155
|
+
const errorType = consecutiveNetworkErrors > 1 ? 'network' : 'other';
|
|
156
|
+
await withRetry('report error', () =>
|
|
157
|
+
apiPost(`${serverUrl}/api/error-report`, {
|
|
158
|
+
userId,
|
|
159
|
+
username,
|
|
160
|
+
errorType,
|
|
161
|
+
errorMessage: result.error,
|
|
162
|
+
stage: 'process',
|
|
163
|
+
errorStack: result.errorStack || '',
|
|
164
|
+
})
|
|
165
|
+
).catch(() => {});
|
|
166
|
+
if (exploreMaxUsers > 0 && processedCount >= exploreMaxUsers) {
|
|
167
|
+
console.error(`\n已达上限 ${exploreMaxUsers} 个用户,停止处理`);
|
|
168
|
+
break;
|
|
169
|
+
}
|
|
170
|
+
continue;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
if (result.captchaDetected) {
|
|
174
|
+
await withRetry('report captcha', () =>
|
|
175
|
+
apiPost(`${serverUrl}/api/error-report`, {
|
|
176
|
+
userId,
|
|
177
|
+
username,
|
|
178
|
+
errorType: 'captcha',
|
|
179
|
+
errorMessage: result.captchaMessage || '页面出现验证码',
|
|
180
|
+
stage: result.captchaStage || 'video-page',
|
|
181
|
+
errorStack: '',
|
|
182
|
+
})
|
|
183
|
+
).catch(() => {});
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
consecutiveNetworkErrors = 0;
|
|
187
|
+
|
|
188
|
+
const guessedLocation = result.locationCreated || null;
|
|
189
|
+
|
|
190
|
+
const payload = {
|
|
191
|
+
userInfo: result.userInfo || {},
|
|
192
|
+
discoveredVideoAuthors: (result.discoveredVideoAuthors || []).map(item =>
|
|
193
|
+
typeof item === 'object' ? { ...item, guessedLocation } : item
|
|
194
|
+
),
|
|
195
|
+
discoveredCommentAuthors: (result.discoveredCommentAuthors || []).map(author => ({ author, guessedLocation })),
|
|
196
|
+
discoveredGuessAuthors: (result.discoveredGuessAuthors || []).map(author => ({ author, guessedLocation })),
|
|
197
|
+
discoveredFollowing: (result.discoveredFollowing || []).map(f => ({
|
|
198
|
+
handle: Array.isArray(f) ? f[0] : f,
|
|
199
|
+
displayName: Array.isArray(f) ? f[1] : null,
|
|
200
|
+
guessedLocation,
|
|
201
|
+
})),
|
|
202
|
+
discoveredFollowers: (result.discoveredFollowers || []).map(f => ({
|
|
203
|
+
handle: Array.isArray(f) ? f[0] : f,
|
|
204
|
+
displayName: Array.isArray(f) ? f[1] : null,
|
|
205
|
+
guessedLocation,
|
|
206
|
+
})),
|
|
207
|
+
processed: result.processed,
|
|
208
|
+
hasFollowData: result.hasFollowData,
|
|
209
|
+
keepFollow: result.keepFollow,
|
|
210
|
+
locationCreated: result.locationCreated,
|
|
211
|
+
noVideo: result.noVideo,
|
|
212
|
+
};
|
|
213
|
+
await apiPost(`${serverUrl}/api/job/${username}`, payload);
|
|
214
|
+
console.error(' 已提交');
|
|
215
|
+
|
|
216
|
+
if (exploreMaxUsers > 0 && processedCount >= exploreMaxUsers) {
|
|
217
|
+
console.error(`\n已达上限 ${exploreMaxUsers} 个用户,停止处理`);
|
|
218
|
+
break;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
const stats = await apiGet(`${serverUrl}/api/stats`);
|
|
223
|
+
console.error(`\n完成: ${processedCount} 个用户处理, ${errorCount} 个出错`);
|
|
224
|
+
console.error(` 总用户: ${stats.totalUsers}, 已完成: ${stats.processedUsers}, 待处理: ${stats.pendingUsers}, 错误: ${stats.errorUsers}`);
|
|
225
|
+
|
|
226
|
+
await browser.close().catch(() => {});
|
|
227
|
+
}
|