tt-help-cli-ycl 1.3.12 → 1.3.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -17
- package/cli.js +9 -9
- package/package.json +45 -45
- package/scripts/run-explore.bat +68 -68
- package/scripts/run-explore.ps1 +81 -81
- package/scripts/run-explore.sh +73 -73
- package/scripts/test-captcha-lib.mjs +68 -0
- package/scripts/test-captcha.mjs +81 -0
- package/scripts/test-incognito-lib.mjs +36 -0
- package/scripts/test-login-state.mjs +128 -0
- package/scripts/test-safe-click.mjs +45 -0
- package/src/cli/auto.js +186 -157
- package/src/cli/explore.js +227 -193
- package/src/cli/progress.js +111 -111
- package/src/cli/refresh.js +216 -0
- package/src/cli/scrape.js +47 -47
- package/src/cli/utils.js +18 -18
- package/src/cli/videos.js +41 -41
- package/src/cli/watch.js +31 -31
- package/src/lib/args.js +456 -402
- package/src/lib/browser/anti-detect.js +23 -23
- package/src/lib/browser/cdp.js +52 -10
- package/src/lib/browser/launch.js +43 -43
- package/src/lib/browser/page.js +146 -87
- package/src/lib/constants.js +119 -115
- package/src/lib/delay.js +54 -54
- package/src/lib/explore-fetch.js +118 -118
- package/src/lib/fetcher.js +45 -45
- package/src/lib/filter.js +66 -66
- package/src/lib/io.js +54 -54
- package/src/lib/output.js +80 -80
- package/src/lib/parser.js +47 -47
- package/src/lib/retry.js +45 -45
- package/src/lib/scrape.js +40 -40
- package/src/lib/url.js +52 -52
- package/src/main.js +2 -0
- package/src/results/user-videos-bar.lar.lar.moeta.json +37 -0
- package/src/scraper/auto-core.js +203 -194
- package/src/scraper/core.js +211 -190
- package/src/scraper/explore-core.js +180 -171
- package/src/scraper/modules/captcha-handler.js +114 -114
- package/src/scraper/modules/comment-extractor.js +74 -69
- package/src/scraper/modules/follow-extractor.js +121 -121
- package/src/scraper/modules/guess-extractor.js +51 -51
- package/src/scraper/modules/page-helpers.js +48 -48
- package/src/scraper/refresh-core.js +179 -0
- package/src/videos/core.js +126 -126
- package/src/watch/data-store.js +431 -302
- package/src/watch/public/index.html +721 -701
- package/src/watch/server.js +483 -359
package/src/cli/explore.js
CHANGED
|
@@ -1,193 +1,227 @@
|
|
|
1
|
-
import { getOrCreatePage } from '../lib/browser/page.js';
|
|
2
|
-
import { delay, getDelayConfig, setDelayConfig } from '../scraper/modules/page-helpers.js';
|
|
3
|
-
import { userId as configuredUserId, saveUserId } from '../lib/constants.js';
|
|
4
|
-
import { getMacOrUuid } from '../lib/mac-or-uuid.js';
|
|
5
|
-
import { ensureBrowserReady as ensureBrowserReadyCDP } from '../lib/browser/cdp.js';
|
|
6
|
-
import path from 'path';
|
|
7
|
-
import os from 'os';
|
|
8
|
-
|
|
9
|
-
const MAX_RETRY_WAIT = 5 * 60 * 1000;
|
|
10
|
-
|
|
11
|
-
async function withRetry(label, fn) {
|
|
12
|
-
let backoff = 1000;
|
|
13
|
-
while (true) {
|
|
14
|
-
try {
|
|
15
|
-
return await fn();
|
|
16
|
-
} catch (err) {
|
|
17
|
-
console.error(`[连接] ${label} 失败: ${err.message},${backoff / 1000}秒后重试...`);
|
|
18
|
-
await new Promise(r => setTimeout(r, backoff));
|
|
19
|
-
if (backoff < MAX_RETRY_WAIT) backoff *= 2;
|
|
20
|
-
}
|
|
21
|
-
}
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
async function apiPost(url, body) {
|
|
25
|
-
return withRetry(`POST ${url}`, async () => {
|
|
26
|
-
const res = await fetch(url, {
|
|
27
|
-
method: 'POST',
|
|
28
|
-
headers: { 'Content-Type': 'application/json' },
|
|
29
|
-
body: JSON.stringify(body),
|
|
30
|
-
});
|
|
31
|
-
return res.json();
|
|
32
|
-
});
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
async function apiGet(url) {
|
|
36
|
-
return withRetry(`GET ${url}`, async () => {
|
|
37
|
-
const res = await fetch(url);
|
|
38
|
-
return res.json();
|
|
39
|
-
});
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
export async function handleExplore(options) {
|
|
43
|
-
const {
|
|
44
|
-
exploreUsernames, explorePreset, exploreMaxComments, exploreMaxGuess,
|
|
45
|
-
exploreEnableFollow, exploreMaxFollowing, exploreMaxFollowers,
|
|
46
|
-
exploreLocation, exploreMaxUsers, serverUrl,
|
|
47
|
-
explorePort, exploreProfile, exploreUserId,
|
|
48
|
-
} = options;
|
|
49
|
-
|
|
50
|
-
let userId = exploreUserId || configuredUserId;
|
|
51
|
-
if (!userId) {
|
|
52
|
-
userId = await getMacOrUuid();
|
|
53
|
-
saveUserId(userId);
|
|
54
|
-
console.error(`[初始化] 未检测到本地用户编号,已生成并使用: ${userId}`);
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
setDelayConfig(explorePreset);
|
|
58
|
-
|
|
59
|
-
await apiGet(`${serverUrl}/api/stats`);
|
|
60
|
-
|
|
61
|
-
if (exploreUsernames && exploreUsernames.length > 0) {
|
|
62
|
-
const { added, skipped } = await apiPost(`${serverUrl}/api/users`, { usernames: exploreUsernames });
|
|
63
|
-
console.error(`种子用户: ${added} 个新增, ${skipped} 个已存在`);
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
console.error(`\n国家筛选: ${exploreLocation}`);
|
|
67
|
-
console.error(`评论: ${exploreMaxComments}, 猜你喜欢: ${exploreMaxGuess}`);
|
|
68
|
-
console.error(`关注/粉丝: ${exploreEnableFollow ? '启用' : '禁用'}`);
|
|
69
|
-
console.error(`服务器: ${serverUrl}(断开会自动重连)`);
|
|
70
|
-
if (exploreMaxUsers > 0) console.error(`上限: ${exploreMaxUsers} 个用户`);
|
|
71
|
-
console.error(`CDP 端口: ${explorePort || 9222}, 用户编号: ${userId}`);
|
|
72
|
-
if (exploreProfile) console.error(`浏览器配置: ${exploreProfile}`);
|
|
73
|
-
|
|
74
|
-
const cdpOptions = {};
|
|
75
|
-
if (explorePort) cdpOptions.port = explorePort;
|
|
76
|
-
if (exploreProfile) {
|
|
77
|
-
cdpOptions.userDataDir = path.join(os.homedir(), 'Library', 'Application Support', `Microsoft Edge For Testing_${exploreProfile}`);
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
const { processExplore } = await import('../scraper/explore-core.js');
|
|
82
|
-
|
|
83
|
-
const page = await getOrCreatePage(browser);
|
|
84
|
-
|
|
85
|
-
let processedCount = 0;
|
|
86
|
-
let errorCount = 0;
|
|
87
|
-
let consecutiveNetworkErrors = 0;
|
|
88
|
-
|
|
89
|
-
while (true) {
|
|
90
|
-
const job = await apiGet(`${serverUrl}/api/job?userId=${encodeURIComponent(userId)}`);
|
|
91
|
-
if (!job.hasJob) break;
|
|
92
|
-
|
|
93
|
-
const username = job.user.uniqueId;
|
|
94
|
-
processedCount++;
|
|
95
|
-
|
|
96
|
-
if (consecutiveNetworkErrors > 0) {
|
|
97
|
-
const waitTime = consecutiveNetworkErrors <= 2
|
|
98
|
-
? 0
|
|
99
|
-
: consecutiveNetworkErrors <= 5
|
|
100
|
-
? 30000
|
|
101
|
-
: 300000;
|
|
102
|
-
if (waitTime > 0) {
|
|
103
|
-
console.error(` [网络] 连续 ${consecutiveNetworkErrors} 次网络异常,等待 ${waitTime / 1000}s 后重试...`);
|
|
104
|
-
await new Promise(r => setTimeout(r, waitTime));
|
|
105
|
-
}
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
console.error(`\n[${processedCount}] 探索 @${username}...`);
|
|
109
|
-
|
|
110
|
-
const { switchMax } = getDelayConfig();
|
|
111
|
-
await delay(switchMax, switchMax * 3);
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
maxComments: exploreMaxComments,
|
|
115
|
-
maxGuess: exploreMaxGuess,
|
|
116
|
-
enableFollow: exploreEnableFollow,
|
|
117
|
-
maxFollowing: exploreMaxFollowing,
|
|
118
|
-
maxFollowers: exploreMaxFollowers,
|
|
119
|
-
location: exploreLocation,
|
|
120
|
-
browser,
|
|
121
|
-
}, console.error);
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
await
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
}
|
|
1
|
+
import { getOrCreatePage, isBrowserClosedError, relaunchBrowser } from '../lib/browser/page.js';
|
|
2
|
+
import { delay, getDelayConfig, setDelayConfig } from '../scraper/modules/page-helpers.js';
|
|
3
|
+
import { userId as configuredUserId, saveUserId } from '../lib/constants.js';
|
|
4
|
+
import { getMacOrUuid } from '../lib/mac-or-uuid.js';
|
|
5
|
+
import { ensureBrowserReady as ensureBrowserReadyCDP } from '../lib/browser/cdp.js';
|
|
6
|
+
import path from 'path';
|
|
7
|
+
import os from 'os';
|
|
8
|
+
|
|
9
|
+
const MAX_RETRY_WAIT = 5 * 60 * 1000;
|
|
10
|
+
|
|
11
|
+
async function withRetry(label, fn) {
|
|
12
|
+
let backoff = 1000;
|
|
13
|
+
while (true) {
|
|
14
|
+
try {
|
|
15
|
+
return await fn();
|
|
16
|
+
} catch (err) {
|
|
17
|
+
console.error(`[连接] ${label} 失败: ${err.message},${backoff / 1000}秒后重试...`);
|
|
18
|
+
await new Promise(r => setTimeout(r, backoff));
|
|
19
|
+
if (backoff < MAX_RETRY_WAIT) backoff *= 2;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
async function apiPost(url, body) {
|
|
25
|
+
return withRetry(`POST ${url}`, async () => {
|
|
26
|
+
const res = await fetch(url, {
|
|
27
|
+
method: 'POST',
|
|
28
|
+
headers: { 'Content-Type': 'application/json' },
|
|
29
|
+
body: JSON.stringify(body),
|
|
30
|
+
});
|
|
31
|
+
return res.json();
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
async function apiGet(url) {
|
|
36
|
+
return withRetry(`GET ${url}`, async () => {
|
|
37
|
+
const res = await fetch(url);
|
|
38
|
+
return res.json();
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export async function handleExplore(options) {
|
|
43
|
+
const {
|
|
44
|
+
exploreUsernames, explorePreset, exploreMaxComments, exploreMaxGuess,
|
|
45
|
+
exploreEnableFollow, exploreMaxFollowing, exploreMaxFollowers,
|
|
46
|
+
exploreLocation, exploreMaxUsers, serverUrl,
|
|
47
|
+
explorePort, exploreProfile, exploreUserId,
|
|
48
|
+
} = options;
|
|
49
|
+
|
|
50
|
+
let userId = exploreUserId || configuredUserId;
|
|
51
|
+
if (!userId) {
|
|
52
|
+
userId = await getMacOrUuid();
|
|
53
|
+
saveUserId(userId);
|
|
54
|
+
console.error(`[初始化] 未检测到本地用户编号,已生成并使用: ${userId}`);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
setDelayConfig(explorePreset);
|
|
58
|
+
|
|
59
|
+
await apiGet(`${serverUrl}/api/stats`);
|
|
60
|
+
|
|
61
|
+
if (exploreUsernames && exploreUsernames.length > 0) {
|
|
62
|
+
const { added, skipped } = await apiPost(`${serverUrl}/api/users`, { usernames: exploreUsernames });
|
|
63
|
+
console.error(`种子用户: ${added} 个新增, ${skipped} 个已存在`);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
console.error(`\n国家筛选: ${exploreLocation}`);
|
|
67
|
+
console.error(`评论: ${exploreMaxComments}, 猜你喜欢: ${exploreMaxGuess}`);
|
|
68
|
+
console.error(`关注/粉丝: ${exploreEnableFollow ? '启用' : '禁用'}`);
|
|
69
|
+
console.error(`服务器: ${serverUrl}(断开会自动重连)`);
|
|
70
|
+
if (exploreMaxUsers > 0) console.error(`上限: ${exploreMaxUsers} 个用户`);
|
|
71
|
+
console.error(`CDP 端口: ${explorePort || 9222}, 用户编号: ${userId}`);
|
|
72
|
+
if (exploreProfile) console.error(`浏览器配置: ${exploreProfile}`);
|
|
73
|
+
|
|
74
|
+
const cdpOptions = {};
|
|
75
|
+
if (explorePort) cdpOptions.port = explorePort;
|
|
76
|
+
if (exploreProfile) {
|
|
77
|
+
cdpOptions.userDataDir = path.join(os.homedir(), 'Library', 'Application Support', `Microsoft Edge For Testing_${exploreProfile}`);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
let browser = await ensureBrowserReadyCDP(cdpOptions);
|
|
81
|
+
const { processExplore } = await import('../scraper/explore-core.js');
|
|
82
|
+
|
|
83
|
+
const page = await getOrCreatePage(browser);
|
|
84
|
+
|
|
85
|
+
let processedCount = 0;
|
|
86
|
+
let errorCount = 0;
|
|
87
|
+
let consecutiveNetworkErrors = 0;
|
|
88
|
+
|
|
89
|
+
while (true) {
|
|
90
|
+
const job = await apiGet(`${serverUrl}/api/job?userId=${encodeURIComponent(userId)}`);
|
|
91
|
+
if (!job.hasJob) break;
|
|
92
|
+
|
|
93
|
+
const username = job.user.uniqueId;
|
|
94
|
+
processedCount++;
|
|
95
|
+
|
|
96
|
+
if (consecutiveNetworkErrors > 0) {
|
|
97
|
+
const waitTime = consecutiveNetworkErrors <= 2
|
|
98
|
+
? 0
|
|
99
|
+
: consecutiveNetworkErrors <= 5
|
|
100
|
+
? 30000
|
|
101
|
+
: 300000;
|
|
102
|
+
if (waitTime > 0) {
|
|
103
|
+
console.error(` [网络] 连续 ${consecutiveNetworkErrors} 次网络异常,等待 ${waitTime / 1000}s 后重试...`);
|
|
104
|
+
await new Promise(r => setTimeout(r, waitTime));
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
console.error(`\n[${processedCount}] 探索 @${username}...`);
|
|
109
|
+
|
|
110
|
+
const { switchMax } = getDelayConfig();
|
|
111
|
+
await delay(switchMax, switchMax * 3);
|
|
112
|
+
|
|
113
|
+
let result = await processExplore(page, username, {
|
|
114
|
+
maxComments: exploreMaxComments,
|
|
115
|
+
maxGuess: exploreMaxGuess,
|
|
116
|
+
enableFollow: exploreEnableFollow,
|
|
117
|
+
maxFollowing: exploreMaxFollowing,
|
|
118
|
+
maxFollowers: exploreMaxFollowers,
|
|
119
|
+
location: exploreLocation,
|
|
120
|
+
browser,
|
|
121
|
+
}, console.error);
|
|
122
|
+
|
|
123
|
+
// 浏览器关闭检测:processExplore 内部 catch 了异常,需要从 result.error 判断
|
|
124
|
+
if (result.error && isBrowserClosedError(new Error(result.error))) {
|
|
125
|
+
const newBrowser = await relaunchBrowser(cdpOptions, explorePort || 9222);
|
|
126
|
+
browser = newBrowser;
|
|
127
|
+
const newPage = await getOrCreatePage(browser);
|
|
128
|
+
Object.assign(page, newPage);
|
|
129
|
+
// 重试当前用户
|
|
130
|
+
result = await processExplore(page, username, {
|
|
131
|
+
maxComments: exploreMaxComments,
|
|
132
|
+
maxGuess: exploreMaxGuess,
|
|
133
|
+
enableFollow: exploreEnableFollow,
|
|
134
|
+
maxFollowing: exploreMaxFollowing,
|
|
135
|
+
maxFollowers: exploreMaxFollowers,
|
|
136
|
+
location: exploreLocation,
|
|
137
|
+
browser,
|
|
138
|
+
}, console.error);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
if (result.restricted) {
|
|
142
|
+
consecutiveNetworkErrors = 0;
|
|
143
|
+
await apiPost(`${serverUrl}/api/job/${username}`, { restricted: true, userInfo: result.userInfo || {} });
|
|
144
|
+
if (exploreMaxUsers > 0 && processedCount >= exploreMaxUsers) {
|
|
145
|
+
console.error(`\n已达上限 ${exploreMaxUsers} 个用户,停止处理`);
|
|
146
|
+
break;
|
|
147
|
+
}
|
|
148
|
+
continue;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if (result.error) {
|
|
152
|
+
consecutiveNetworkErrors++;
|
|
153
|
+
errorCount++;
|
|
154
|
+
await apiPost(`${serverUrl}/api/job/${username}`, { error: result.error });
|
|
155
|
+
const errorType = consecutiveNetworkErrors > 1 ? 'network' : 'other';
|
|
156
|
+
await withRetry('report error', () =>
|
|
157
|
+
apiPost(`${serverUrl}/api/error-report`, {
|
|
158
|
+
userId,
|
|
159
|
+
username,
|
|
160
|
+
errorType,
|
|
161
|
+
errorMessage: result.error,
|
|
162
|
+
stage: 'process',
|
|
163
|
+
errorStack: result.errorStack || '',
|
|
164
|
+
})
|
|
165
|
+
).catch(() => {});
|
|
166
|
+
if (exploreMaxUsers > 0 && processedCount >= exploreMaxUsers) {
|
|
167
|
+
console.error(`\n已达上限 ${exploreMaxUsers} 个用户,停止处理`);
|
|
168
|
+
break;
|
|
169
|
+
}
|
|
170
|
+
continue;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
if (result.captchaDetected) {
|
|
174
|
+
await withRetry('report captcha', () =>
|
|
175
|
+
apiPost(`${serverUrl}/api/error-report`, {
|
|
176
|
+
userId,
|
|
177
|
+
username,
|
|
178
|
+
errorType: 'captcha',
|
|
179
|
+
errorMessage: result.captchaMessage || '页面出现验证码',
|
|
180
|
+
stage: result.captchaStage || 'video-page',
|
|
181
|
+
errorStack: '',
|
|
182
|
+
})
|
|
183
|
+
).catch(() => {});
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
consecutiveNetworkErrors = 0;
|
|
187
|
+
|
|
188
|
+
const guessedLocation = result.locationCreated || null;
|
|
189
|
+
|
|
190
|
+
const payload = {
|
|
191
|
+
userInfo: result.userInfo || {},
|
|
192
|
+
discoveredVideoAuthors: (result.discoveredVideoAuthors || []).map(item =>
|
|
193
|
+
typeof item === 'object' ? { ...item, guessedLocation } : item
|
|
194
|
+
),
|
|
195
|
+
discoveredCommentAuthors: (result.discoveredCommentAuthors || []).map(author => ({ author, guessedLocation })),
|
|
196
|
+
discoveredGuessAuthors: (result.discoveredGuessAuthors || []).map(author => ({ author, guessedLocation })),
|
|
197
|
+
discoveredFollowing: (result.discoveredFollowing || []).map(f => ({
|
|
198
|
+
handle: Array.isArray(f) ? f[0] : f,
|
|
199
|
+
displayName: Array.isArray(f) ? f[1] : null,
|
|
200
|
+
guessedLocation,
|
|
201
|
+
})),
|
|
202
|
+
discoveredFollowers: (result.discoveredFollowers || []).map(f => ({
|
|
203
|
+
handle: Array.isArray(f) ? f[0] : f,
|
|
204
|
+
displayName: Array.isArray(f) ? f[1] : null,
|
|
205
|
+
guessedLocation,
|
|
206
|
+
})),
|
|
207
|
+
processed: result.processed,
|
|
208
|
+
hasFollowData: result.hasFollowData,
|
|
209
|
+
keepFollow: result.keepFollow,
|
|
210
|
+
locationCreated: result.locationCreated,
|
|
211
|
+
noVideo: result.noVideo,
|
|
212
|
+
};
|
|
213
|
+
await apiPost(`${serverUrl}/api/job/${username}`, payload);
|
|
214
|
+
console.error(' 已提交');
|
|
215
|
+
|
|
216
|
+
if (exploreMaxUsers > 0 && processedCount >= exploreMaxUsers) {
|
|
217
|
+
console.error(`\n已达上限 ${exploreMaxUsers} 个用户,停止处理`);
|
|
218
|
+
break;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
const stats = await apiGet(`${serverUrl}/api/stats`);
|
|
223
|
+
console.error(`\n完成: ${processedCount} 个用户处理, ${errorCount} 个出错`);
|
|
224
|
+
console.error(` 总用户: ${stats.totalUsers}, 已完成: ${stats.processedUsers}, 待处理: ${stats.pendingUsers}, 错误: ${stats.errorUsers}`);
|
|
225
|
+
|
|
226
|
+
await browser.close().catch(() => {});
|
|
227
|
+
}
|
package/src/cli/progress.js
CHANGED
|
@@ -1,111 +1,111 @@
|
|
|
1
|
-
import { writeFileSync } from 'fs';
|
|
2
|
-
import { formatOutput } from '../lib/output.js';
|
|
3
|
-
import { deduplicate } from '../lib/output.js';
|
|
4
|
-
import { applyFilter, formatFilterDescription } from '../lib/filter.js';
|
|
5
|
-
import { calculateConcurrency, createMultiProgressBars, renderMultiProgressBars, clearProgressBars } from '../lib/io.js';
|
|
6
|
-
import { randomDelay } from '../lib/delay.js';
|
|
7
|
-
|
|
8
|
-
export async function processUrlsWithProgress({
|
|
9
|
-
urls,
|
|
10
|
-
proxyUrl,
|
|
11
|
-
outputFile,
|
|
12
|
-
outputFormat,
|
|
13
|
-
filter,
|
|
14
|
-
processFn,
|
|
15
|
-
label = '数据',
|
|
16
|
-
}) {
|
|
17
|
-
const allResults = [];
|
|
18
|
-
const errors = [];
|
|
19
|
-
|
|
20
|
-
if (urls.length === 0) {
|
|
21
|
-
console.error('\n未获取到数据');
|
|
22
|
-
if (outputFile) writeFileSync(outputFile, '[]', 'utf-8');
|
|
23
|
-
return;
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
const concurrency = calculateConcurrency(urls.length);
|
|
27
|
-
const bars = createMultiProgressBars(concurrency);
|
|
28
|
-
|
|
29
|
-
const slots = Array.from({ length: concurrency }, () => []);
|
|
30
|
-
urls.forEach((url, i) => slots[i % concurrency].push(url));
|
|
31
|
-
|
|
32
|
-
bars.forEach((bar, i) => {
|
|
33
|
-
bar.total = slots[i].length;
|
|
34
|
-
bar.status = slots[i].length > 0 ? 'running' : 'done';
|
|
35
|
-
});
|
|
36
|
-
|
|
37
|
-
renderMultiProgressBars(bars);
|
|
38
|
-
|
|
39
|
-
const workers = slots.map(async (slotUrls, slotIndex) => {
|
|
40
|
-
for (const url of slotUrls) {
|
|
41
|
-
bars[slotIndex].url = url;
|
|
42
|
-
renderMultiProgressBars(bars);
|
|
43
|
-
|
|
44
|
-
await randomDelay();
|
|
45
|
-
|
|
46
|
-
try {
|
|
47
|
-
const results = await processFn(url, proxyUrl);
|
|
48
|
-
allResults.push(...results);
|
|
49
|
-
bars[slotIndex].current++;
|
|
50
|
-
bars[slotIndex].status = 'running';
|
|
51
|
-
} catch (err) {
|
|
52
|
-
errors.push({ url, message: err.message });
|
|
53
|
-
bars[slotIndex].current++;
|
|
54
|
-
bars[slotIndex].status = 'error';
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
renderMultiProgressBars(bars);
|
|
58
|
-
}
|
|
59
|
-
bars[slotIndex].status = bars[slotIndex].current === bars[slotIndex].total ? 'done' : 'error';
|
|
60
|
-
renderMultiProgressBars(bars);
|
|
61
|
-
});
|
|
62
|
-
|
|
63
|
-
await Promise.all(workers);
|
|
64
|
-
clearProgressBars();
|
|
65
|
-
|
|
66
|
-
const uniqueResults = deduplicate(allResults);
|
|
67
|
-
const filteredResults = applyFilter(uniqueResults, filter);
|
|
68
|
-
|
|
69
|
-
if (errors.length > 0) {
|
|
70
|
-
const firstMsg = errors[0].message;
|
|
71
|
-
const isProxyError = ['不可用', '连接被拒绝', '连接中断', '超时', '无法解析']
|
|
72
|
-
.some(kw => firstMsg.includes(kw));
|
|
73
|
-
|
|
74
|
-
if (filteredResults.length === 0) {
|
|
75
|
-
if (isProxyError) {
|
|
76
|
-
console.error(` 所有请求失败,请检查代理: ${proxyUrl}\n`);
|
|
77
|
-
} else {
|
|
78
|
-
const show = errors.slice(0, 5);
|
|
79
|
-
for (const e of show) console.error(` ✗ ${e.url}: ${e.message}\n`);
|
|
80
|
-
if (errors.length > 5) console.error(` ... 还有 ${errors.length - 5} 个失败\n`);
|
|
81
|
-
}
|
|
82
|
-
console.error('未获取到数据');
|
|
83
|
-
if (outputFile) writeFileSync(outputFile, '[]', 'utf-8');
|
|
84
|
-
return;
|
|
85
|
-
} else {
|
|
86
|
-
if (isProxyError) {
|
|
87
|
-
console.error(` ${errors.length} 个请求失败,请检查代理: ${proxyUrl}\n`);
|
|
88
|
-
} else {
|
|
89
|
-
console.error(` ${errors.length} 个失败:`);
|
|
90
|
-
const show = errors.slice(0, 5);
|
|
91
|
-
for (const e of show) console.error(` ✗ ${e.url}: ${e.message}`);
|
|
92
|
-
if (errors.length > 5) console.error(` ... 还有 ${errors.length - 5} 个`);
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
const output = formatOutput(filteredResults, outputFormat);
|
|
98
|
-
|
|
99
|
-
if (outputFile) {
|
|
100
|
-
writeFileSync(outputFile, output, 'utf-8');
|
|
101
|
-
console.log(`\n结果已写入: ${outputFile}`);
|
|
102
|
-
} else {
|
|
103
|
-
process.stdout.write(output + '\n');
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
if (filter) {
|
|
107
|
-
console.log(`\n共 ${uniqueResults.length} 个${label},过滤后 ${filteredResults.length} 个(过滤条件: ${formatFilterDescription(filter)})`);
|
|
108
|
-
} else {
|
|
109
|
-
console.log(`\n共 ${filteredResults.length} 个${label}`);
|
|
110
|
-
}
|
|
111
|
-
}
|
|
1
|
+
import { writeFileSync } from 'fs';
|
|
2
|
+
import { formatOutput } from '../lib/output.js';
|
|
3
|
+
import { deduplicate } from '../lib/output.js';
|
|
4
|
+
import { applyFilter, formatFilterDescription } from '../lib/filter.js';
|
|
5
|
+
import { calculateConcurrency, createMultiProgressBars, renderMultiProgressBars, clearProgressBars } from '../lib/io.js';
|
|
6
|
+
import { randomDelay } from '../lib/delay.js';
|
|
7
|
+
|
|
8
|
+
export async function processUrlsWithProgress({
|
|
9
|
+
urls,
|
|
10
|
+
proxyUrl,
|
|
11
|
+
outputFile,
|
|
12
|
+
outputFormat,
|
|
13
|
+
filter,
|
|
14
|
+
processFn,
|
|
15
|
+
label = '数据',
|
|
16
|
+
}) {
|
|
17
|
+
const allResults = [];
|
|
18
|
+
const errors = [];
|
|
19
|
+
|
|
20
|
+
if (urls.length === 0) {
|
|
21
|
+
console.error('\n未获取到数据');
|
|
22
|
+
if (outputFile) writeFileSync(outputFile, '[]', 'utf-8');
|
|
23
|
+
return;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const concurrency = calculateConcurrency(urls.length);
|
|
27
|
+
const bars = createMultiProgressBars(concurrency);
|
|
28
|
+
|
|
29
|
+
const slots = Array.from({ length: concurrency }, () => []);
|
|
30
|
+
urls.forEach((url, i) => slots[i % concurrency].push(url));
|
|
31
|
+
|
|
32
|
+
bars.forEach((bar, i) => {
|
|
33
|
+
bar.total = slots[i].length;
|
|
34
|
+
bar.status = slots[i].length > 0 ? 'running' : 'done';
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
renderMultiProgressBars(bars);
|
|
38
|
+
|
|
39
|
+
const workers = slots.map(async (slotUrls, slotIndex) => {
|
|
40
|
+
for (const url of slotUrls) {
|
|
41
|
+
bars[slotIndex].url = url;
|
|
42
|
+
renderMultiProgressBars(bars);
|
|
43
|
+
|
|
44
|
+
await randomDelay();
|
|
45
|
+
|
|
46
|
+
try {
|
|
47
|
+
const results = await processFn(url, proxyUrl);
|
|
48
|
+
allResults.push(...results);
|
|
49
|
+
bars[slotIndex].current++;
|
|
50
|
+
bars[slotIndex].status = 'running';
|
|
51
|
+
} catch (err) {
|
|
52
|
+
errors.push({ url, message: err.message });
|
|
53
|
+
bars[slotIndex].current++;
|
|
54
|
+
bars[slotIndex].status = 'error';
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
renderMultiProgressBars(bars);
|
|
58
|
+
}
|
|
59
|
+
bars[slotIndex].status = bars[slotIndex].current === bars[slotIndex].total ? 'done' : 'error';
|
|
60
|
+
renderMultiProgressBars(bars);
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
await Promise.all(workers);
|
|
64
|
+
clearProgressBars();
|
|
65
|
+
|
|
66
|
+
const uniqueResults = deduplicate(allResults);
|
|
67
|
+
const filteredResults = applyFilter(uniqueResults, filter);
|
|
68
|
+
|
|
69
|
+
if (errors.length > 0) {
|
|
70
|
+
const firstMsg = errors[0].message;
|
|
71
|
+
const isProxyError = ['不可用', '连接被拒绝', '连接中断', '超时', '无法解析']
|
|
72
|
+
.some(kw => firstMsg.includes(kw));
|
|
73
|
+
|
|
74
|
+
if (filteredResults.length === 0) {
|
|
75
|
+
if (isProxyError) {
|
|
76
|
+
console.error(` 所有请求失败,请检查代理: ${proxyUrl}\n`);
|
|
77
|
+
} else {
|
|
78
|
+
const show = errors.slice(0, 5);
|
|
79
|
+
for (const e of show) console.error(` ✗ ${e.url}: ${e.message}\n`);
|
|
80
|
+
if (errors.length > 5) console.error(` ... 还有 ${errors.length - 5} 个失败\n`);
|
|
81
|
+
}
|
|
82
|
+
console.error('未获取到数据');
|
|
83
|
+
if (outputFile) writeFileSync(outputFile, '[]', 'utf-8');
|
|
84
|
+
return;
|
|
85
|
+
} else {
|
|
86
|
+
if (isProxyError) {
|
|
87
|
+
console.error(` ${errors.length} 个请求失败,请检查代理: ${proxyUrl}\n`);
|
|
88
|
+
} else {
|
|
89
|
+
console.error(` ${errors.length} 个失败:`);
|
|
90
|
+
const show = errors.slice(0, 5);
|
|
91
|
+
for (const e of show) console.error(` ✗ ${e.url}: ${e.message}`);
|
|
92
|
+
if (errors.length > 5) console.error(` ... 还有 ${errors.length - 5} 个`);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const output = formatOutput(filteredResults, outputFormat);
|
|
98
|
+
|
|
99
|
+
if (outputFile) {
|
|
100
|
+
writeFileSync(outputFile, output, 'utf-8');
|
|
101
|
+
console.log(`\n结果已写入: ${outputFile}`);
|
|
102
|
+
} else {
|
|
103
|
+
process.stdout.write(output + '\n');
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
if (filter) {
|
|
107
|
+
console.log(`\n共 ${uniqueResults.length} 个${label},过滤后 ${filteredResults.length} 个(过滤条件: ${formatFilterDescription(filter)})`);
|
|
108
|
+
} else {
|
|
109
|
+
console.log(`\n共 ${filteredResults.length} 个${label}`);
|
|
110
|
+
}
|
|
111
|
+
}
|