tt-help-cli-ycl 1.3.10 → 1.3.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.js +1 -1
- package/package.json +4 -5
- package/src/cli/auto.js +1 -1
- package/src/cli/config.js +116 -0
- package/src/cli/explore-default.js +83 -0
- package/src/cli/explore.js +16 -4
- package/src/cli/scrape.js +1 -1
- package/src/cli/videos.js +1 -1
- package/src/cli/watch.js +4 -4
- package/src/lib/args.js +12 -1
- package/src/lib/browser/cdp.js +152 -142
- package/src/lib/constants.js +0 -4
- package/src/lib/explore-fetch.js +1 -1
- package/src/{scraper/modules/page-error-detector.mjs → lib/page-error-detector.js} +70 -70
- package/src/{scraper/modules/scroll-collector.mjs → lib/scroll-collector.js} +231 -189
- package/src/main.js +46 -0
- package/src/scraper/{auto-core.mjs → auto-core.js} +5 -5
- package/src/scraper/{core.mjs → core.js} +3 -3
- package/src/scraper/{explore-core.mjs → explore-core.js} +7 -7
- package/src/scraper/modules/{comment-extractor.mjs → comment-extractor.js} +3 -3
- package/src/scraper/modules/{follow-extractor.mjs → follow-extractor.js} +2 -2
- package/src/scraper/modules/{guess-extractor.mjs → guess-extractor.js} +2 -2
- package/src/scraper/modules/page-error-detector.js +1 -0
- package/src/scraper/modules/{page-helpers.mjs → page-helpers.js} +1 -1
- package/src/scraper/modules/scroll-collector.js +8 -0
- package/src/videos/{core.mjs → core.js} +2 -2
- package/src/watch/{data-store.mjs → data-store.js} +38 -10
- package/src/watch/public/index.html +13 -2
- package/src/watch/{server.mjs → server.js} +21 -6
- package/src/main.mjs +0 -234
- package/src/test-auto-follow.cjs +0 -109
- package/src/test-extractors.cjs +0 -75
- package/src/test-follow.cjs +0 -41
- /package/{bat → scripts}/run-explore.bat +0 -0
- /package/{bat → scripts}/run-explore.ps1 +0 -0
- /package/{bat → scripts}/run-explore.sh +0 -0
- /package/src/scraper/modules/{captcha-handler.mjs → captcha-handler.js} +0 -0
|
@@ -111,10 +111,20 @@ export function createStore(filePath) {
|
|
|
111
111
|
}
|
|
112
112
|
|
|
113
113
|
function claimNextJob(userId, expireMs = 5 * 60 * 1000) {
|
|
114
|
+
const now = Date.now();
|
|
115
|
+
|
|
116
|
+
// 0. 该客户端有未过期的任务,续期返回
|
|
117
|
+
const ongoing = data.find(u =>
|
|
118
|
+
u.status === 'processing' && u.claimedBy === userId && u.claimedAt && (now - u.claimedAt) < expireMs
|
|
119
|
+
);
|
|
120
|
+
if (ongoing) {
|
|
121
|
+
ongoing.claimedAt = now;
|
|
122
|
+
return { uniqueId: ongoing.uniqueId, nickname: ongoing.nickname, claimedAt: ongoing.claimedAt, claimedBy: userId };
|
|
123
|
+
}
|
|
124
|
+
|
|
114
125
|
let next = data.find(u => u.status === 'pending' && u.pinned);
|
|
115
126
|
|
|
116
127
|
if (!next) {
|
|
117
|
-
const now = Date.now();
|
|
118
128
|
const expired = data.find(u =>
|
|
119
129
|
u.status === 'processing' && u.claimedAt && (now - u.claimedAt) > expireMs
|
|
120
130
|
);
|
|
@@ -129,13 +139,17 @@ export function createStore(filePath) {
|
|
|
129
139
|
next = data.find(u => u.status === 'pending' && u.sources && u.sources.includes('seed'));
|
|
130
140
|
}
|
|
131
141
|
|
|
142
|
+
if (!next) {
|
|
143
|
+
next = data.find(u => u.status === 'pending' && u.sources && (u.sources.includes('following') || u.sources.includes('follower')));
|
|
144
|
+
}
|
|
145
|
+
|
|
132
146
|
if (!next) {
|
|
133
147
|
next = data.find(u => u.status === 'pending');
|
|
134
148
|
}
|
|
135
149
|
|
|
136
150
|
if (next) {
|
|
137
151
|
next.status = 'processing';
|
|
138
|
-
next.claimedAt =
|
|
152
|
+
next.claimedAt = now;
|
|
139
153
|
next.claimedBy = userId;
|
|
140
154
|
return { uniqueId: next.uniqueId, nickname: next.nickname, claimedAt: next.claimedAt, claimedBy: userId };
|
|
141
155
|
}
|
|
@@ -250,13 +264,27 @@ export function createStore(filePath) {
|
|
|
250
264
|
}
|
|
251
265
|
|
|
252
266
|
function reportClientError(userId, errorType, errorMessage, username) {
|
|
253
|
-
clientErrors.
|
|
254
|
-
|
|
255
|
-
errorType
|
|
256
|
-
errorMessage
|
|
257
|
-
username
|
|
258
|
-
timestamp
|
|
259
|
-
|
|
267
|
+
const existing = clientErrors.get(userId);
|
|
268
|
+
if (existing) {
|
|
269
|
+
existing.errorType = errorType;
|
|
270
|
+
existing.errorMessage = errorMessage;
|
|
271
|
+
existing.username = username;
|
|
272
|
+
existing.timestamp = Date.now();
|
|
273
|
+
existing.reportCount = (existing.reportCount || 1) + 1;
|
|
274
|
+
} else {
|
|
275
|
+
clientErrors.set(userId, {
|
|
276
|
+
userId,
|
|
277
|
+
errorType,
|
|
278
|
+
errorMessage,
|
|
279
|
+
username,
|
|
280
|
+
timestamp: Date.now(),
|
|
281
|
+
reportCount: 1,
|
|
282
|
+
});
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
function deleteClientError(userId) {
|
|
287
|
+
clientErrors.delete(userId);
|
|
260
288
|
}
|
|
261
289
|
|
|
262
290
|
function getClientErrors() {
|
|
@@ -267,7 +295,7 @@ export function createStore(filePath) {
|
|
|
267
295
|
save, getUser, hasUser, addUser,
|
|
268
296
|
getPendingUsers, getProcessedUsers, getAllUsers,
|
|
269
297
|
claimNextJob, commitJob, resetJob, togglePin,
|
|
270
|
-
reportClientError, getClientErrors,
|
|
298
|
+
reportClientError, deleteClientError, getClientErrors,
|
|
271
299
|
stopBackup,
|
|
272
300
|
data,
|
|
273
301
|
};
|
|
@@ -144,8 +144,8 @@
|
|
|
144
144
|
<h1>TikTok 采集监控</h1>
|
|
145
145
|
<div class="meta" id="fileMeta">加载中...</div>
|
|
146
146
|
<div style="display:flex;gap:8px;align-items:center">
|
|
147
|
-
<a href="/scripts/run-explore.sh" class="script-link" download>mac</a>
|
|
148
|
-
<a href="/scripts/run-explore.bat" class="script-link" download>windows</a>
|
|
147
|
+
<a href="/scripts/run-explore.sh" class="script-link" download="run-explore.sh">mac</a>
|
|
148
|
+
<a href="/scripts/run-explore.bat" class="script-link" download="run-explore.bat">windows</a>
|
|
149
149
|
<span class="status" id="lastUpdate">--</span>
|
|
150
150
|
</div>
|
|
151
151
|
</div>
|
|
@@ -168,8 +168,10 @@
|
|
|
168
168
|
<tr>
|
|
169
169
|
<th>客户端</th>
|
|
170
170
|
<th>错误类型</th>
|
|
171
|
+
<th>汇报次数</th>
|
|
171
172
|
<th>当时处理的 TikTok 用户</th>
|
|
172
173
|
<th>时间</th>
|
|
174
|
+
<th>操作</th>
|
|
173
175
|
</tr>
|
|
174
176
|
</thead>
|
|
175
177
|
<tbody id="clientErrorsBody"></tbody>
|
|
@@ -279,13 +281,22 @@ async function fetchClientErrors() {
|
|
|
279
281
|
return `<tr>
|
|
280
282
|
<td style="font-family:monospace;font-weight:600;color:#60a5fa">${escapeHtml(c.userId)}</td>
|
|
281
283
|
<td class="${typeClass}">${typeText}</td>
|
|
284
|
+
<td style="color:#f87171;font-weight:600">${c.reportCount || 1}</td>
|
|
282
285
|
<td style="color:#60a5fa">@${escapeHtml(c.username || '-')}</td>
|
|
283
286
|
<td style="color:#888;font-size:12px">${new Date(c.timestamp).toLocaleTimeString()}</td>
|
|
287
|
+
<td><button class="btn-delete" onclick="deleteClientError('${escapeHtml(c.userId)}')" style="background:#991b1b;color:#fff;border:none;padding:3px 10px;border-radius:4px;cursor:pointer;font-size:12px">删除</button></td>
|
|
284
288
|
</tr>`;
|
|
285
289
|
}).join('');
|
|
286
290
|
} catch (e) {}
|
|
287
291
|
}
|
|
288
292
|
|
|
293
|
+
async function deleteClientError(userId) {
|
|
294
|
+
try {
|
|
295
|
+
await fetch(`/api/client-error/${encodeURIComponent(userId)}`, { method: 'DELETE' });
|
|
296
|
+
fetchClientErrors();
|
|
297
|
+
} catch (e) {}
|
|
298
|
+
}
|
|
299
|
+
|
|
289
300
|
function flashEl(id, value) {
|
|
290
301
|
const el = document.getElementById(id);
|
|
291
302
|
if (!el) return;
|
|
@@ -5,7 +5,7 @@ import { readFileSync, existsSync } from 'fs';
|
|
|
5
5
|
import { join, dirname } from 'path';
|
|
6
6
|
import { fileURLToPath } from 'url';
|
|
7
7
|
import { spawn } from 'child_process';
|
|
8
|
-
import { createStore } from './data-store.
|
|
8
|
+
import { createStore } from './data-store.js';
|
|
9
9
|
|
|
10
10
|
const __filename = fileURLToPath(import.meta.url);
|
|
11
11
|
|
|
@@ -235,6 +235,17 @@ export function startWatchServer(outputFile, port = 3000, existingStore) {
|
|
|
235
235
|
return;
|
|
236
236
|
}
|
|
237
237
|
|
|
238
|
+
if (req.method === 'DELETE' && routePath.startsWith('/api/client-error/')) {
|
|
239
|
+
const userId = routePath.replace('/api/client-error/', '');
|
|
240
|
+
if (userId) {
|
|
241
|
+
store.deleteClientError(userId);
|
|
242
|
+
sendJSON(res, 200, { ok: true });
|
|
243
|
+
} else {
|
|
244
|
+
sendJSON(res, 400, { error: 'missing userId' });
|
|
245
|
+
}
|
|
246
|
+
return;
|
|
247
|
+
}
|
|
248
|
+
|
|
238
249
|
if (req.method === 'POST' && routePath === '/api/error-report') {
|
|
239
250
|
const body = await readBody(req);
|
|
240
251
|
if (body && body.userId) {
|
|
@@ -303,13 +314,17 @@ export function startWatchServer(outputFile, port = 3000, existingStore) {
|
|
|
303
314
|
|
|
304
315
|
const scriptMatch = routePath.match(/^\/scripts\/(.+)$/);
|
|
305
316
|
if (req.method === 'GET' && scriptMatch) {
|
|
306
|
-
const
|
|
307
|
-
const scriptFile = join(
|
|
317
|
+
const scriptsDir = join(__dirname, '../../scripts');
|
|
318
|
+
const scriptFile = join(scriptsDir, scriptMatch[1]);
|
|
308
319
|
if (existsSync(scriptFile)) {
|
|
309
320
|
const content = readFileSync(scriptFile);
|
|
310
|
-
const
|
|
311
|
-
|
|
312
|
-
|
|
321
|
+
const fileName = scriptMatch[1];
|
|
322
|
+
const ext = fileName.split('.').pop();
|
|
323
|
+
const mime = ext === 'sh' ? 'text/x-shellscript' : ext === 'bat' ? 'text/x-msdos-batch' : ext === 'ps1' ? 'text/x-powershell' : 'text/plain';
|
|
324
|
+
res.writeHead(200, {
|
|
325
|
+
'Content-Type': `${mime}; charset=utf-8`,
|
|
326
|
+
'Content-Disposition': `attachment; filename="${fileName}"`,
|
|
327
|
+
});
|
|
313
328
|
res.end(content);
|
|
314
329
|
return;
|
|
315
330
|
}
|
package/src/main.mjs
DELETED
|
@@ -1,234 +0,0 @@
|
|
|
1
|
-
import { parseArgs } from './lib/args.js';
|
|
2
|
-
import { HELP_TEXT, proxy, configFile, configPath, DEFAULT_PROXY, saveBrowser, saveUserId, getConfigText } from './lib/constants.js';
|
|
3
|
-
import { parseFilter, applyFilter, formatFilterDescription } from './lib/filter.js';
|
|
4
|
-
import { writeFileSync, readFileSync, existsSync } from 'fs';
|
|
5
|
-
import { handleScrape } from './cli/scrape.js';
|
|
6
|
-
import { handleVideos } from './cli/videos.js';
|
|
7
|
-
import { handleAuto } from './cli/auto.js';
|
|
8
|
-
import { handleExplore } from './cli/explore.js';
|
|
9
|
-
import { handleWatch } from './cli/watch.js';
|
|
10
|
-
import { processUrlsWithProgress } from './cli/progress.js';
|
|
11
|
-
import { cleanError } from './cli/utils.js';
|
|
12
|
-
import { fileURLToPath } from 'url';
|
|
13
|
-
import { dirname, join } from 'path';
|
|
14
|
-
|
|
15
|
-
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
16
|
-
const pkgPath = join(__dirname, '..', 'package.json');
|
|
17
|
-
const { version } = JSON.parse(readFileSync(pkgPath, 'utf-8'));
|
|
18
|
-
|
|
19
|
-
function showConfig(urls, outputFile) {
|
|
20
|
-
const lines = getConfigText();
|
|
21
|
-
if (outputFile) lines.push(` 输出文件: ${outputFile}`);
|
|
22
|
-
if (urls.length > 0) lines.push(` 待处理URL: ${urls.length}`);
|
|
23
|
-
lines.push('', '参数:', ' -c, --config 显示当前配置', ' -h, --help 显示帮助');
|
|
24
|
-
console.log(lines.join('\n'));
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
function showUsage() {
|
|
28
|
-
console.log(HELP_TEXT.join('\n'));
|
|
29
|
-
process.exit(0);
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
function handleConfig(action, key, value) {
|
|
33
|
-
if (action === 'show' || action === null) {
|
|
34
|
-
showConfig([], null);
|
|
35
|
-
return;
|
|
36
|
-
}
|
|
37
|
-
if (action === 'set' || action === 'set-proxy') {
|
|
38
|
-
if (!key) {
|
|
39
|
-
console.error('用法: tt-help config set <key> <value>');
|
|
40
|
-
console.error(' 可用 key: proxy, server, browser, userId');
|
|
41
|
-
process.exit(1);
|
|
42
|
-
}
|
|
43
|
-
if (!value && key.startsWith('http://')) {
|
|
44
|
-
// 兼容旧用法: config set <代理地址>(key 实际是 value)
|
|
45
|
-
const cfg = existsSync(configPath) ? JSON.parse(readFileSync(configPath, 'utf-8')) : {};
|
|
46
|
-
cfg.proxy = key;
|
|
47
|
-
writeFileSync(configPath, JSON.stringify(cfg, null, 2), 'utf-8');
|
|
48
|
-
console.log(`代理已设置为: ${key}`);
|
|
49
|
-
console.log(`配置文件: ${configPath}`);
|
|
50
|
-
return;
|
|
51
|
-
}
|
|
52
|
-
if (!value) {
|
|
53
|
-
console.error(`请提供 ${key} 的值`);
|
|
54
|
-
process.exit(1);
|
|
55
|
-
}
|
|
56
|
-
const cfg = existsSync(configPath) ? JSON.parse(readFileSync(configPath, 'utf-8')) : {};
|
|
57
|
-
if (key === 'proxy') cfg.proxy = value;
|
|
58
|
-
else if (key === 'server') cfg.server = value;
|
|
59
|
-
else if (key === 'browser') cfg.browser = value;
|
|
60
|
-
else if (key === 'userId') {
|
|
61
|
-
saveUserId(value);
|
|
62
|
-
console.log(`userId 已设置为: ${value}`);
|
|
63
|
-
console.log(`配置文件: ${configPath}`);
|
|
64
|
-
return;
|
|
65
|
-
}
|
|
66
|
-
else {
|
|
67
|
-
console.error(`未知配置项: ${key}`);
|
|
68
|
-
console.error(' 可用 key: proxy, server, browser, userId');
|
|
69
|
-
process.exit(1);
|
|
70
|
-
}
|
|
71
|
-
writeFileSync(configPath, JSON.stringify(cfg, null, 2), 'utf-8');
|
|
72
|
-
console.log(`${key} 已设置为: ${value}`);
|
|
73
|
-
console.log(`配置文件: ${configPath}`);
|
|
74
|
-
return;
|
|
75
|
-
}
|
|
76
|
-
if (action === 'set-browser') {
|
|
77
|
-
if (!key) {
|
|
78
|
-
console.error('用法: tt-help config set-browser <浏览器路径 或 auto>');
|
|
79
|
-
process.exit(1);
|
|
80
|
-
}
|
|
81
|
-
if (key === 'auto') {
|
|
82
|
-
if (existsSync(configPath)) {
|
|
83
|
-
const cfg = JSON.parse(readFileSync(configPath, 'utf-8'));
|
|
84
|
-
delete cfg.browser;
|
|
85
|
-
writeFileSync(configPath, JSON.stringify(cfg, null, 2), 'utf-8');
|
|
86
|
-
}
|
|
87
|
-
console.log('已切换为自动探测浏览器模式');
|
|
88
|
-
} else {
|
|
89
|
-
saveBrowser(key);
|
|
90
|
-
console.log(`浏览器已设置为: ${key}`);
|
|
91
|
-
}
|
|
92
|
-
console.log(`配置文件: ${configPath}`);
|
|
93
|
-
return;
|
|
94
|
-
}
|
|
95
|
-
if (action === 'reset') {
|
|
96
|
-
if (existsSync(configPath)) {
|
|
97
|
-
const cfg = JSON.parse(readFileSync(configPath, 'utf-8'));
|
|
98
|
-
cfg.proxy = DEFAULT_PROXY;
|
|
99
|
-
cfg.server = 'http://127.0.0.1:3001';
|
|
100
|
-
delete cfg.browser;
|
|
101
|
-
delete cfg.userId;
|
|
102
|
-
writeFileSync(configPath, JSON.stringify(cfg, null, 2), 'utf-8');
|
|
103
|
-
console.log('已重置所有配置:');
|
|
104
|
-
console.log(` 代理: ${DEFAULT_PROXY}`);
|
|
105
|
-
console.log(` 服务端: http://127.0.0.1:3001`);
|
|
106
|
-
console.log(' 浏览器: 已清空(自动探测)');
|
|
107
|
-
console.log(' 用户号: 已清空(下次运行 auto 自动创建)');
|
|
108
|
-
console.log(`配置文件: ${configPath}`);
|
|
109
|
-
} else {
|
|
110
|
-
console.log('当前使用默认配置,无需重置');
|
|
111
|
-
}
|
|
112
|
-
return;
|
|
113
|
-
}
|
|
114
|
-
console.error(`未知配置命令: ${action}`);
|
|
115
|
-
console.error('用法: tt-help config [show|set|reset]');
|
|
116
|
-
process.exit(1);
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
async function runExploreDefault(exploreCount, urls, proxyUrl, outputFile, outputFormat, pipeMode, filter) {
|
|
120
|
-
console.log(`\n代理: ${proxyUrl}`);
|
|
121
|
-
console.log(`Explore 数量: ${exploreCount}`);
|
|
122
|
-
if (urls.length > 0) console.log(`额外 URL: ${urls.length}\n`);
|
|
123
|
-
else console.log('');
|
|
124
|
-
|
|
125
|
-
const allResults = [];
|
|
126
|
-
|
|
127
|
-
if (exploreCount > 0) {
|
|
128
|
-
try {
|
|
129
|
-
const { fetchExplore } = await import('./lib/explore-fetch.js');
|
|
130
|
-
const exploreResults = await fetchExplore(exploreCount);
|
|
131
|
-
console.log(` 获取到 ${exploreResults.length} 个视频\n`);
|
|
132
|
-
if (pipeMode) {
|
|
133
|
-
const videoUrls = exploreResults.map(r => r.url).filter(Boolean);
|
|
134
|
-
if (videoUrls.length > 0) {
|
|
135
|
-
await runScrapeDefault(videoUrls, proxyUrl, outputFile, outputFormat, filter);
|
|
136
|
-
return;
|
|
137
|
-
}
|
|
138
|
-
}
|
|
139
|
-
allResults.push(...exploreResults);
|
|
140
|
-
} catch (err) {
|
|
141
|
-
console.error(` Explore 获取失败: ${cleanError(err.message)}\n`);
|
|
142
|
-
console.error(` 请确保代理 ${proxyUrl} 正常运行\n`);
|
|
143
|
-
}
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
if (urls.length > 0) {
|
|
147
|
-
const { processUrl } = await import('./lib/scrape.js');
|
|
148
|
-
await processUrlsWithProgress({
|
|
149
|
-
urls,
|
|
150
|
-
proxyUrl,
|
|
151
|
-
outputFile,
|
|
152
|
-
outputFormat,
|
|
153
|
-
filter,
|
|
154
|
-
processFn: (url, px) => processUrl(url, px),
|
|
155
|
-
label: '数据',
|
|
156
|
-
log: console.log,
|
|
157
|
-
});
|
|
158
|
-
return;
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
const { deduplicate, formatOutput } = await import('./lib/output.js');
|
|
162
|
-
const uniqueResults = deduplicate(allResults);
|
|
163
|
-
const filteredResults = applyFilter(uniqueResults, filter);
|
|
164
|
-
|
|
165
|
-
if (filteredResults.length === 0) {
|
|
166
|
-
console.log('\n未获取到数据');
|
|
167
|
-
if (outputFile) writeFileSync(outputFile, '[]', 'utf-8');
|
|
168
|
-
return;
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
const output = formatOutput(filteredResults, outputFormat);
|
|
172
|
-
if (outputFile) {
|
|
173
|
-
writeFileSync(outputFile, output, 'utf-8');
|
|
174
|
-
console.log(`\n结果已写入: ${outputFile}`);
|
|
175
|
-
} else {
|
|
176
|
-
console.log(output);
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
if (filter) {
|
|
180
|
-
console.log(`\n共 ${uniqueResults.length} 个数据,过滤后 ${filteredResults.length} 个(过滤条件: ${formatFilterDescription(filter)})`);
|
|
181
|
-
} else {
|
|
182
|
-
console.log(`\n共 ${filteredResults.length} 个数据`);
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
async function runScrapeDefault(urls, proxyUrl, outputFile, outputFormat, filter) {
|
|
187
|
-
const { processUrl } = await import('./lib/scrape.js');
|
|
188
|
-
await processUrlsWithProgress({
|
|
189
|
-
urls,
|
|
190
|
-
proxyUrl,
|
|
191
|
-
outputFile,
|
|
192
|
-
outputFormat,
|
|
193
|
-
filter,
|
|
194
|
-
processFn: (url, px) => processUrl(url, px),
|
|
195
|
-
label: '用户的数据',
|
|
196
|
-
log: console.log,
|
|
197
|
-
});
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
async function main() {
|
|
201
|
-
const parsed = parseArgs();
|
|
202
|
-
|
|
203
|
-
switch (parsed.subcommand) {
|
|
204
|
-
case 'scrape': return handleScrape(parsed);
|
|
205
|
-
case 'videos': return handleVideos(parsed);
|
|
206
|
-
case 'auto': return handleAuto(parsed);
|
|
207
|
-
case 'explore':return handleExplore(parsed);
|
|
208
|
-
case 'watch': return handleWatch(parsed);
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
const { urls, outputFile, outputFormat, exploreCount, showConfig: showCfg, showHelp, showVersion, customProxy, configAction, configKey, configValue, pipeMode, filterStr } = parsed;
|
|
212
|
-
const proxyUrl = customProxy || proxy;
|
|
213
|
-
const filter = parseFilter(filterStr);
|
|
214
|
-
|
|
215
|
-
if (showVersion) {
|
|
216
|
-
console.log(version);
|
|
217
|
-
process.exit(0);
|
|
218
|
-
}
|
|
219
|
-
if (showHelp) return showUsage();
|
|
220
|
-
if (configAction) return handleConfig(configAction, configKey, configValue);
|
|
221
|
-
if (showCfg) return showConfig(urls, outputFile);
|
|
222
|
-
if (urls.length === 0 && exploreCount === 0) return showUsage();
|
|
223
|
-
|
|
224
|
-
if (exploreCount > 0) {
|
|
225
|
-
await runExploreDefault(exploreCount, urls, proxyUrl, outputFile, outputFormat, pipeMode, filter);
|
|
226
|
-
} else {
|
|
227
|
-
await runScrapeDefault(urls, proxyUrl, outputFile, outputFormat, filter);
|
|
228
|
-
}
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
main().catch(err => {
|
|
232
|
-
console.error(`错误: ${err.message}`);
|
|
233
|
-
process.exit(1);
|
|
234
|
-
});
|
package/src/test-auto-follow.cjs
DELETED
|
@@ -1,109 +0,0 @@
|
|
|
1
|
-
const path = require('path');
|
|
2
|
-
const fs = require('fs');
|
|
3
|
-
const { ensureBrowserReady, setDelayConfig } = require('./scraper/modules/page-helpers.cjs');
|
|
4
|
-
const { processUser } = require('./auto-core.cjs');
|
|
5
|
-
const { createStore } = require('./data-store.cjs');
|
|
6
|
-
|
|
7
|
-
async function main() {
|
|
8
|
-
const outFile = path.join(__dirname, '..', 'results', 'auto-test.json');
|
|
9
|
-
const store = createStore(outFile);
|
|
10
|
-
|
|
11
|
-
setDelayConfig('fast');
|
|
12
|
-
|
|
13
|
-
const browser = await ensureBrowserReady();
|
|
14
|
-
let page;
|
|
15
|
-
try {
|
|
16
|
-
const contexts = browser.contexts();
|
|
17
|
-
page = null;
|
|
18
|
-
for (const ctx of contexts) {
|
|
19
|
-
for (const p of ctx.pages()) {
|
|
20
|
-
if (p.url().includes('tiktok.com')) { page = p; break; }
|
|
21
|
-
}
|
|
22
|
-
if (page) break;
|
|
23
|
-
}
|
|
24
|
-
if (!page) page = await contexts[0].newPage();
|
|
25
|
-
|
|
26
|
-
console.error('========== 测试 processUser + enableFollow ==========');
|
|
27
|
-
console.error('用户: @qiqi23280\n');
|
|
28
|
-
|
|
29
|
-
const result = await processUser(page, 'qiqi23280', {
|
|
30
|
-
collectMax: 1,
|
|
31
|
-
scrapeDepth: 1,
|
|
32
|
-
maxComments: 10,
|
|
33
|
-
maxGuess: 5,
|
|
34
|
-
preset: 'fast',
|
|
35
|
-
enableFollow: true,
|
|
36
|
-
maxFollowing: 50,
|
|
37
|
-
maxFollowers: 50,
|
|
38
|
-
browser,
|
|
39
|
-
}, console.error);
|
|
40
|
-
|
|
41
|
-
console.error('\n========== 结果验证 ==========');
|
|
42
|
-
let allPassed = true;
|
|
43
|
-
|
|
44
|
-
const checks = [
|
|
45
|
-
{ label: '用户信息', ok: result.userInfo && result.userInfo.uniqueId, detail: result.userInfo?.uniqueId },
|
|
46
|
-
{ label: '关注列表', ok: Array.isArray(result.discoveredFollowing) && result.discoveredFollowing.length > 0, detail: `${result.discoveredFollowing?.length || 0} 人` },
|
|
47
|
-
{ label: '粉丝列表', ok: Array.isArray(result.discoveredFollowers) && result.discoveredFollowers.length > 0, detail: `${result.discoveredFollowers?.length || 0} 人` },
|
|
48
|
-
{ label: '关注格式', ok: result.discoveredFollowing?.every(p => Array.isArray(p) && p.length === 2 && p[0].startsWith('@')), detail: null },
|
|
49
|
-
{ label: '粉丝格式', ok: result.discoveredFollowers?.every(p => Array.isArray(p) && p.length === 2 && p[0].startsWith('@')), detail: null },
|
|
50
|
-
{ label: '无错误', ok: !result.error, detail: result.error },
|
|
51
|
-
];
|
|
52
|
-
|
|
53
|
-
for (const c of checks) {
|
|
54
|
-
const status = c.ok ? 'PASS' : 'FAIL';
|
|
55
|
-
const detailStr = c.detail !== null ? ` (${c.detail})` : '';
|
|
56
|
-
console.error(` ${status}: ${c.label}${detailStr}`);
|
|
57
|
-
if (!c.ok) allPassed = false;
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
// 模拟入队逻辑
|
|
61
|
-
const queue = ['qiqi23280'];
|
|
62
|
-
const followingIds = (result.discoveredFollowing || []).map(([h]) => h.replace(/^@/, ''));
|
|
63
|
-
const followerIds = (result.discoveredFollowers || []).map(([h]) => h.replace(/^@/, ''));
|
|
64
|
-
|
|
65
|
-
for (const uid of followingIds) queue.push(uid);
|
|
66
|
-
for (const uid of followerIds) queue.push(uid);
|
|
67
|
-
const uniqueQueue = [...new Set(queue)];
|
|
68
|
-
|
|
69
|
-
console.error(`\n 队列长度: ${uniqueQueue.length}(关注 ${followingIds.length} + 粉丝 ${followerIds.length} + 种子 1)`);
|
|
70
|
-
|
|
71
|
-
// 写入 store 验证
|
|
72
|
-
store.addUser({
|
|
73
|
-
uniqueId: 'qiqi23280',
|
|
74
|
-
...result.userInfo,
|
|
75
|
-
sources: ['seed'],
|
|
76
|
-
});
|
|
77
|
-
for (const [handle, name] of (result.discoveredFollowing || [])) {
|
|
78
|
-
store.addUser({ uniqueId: handle.replace(/^@/, ''), nickname: name, sources: ['following'] });
|
|
79
|
-
}
|
|
80
|
-
for (const [handle, name] of (result.discoveredFollowers || [])) {
|
|
81
|
-
store.addUser({ uniqueId: handle.replace(/^@/, ''), nickname: name, sources: ['follower'] });
|
|
82
|
-
}
|
|
83
|
-
store.save();
|
|
84
|
-
|
|
85
|
-
const allUsers = store.getAllUsers();
|
|
86
|
-
console.error(` Store 用户数: ${allUsers.length}`);
|
|
87
|
-
|
|
88
|
-
// 验证 source 标记
|
|
89
|
-
const followingUsers = allUsers.filter(u => u.sources?.includes('following'));
|
|
90
|
-
const followerUsers = allUsers.filter(u => u.sources?.includes('follower'));
|
|
91
|
-
console.error(` 关注来源: ${followingUsers.length} | 粉丝来源: ${followerUsers.length}`);
|
|
92
|
-
|
|
93
|
-
if (followingUsers.length === 0 || followerUsers.length === 0) {
|
|
94
|
-
console.error(' FAIL: 缺少 following 或 follower 来源标记');
|
|
95
|
-
allPassed = false;
|
|
96
|
-
} else {
|
|
97
|
-
console.error(' PASS: 来源标记正确');
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
console.error(`\n${allPassed ? 'ALL PASSED' : 'SOME FAILED'}`);
|
|
101
|
-
console.error(`数据保存到: ${outFile}`);
|
|
102
|
-
process.exit(allPassed ? 0 : 1);
|
|
103
|
-
|
|
104
|
-
} finally {
|
|
105
|
-
await browser.close().catch(() => {});
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
main().catch(err => { console.error('FATAL:', err.message); process.exit(1); });
|
package/src/test-extractors.cjs
DELETED
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
const { ensureBrowserReady, delay, setDelayConfig } = require('./scraper/modules/page-helpers.cjs');
|
|
2
|
-
const { extractCommentAuthors } = require('./scraper/modules/comment-extractor.cjs');
|
|
3
|
-
const { extractGuessVideos } = require('./scraper/modules/guess-extractor.cjs');
|
|
4
|
-
|
|
5
|
-
async function main() {
|
|
6
|
-
setDelayConfig('fast');
|
|
7
|
-
|
|
8
|
-
const videoUrl = process.argv[2] || 'https://www.tiktok.com/@porfirio.fructuoso/video/7615853535955111198';
|
|
9
|
-
console.error(`目标: ${videoUrl}`);
|
|
10
|
-
|
|
11
|
-
const browser = await ensureBrowserReady();
|
|
12
|
-
let page;
|
|
13
|
-
try {
|
|
14
|
-
const contexts = browser.contexts();
|
|
15
|
-
page = null;
|
|
16
|
-
for (const ctx of contexts) {
|
|
17
|
-
for (const p of ctx.pages()) {
|
|
18
|
-
if (p.url().includes('tiktok.com')) { page = p; break; }
|
|
19
|
-
}
|
|
20
|
-
if (page) break;
|
|
21
|
-
}
|
|
22
|
-
if (!page) {
|
|
23
|
-
page = await contexts[0].newPage();
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
await page.goto(videoUrl, { waitUntil: 'networkidle', timeout: 60000 });
|
|
27
|
-
await delay(5000, 8000);
|
|
28
|
-
|
|
29
|
-
console.error(`当前URL: ${page.url()}`);
|
|
30
|
-
|
|
31
|
-
let allPassed = true;
|
|
32
|
-
|
|
33
|
-
// ========== 评论提取 ==========
|
|
34
|
-
console.error('\n--- 评论提取 (max=30) ---');
|
|
35
|
-
const t1 = Date.now();
|
|
36
|
-
let commentUsers = [];
|
|
37
|
-
try { commentUsers = await extractCommentAuthors(page, 30); }
|
|
38
|
-
catch (e) { console.error(` 异常: ${e.message}`); }
|
|
39
|
-
console.error(` 耗时: ${((Date.now()-t1)/1000).toFixed(1)}s, 结果: ${commentUsers.length} 个`);
|
|
40
|
-
|
|
41
|
-
if (commentUsers.length > 0) {
|
|
42
|
-
const s = new Set(commentUsers);
|
|
43
|
-
const ok = s.size === commentUsers.length;
|
|
44
|
-
console.error(` ${ok ? 'PASS' : 'FAIL'}: 唯一${s.size}/总数${commentUsers.length}`);
|
|
45
|
-
if (!ok) allPassed = false;
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
// ========== 猜你喜欢提取 ==========
|
|
49
|
-
console.error('\n--- 猜你喜欢提取 (max=20) ---');
|
|
50
|
-
const t2 = Date.now();
|
|
51
|
-
let guessVideos = [];
|
|
52
|
-
try { guessVideos = await extractGuessVideos(page, 20); }
|
|
53
|
-
catch (e) { console.error(` 异常: ${e.message}`); }
|
|
54
|
-
console.error(` 耗时: ${((Date.now()-t2)/1000).toFixed(1)}s, 结果: ${guessVideos.length} 个`);
|
|
55
|
-
|
|
56
|
-
if (guessVideos.length > 0) {
|
|
57
|
-
const ids = guessVideos.map(v => v.videoId);
|
|
58
|
-
const s = new Set(ids);
|
|
59
|
-
const ok = s.size === ids.length;
|
|
60
|
-
console.error(` ${ok ? 'PASS' : 'FAIL'}: 唯一${s.size}/总数${ids.length}`);
|
|
61
|
-
if (!ok) allPassed = false;
|
|
62
|
-
const ok2 = guessVideos.every(v => v.author && v.videoId && v.url);
|
|
63
|
-
console.error(` ${ok2 ? 'PASS' : 'FAIL'}: 结构完整`);
|
|
64
|
-
if (!ok2) allPassed = false;
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
console.error(`\n${allPassed ? 'ALL PASSED' : 'SOME FAILED'}`);
|
|
68
|
-
process.exit(allPassed ? 0 : 1);
|
|
69
|
-
|
|
70
|
-
} finally {
|
|
71
|
-
await browser.close().catch(() => {});
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
main().catch(err => { console.error('FATAL:', err.message); process.exit(1); });
|
package/src/test-follow.cjs
DELETED
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
const path = require('path');
|
|
2
|
-
const { ensureBrowserReady, ensureTikTokPage, delay } = require('./scraper/modules/page-helpers.cjs');
|
|
3
|
-
const { extractFollowAndFollowers } = require('./scraper/modules/follow-extractor.cjs');
|
|
4
|
-
|
|
5
|
-
async function main() {
|
|
6
|
-
const url = process.argv[2] || 'https://www.tiktok.com/@qiqi23280';
|
|
7
|
-
console.error(`目标: ${url}`);
|
|
8
|
-
|
|
9
|
-
const browser = await ensureBrowserReady();
|
|
10
|
-
try {
|
|
11
|
-
const page = await ensureTikTokPage(browser, url);
|
|
12
|
-
await page.goto(url, { waitUntil: 'load', timeout: 30000 });
|
|
13
|
-
console.error('等待页面加载...');
|
|
14
|
-
await delay(3000, 5000);
|
|
15
|
-
|
|
16
|
-
console.error('开始提取关注和粉丝...\n');
|
|
17
|
-
const result = await extractFollowAndFollowers(page, {
|
|
18
|
-
log: console.error,
|
|
19
|
-
});
|
|
20
|
-
|
|
21
|
-
console.error('\n--- 提取完成 ---');
|
|
22
|
-
console.error(`关注: ${result.following.length} 人`);
|
|
23
|
-
console.error(`粉丝: ${result.followers.length} 人`);
|
|
24
|
-
|
|
25
|
-
const outDir = path.join(__dirname, '..', 'results');
|
|
26
|
-
const fs = require('fs');
|
|
27
|
-
fs.mkdirSync(outDir, { recursive: true });
|
|
28
|
-
const outPath = path.join(outDir, 'follow-result.json');
|
|
29
|
-
fs.writeFileSync(outPath, JSON.stringify(result, null, 2));
|
|
30
|
-
console.error(`已保存到 ${outPath}`);
|
|
31
|
-
|
|
32
|
-
console.log(JSON.stringify(result, null, 2));
|
|
33
|
-
} finally {
|
|
34
|
-
await browser.close().catch(() => {});
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
main().catch(err => {
|
|
39
|
-
console.error('错误:', err.message);
|
|
40
|
-
process.exit(1);
|
|
41
|
-
});
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|