tt-help-cli-ycl 1.2.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/auto-core.mjs +174 -0
- package/src/cli/auto.js +94 -0
- package/src/cli/explore.js +117 -0
- package/src/cli/progress.js +111 -0
- package/src/cli/scrape.js +47 -0
- package/src/cli/utils.js +18 -0
- package/src/cli/videos.js +41 -0
- package/src/cli/watch.js +28 -0
- package/src/data-store.mjs +213 -0
- package/src/{explore-core.cjs → explore-core.mjs} +148 -157
- package/src/{get-user-videos-core.cjs → get-user-videos-core.mjs} +6 -23
- package/src/lib/args.js +19 -38
- package/src/lib/auto-browser.mjs +5 -12
- package/src/lib/browser/anti-detect.js +23 -0
- package/src/lib/browser/cdp.js +142 -0
- package/src/lib/browser/launch.js +43 -0
- package/src/lib/browser/page.js +62 -0
- package/src/lib/constants.js +13 -95
- package/src/lib/delay.js +54 -0
- package/src/lib/explore.js +16 -123
- package/src/lib/fetcher.js +3 -18
- package/src/lib/get-user-videos-browser.mjs +1 -6
- package/src/lib/io.js +8 -30
- package/src/lib/parser.js +1 -1
- package/src/lib/retry.js +44 -0
- package/src/lib/scrape-browser.mjs +1 -6
- package/src/lib/scrape.js +5 -4
- package/src/lib/url.js +52 -0
- package/src/main.mjs +59 -822
- package/src/scraper/{core.cjs → core.mjs} +25 -57
- package/src/scraper/modules/{comment-extractor.cjs → comment-extractor.mjs} +23 -15
- package/src/scraper/modules/follow-extractor.mjs +121 -0
- package/src/scraper/modules/{guess-extractor.cjs → guess-extractor.mjs} +3 -5
- package/src/scraper/modules/page-error-detector.mjs +68 -0
- package/src/scraper/modules/page-helpers.mjs +44 -0
- package/src/scraper/modules/scroll-collector.mjs +189 -0
- package/src/watch/public/index.html +139 -64
- package/src/watch/server.mjs +234 -153
- package/src/auto-core.cjs +0 -367
- package/src/data-store.cjs +0 -69
- package/src/get-user-videos.cjs +0 -59
- package/src/scraper/index.cjs +0 -97
- package/src/scraper/modules/follow-extractor.cjs +0 -112
- package/src/scraper/modules/page-helpers.cjs +0 -422
- package/src/scraper/modules/scroll-collector.cjs +0 -173
- package/src/scraper/modules/video-scanner.cjs +0 -43
package/src/lib/args.js
CHANGED
|
@@ -74,9 +74,7 @@ function parseAutoArgs(args) {
|
|
|
74
74
|
let autoPreset = 'fast';
|
|
75
75
|
let autoSwitchDelay = null;
|
|
76
76
|
let autoCommentDelay = null;
|
|
77
|
-
let
|
|
78
|
-
let autoWatch = false;
|
|
79
|
-
let autoWatchPort = 3000;
|
|
77
|
+
let serverUrl = 'http://127.0.0.1:3000';
|
|
80
78
|
let autoEnableFollow = false;
|
|
81
79
|
let autoMaxFollowing = 200;
|
|
82
80
|
let autoMaxFollowers = 200;
|
|
@@ -86,20 +84,12 @@ function parseAutoArgs(args) {
|
|
|
86
84
|
|
|
87
85
|
for (let i = 0; i < args.length; i++) {
|
|
88
86
|
const arg = args[i];
|
|
89
|
-
if (arg === '
|
|
90
|
-
|
|
91
|
-
} else if (arg === '--switch-delay') {
|
|
87
|
+
if (arg === '--server') {
|
|
88
|
+
serverUrl = args[++i];
|
|
89
|
+
} else if (arg === '--switch-delay') {
|
|
92
90
|
autoSwitchDelay = parseInt(args[++i]) || null;
|
|
93
91
|
} else if (arg === '--comment-delay') {
|
|
94
92
|
autoCommentDelay = parseInt(args[++i]) || null;
|
|
95
|
-
} else if (arg === '--watch') {
|
|
96
|
-
autoWatch = true;
|
|
97
|
-
if (args[i + 1] === '-p') {
|
|
98
|
-
autoWatchPort = parseInt(args[i + 2]) || 3000;
|
|
99
|
-
i += 2;
|
|
100
|
-
}
|
|
101
|
-
} else if (arg === '-p' && autoWatch) {
|
|
102
|
-
autoWatchPort = parseInt(args[++i]) || 3000;
|
|
103
93
|
} else if (arg === '--enable-follow') {
|
|
104
94
|
autoEnableFollow = true;
|
|
105
95
|
} else if (arg === '--max-following') {
|
|
@@ -144,9 +134,7 @@ function parseAutoArgs(args) {
|
|
|
144
134
|
autoPreset,
|
|
145
135
|
autoSwitchDelay,
|
|
146
136
|
autoCommentDelay,
|
|
147
|
-
|
|
148
|
-
autoWatch,
|
|
149
|
-
autoWatchPort,
|
|
137
|
+
serverUrl,
|
|
150
138
|
autoEnableFollow,
|
|
151
139
|
autoMaxFollowing,
|
|
152
140
|
autoMaxFollowers,
|
|
@@ -164,16 +152,14 @@ function parseAutoArgs(args) {
|
|
|
164
152
|
}
|
|
165
153
|
|
|
166
154
|
function parseExploreArgs(args) {
|
|
167
|
-
let
|
|
155
|
+
let serverUrl = 'http://127.0.0.1:3000';
|
|
168
156
|
let explorePreset = 'normal';
|
|
169
|
-
let exploreMaxComments =
|
|
157
|
+
let exploreMaxComments = 10;
|
|
170
158
|
let exploreMaxGuess = 0;
|
|
171
159
|
let exploreEnableFollow = true;
|
|
172
|
-
let exploreMaxFollowing =
|
|
173
|
-
let exploreMaxFollowers =
|
|
160
|
+
let exploreMaxFollowing = 5;
|
|
161
|
+
let exploreMaxFollowers = 5;
|
|
174
162
|
let exploreLocation = 'ES';
|
|
175
|
-
let exploreWatch = false;
|
|
176
|
-
let exploreWatchPort = 3000;
|
|
177
163
|
let exploreMaxUsers = 0;
|
|
178
164
|
|
|
179
165
|
const positional = [];
|
|
@@ -181,8 +167,8 @@ function parseExploreArgs(args) {
|
|
|
181
167
|
|
|
182
168
|
for (let i = 0; i < args.length; i++) {
|
|
183
169
|
const arg = args[i];
|
|
184
|
-
if (arg === '
|
|
185
|
-
|
|
170
|
+
if (arg === '--server') {
|
|
171
|
+
serverUrl = args[++i];
|
|
186
172
|
} else if (arg === '--max-comments') {
|
|
187
173
|
exploreMaxComments = parseInt(args[++i]) || 0;
|
|
188
174
|
} else if (arg === '--max-guess') {
|
|
@@ -194,17 +180,11 @@ function parseExploreArgs(args) {
|
|
|
194
180
|
} else if (arg === '--disable-follow') {
|
|
195
181
|
exploreEnableFollow = false;
|
|
196
182
|
} else if (arg === '--max-following') {
|
|
197
|
-
exploreMaxFollowing = parseInt(args[++i]) ||
|
|
183
|
+
exploreMaxFollowing = parseInt(args[++i]) || 5;
|
|
198
184
|
} else if (arg === '--max-followers') {
|
|
199
|
-
exploreMaxFollowers = parseInt(args[++i]) ||
|
|
185
|
+
exploreMaxFollowers = parseInt(args[++i]) || 5;
|
|
200
186
|
} else if (arg === '--max-users') {
|
|
201
187
|
exploreMaxUsers = parseInt(args[++i]) || 0;
|
|
202
|
-
} else if (arg === '--watch') {
|
|
203
|
-
exploreWatch = true;
|
|
204
|
-
if (args[i + 1] === '-p') {
|
|
205
|
-
exploreWatchPort = parseInt(args[i + 2]) || 3000;
|
|
206
|
-
i += 2;
|
|
207
|
-
}
|
|
208
188
|
} else {
|
|
209
189
|
positional.push(arg);
|
|
210
190
|
}
|
|
@@ -232,10 +212,8 @@ function parseExploreArgs(args) {
|
|
|
232
212
|
exploreMaxFollowing,
|
|
233
213
|
exploreMaxFollowers,
|
|
234
214
|
exploreLocation,
|
|
235
|
-
|
|
236
|
-
exploreWatchPort,
|
|
215
|
+
serverUrl,
|
|
237
216
|
exploreMaxUsers,
|
|
238
|
-
outputFile,
|
|
239
217
|
urls: [],
|
|
240
218
|
outputFormat: 'json',
|
|
241
219
|
exploreCount: 0,
|
|
@@ -287,7 +265,7 @@ function parseVideosArgs(args) {
|
|
|
287
265
|
}
|
|
288
266
|
|
|
289
267
|
function parseWatchArgs(args) {
|
|
290
|
-
let outputFile =
|
|
268
|
+
let outputFile = './result.json';
|
|
291
269
|
let watchPort = 3000;
|
|
292
270
|
|
|
293
271
|
for (let i = 0; i < args.length; i++) {
|
|
@@ -346,6 +324,7 @@ export function parseArgs() {
|
|
|
346
324
|
let exploreCount = 0;
|
|
347
325
|
let showConfig = false;
|
|
348
326
|
let showHelp = false;
|
|
327
|
+
let showVersion = false;
|
|
349
328
|
let customProxy = null;
|
|
350
329
|
let configAction = null;
|
|
351
330
|
let configValue = null;
|
|
@@ -383,6 +362,8 @@ export function parseArgs() {
|
|
|
383
362
|
showConfig = true;
|
|
384
363
|
} else if (arg === '-h' || arg === '--help') {
|
|
385
364
|
showHelp = true;
|
|
365
|
+
} else if (arg === '--version') {
|
|
366
|
+
showVersion = true;
|
|
386
367
|
} else if (arg.startsWith('http')) {
|
|
387
368
|
urls.push(arg);
|
|
388
369
|
}
|
|
@@ -394,5 +375,5 @@ export function parseArgs() {
|
|
|
394
375
|
urls.push(...lines);
|
|
395
376
|
}
|
|
396
377
|
|
|
397
|
-
return { urls, outputFile, outputFormat, exploreCount, showConfig, showHelp, customProxy, configAction, configValue, pipeMode, filterStr };
|
|
378
|
+
return { urls, outputFile, outputFormat, exploreCount, showConfig, showHelp, showVersion, customProxy, configAction, configValue, pipeMode, filterStr };
|
|
398
379
|
}
|
package/src/lib/auto-browser.mjs
CHANGED
|
@@ -1,13 +1,6 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { processUser } from '../auto-core.mjs';
|
|
2
|
+
import { processExplore } from '../explore-core.mjs';
|
|
3
|
+
import { ensureBrowserReady } from '../scraper/modules/page-helpers.mjs';
|
|
4
|
+
import { ensureTikTokPage, closeCommentPanel } from '../scraper/modules/page-helpers.mjs';
|
|
2
5
|
|
|
3
|
-
|
|
4
|
-
const core = require('../auto-core.cjs');
|
|
5
|
-
const exploreCore = require('../explore-core.cjs');
|
|
6
|
-
const helpers = require('../scraper/modules/page-helpers.cjs');
|
|
7
|
-
|
|
8
|
-
export const runAuto = core.runAuto;
|
|
9
|
-
export const processUser = core.processUser;
|
|
10
|
-
export const processExplore = exploreCore.processExplore;
|
|
11
|
-
export const ensureBrowserReady = helpers.ensureBrowserReady;
|
|
12
|
-
export const ensureTikTokPage = helpers.ensureTikTokPage;
|
|
13
|
-
export const closeCommentPanel = helpers.closeCommentPanel;
|
|
6
|
+
export { processUser, processExplore, ensureBrowserReady, ensureTikTokPage, closeCommentPanel };
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
export function getAntiDetectScript() {
|
|
2
|
+
return () => {
|
|
3
|
+
Object.defineProperty(navigator, 'webdriver', { get: () => false });
|
|
4
|
+
|
|
5
|
+
if (!window.chrome) {
|
|
6
|
+
window.chrome = { runtime: {} };
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
const originalQuery = window.navigator.permissions.query;
|
|
10
|
+
window.navigator.permissions.query = (params) =>
|
|
11
|
+
params.name === 'notifications'
|
|
12
|
+
? Promise.resolve({ state: Notification.permission })
|
|
13
|
+
: originalQuery(params);
|
|
14
|
+
|
|
15
|
+
Object.defineProperty(navigator, 'languages', {
|
|
16
|
+
get: () => ['en-US', 'en'],
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
Object.defineProperty(navigator, 'plugins', {
|
|
20
|
+
get: () => [1, 2, 3, 4, 5],
|
|
21
|
+
});
|
|
22
|
+
};
|
|
23
|
+
}
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
import { exec } from 'child_process';
|
|
2
|
+
import http from 'http';
|
|
3
|
+
import os from 'os';
|
|
4
|
+
import path from 'path';
|
|
5
|
+
import { chromium } from 'playwright';
|
|
6
|
+
|
|
7
|
+
export const CDP_PORT = 9222;
|
|
8
|
+
|
|
9
|
+
function getEdgePath() {
|
|
10
|
+
const platform = os.platform();
|
|
11
|
+
if (platform === 'darwin') return '"Microsoft Edge"';
|
|
12
|
+
if (platform === 'win32') return 'msedge.exe';
|
|
13
|
+
return 'msedge';
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function isEdgeRunning() {
|
|
17
|
+
return new Promise(resolve => {
|
|
18
|
+
const platform = os.platform();
|
|
19
|
+
let command;
|
|
20
|
+
if (platform === 'darwin') {
|
|
21
|
+
command = 'ps aux | grep -q "[M]icrosoft Edge.app/Contents/MacOS/Microsoft Edge" 2>/dev/null';
|
|
22
|
+
} else if (platform === 'win32') {
|
|
23
|
+
command = 'tasklist /FI "IMAGENAME eq msedge.exe" 2>nul | findstr /I msedge';
|
|
24
|
+
} else {
|
|
25
|
+
command = 'pgrep -f msedge > /dev/null 2>&1';
|
|
26
|
+
}
|
|
27
|
+
exec(command, (err) => resolve(!err));
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function checkCDPPort() {
|
|
32
|
+
return new Promise(resolve => {
|
|
33
|
+
const req = http.get(`http://127.0.0.1:${CDP_PORT}/json`, res => {
|
|
34
|
+
res.on('data', () => {});
|
|
35
|
+
res.on('end', () => resolve(res.statusCode === 200));
|
|
36
|
+
});
|
|
37
|
+
req.on('error', () => resolve(false));
|
|
38
|
+
req.setTimeout(3000, () => { resolve(false); req.destroy(); });
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function checkEdgeArgs() {
|
|
43
|
+
return new Promise(resolve => {
|
|
44
|
+
const platform = os.platform();
|
|
45
|
+
let command;
|
|
46
|
+
if (platform === 'darwin') {
|
|
47
|
+
command = 'ps aux | grep "[M]icrosoft Edge" | grep -v "Helper\\|crashpad" | grep "user-data-dir"';
|
|
48
|
+
} else if (platform === 'win32') {
|
|
49
|
+
command = 'wmic process where "name like \\"%msedge%\\"" get commandline | findstr "user-data-dir"';
|
|
50
|
+
} else {
|
|
51
|
+
command = 'ps aux | grep "[m]sedge" | grep -v "Helper\\|crashpad" | grep "user-data-dir"';
|
|
52
|
+
}
|
|
53
|
+
exec(command, (err, stdout) => resolve(!err && stdout.trim().length > 0));
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function killEdgeProcesses() {
|
|
58
|
+
return new Promise(resolve => {
|
|
59
|
+
const platform = os.platform();
|
|
60
|
+
let command;
|
|
61
|
+
if (platform === 'darwin') {
|
|
62
|
+
command = 'killall -9 "Microsoft Edge" 2>/dev/null; rm -f ~/Library/Caches/Microsoft\\ Edge/Singleton*; true';
|
|
63
|
+
} else if (platform === 'win32') {
|
|
64
|
+
command = 'taskkill /F /IM msedge.exe 2>nul || exit 0';
|
|
65
|
+
} else {
|
|
66
|
+
command = 'pkill -9 -f msedge 2>/dev/null; true';
|
|
67
|
+
}
|
|
68
|
+
exec(command, () => resolve());
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function launchEdgeWithCDP() {
|
|
73
|
+
return new Promise((resolve, reject) => {
|
|
74
|
+
const platform = os.platform();
|
|
75
|
+
const edgePath = getEdgePath();
|
|
76
|
+
const userDataDir = path.join(os.homedir(), 'Library', 'Application Support', 'Microsoft Edge For Testing');
|
|
77
|
+
let command;
|
|
78
|
+
|
|
79
|
+
if (platform === 'darwin') {
|
|
80
|
+
command = `open -a ${edgePath} --args --remote-debugging-port=${CDP_PORT} --user-data-dir="${userDataDir}"`;
|
|
81
|
+
} else if (platform === 'win32') {
|
|
82
|
+
command = `start msedge --remote-debugging-port=${CDP_PORT} --user-data-dir="${userDataDir}"`;
|
|
83
|
+
} else {
|
|
84
|
+
command = `msedge --remote-debugging-port=${CDP_PORT} --user-data-dir="${userDataDir}" &`;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
exec(command, (err) => {
|
|
88
|
+
if (err) reject(new Error(`启动 Edge 浏览器失败: ${err.message}`));
|
|
89
|
+
else resolve();
|
|
90
|
+
});
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
async function waitForCDP(timeout = 30000, interval = 1000) {
|
|
95
|
+
const start = Date.now();
|
|
96
|
+
while (Date.now() - start < timeout) {
|
|
97
|
+
const ready = await checkCDPPort();
|
|
98
|
+
if (ready) return true;
|
|
99
|
+
await new Promise(r => setTimeout(r, interval));
|
|
100
|
+
}
|
|
101
|
+
return false;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
export async function ensureBrowserReady() {
|
|
105
|
+
const isReady = await checkCDPPort();
|
|
106
|
+
let needLaunch = !isReady;
|
|
107
|
+
|
|
108
|
+
if (!needLaunch) {
|
|
109
|
+
const edgeArgsValid = await checkEdgeArgs();
|
|
110
|
+
if (!edgeArgsValid) {
|
|
111
|
+
console.error('Edge 已运行但启动参数不完整,正在重启...');
|
|
112
|
+
await killEdgeProcesses();
|
|
113
|
+
await new Promise(r => setTimeout(r, 3000));
|
|
114
|
+
needLaunch = true;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (needLaunch) {
|
|
119
|
+
const edgeRunning = await isEdgeRunning();
|
|
120
|
+
if (edgeRunning) {
|
|
121
|
+
console.error(`Edge 已运行但 CDP 端口 ${CDP_PORT} 未启用,正在重启...`);
|
|
122
|
+
await killEdgeProcesses();
|
|
123
|
+
await new Promise(r => setTimeout(r, 3000));
|
|
124
|
+
} else {
|
|
125
|
+
console.error(`CDP 端口 ${CDP_PORT} 未就绪,正在启动 Edge 浏览器...`);
|
|
126
|
+
}
|
|
127
|
+
await launchEdgeWithCDP();
|
|
128
|
+
|
|
129
|
+
console.error('等待浏览器启动...');
|
|
130
|
+
const launched = await waitForCDP();
|
|
131
|
+
if (!launched) {
|
|
132
|
+
throw new Error(
|
|
133
|
+
`等待 CDP 端口 ${CDP_PORT} 超时。请确认 Edge 浏览器已安装,\n` +
|
|
134
|
+
'或手动启动: Microsoft Edge --remote-debugging-port=9222'
|
|
135
|
+
);
|
|
136
|
+
}
|
|
137
|
+
console.error('浏览器启动成功');
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
const browser = await chromium.connectOverCDP(`http://127.0.0.1:${CDP_PORT}`);
|
|
141
|
+
return browser;
|
|
142
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { accessSync } from 'fs';
|
|
2
|
+
|
|
3
|
+
export function detectBrowser() {
|
|
4
|
+
const isMac = process.platform === 'darwin';
|
|
5
|
+
const isWin = process.platform === 'win32';
|
|
6
|
+
const isLinux = process.platform === 'linux';
|
|
7
|
+
|
|
8
|
+
const paths = [];
|
|
9
|
+
|
|
10
|
+
if (isMac) {
|
|
11
|
+
paths.push(
|
|
12
|
+
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
|
13
|
+
'/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary',
|
|
14
|
+
'/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge',
|
|
15
|
+
'/Applications/Brave Browser.app/Contents/MacOS/Brave Browser',
|
|
16
|
+
);
|
|
17
|
+
} else if (isWin) {
|
|
18
|
+
const localAppData = process.env.LOCALAPPDATA || '';
|
|
19
|
+
const programFiles = process.env.PROGRAMFILES || '';
|
|
20
|
+
const programFilesX86 = process.env['PROGRAMFILES(X86)'] || '';
|
|
21
|
+
paths.push(
|
|
22
|
+
`${programFiles}\\Google\\Chrome\\Application\\chrome.exe`,
|
|
23
|
+
`${programFilesX86}\\Google\\Chrome\\Application\\chrome.exe`,
|
|
24
|
+
`${localAppData}\\Google\\Chrome\\Application\\chrome.exe`,
|
|
25
|
+
`${programFiles}\\Microsoft\\Edge\\Application\\msedge.exe`,
|
|
26
|
+
`${programFilesX86}\\Microsoft\\Edge\\Application\\msedge.exe`,
|
|
27
|
+
);
|
|
28
|
+
} else if (isLinux) {
|
|
29
|
+
paths.push(
|
|
30
|
+
'/usr/bin/google-chrome',
|
|
31
|
+
'/usr/bin/google-chrome-stable',
|
|
32
|
+
'/usr/bin/chromium-browser',
|
|
33
|
+
'/usr/bin/chromium',
|
|
34
|
+
'/snap/bin/chromium',
|
|
35
|
+
'/usr/bin/microsoft-edge',
|
|
36
|
+
);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
for (const p of paths) {
|
|
40
|
+
try { accessSync(p); return p; } catch { /* not found */ }
|
|
41
|
+
}
|
|
42
|
+
return null;
|
|
43
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { delay } from '../delay.js';
|
|
2
|
+
import { retryWithBackoff } from '../retry.js';
|
|
3
|
+
import { getDelayConfig } from '../delay.js';
|
|
4
|
+
|
|
5
|
+
export async function closeCommentPanel(page) {
|
|
6
|
+
await page.evaluate(() => {
|
|
7
|
+
const rightPanel = document.querySelector('[class*="RightPanelContainer"]');
|
|
8
|
+
if (rightPanel) {
|
|
9
|
+
const tabContainer = rightPanel.querySelector('[class*="TabContainer"]');
|
|
10
|
+
if (tabContainer) {
|
|
11
|
+
const closeOverlay = tabContainer.querySelector('div:last-child');
|
|
12
|
+
if (closeOverlay) closeOverlay.click();
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
});
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export async function ensureTikTokPage(browser, url) {
|
|
19
|
+
const contexts = browser.contexts();
|
|
20
|
+
let page = null;
|
|
21
|
+
|
|
22
|
+
for (const ctx of contexts) {
|
|
23
|
+
for (const p of ctx.pages()) {
|
|
24
|
+
if (p.url().includes('tiktok.com')) {
|
|
25
|
+
page = p;
|
|
26
|
+
break;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
if (page) break;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
if (!page) {
|
|
33
|
+
console.error('未找到 TikTok 页面,正在打开...');
|
|
34
|
+
const defaultCtx = browser.contexts()[0];
|
|
35
|
+
page = await defaultCtx.newPage();
|
|
36
|
+
await retryWithBackoff(() => page.goto(url, { waitUntil: 'load', timeout: 30000 }));
|
|
37
|
+
const config = getDelayConfig();
|
|
38
|
+
await delay(Math.round(config.switchMax * 0.5), config.switchMax);
|
|
39
|
+
console.error('TikTok 页面已打开');
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
return page;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export async function findTikTokPage(browser) {
|
|
46
|
+
const contexts = browser.contexts();
|
|
47
|
+
for (const ctx of contexts) {
|
|
48
|
+
for (const p of ctx.pages()) {
|
|
49
|
+
if (p.url().includes('tiktok.com')) return p;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
return null;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export async function getOrCreatePage(browser) {
|
|
56
|
+
let page = await findTikTokPage(browser);
|
|
57
|
+
if (!page) {
|
|
58
|
+
const defaultCtx = browser.contexts()[0] || await browser.newContext();
|
|
59
|
+
page = await defaultCtx.newPage();
|
|
60
|
+
}
|
|
61
|
+
return page;
|
|
62
|
+
}
|
package/src/lib/constants.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { join, dirname } from 'path';
|
|
2
2
|
import { readFileSync, writeFileSync, existsSync } from 'fs';
|
|
3
3
|
import { fileURLToPath } from 'url';
|
|
4
|
+
import { USER_SECTION_SIZE } from './parser.js';
|
|
4
5
|
|
|
5
6
|
const __filename = fileURLToPath(import.meta.url);
|
|
6
7
|
const __dirname = dirname(__filename);
|
|
@@ -9,7 +10,6 @@ const configPath = join(homeDir, '.tt-help.json');
|
|
|
9
10
|
|
|
10
11
|
const DEFAULT_PROXY = 'http://127.0.0.1:7897';
|
|
11
12
|
const DEFAULT_OUTPUT = 'tiktok_data.json';
|
|
12
|
-
const USER_SECTION_SIZE = 12000;
|
|
13
13
|
|
|
14
14
|
let proxy = DEFAULT_PROXY;
|
|
15
15
|
let configFile = null;
|
|
@@ -39,109 +39,27 @@ function saveBrowser(path) {
|
|
|
39
39
|
}
|
|
40
40
|
|
|
41
41
|
const HELP_TEXT = [
|
|
42
|
-
'用法: tt-help [选项]',
|
|
43
|
-
' tt-help scrape <URL> [预设] [选项]',
|
|
44
|
-
' tt-help videos <用户名> [最大视频数] [选项]',
|
|
45
|
-
' tt-help auto <用户名> [preset] [收集数] [切换数] [每视频评论数] [选项]',
|
|
42
|
+
'用法: tt-help explore <用户名> [preset] [选项]',
|
|
46
43
|
'',
|
|
47
|
-
'
|
|
48
|
-
' explore <用户名> [preset] [选项] 用户探索模式',
|
|
49
|
-
' --explore [count] 从 Explore 页面获取视频列表(默认: 100)',
|
|
50
|
-
' --pipe 将 Explore 结果自动传给 URL 爬取',
|
|
51
|
-
'',
|
|
52
|
-
'用户探索模式 (explore):',
|
|
53
|
-
' tt-help explore <用户名> [preset] [选项]',
|
|
54
|
-
' 支持多个用户名: tt-help explore @user1 @user2 -o data.json',
|
|
44
|
+
' 支持多个用户名: tt-help explore @user1 @user2 --server http://127.0.0.1:3000',
|
|
55
45
|
' 预设: fast, normal(默认), slow, stealth',
|
|
56
46
|
' 选项:',
|
|
47
|
+
' --server <URL> 服务端地址,默认 http://127.0.0.1:3000',
|
|
57
48
|
' --location <国家代码> 国家筛选,默认 ES',
|
|
58
|
-
' --max-comments <数量> 每视频最大评论数,默认
|
|
49
|
+
' --max-comments <数量> 每视频最大评论数,默认 10',
|
|
59
50
|
' --max-guess <数量> 每视频最大猜你喜欢数,默认 0',
|
|
60
51
|
' --enable-follow 启用关注/粉丝提取(默认启用)',
|
|
61
52
|
' --disable-follow 禁用关注/粉丝提取',
|
|
62
|
-
' --max-following <数量> 最大获取关注数,默认
|
|
63
|
-
' --max-followers <数量> 最大获取粉丝数,默认
|
|
53
|
+
' --max-following <数量> 最大获取关注数,默认 5',
|
|
54
|
+
' --max-followers <数量> 最大获取粉丝数,默认 5',
|
|
64
55
|
' --max-users <数量> 最大处理用户数,默认无限制',
|
|
65
|
-
' --
|
|
66
|
-
'
|
|
67
|
-
'
|
|
68
|
-
'
|
|
69
|
-
' -i, --input <file> 从文件读取 URL 列表(每行一个)',
|
|
70
|
-
' -o, --output <file> 指定输出文件(默认: tiktok_data.json)',
|
|
71
|
-
' -f, --format <fmt> 输出格式: json(默认), table, raw',
|
|
72
|
-
' -c, --config 显示当前配置',
|
|
73
|
-
' -h, --help 显示帮助',
|
|
74
|
-
'',
|
|
75
|
-
'浏览器抓取模式 (scrape):',
|
|
76
|
-
' tt-help scrape <URL> [preset] [最大视频数] [最大评论数] [-o 输出路径]',
|
|
77
|
-
' tt-help scrape <URL> [最大视频数] [最大评论数] [--switch-delay ms] [--comment-delay ms]',
|
|
78
|
-
' 预设: fast(600ms/400ms), normal(1.5s/800ms), slow(3s/2s), stealth(5s/3.5s)',
|
|
79
|
-
' 不指定 -o 则输出到 stdout(可 pipe)',
|
|
80
|
-
'',
|
|
81
|
-
'用户视频模式 (videos):',
|
|
82
|
-
' tt-help videos <用户名> [最大视频数] [-o 输出路径]',
|
|
83
|
-
' 获取用户信息和视频列表(浏览器模式)',
|
|
84
|
-
' 不指定 -o 则输出到 stdout(可 pipe)',
|
|
85
|
-
'',
|
|
86
|
-
'自动探索模式 (auto):',
|
|
87
|
-
' tt-help auto [用户名...] [preset] [收集数] [切换数] [每视频评论数] [选项]',
|
|
88
|
-
' 支持多个用户名: tt-help auto @user1 @user2 -o data.json',
|
|
89
|
-
' 不指定用户名: 从 -o 数据源中读取未处理的用户继续探索',
|
|
90
|
-
' 收集数: 从种子用户收集的种子视频数(默认 1)',
|
|
91
|
-
' 切换数: 每个种子作者主页下切换视频的次数(默认 50)',
|
|
92
|
-
' 每视频评论数: 每个视频抓取评论数(默认 200)',
|
|
93
|
-
' 预设: fast(默认), normal, slow, stealth',
|
|
94
|
-
' 选项: -o <路径> 数据源文件(不指定则输出到 stdout)',
|
|
95
|
-
' --switch-delay <ms> 视频切换延迟(毫秒)',
|
|
96
|
-
' --comment-delay <ms> 评论滚动延迟(毫秒)',
|
|
97
|
-
' --watch [-p 3000] 启动 Web 监控页面实时查看数据',
|
|
98
|
-
' --enable-follow 启用关注/粉丝列表提取(需已登录)',
|
|
99
|
-
' --max-following <n> 关注列表最大提取数(默认 200)',
|
|
100
|
-
' --max-followers <n> 粉丝列表最大提取数(默认 200)',
|
|
101
|
-
' 返回: 纯用户数组,按 uniqueId 去重',
|
|
102
|
-
' 示例: tt-help auto username -o result.json',
|
|
103
|
-
' tt-help auto @a @b fast 1 30 100 -o data.json',
|
|
104
|
-
' tt-help auto -o data.json # 续跑',
|
|
105
|
-
' tt-help auto username --watch -o data.json # 带监控页面',
|
|
106
|
-
'',
|
|
107
|
-
'实时监控模式 (watch):',
|
|
108
|
-
' tt-help watch -o <数据文件> [-p 端口]',
|
|
109
|
-
' 启动 Web 监控页面,实时查看采集数据',
|
|
110
|
-
' 支持在 auto 模式中通过 --watch 参数同时启动',
|
|
111
|
-
' 示例: tt-help watch -o data.json',
|
|
112
|
-
' tt-help watch -o data.json -p 8080',
|
|
56
|
+
' --proxy <地址> 临时指定代理地址',
|
|
57
|
+
' -o, --output <file> 指定输出文件(默认: tiktok_data.json)',
|
|
58
|
+
' -h, --help 显示帮助',
|
|
59
|
+
' --version 显示版本号',
|
|
113
60
|
'',
|
|
114
|
-
'
|
|
115
|
-
|
|
116
|
-
' --filter "locationCreated=DE,ES" 过滤指定地区',
|
|
117
|
-
' --filter "ttSeller=true&locationCreated=US" 组合条件',
|
|
118
|
-
'',
|
|
119
|
-
'配置代理:',
|
|
120
|
-
' tt-help config set http://127.0.0.1:7890 设置代理',
|
|
121
|
-
' tt-help config show 查看配置',
|
|
122
|
-
' tt-help config reset 恢复默认',
|
|
123
|
-
'',
|
|
124
|
-
'配置浏览器:',
|
|
125
|
-
' tt-help config set-browser /path/to/chrome 手动指定浏览器路径',
|
|
126
|
-
' tt-help config set-browser auto 自动探测浏览器',
|
|
127
|
-
'',
|
|
128
|
-
'示例:',
|
|
129
|
-
' tt-help --explore 200 # 先预览 Explore 结果',
|
|
130
|
-
' tt-help --explore 50 --pipe -o result.json # 自动抓取用户数据',
|
|
131
|
-
' tt-help --explore -f raw # 仅输出 URL 列表',
|
|
132
|
-
' tt-help --explore -f raw -o urls.txt # 保存 URL 到文件',
|
|
133
|
-
' tt-help -i urls.txt -o result.json # 再爬取这些 URL',
|
|
134
|
-
' tt-help scrape <URL> fast 50 999 # 浏览器抓取(快速预设)',
|
|
135
|
-
' tt-help scrape <URL> slow -o out.json # 慢速预设,输出到文件',
|
|
136
|
-
' tt-help scrape <URL> 20 999 | jq . # 输出到 stdout 可 pipe',
|
|
137
|
-
' tt-help videos username 100 # 获取用户 100 个视频',
|
|
138
|
-
' tt-help videos username 50 -o out.json # 输出到文件',
|
|
139
|
-
' tt-help auto username -o out.json # 自动探索(默认 fast/1/50/200)',
|
|
140
|
-
' tt-help auto username slow 1 30 100 -o out.json # 慢速探索',
|
|
141
|
-
' tt-help config set http://127.0.0.1:7890',
|
|
142
|
-
' tt-help https://www.tiktok.com/@username',
|
|
143
|
-
' tt-help https://... --filter "ttSeller=true&locationCreated=DE"',
|
|
144
|
-
];
|
|
61
|
+
' 示例: tt-help explore qiqi23280 fast --location ES --max-comments 50',
|
|
62
|
+
];
|
|
145
63
|
|
|
146
64
|
const CONFIG_TEXT = [
|
|
147
65
|
'tt-help v1.0.1',
|
package/src/lib/delay.js
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
export const DELAY_PRESETS = {
|
|
2
|
+
fast: { switchMax: 300, commentMax: 200, fast: true },
|
|
3
|
+
normal: { switchMax: 1500, commentMax: 800 },
|
|
4
|
+
slow: { switchMax: 3000, commentMax: 2000 },
|
|
5
|
+
stealth: { switchMax: 5000, commentMax: 3500 },
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
const delayConfig = {
|
|
9
|
+
switchMax: 2500,
|
|
10
|
+
commentMax: 1500,
|
|
11
|
+
fast: false,
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
export function setDelayConfig(config) {
|
|
15
|
+
if (typeof config === 'string') {
|
|
16
|
+
const preset = DELAY_PRESETS[config.toLowerCase()];
|
|
17
|
+
if (!preset) {
|
|
18
|
+
throw new Error(
|
|
19
|
+
`未知的延迟预设: ${config}\n可用预设: ${Object.keys(DELAY_PRESETS).join(', ')}`
|
|
20
|
+
);
|
|
21
|
+
}
|
|
22
|
+
delayConfig.switchMax = preset.switchMax;
|
|
23
|
+
delayConfig.commentMax = preset.commentMax;
|
|
24
|
+
delayConfig.fast = preset.fast || false;
|
|
25
|
+
} else if (typeof config === 'object') {
|
|
26
|
+
if (config.switchMax) delayConfig.switchMax = config.switchMax;
|
|
27
|
+
if (config.commentMax) delayConfig.commentMax = config.commentMax;
|
|
28
|
+
delayConfig.fast = config.fast || false;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function getDelayConfig() {
|
|
33
|
+
return { ...delayConfig };
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function listDelayPresets() {
|
|
37
|
+
return DELAY_PRESETS;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export function delay(min, max) {
|
|
41
|
+
const lo = Math.min(min, max);
|
|
42
|
+
const hi = Math.max(min, max);
|
|
43
|
+
let ms;
|
|
44
|
+
if (delayConfig.fast) {
|
|
45
|
+
ms = 0;
|
|
46
|
+
} else {
|
|
47
|
+
ms = Math.floor(Math.random() * (hi - lo + 1)) + lo;
|
|
48
|
+
}
|
|
49
|
+
return new Promise(r => setTimeout(r, ms));
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export function randomDelay(min = 200, max = 600) {
|
|
53
|
+
return delay(min, max);
|
|
54
|
+
}
|