tt-help-cli-ycl 1.3.12 → 1.3.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -17
- package/cli.js +9 -9
- package/package.json +45 -45
- package/scripts/run-explore.bat +68 -68
- package/scripts/run-explore.ps1 +81 -81
- package/scripts/run-explore.sh +73 -73
- package/scripts/test-captcha-lib.mjs +68 -0
- package/scripts/test-captcha.mjs +81 -0
- package/scripts/test-incognito-lib.mjs +36 -0
- package/scripts/test-login-state.mjs +128 -0
- package/scripts/test-safe-click.mjs +45 -0
- package/src/cli/auto.js +186 -157
- package/src/cli/explore.js +227 -193
- package/src/cli/progress.js +111 -111
- package/src/cli/refresh.js +216 -0
- package/src/cli/scrape.js +47 -47
- package/src/cli/utils.js +18 -18
- package/src/cli/videos.js +41 -41
- package/src/cli/watch.js +31 -31
- package/src/lib/args.js +456 -402
- package/src/lib/browser/anti-detect.js +23 -23
- package/src/lib/browser/cdp.js +52 -10
- package/src/lib/browser/launch.js +43 -43
- package/src/lib/browser/page.js +146 -87
- package/src/lib/constants.js +119 -115
- package/src/lib/delay.js +54 -54
- package/src/lib/explore-fetch.js +118 -118
- package/src/lib/fetcher.js +45 -45
- package/src/lib/filter.js +66 -66
- package/src/lib/io.js +54 -54
- package/src/lib/output.js +80 -80
- package/src/lib/parser.js +47 -47
- package/src/lib/retry.js +45 -45
- package/src/lib/scrape.js +40 -40
- package/src/lib/url.js +52 -52
- package/src/main.js +2 -0
- package/src/results/user-videos-bar.lar.lar.moeta.json +37 -0
- package/src/scraper/auto-core.js +203 -194
- package/src/scraper/core.js +211 -190
- package/src/scraper/explore-core.js +180 -171
- package/src/scraper/modules/captcha-handler.js +114 -114
- package/src/scraper/modules/comment-extractor.js +74 -69
- package/src/scraper/modules/follow-extractor.js +121 -121
- package/src/scraper/modules/guess-extractor.js +51 -51
- package/src/scraper/modules/page-helpers.js +48 -48
- package/src/scraper/refresh-core.js +179 -0
- package/src/videos/core.js +126 -126
- package/src/watch/data-store.js +431 -302
- package/src/watch/public/index.html +721 -701
- package/src/watch/server.js +483 -359
package/src/lib/constants.js
CHANGED
|
@@ -1,115 +1,119 @@
|
|
|
1
|
-
import { join, dirname } from 'path';
|
|
2
|
-
import { readFileSync, writeFileSync, existsSync } from 'fs';
|
|
3
|
-
import { fileURLToPath } from 'url';
|
|
4
|
-
|
|
5
|
-
const __filename = fileURLToPath(import.meta.url);
|
|
6
|
-
const __dirname = dirname(__filename);
|
|
7
|
-
const homeDir = process.env.HOME || process.env.USERPROFILE || '';
|
|
8
|
-
const configPath = join(homeDir, '.tt-help.json');
|
|
9
|
-
|
|
10
|
-
const DEFAULT_PROXY = 'http://127.0.0.1:7897';
|
|
11
|
-
const DEFAULT_OUTPUT = 'tiktok_data.json';
|
|
12
|
-
|
|
13
|
-
let proxy = DEFAULT_PROXY;
|
|
14
|
-
let server = 'http://127.0.0.1:3001';
|
|
15
|
-
let configFile = null;
|
|
16
|
-
let browser = null;
|
|
17
|
-
let userId = null;
|
|
18
|
-
|
|
19
|
-
try {
|
|
20
|
-
if (existsSync(configPath)) {
|
|
21
|
-
const cfg = JSON.parse(readFileSync(configPath, 'utf-8'));
|
|
22
|
-
if (cfg.proxy) {
|
|
23
|
-
proxy = cfg.proxy;
|
|
24
|
-
}
|
|
25
|
-
if (cfg.server) {
|
|
26
|
-
server = cfg.server;
|
|
27
|
-
}
|
|
28
|
-
if (cfg.browser) {
|
|
29
|
-
browser = cfg.browser;
|
|
30
|
-
}
|
|
31
|
-
if (cfg.userId) {
|
|
32
|
-
userId = cfg.userId;
|
|
33
|
-
}
|
|
34
|
-
configFile = configPath;
|
|
35
|
-
}
|
|
36
|
-
} catch {
|
|
37
|
-
// no config file
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
function saveBrowser(path) {
|
|
41
|
-
const cfg = existsSync(configPath) ? JSON.parse(readFileSync(configPath, 'utf-8')) : {};
|
|
42
|
-
cfg.browser = path;
|
|
43
|
-
writeFileSync(configPath, JSON.stringify(cfg, null, 2), 'utf-8');
|
|
44
|
-
browser = path;
|
|
45
|
-
configFile = configPath;
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
function saveUserId(id) {
|
|
49
|
-
const cfg = existsSync(configPath) ? JSON.parse(readFileSync(configPath, 'utf-8')) : {};
|
|
50
|
-
cfg.userId = id;
|
|
51
|
-
writeFileSync(configPath, JSON.stringify(cfg, null, 2), 'utf-8');
|
|
52
|
-
userId = id;
|
|
53
|
-
configFile = configPath;
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
const HELP_TEXT = [
|
|
57
|
-
'用法: tt-help explore <用户名> [preset] [选项]',
|
|
58
|
-
'',
|
|
59
|
-
' 支持多个用户名: tt-help explore @user1 @user2 --server http://127.0.0.1:3001',
|
|
60
|
-
' 预设: fast, normal(默认), slow, stealth',
|
|
61
|
-
' 选项:',
|
|
62
|
-
' --server <URL> 服务端地址,默认 http://127.0.0.1:3001',
|
|
63
|
-
' --location <国家代码>
|
|
64
|
-
' --max-comments <数量> 每视频最大评论数,默认 10',
|
|
65
|
-
' --max-guess <数量> 每视频最大猜你喜欢数,默认 0',
|
|
66
|
-
' --enable-follow 启用关注/粉丝提取(默认启用)',
|
|
67
|
-
' --disable-follow 禁用关注/粉丝提取',
|
|
68
|
-
' --max-following <数量> 最大获取关注数,默认 5',
|
|
69
|
-
' --max-followers <数量> 最大获取粉丝数,默认 5',
|
|
70
|
-
' --max-users <数量> 最大处理用户数,默认无限制',
|
|
71
|
-
'
|
|
72
|
-
'
|
|
73
|
-
'
|
|
74
|
-
'
|
|
75
|
-
'
|
|
76
|
-
'
|
|
77
|
-
'',
|
|
78
|
-
'
|
|
79
|
-
'
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
`
|
|
99
|
-
`
|
|
100
|
-
`
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
1
|
+
import { join, dirname } from 'path';
|
|
2
|
+
import { readFileSync, writeFileSync, existsSync } from 'fs';
|
|
3
|
+
import { fileURLToPath } from 'url';
|
|
4
|
+
|
|
5
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
6
|
+
const __dirname = dirname(__filename);
|
|
7
|
+
const homeDir = process.env.HOME || process.env.USERPROFILE || '';
|
|
8
|
+
const configPath = join(homeDir, '.tt-help.json');
|
|
9
|
+
|
|
10
|
+
const DEFAULT_PROXY = 'http://127.0.0.1:7897';
|
|
11
|
+
const DEFAULT_OUTPUT = 'tiktok_data.json';
|
|
12
|
+
|
|
13
|
+
let proxy = DEFAULT_PROXY;
|
|
14
|
+
let server = 'http://127.0.0.1:3001';
|
|
15
|
+
let configFile = null;
|
|
16
|
+
let browser = null;
|
|
17
|
+
let userId = null;
|
|
18
|
+
|
|
19
|
+
try {
|
|
20
|
+
if (existsSync(configPath)) {
|
|
21
|
+
const cfg = JSON.parse(readFileSync(configPath, 'utf-8'));
|
|
22
|
+
if (cfg.proxy) {
|
|
23
|
+
proxy = cfg.proxy;
|
|
24
|
+
}
|
|
25
|
+
if (cfg.server) {
|
|
26
|
+
server = cfg.server;
|
|
27
|
+
}
|
|
28
|
+
if (cfg.browser) {
|
|
29
|
+
browser = cfg.browser;
|
|
30
|
+
}
|
|
31
|
+
if (cfg.userId) {
|
|
32
|
+
userId = cfg.userId;
|
|
33
|
+
}
|
|
34
|
+
configFile = configPath;
|
|
35
|
+
}
|
|
36
|
+
} catch {
|
|
37
|
+
// no config file
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function saveBrowser(path) {
|
|
41
|
+
const cfg = existsSync(configPath) ? JSON.parse(readFileSync(configPath, 'utf-8')) : {};
|
|
42
|
+
cfg.browser = path;
|
|
43
|
+
writeFileSync(configPath, JSON.stringify(cfg, null, 2), 'utf-8');
|
|
44
|
+
browser = path;
|
|
45
|
+
configFile = configPath;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function saveUserId(id) {
|
|
49
|
+
const cfg = existsSync(configPath) ? JSON.parse(readFileSync(configPath, 'utf-8')) : {};
|
|
50
|
+
cfg.userId = id;
|
|
51
|
+
writeFileSync(configPath, JSON.stringify(cfg, null, 2), 'utf-8');
|
|
52
|
+
userId = id;
|
|
53
|
+
configFile = configPath;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const HELP_TEXT = [
|
|
57
|
+
'用法: tt-help explore <用户名> [preset] [选项]',
|
|
58
|
+
'',
|
|
59
|
+
' 支持多个用户名: tt-help explore @user1 @user2 --server http://127.0.0.1:3001',
|
|
60
|
+
' 预设: fast, normal(默认), slow, stealth',
|
|
61
|
+
' 选项:',
|
|
62
|
+
' --server <URL> 服务端地址,默认 http://127.0.0.1:3001',
|
|
63
|
+
' --location <国家代码> 国家筛选,逗号分隔,默认 PL,NL,BE,DE,FR,IT,ES,IE',
|
|
64
|
+
' --max-comments <数量> 每视频最大评论数,默认 10',
|
|
65
|
+
' --max-guess <数量> 每视频最大猜你喜欢数,默认 0',
|
|
66
|
+
' --enable-follow 启用关注/粉丝提取(默认启用)',
|
|
67
|
+
' --disable-follow 禁用关注/粉丝提取',
|
|
68
|
+
' --max-following <数量> 最大获取关注数,默认 5',
|
|
69
|
+
' --max-followers <数量> 最大获取粉丝数,默认 5',
|
|
70
|
+
' --max-users <数量> 最大处理用户数,默认无限制',
|
|
71
|
+
' --port <端口号> CDP 端口,默认 9222',
|
|
72
|
+
' --profile <名称> 浏览器 Profile 名称',
|
|
73
|
+
' --user-id <编号> 客户端编号(设备ID),默认自动生成',
|
|
74
|
+
' 全局选项:',
|
|
75
|
+
' config 查看当前配置',
|
|
76
|
+
' config set <key> <value> 设置配置(key: proxy, server, browser, userId)',
|
|
77
|
+
' config reset 重置所有配置为默认',
|
|
78
|
+
' -h, --help 显示帮助',
|
|
79
|
+
' --version 显示版本号',
|
|
80
|
+
'',
|
|
81
|
+
' 示例: tt-help explore qiqi23280 fast --location ES --max-comments 50',
|
|
82
|
+
' tt-help explore @user1 --user-id my-device-001 --port 9333',
|
|
83
|
+
' tt-help config set server http://127.0.0.1:3001',
|
|
84
|
+
];
|
|
85
|
+
|
|
86
|
+
function getConfigText() {
|
|
87
|
+
let currentUserId = userId;
|
|
88
|
+
if (!currentUserId && existsSync(configPath)) {
|
|
89
|
+
try {
|
|
90
|
+
const cfg = JSON.parse(readFileSync(configPath, 'utf-8'));
|
|
91
|
+
if (cfg.userId) currentUserId = cfg.userId;
|
|
92
|
+
} catch {}
|
|
93
|
+
}
|
|
94
|
+
return [
|
|
95
|
+
'tt-help v1.0.1',
|
|
96
|
+
'',
|
|
97
|
+
'配置:',
|
|
98
|
+
` 代理: ${proxy}`,
|
|
99
|
+
` 服务端: ${server}`,
|
|
100
|
+
` 浏览器: ${browser || '未配置(将自动探测或回退)'}`,
|
|
101
|
+
` 用户号: ${currentUserId || '未设置(首次运行 auto 自动创建)'}`,
|
|
102
|
+
` 输出格式: json`,
|
|
103
|
+
` 默认输出: ${DEFAULT_OUTPUT}`,
|
|
104
|
+
` 配置文件: ${configFile || '无(使用默认值)'}`,
|
|
105
|
+
];
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
export {
|
|
109
|
+
proxy,
|
|
110
|
+
server,
|
|
111
|
+
configPath,
|
|
112
|
+
DEFAULT_PROXY,
|
|
113
|
+
HELP_TEXT,
|
|
114
|
+
browser,
|
|
115
|
+
userId,
|
|
116
|
+
saveBrowser,
|
|
117
|
+
saveUserId,
|
|
118
|
+
getConfigText,
|
|
119
|
+
};
|
package/src/lib/delay.js
CHANGED
|
@@ -1,54 +1,54 @@
|
|
|
1
|
-
export const DELAY_PRESETS = {
|
|
2
|
-
fast: { switchMax: 300, commentMax: 200, fast: true },
|
|
3
|
-
normal: { switchMax: 1500, commentMax: 800 },
|
|
4
|
-
slow: { switchMax: 3000, commentMax: 2000 },
|
|
5
|
-
stealth: { switchMax: 5000, commentMax: 3500 },
|
|
6
|
-
};
|
|
7
|
-
|
|
8
|
-
const delayConfig = {
|
|
9
|
-
switchMax: 2500,
|
|
10
|
-
commentMax: 1500,
|
|
11
|
-
fast: false,
|
|
12
|
-
};
|
|
13
|
-
|
|
14
|
-
export function setDelayConfig(config) {
|
|
15
|
-
if (typeof config === 'string') {
|
|
16
|
-
const preset = DELAY_PRESETS[config.toLowerCase()];
|
|
17
|
-
if (!preset) {
|
|
18
|
-
throw new Error(
|
|
19
|
-
`未知的延迟预设: ${config}\n可用预设: ${Object.keys(DELAY_PRESETS).join(', ')}`
|
|
20
|
-
);
|
|
21
|
-
}
|
|
22
|
-
delayConfig.switchMax = preset.switchMax;
|
|
23
|
-
delayConfig.commentMax = preset.commentMax;
|
|
24
|
-
delayConfig.fast = preset.fast || false;
|
|
25
|
-
} else if (typeof config === 'object') {
|
|
26
|
-
if (config.switchMax) delayConfig.switchMax = config.switchMax;
|
|
27
|
-
if (config.commentMax) delayConfig.commentMax = config.commentMax;
|
|
28
|
-
delayConfig.fast = config.fast || false;
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
export function getDelayConfig() {
|
|
33
|
-
return { ...delayConfig };
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
export function listDelayPresets() {
|
|
37
|
-
return DELAY_PRESETS;
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
export function delay(min, max) {
|
|
41
|
-
const lo = Math.min(min, max);
|
|
42
|
-
const hi = Math.max(min, max);
|
|
43
|
-
let ms;
|
|
44
|
-
if (delayConfig.fast) {
|
|
45
|
-
ms = 0;
|
|
46
|
-
} else {
|
|
47
|
-
ms = Math.floor(Math.random() * (hi - lo + 1)) + lo;
|
|
48
|
-
}
|
|
49
|
-
return new Promise(r => setTimeout(r, ms));
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
export function randomDelay(min = 200, max = 600) {
|
|
53
|
-
return delay(min, max);
|
|
54
|
-
}
|
|
1
|
+
export const DELAY_PRESETS = {
|
|
2
|
+
fast: { switchMax: 300, commentMax: 200, fast: true },
|
|
3
|
+
normal: { switchMax: 1500, commentMax: 800 },
|
|
4
|
+
slow: { switchMax: 3000, commentMax: 2000 },
|
|
5
|
+
stealth: { switchMax: 5000, commentMax: 3500 },
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
const delayConfig = {
|
|
9
|
+
switchMax: 2500,
|
|
10
|
+
commentMax: 1500,
|
|
11
|
+
fast: false,
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
export function setDelayConfig(config) {
|
|
15
|
+
if (typeof config === 'string') {
|
|
16
|
+
const preset = DELAY_PRESETS[config.toLowerCase()];
|
|
17
|
+
if (!preset) {
|
|
18
|
+
throw new Error(
|
|
19
|
+
`未知的延迟预设: ${config}\n可用预设: ${Object.keys(DELAY_PRESETS).join(', ')}`
|
|
20
|
+
);
|
|
21
|
+
}
|
|
22
|
+
delayConfig.switchMax = preset.switchMax;
|
|
23
|
+
delayConfig.commentMax = preset.commentMax;
|
|
24
|
+
delayConfig.fast = preset.fast || false;
|
|
25
|
+
} else if (typeof config === 'object') {
|
|
26
|
+
if (config.switchMax) delayConfig.switchMax = config.switchMax;
|
|
27
|
+
if (config.commentMax) delayConfig.commentMax = config.commentMax;
|
|
28
|
+
delayConfig.fast = config.fast || false;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function getDelayConfig() {
|
|
33
|
+
return { ...delayConfig };
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function listDelayPresets() {
|
|
37
|
+
return DELAY_PRESETS;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export function delay(min, max) {
|
|
41
|
+
const lo = Math.min(min, max);
|
|
42
|
+
const hi = Math.max(min, max);
|
|
43
|
+
let ms;
|
|
44
|
+
if (delayConfig.fast) {
|
|
45
|
+
ms = 0;
|
|
46
|
+
} else {
|
|
47
|
+
ms = Math.floor(Math.random() * (hi - lo + 1)) + lo;
|
|
48
|
+
}
|
|
49
|
+
return new Promise(r => setTimeout(r, ms));
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export function randomDelay(min = 200, max = 600) {
|
|
53
|
+
return delay(min, max);
|
|
54
|
+
}
|
package/src/lib/explore-fetch.js
CHANGED
|
@@ -1,118 +1,118 @@
|
|
|
1
|
-
import { chromium } from 'playwright';
|
|
2
|
-
import { browser, saveBrowser, configPath } from './constants.js';
|
|
3
|
-
import { detectBrowser } from './browser/launch.js';
|
|
4
|
-
import { getAntiDetectScript } from './browser/anti-detect.js';
|
|
5
|
-
import { retryWithBackoff } from './retry.js';
|
|
6
|
-
import { scrollAndCollect } from './scroll-collector.js';
|
|
7
|
-
|
|
8
|
-
const EXPLORE_URL = 'https://www.tiktok.com/explore';
|
|
9
|
-
|
|
10
|
-
function sleep(ms) {
|
|
11
|
-
return new Promise(r => setTimeout(r, ms));
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
export async function fetchExplore(count = 100) {
|
|
15
|
-
let browserPath = browser;
|
|
16
|
-
let browserSource = '配置';
|
|
17
|
-
|
|
18
|
-
if (!browserPath) {
|
|
19
|
-
console.log(' [0/6] 未配置浏览器,正在自动探测...');
|
|
20
|
-
const detected = detectBrowser();
|
|
21
|
-
if (detected) {
|
|
22
|
-
browserPath = detected;
|
|
23
|
-
browserSource = '自动探测';
|
|
24
|
-
try {
|
|
25
|
-
saveBrowser(browserPath);
|
|
26
|
-
console.log(` [0/6] 已保存浏览器路径到配置: ${configPath}`);
|
|
27
|
-
} catch (err) {
|
|
28
|
-
console.log(` [0/6] 保存配置失败: ${err.message}`);
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
const launchOptions = {
|
|
34
|
-
headless: true,
|
|
35
|
-
args: [
|
|
36
|
-
'--no-sandbox',
|
|
37
|
-
'--disable-setuid-sandbox',
|
|
38
|
-
'--disable-blink-features=AutomationControlled',
|
|
39
|
-
'--disable-dev-shm-usage',
|
|
40
|
-
],
|
|
41
|
-
};
|
|
42
|
-
|
|
43
|
-
if (browserPath) {
|
|
44
|
-
console.log(` [0/6] 使用${browserSource}浏览器: ${browserPath}`);
|
|
45
|
-
launchOptions.executablePath = browserPath;
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
let instance;
|
|
49
|
-
try {
|
|
50
|
-
instance = await chromium.launch(launchOptions);
|
|
51
|
-
} catch (err) {
|
|
52
|
-
if (browserPath) {
|
|
53
|
-
console.log(` [0/6] 浏览器启动失败 (${err.message}),回退到 Playwright Chromium...`);
|
|
54
|
-
}
|
|
55
|
-
instance = await chromium.launch({
|
|
56
|
-
headless: true,
|
|
57
|
-
args: launchOptions.args,
|
|
58
|
-
});
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
try {
|
|
62
|
-
const context = await instance.newContext({
|
|
63
|
-
viewport: { width: 1280, height: 900 },
|
|
64
|
-
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
|
65
|
-
locale: 'en-US',
|
|
66
|
-
});
|
|
67
|
-
|
|
68
|
-
await context.addInitScript(getAntiDetectScript());
|
|
69
|
-
|
|
70
|
-
const page = await context.newPage();
|
|
71
|
-
await retryWithBackoff(() => page.goto(EXPLORE_URL, { waitUntil: 'load', timeout: 30000 }));
|
|
72
|
-
console.log(' [1/6] 页面已加载');
|
|
73
|
-
|
|
74
|
-
await sleep(5000);
|
|
75
|
-
|
|
76
|
-
const allUrls = await scrollAndCollect(page, {
|
|
77
|
-
container: null,
|
|
78
|
-
collectFn: () => ({
|
|
79
|
-
items: Array.from(document.querySelectorAll('a'))
|
|
80
|
-
.filter(a => /\/video\/\d{16,20}/.test(a.href))
|
|
81
|
-
.map(a => a.href),
|
|
82
|
-
}),
|
|
83
|
-
maxItems: count * 2,
|
|
84
|
-
delayRange: [1500, 2500],
|
|
85
|
-
staleThreshold: 5,
|
|
86
|
-
onRound: (round, items, allItems) => {
|
|
87
|
-
if ((round + 1) % 10 === 0) {
|
|
88
|
-
const uniqueCount = [...new Set(allItems)].length;
|
|
89
|
-
console.log(` [2/6] 滚动 ${round + 1},当前 ${uniqueCount} 个视频`);
|
|
90
|
-
}
|
|
91
|
-
},
|
|
92
|
-
});
|
|
93
|
-
|
|
94
|
-
await sleep(3000);
|
|
95
|
-
|
|
96
|
-
const unique = [...new Set(allUrls)];
|
|
97
|
-
console.log(` [4/6] 共检测到 ${unique.length} 个不重复视频`);
|
|
98
|
-
|
|
99
|
-
const results = [];
|
|
100
|
-
const seen = new Set();
|
|
101
|
-
for (const url of unique) {
|
|
102
|
-
if (results.length >= count) break;
|
|
103
|
-
const videoId = url.match(/video\/(\d{16,20})$/)?.[1];
|
|
104
|
-
if (videoId && !seen.has(videoId)) {
|
|
105
|
-
seen.add(videoId);
|
|
106
|
-
const user = url.match(/\/@([^/]+)/)?.[1];
|
|
107
|
-
if (user) {
|
|
108
|
-
results.push({ user, id: videoId, url });
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
console.log(` [5/6] 去重后 ${results.length} 个`);
|
|
114
|
-
return results;
|
|
115
|
-
} finally {
|
|
116
|
-
await instance.close();
|
|
117
|
-
}
|
|
118
|
-
}
|
|
1
|
+
import { chromium } from 'playwright';
|
|
2
|
+
import { browser, saveBrowser, configPath } from './constants.js';
|
|
3
|
+
import { detectBrowser } from './browser/launch.js';
|
|
4
|
+
import { getAntiDetectScript } from './browser/anti-detect.js';
|
|
5
|
+
import { retryWithBackoff } from './retry.js';
|
|
6
|
+
import { scrollAndCollect } from './scroll-collector.js';
|
|
7
|
+
|
|
8
|
+
const EXPLORE_URL = 'https://www.tiktok.com/explore';
|
|
9
|
+
|
|
10
|
+
function sleep(ms) {
|
|
11
|
+
return new Promise(r => setTimeout(r, ms));
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export async function fetchExplore(count = 100) {
|
|
15
|
+
let browserPath = browser;
|
|
16
|
+
let browserSource = '配置';
|
|
17
|
+
|
|
18
|
+
if (!browserPath) {
|
|
19
|
+
console.log(' [0/6] 未配置浏览器,正在自动探测...');
|
|
20
|
+
const detected = detectBrowser();
|
|
21
|
+
if (detected) {
|
|
22
|
+
browserPath = detected;
|
|
23
|
+
browserSource = '自动探测';
|
|
24
|
+
try {
|
|
25
|
+
saveBrowser(browserPath);
|
|
26
|
+
console.log(` [0/6] 已保存浏览器路径到配置: ${configPath}`);
|
|
27
|
+
} catch (err) {
|
|
28
|
+
console.log(` [0/6] 保存配置失败: ${err.message}`);
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const launchOptions = {
|
|
34
|
+
headless: true,
|
|
35
|
+
args: [
|
|
36
|
+
'--no-sandbox',
|
|
37
|
+
'--disable-setuid-sandbox',
|
|
38
|
+
'--disable-blink-features=AutomationControlled',
|
|
39
|
+
'--disable-dev-shm-usage',
|
|
40
|
+
],
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
if (browserPath) {
|
|
44
|
+
console.log(` [0/6] 使用${browserSource}浏览器: ${browserPath}`);
|
|
45
|
+
launchOptions.executablePath = browserPath;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
let instance;
|
|
49
|
+
try {
|
|
50
|
+
instance = await chromium.launch(launchOptions);
|
|
51
|
+
} catch (err) {
|
|
52
|
+
if (browserPath) {
|
|
53
|
+
console.log(` [0/6] 浏览器启动失败 (${err.message}),回退到 Playwright Chromium...`);
|
|
54
|
+
}
|
|
55
|
+
instance = await chromium.launch({
|
|
56
|
+
headless: true,
|
|
57
|
+
args: launchOptions.args,
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
try {
|
|
62
|
+
const context = await instance.newContext({
|
|
63
|
+
viewport: { width: 1280, height: 900 },
|
|
64
|
+
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
|
65
|
+
locale: 'en-US',
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
await context.addInitScript(getAntiDetectScript());
|
|
69
|
+
|
|
70
|
+
const page = await context.newPage();
|
|
71
|
+
await retryWithBackoff(() => page.goto(EXPLORE_URL, { waitUntil: 'load', timeout: 30000 }));
|
|
72
|
+
console.log(' [1/6] 页面已加载');
|
|
73
|
+
|
|
74
|
+
await sleep(5000);
|
|
75
|
+
|
|
76
|
+
const allUrls = await scrollAndCollect(page, {
|
|
77
|
+
container: null,
|
|
78
|
+
collectFn: () => ({
|
|
79
|
+
items: Array.from(document.querySelectorAll('a'))
|
|
80
|
+
.filter(a => /\/video\/\d{16,20}/.test(a.href))
|
|
81
|
+
.map(a => a.href),
|
|
82
|
+
}),
|
|
83
|
+
maxItems: count * 2,
|
|
84
|
+
delayRange: [1500, 2500],
|
|
85
|
+
staleThreshold: 5,
|
|
86
|
+
onRound: (round, items, allItems) => {
|
|
87
|
+
if ((round + 1) % 10 === 0) {
|
|
88
|
+
const uniqueCount = [...new Set(allItems)].length;
|
|
89
|
+
console.log(` [2/6] 滚动 ${round + 1},当前 ${uniqueCount} 个视频`);
|
|
90
|
+
}
|
|
91
|
+
},
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
await sleep(3000);
|
|
95
|
+
|
|
96
|
+
const unique = [...new Set(allUrls)];
|
|
97
|
+
console.log(` [4/6] 共检测到 ${unique.length} 个不重复视频`);
|
|
98
|
+
|
|
99
|
+
const results = [];
|
|
100
|
+
const seen = new Set();
|
|
101
|
+
for (const url of unique) {
|
|
102
|
+
if (results.length >= count) break;
|
|
103
|
+
const videoId = url.match(/video\/(\d{16,20})$/)?.[1];
|
|
104
|
+
if (videoId && !seen.has(videoId)) {
|
|
105
|
+
seen.add(videoId);
|
|
106
|
+
const user = url.match(/\/@([^/]+)/)?.[1];
|
|
107
|
+
if (user) {
|
|
108
|
+
results.push({ user, id: videoId, url });
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
console.log(` [5/6] 去重后 ${results.length} 个`);
|
|
114
|
+
return results;
|
|
115
|
+
} finally {
|
|
116
|
+
await instance.close();
|
|
117
|
+
}
|
|
118
|
+
}
|