tt-help-cli-ycl 1.3.11 → 1.3.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.js +1 -1
- package/package.json +4 -5
- package/src/cli/auto.js +1 -1
- package/src/cli/config.js +116 -0
- package/src/cli/explore-default.js +83 -0
- package/src/cli/explore.js +16 -4
- package/src/cli/scrape.js +1 -1
- package/src/cli/videos.js +1 -1
- package/src/cli/watch.js +4 -4
- package/src/lib/args.js +12 -1
- package/src/lib/browser/cdp.js +152 -142
- package/src/lib/constants.js +0 -4
- package/src/lib/explore-fetch.js +1 -1
- package/src/{scraper/modules/page-error-detector.mjs → lib/page-error-detector.js} +70 -70
- package/src/{scraper/modules/scroll-collector.mjs → lib/scroll-collector.js} +231 -189
- package/src/main.js +46 -0
- package/src/scraper/{auto-core.mjs → auto-core.js} +5 -5
- package/src/scraper/{core.mjs → core.js} +3 -3
- package/src/scraper/{explore-core.mjs → explore-core.js} +7 -7
- package/src/scraper/modules/{comment-extractor.mjs → comment-extractor.js} +3 -3
- package/src/scraper/modules/{follow-extractor.mjs → follow-extractor.js} +2 -2
- package/src/scraper/modules/{guess-extractor.mjs → guess-extractor.js} +2 -2
- package/src/scraper/modules/page-error-detector.js +1 -0
- package/src/scraper/modules/{page-helpers.mjs → page-helpers.js} +1 -1
- package/src/scraper/modules/scroll-collector.js +8 -0
- package/src/videos/{core.mjs → core.js} +2 -2
- package/src/watch/{data-store.mjs → data-store.js} +38 -10
- package/src/watch/public/index.html +13 -2
- package/src/watch/{server.mjs → server.js} +16 -5
- package/src/main.mjs +0 -234
- package/src/test-auto-follow.cjs +0 -109
- package/src/test-extractors.cjs +0 -75
- package/src/test-follow.cjs +0 -41
- /package/{bat → scripts}/run-explore.bat +0 -0
- /package/{bat → scripts}/run-explore.ps1 +0 -0
- /package/{bat → scripts}/run-explore.sh +0 -0
- /package/src/scraper/modules/{captcha-handler.mjs → captcha-handler.js} +0 -0
package/src/lib/browser/cdp.js
CHANGED
|
@@ -1,142 +1,152 @@
|
|
|
1
|
-
import { exec } from 'child_process';
|
|
2
|
-
import http from 'http';
|
|
3
|
-
import os from 'os';
|
|
4
|
-
import path from 'path';
|
|
5
|
-
import { chromium } from 'playwright';
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
if (platform === '
|
|
13
|
-
return 'msedge';
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
res.on('
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
req.
|
|
39
|
-
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
const
|
|
76
|
-
const
|
|
77
|
-
let command;
|
|
78
|
-
|
|
79
|
-
if (platform === 'darwin') {
|
|
80
|
-
command = `open -a ${edgePath} --args --remote-debugging-port=${
|
|
81
|
-
} else if (platform === 'win32') {
|
|
82
|
-
command = `start msedge --remote-debugging-port=${
|
|
83
|
-
} else {
|
|
84
|
-
command = `msedge --remote-debugging-port=${
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
exec(command, (err) => {
|
|
88
|
-
if (err) reject(new Error(`启动 Edge 浏览器失败: ${err.message}`));
|
|
89
|
-
else resolve();
|
|
90
|
-
});
|
|
91
|
-
});
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
async function waitForCDP(timeout = 30000, interval = 1000) {
|
|
95
|
-
const start = Date.now();
|
|
96
|
-
while (Date.now() - start < timeout) {
|
|
97
|
-
const ready = await checkCDPPort();
|
|
98
|
-
if (ready) return true;
|
|
99
|
-
await new Promise(r => setTimeout(r, interval));
|
|
100
|
-
}
|
|
101
|
-
return false;
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
export async function ensureBrowserReady() {
|
|
105
|
-
const
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
1
|
+
import { exec } from 'child_process';
|
|
2
|
+
import http from 'http';
|
|
3
|
+
import os from 'os';
|
|
4
|
+
import path from 'path';
|
|
5
|
+
import { chromium } from 'playwright';
|
|
6
|
+
|
|
7
|
+
const DEFAULT_CDP_PORT = 9222;
|
|
8
|
+
const DEFAULT_USER_DATA_DIR = path.join(os.homedir(), 'Library', 'Application Support', 'Microsoft Edge For Testing');
|
|
9
|
+
|
|
10
|
+
function getEdgePath() {
|
|
11
|
+
const platform = os.platform();
|
|
12
|
+
if (platform === 'darwin') return '"Microsoft Edge"';
|
|
13
|
+
if (platform === 'win32') return 'msedge.exe';
|
|
14
|
+
return 'msedge';
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
function isEdgeRunning() {
|
|
18
|
+
return new Promise(resolve => {
|
|
19
|
+
const platform = os.platform();
|
|
20
|
+
let command;
|
|
21
|
+
if (platform === 'darwin') {
|
|
22
|
+
command = 'ps aux | grep -q "[M]icrosoft Edge.app/Contents/MacOS/Microsoft Edge" 2>/dev/null';
|
|
23
|
+
} else if (platform === 'win32') {
|
|
24
|
+
command = 'tasklist /FI "IMAGENAME eq msedge.exe" 2>nul | findstr /I msedge';
|
|
25
|
+
} else {
|
|
26
|
+
command = 'pgrep -f msedge > /dev/null 2>&1';
|
|
27
|
+
}
|
|
28
|
+
exec(command, (err) => resolve(!err));
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function checkCDPPort(port) {
|
|
33
|
+
return new Promise(resolve => {
|
|
34
|
+
const req = http.get(`http://127.0.0.1:${port}/json`, res => {
|
|
35
|
+
res.on('data', () => {});
|
|
36
|
+
res.on('end', () => resolve(res.statusCode === 200));
|
|
37
|
+
});
|
|
38
|
+
req.on('error', () => resolve(false));
|
|
39
|
+
req.setTimeout(3000, () => { resolve(false); req.destroy(); });
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function checkEdgeArgs() {
|
|
44
|
+
return new Promise(resolve => {
|
|
45
|
+
const platform = os.platform();
|
|
46
|
+
let command;
|
|
47
|
+
if (platform === 'darwin') {
|
|
48
|
+
command = 'ps aux | grep "[M]icrosoft Edge" | grep -v "Helper\\|crashpad" | grep "user-data-dir"';
|
|
49
|
+
} else if (platform === 'win32') {
|
|
50
|
+
command = 'wmic process where "name like \\"%msedge%\\"" get commandline | findstr "user-data-dir"';
|
|
51
|
+
} else {
|
|
52
|
+
command = 'ps aux | grep "[m]sedge" | grep -v "Helper\\|crashpad" | grep "user-data-dir"';
|
|
53
|
+
}
|
|
54
|
+
exec(command, (err, stdout) => resolve(!err && stdout.trim().length > 0));
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function killEdgeProcesses() {
|
|
59
|
+
return new Promise(resolve => {
|
|
60
|
+
const platform = os.platform();
|
|
61
|
+
let command;
|
|
62
|
+
if (platform === 'darwin') {
|
|
63
|
+
command = 'killall -9 "Microsoft Edge" 2>/dev/null; rm -f ~/Library/Caches/Microsoft\\ Edge/Singleton*; true';
|
|
64
|
+
} else if (platform === 'win32') {
|
|
65
|
+
command = 'taskkill /F /IM msedge.exe 2>nul || exit 0';
|
|
66
|
+
} else {
|
|
67
|
+
command = 'pkill -9 -f msedge 2>/dev/null; true';
|
|
68
|
+
}
|
|
69
|
+
exec(command, () => resolve());
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function launchEdgeWithCDP(port, userDataDir) {
|
|
74
|
+
return new Promise((resolve, reject) => {
|
|
75
|
+
const platform = os.platform();
|
|
76
|
+
const edgePath = getEdgePath();
|
|
77
|
+
let command;
|
|
78
|
+
|
|
79
|
+
if (platform === 'darwin') {
|
|
80
|
+
command = `open -a ${edgePath} --args --remote-debugging-port=${port} --user-data-dir="${userDataDir}"`;
|
|
81
|
+
} else if (platform === 'win32') {
|
|
82
|
+
command = `start msedge --remote-debugging-port=${port} --user-data-dir="${userDataDir}"`;
|
|
83
|
+
} else {
|
|
84
|
+
command = `msedge --remote-debugging-port=${port} --user-data-dir="${userDataDir}" &`;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
exec(command, (err) => {
|
|
88
|
+
if (err) reject(new Error(`启动 Edge 浏览器失败: ${err.message}`));
|
|
89
|
+
else resolve();
|
|
90
|
+
});
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
async function waitForCDP(port, timeout = 30000, interval = 1000) {
|
|
95
|
+
const start = Date.now();
|
|
96
|
+
while (Date.now() - start < timeout) {
|
|
97
|
+
const ready = await checkCDPPort(port);
|
|
98
|
+
if (ready) return true;
|
|
99
|
+
await new Promise(r => setTimeout(r, interval));
|
|
100
|
+
}
|
|
101
|
+
return false;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
export async function ensureBrowserReady(options = {}) {
|
|
105
|
+
const port = options.port || DEFAULT_CDP_PORT;
|
|
106
|
+
const userDataDir = options.userDataDir || DEFAULT_USER_DATA_DIR;
|
|
107
|
+
const isCustom = port !== DEFAULT_CDP_PORT || !!options.userDataDir;
|
|
108
|
+
|
|
109
|
+
const isReady = await checkCDPPort(port);
|
|
110
|
+
let needLaunch = !isReady;
|
|
111
|
+
|
|
112
|
+
if (!needLaunch) {
|
|
113
|
+
if (!isCustom) {
|
|
114
|
+
const edgeArgsValid = await checkEdgeArgs();
|
|
115
|
+
if (!edgeArgsValid) {
|
|
116
|
+
console.error('Edge 已运行但启动参数不完整,正在重启...');
|
|
117
|
+
await killEdgeProcesses();
|
|
118
|
+
await new Promise(r => setTimeout(r, 3000));
|
|
119
|
+
needLaunch = true;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
if (needLaunch) {
|
|
125
|
+
if (isCustom) {
|
|
126
|
+
console.error(`CDP 端口 ${port} 未就绪,正在启动 Edge 浏览器...`);
|
|
127
|
+
} else {
|
|
128
|
+
const edgeRunning = await isEdgeRunning();
|
|
129
|
+
if (edgeRunning) {
|
|
130
|
+
console.error(`Edge 已运行但 CDP 端口 ${port} 未启用,正在重启...`);
|
|
131
|
+
await killEdgeProcesses();
|
|
132
|
+
await new Promise(r => setTimeout(r, 3000));
|
|
133
|
+
} else {
|
|
134
|
+
console.error(`CDP 端口 ${port} 未就绪,正在启动 Edge 浏览器...`);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
await launchEdgeWithCDP(port, userDataDir);
|
|
138
|
+
|
|
139
|
+
console.error('等待浏览器启动...');
|
|
140
|
+
const launched = await waitForCDP(port);
|
|
141
|
+
if (!launched) {
|
|
142
|
+
throw new Error(
|
|
143
|
+
`等待 CDP 端口 ${port} 超时。请确认 Edge 浏览器已安装,\n` +
|
|
144
|
+
`或手动启动: Microsoft Edge --remote-debugging-port=${port}`
|
|
145
|
+
);
|
|
146
|
+
}
|
|
147
|
+
console.error('浏览器启动成功');
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
const browser = await chromium.connectOverCDP(`http://127.0.0.1:${port}`);
|
|
151
|
+
return browser;
|
|
152
|
+
}
|
package/src/lib/constants.js
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import { join, dirname } from 'path';
|
|
2
2
|
import { readFileSync, writeFileSync, existsSync } from 'fs';
|
|
3
3
|
import { fileURLToPath } from 'url';
|
|
4
|
-
import { USER_SECTION_SIZE } from './parser.js';
|
|
5
4
|
|
|
6
5
|
const __filename = fileURLToPath(import.meta.url);
|
|
7
6
|
const __dirname = dirname(__filename);
|
|
@@ -105,11 +104,8 @@ function getConfigText() {
|
|
|
105
104
|
export {
|
|
106
105
|
proxy,
|
|
107
106
|
server,
|
|
108
|
-
configFile,
|
|
109
107
|
configPath,
|
|
110
108
|
DEFAULT_PROXY,
|
|
111
|
-
DEFAULT_OUTPUT,
|
|
112
|
-
USER_SECTION_SIZE,
|
|
113
109
|
HELP_TEXT,
|
|
114
110
|
browser,
|
|
115
111
|
userId,
|
package/src/lib/explore-fetch.js
CHANGED
|
@@ -3,7 +3,7 @@ import { browser, saveBrowser, configPath } from './constants.js';
|
|
|
3
3
|
import { detectBrowser } from './browser/launch.js';
|
|
4
4
|
import { getAntiDetectScript } from './browser/anti-detect.js';
|
|
5
5
|
import { retryWithBackoff } from './retry.js';
|
|
6
|
-
import { scrollAndCollect } from '
|
|
6
|
+
import { scrollAndCollect } from './scroll-collector.js';
|
|
7
7
|
|
|
8
8
|
const EXPLORE_URL = 'https://www.tiktok.com/explore';
|
|
9
9
|
|
|
@@ -1,70 +1,70 @@
|
|
|
1
|
-
const PATTERNS = {
|
|
2
|
-
login_required: [
|
|
3
|
-
"登录 TikTok",
|
|
4
|
-
"登录后查看",
|
|
5
|
-
"查看需登录",
|
|
6
|
-
"Log in to TikTok",
|
|
7
|
-
"Login to TikTok",
|
|
8
|
-
"观众管理功能",
|
|
9
|
-
"Viewer management",
|
|
10
|
-
"私密账号",
|
|
11
|
-
"私密状态",
|
|
12
|
-
],
|
|
13
|
-
captcha: [
|
|
14
|
-
"captcha",
|
|
15
|
-
"verify",
|
|
16
|
-
"验证码",
|
|
17
|
-
"点击下一步",
|
|
18
|
-
"Press and hold",
|
|
19
|
-
"slide to verify",
|
|
20
|
-
"滑动验证",
|
|
21
|
-
"人机验证",
|
|
22
|
-
"安全验证",
|
|
23
|
-
],
|
|
24
|
-
rate_limited: [
|
|
25
|
-
"访问过于频繁",
|
|
26
|
-
"操作过于频繁",
|
|
27
|
-
"too many requests",
|
|
28
|
-
"rate limit",
|
|
29
|
-
"稍后再试",
|
|
30
|
-
"try again later",
|
|
31
|
-
"请稍后再来",
|
|
32
|
-
],
|
|
33
|
-
region_blocked: [
|
|
34
|
-
"地区限制",
|
|
35
|
-
"not available in your",
|
|
36
|
-
"此内容不可用",
|
|
37
|
-
"content not available",
|
|
38
|
-
"currently unavailable",
|
|
39
|
-
"抱歉,此内容",
|
|
40
|
-
"此页面不可用",
|
|
41
|
-
],
|
|
42
|
-
not_found: [
|
|
43
|
-
"页面不存在",
|
|
44
|
-
"page not found",
|
|
45
|
-
"找不到",
|
|
46
|
-
"Couldn't find this",
|
|
47
|
-
"nothing here",
|
|
48
|
-
"此页面不存在",
|
|
49
|
-
"没有内容",
|
|
50
|
-
"发起对话",
|
|
51
|
-
"0 条评论",
|
|
52
|
-
],
|
|
53
|
-
};
|
|
54
|
-
|
|
55
|
-
export async function detectPageError(page) {
|
|
56
|
-
return page.evaluate((patterns) => {
|
|
57
|
-
const bodyText = document.body.innerText;
|
|
58
|
-
const lower = bodyText.toLowerCase();
|
|
59
|
-
|
|
60
|
-
for (const [type, phrases] of Object.entries(patterns)) {
|
|
61
|
-
for (const phrase of phrases) {
|
|
62
|
-
if (lower.includes(phrase.toLowerCase())) {
|
|
63
|
-
return type;
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
return null;
|
|
69
|
-
}, PATTERNS);
|
|
70
|
-
}
|
|
1
|
+
const PATTERNS = {
|
|
2
|
+
login_required: [
|
|
3
|
+
"登录 TikTok",
|
|
4
|
+
"登录后查看",
|
|
5
|
+
"查看需登录",
|
|
6
|
+
"Log in to TikTok",
|
|
7
|
+
"Login to TikTok",
|
|
8
|
+
"观众管理功能",
|
|
9
|
+
"Viewer management",
|
|
10
|
+
"私密账号",
|
|
11
|
+
"私密状态",
|
|
12
|
+
],
|
|
13
|
+
captcha: [
|
|
14
|
+
"captcha",
|
|
15
|
+
"verify",
|
|
16
|
+
"验证码",
|
|
17
|
+
"点击下一步",
|
|
18
|
+
"Press and hold",
|
|
19
|
+
"slide to verify",
|
|
20
|
+
"滑动验证",
|
|
21
|
+
"人机验证",
|
|
22
|
+
"安全验证",
|
|
23
|
+
],
|
|
24
|
+
rate_limited: [
|
|
25
|
+
"访问过于频繁",
|
|
26
|
+
"操作过于频繁",
|
|
27
|
+
"too many requests",
|
|
28
|
+
"rate limit",
|
|
29
|
+
"稍后再试",
|
|
30
|
+
"try again later",
|
|
31
|
+
"请稍后再来",
|
|
32
|
+
],
|
|
33
|
+
region_blocked: [
|
|
34
|
+
"地区限制",
|
|
35
|
+
"not available in your",
|
|
36
|
+
"此内容不可用",
|
|
37
|
+
"content not available",
|
|
38
|
+
"currently unavailable",
|
|
39
|
+
"抱歉,此内容",
|
|
40
|
+
"此页面不可用",
|
|
41
|
+
],
|
|
42
|
+
not_found: [
|
|
43
|
+
"页面不存在",
|
|
44
|
+
"page not found",
|
|
45
|
+
"找不到",
|
|
46
|
+
"Couldn't find this",
|
|
47
|
+
"nothing here",
|
|
48
|
+
"此页面不存在",
|
|
49
|
+
"没有内容",
|
|
50
|
+
"发起对话",
|
|
51
|
+
"0 条评论",
|
|
52
|
+
],
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
export async function detectPageError(page) {
|
|
56
|
+
return page.evaluate((patterns) => {
|
|
57
|
+
const bodyText = document.body.innerText;
|
|
58
|
+
const lower = bodyText.toLowerCase();
|
|
59
|
+
|
|
60
|
+
for (const [type, phrases] of Object.entries(patterns)) {
|
|
61
|
+
for (const phrase of phrases) {
|
|
62
|
+
if (lower.includes(phrase.toLowerCase())) {
|
|
63
|
+
return type;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
return null;
|
|
69
|
+
}, PATTERNS);
|
|
70
|
+
}
|