tt-help-cli-ycl 1.3.10 → 1.3.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.js +1 -1
- package/package.json +4 -5
- package/src/cli/auto.js +1 -1
- package/src/cli/config.js +116 -0
- package/src/cli/explore-default.js +83 -0
- package/src/cli/explore.js +16 -4
- package/src/cli/scrape.js +1 -1
- package/src/cli/videos.js +1 -1
- package/src/cli/watch.js +4 -4
- package/src/lib/args.js +12 -1
- package/src/lib/browser/cdp.js +152 -142
- package/src/lib/constants.js +0 -4
- package/src/lib/explore-fetch.js +1 -1
- package/src/{scraper/modules/page-error-detector.mjs → lib/page-error-detector.js} +70 -70
- package/src/{scraper/modules/scroll-collector.mjs → lib/scroll-collector.js} +231 -189
- package/src/main.js +46 -0
- package/src/scraper/{auto-core.mjs → auto-core.js} +5 -5
- package/src/scraper/{core.mjs → core.js} +3 -3
- package/src/scraper/{explore-core.mjs → explore-core.js} +7 -7
- package/src/scraper/modules/{comment-extractor.mjs → comment-extractor.js} +3 -3
- package/src/scraper/modules/{follow-extractor.mjs → follow-extractor.js} +2 -2
- package/src/scraper/modules/{guess-extractor.mjs → guess-extractor.js} +2 -2
- package/src/scraper/modules/page-error-detector.js +1 -0
- package/src/scraper/modules/{page-helpers.mjs → page-helpers.js} +1 -1
- package/src/scraper/modules/scroll-collector.js +8 -0
- package/src/videos/{core.mjs → core.js} +2 -2
- package/src/watch/{data-store.mjs → data-store.js} +38 -10
- package/src/watch/public/index.html +13 -2
- package/src/watch/{server.mjs → server.js} +21 -6
- package/src/main.mjs +0 -234
- package/src/test-auto-follow.cjs +0 -109
- package/src/test-extractors.cjs +0 -75
- package/src/test-follow.cjs +0 -41
- /package/{bat → scripts}/run-explore.bat +0 -0
- /package/{bat → scripts}/run-explore.ps1 +0 -0
- /package/{bat → scripts}/run-explore.sh +0 -0
- /package/src/scraper/modules/{captcha-handler.mjs → captcha-handler.js} +0 -0
package/cli.js
CHANGED
|
@@ -5,5 +5,5 @@ import { dirname, resolve } from 'path';
|
|
|
5
5
|
const __filename = fileURLToPath(import.meta.url);
|
|
6
6
|
const __dirname = dirname(__filename);
|
|
7
7
|
|
|
8
|
-
const mainPath = resolve(__dirname, 'src', 'main.
|
|
8
|
+
const mainPath = resolve(__dirname, 'src', 'main.js');
|
|
9
9
|
await import(`file://${mainPath}`);
|
package/package.json
CHANGED
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "tt-help-cli-ycl",
|
|
3
|
-
"version": "1.3.
|
|
3
|
+
"version": "1.3.12",
|
|
4
4
|
"description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
7
7
|
"tt-help": "cli.js"
|
|
8
8
|
},
|
|
9
|
-
"main": "src/main.
|
|
9
|
+
"main": "src/main.js",
|
|
10
10
|
"files": [
|
|
11
11
|
"cli.js",
|
|
12
12
|
"src/",
|
|
13
|
-
"
|
|
13
|
+
"scripts/"
|
|
14
14
|
],
|
|
15
15
|
"scripts": {
|
|
16
|
-
"start": "node src/main.
|
|
16
|
+
"start": "node src/main.js"
|
|
17
17
|
},
|
|
18
18
|
"keywords": [
|
|
19
19
|
"tiktok",
|
|
@@ -40,7 +40,6 @@
|
|
|
40
40
|
"homepage": "https://github.com/jsjhycl/tt-help-cli#readme",
|
|
41
41
|
"dependencies": {
|
|
42
42
|
"playwright": "^1.59.1",
|
|
43
|
-
"tt-help-cli-ycl": "^1.3.2",
|
|
44
43
|
"undici": "^8.1.0"
|
|
45
44
|
}
|
|
46
45
|
}
|
package/src/cli/auto.js
CHANGED
|
@@ -70,7 +70,7 @@ export async function handleAuto(options) {
|
|
|
70
70
|
|
|
71
71
|
console.error(`服务器: ${serverUrl}(断开会自动重连)`);
|
|
72
72
|
|
|
73
|
-
const { ensureBrowserReady, processUser } = await import('../scraper/auto-core.
|
|
73
|
+
const { ensureBrowserReady, processUser } = await import('../scraper/auto-core.js');
|
|
74
74
|
const browser = await ensureBrowserReady();
|
|
75
75
|
|
|
76
76
|
const page = await getOrCreatePage(browser);
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import { HELP_TEXT, configPath, saveBrowser, saveUserId, getConfigText } from '../lib/constants.js';
|
|
2
|
+
import { readFileSync, writeFileSync, existsSync } from 'fs';
|
|
3
|
+
import { fileURLToPath } from 'url';
|
|
4
|
+
import { dirname, join } from 'path';
|
|
5
|
+
|
|
6
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
7
|
+
const pkgPath = join(__dirname, '..', '..', 'package.json');
|
|
8
|
+
const { version } = JSON.parse(readFileSync(pkgPath, 'utf-8'));
|
|
9
|
+
|
|
10
|
+
function showConfig(urls, outputFile) {
|
|
11
|
+
const configLines = getConfigText();
|
|
12
|
+
if (urls.length > 0) {
|
|
13
|
+
configLines.push(`\n URL数量: ${urls.length}`);
|
|
14
|
+
}
|
|
15
|
+
if (outputFile) {
|
|
16
|
+
configLines.push(` 输出文件: ${outputFile}`);
|
|
17
|
+
}
|
|
18
|
+
console.error(configLines.join('\n'));
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function showUsage() {
|
|
22
|
+
console.error(HELP_TEXT.join('\n'));
|
|
23
|
+
process.exit(0);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function handleConfig(action, key, value) {
|
|
27
|
+
switch (action) {
|
|
28
|
+
case 'show': {
|
|
29
|
+
const configLines = getConfigText();
|
|
30
|
+
console.error(configLines.join('\n'));
|
|
31
|
+
break;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
case 'set': {
|
|
35
|
+
if (!key) {
|
|
36
|
+
console.error('用法: tt-help config set <key> <value>');
|
|
37
|
+
console.error(' 可用 key: proxy, server, browser, userId');
|
|
38
|
+
return;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
switch (key) {
|
|
42
|
+
case 'proxy':
|
|
43
|
+
if (!value) {
|
|
44
|
+
console.error('请提供 proxy 的值');
|
|
45
|
+
console.error('用法: tt-help config set proxy <代理地址>');
|
|
46
|
+
return;
|
|
47
|
+
}
|
|
48
|
+
saveProxy(value);
|
|
49
|
+
console.error(`代理已更新: ${value}`);
|
|
50
|
+
break;
|
|
51
|
+
|
|
52
|
+
case 'server':
|
|
53
|
+
if (!value) {
|
|
54
|
+
console.error('请提供 server 的值');
|
|
55
|
+
return;
|
|
56
|
+
}
|
|
57
|
+
saveServer(value);
|
|
58
|
+
console.error(`服务器已更新: ${value}`);
|
|
59
|
+
break;
|
|
60
|
+
|
|
61
|
+
case 'browser':
|
|
62
|
+
if (!value) {
|
|
63
|
+
console.error('请提供 browser 的值');
|
|
64
|
+
console.error('用法: tt-help config set browser <浏览器路径>');
|
|
65
|
+
console.error(' 或: tt-help config set-browser <浏览器路径>');
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
saveBrowser(value);
|
|
69
|
+
console.error(`浏览器路径已更新: ${value}`);
|
|
70
|
+
break;
|
|
71
|
+
|
|
72
|
+
case 'userId':
|
|
73
|
+
if (!value) {
|
|
74
|
+
console.error('请提供 userId 的值');
|
|
75
|
+
return;
|
|
76
|
+
}
|
|
77
|
+
saveUserId(value);
|
|
78
|
+
console.error(`用户号已更新: ${value}`);
|
|
79
|
+
break;
|
|
80
|
+
|
|
81
|
+
default:
|
|
82
|
+
console.error(`未知配置项: ${key}`);
|
|
83
|
+
console.error(' 可用 key: proxy, server, browser, userId');
|
|
84
|
+
}
|
|
85
|
+
break;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
case 'reset': {
|
|
89
|
+
if (existsSync(configPath)) {
|
|
90
|
+
writeFileSync(configPath, '{}', 'utf-8');
|
|
91
|
+
console.error('配置已重置为默认');
|
|
92
|
+
} else {
|
|
93
|
+
console.error('配置文件不存在或已是默认状态');
|
|
94
|
+
}
|
|
95
|
+
break;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
default:
|
|
99
|
+
console.error(`未知配置命令: ${action}`);
|
|
100
|
+
console.error('用法: tt-help config [show|set|reset]');
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function saveProxy(newProxy) {
|
|
105
|
+
const cfg = existsSync(configPath) ? JSON.parse(readFileSync(configPath, 'utf-8')) : {};
|
|
106
|
+
cfg.proxy = newProxy;
|
|
107
|
+
writeFileSync(configPath, JSON.stringify(cfg, null, 2), 'utf-8');
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function saveServer(newServer) {
|
|
111
|
+
const cfg = existsSync(configPath) ? JSON.parse(readFileSync(configPath, 'utf-8')) : {};
|
|
112
|
+
cfg.server = newServer;
|
|
113
|
+
writeFileSync(configPath, JSON.stringify(cfg, null, 2), 'utf-8');
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
export { handleConfig, showConfig, showUsage, version };
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import { parseFilter, applyFilter, formatFilterDescription } from '../lib/filter.js';
|
|
2
|
+
import { writeFileSync } from 'fs';
|
|
3
|
+
import { processUrlsWithProgress } from './progress.js';
|
|
4
|
+
import { cleanError } from './utils.js';
|
|
5
|
+
|
|
6
|
+
async function runExploreDefault(exploreCount, urls, proxyUrl, outputFile, outputFormat, pipeMode, filter) {
|
|
7
|
+
const allResults = [];
|
|
8
|
+
|
|
9
|
+
if (exploreCount > 0) {
|
|
10
|
+
try {
|
|
11
|
+
const { fetchExplore } = await import('../lib/explore-fetch.js');
|
|
12
|
+
const exploreResults = await fetchExplore(exploreCount);
|
|
13
|
+
|
|
14
|
+
console.log(` 获取到 ${exploreResults.length} 个视频\n`);
|
|
15
|
+
if (pipeMode) {
|
|
16
|
+
const videoUrls = exploreResults.map(r => r.url).filter(Boolean);
|
|
17
|
+
if (videoUrls.length > 0) {
|
|
18
|
+
await runScrapeDefault(videoUrls, proxyUrl, outputFile, outputFormat, filter);
|
|
19
|
+
return;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
allResults.push(...exploreResults);
|
|
23
|
+
} catch (err) {
|
|
24
|
+
console.error(` Explore 获取失败: ${cleanError(err.message)}\n`);
|
|
25
|
+
console.error(` 请确保代理 ${proxyUrl} 正常运行\n`);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
if (urls.length > 0) {
|
|
30
|
+
const { processUrl } = await import('../lib/scrape.js');
|
|
31
|
+
await processUrlsWithProgress({
|
|
32
|
+
urls,
|
|
33
|
+
proxyUrl,
|
|
34
|
+
outputFile,
|
|
35
|
+
outputFormat,
|
|
36
|
+
filter,
|
|
37
|
+
processFn: (url, px) => processUrl(url, px),
|
|
38
|
+
label: '数据',
|
|
39
|
+
log: console.log,
|
|
40
|
+
});
|
|
41
|
+
return;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const { deduplicate, formatOutput } = await import('../lib/output.js');
|
|
45
|
+
const uniqueResults = deduplicate(allResults);
|
|
46
|
+
const filteredResults = applyFilter(uniqueResults, filter);
|
|
47
|
+
|
|
48
|
+
if (filteredResults.length === 0) {
|
|
49
|
+
console.log('\n未获取到数据');
|
|
50
|
+
if (outputFile) writeFileSync(outputFile, '[]', 'utf-8');
|
|
51
|
+
return;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const output = formatOutput(filteredResults, outputFormat);
|
|
55
|
+
if (outputFile) {
|
|
56
|
+
writeFileSync(outputFile, output, 'utf-8');
|
|
57
|
+
console.log(`\n结果已写入: ${outputFile}`);
|
|
58
|
+
} else {
|
|
59
|
+
console.log(output);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if (filter) {
|
|
63
|
+
console.log(`\n共 ${uniqueResults.length} 个数据,过滤后 ${filteredResults.length} 个(过滤条件: ${formatFilterDescription(filter)})`);
|
|
64
|
+
} else {
|
|
65
|
+
console.log(`\n共 ${filteredResults.length} 个数据`);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
async function runScrapeDefault(urls, proxyUrl, outputFile, outputFormat, filter) {
|
|
70
|
+
const { processUrl } = await import('../lib/scrape.js');
|
|
71
|
+
await processUrlsWithProgress({
|
|
72
|
+
urls,
|
|
73
|
+
proxyUrl,
|
|
74
|
+
outputFile,
|
|
75
|
+
outputFormat,
|
|
76
|
+
filter,
|
|
77
|
+
processFn: (url, px) => processUrl(url, px),
|
|
78
|
+
label: '用户的数据',
|
|
79
|
+
log: console.log,
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
export { runExploreDefault, runScrapeDefault };
|
package/src/cli/explore.js
CHANGED
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import { getOrCreatePage } from '../lib/browser/page.js';
|
|
2
|
-
import { delay, getDelayConfig, setDelayConfig } from '../scraper/modules/page-helpers.
|
|
2
|
+
import { delay, getDelayConfig, setDelayConfig } from '../scraper/modules/page-helpers.js';
|
|
3
3
|
import { userId as configuredUserId, saveUserId } from '../lib/constants.js';
|
|
4
4
|
import { getMacOrUuid } from '../lib/mac-or-uuid.js';
|
|
5
|
+
import { ensureBrowserReady as ensureBrowserReadyCDP } from '../lib/browser/cdp.js';
|
|
6
|
+
import path from 'path';
|
|
7
|
+
import os from 'os';
|
|
5
8
|
|
|
6
9
|
const MAX_RETRY_WAIT = 5 * 60 * 1000;
|
|
7
10
|
|
|
@@ -41,9 +44,10 @@ export async function handleExplore(options) {
|
|
|
41
44
|
exploreUsernames, explorePreset, exploreMaxComments, exploreMaxGuess,
|
|
42
45
|
exploreEnableFollow, exploreMaxFollowing, exploreMaxFollowers,
|
|
43
46
|
exploreLocation, exploreMaxUsers, serverUrl,
|
|
47
|
+
explorePort, exploreProfile, exploreUserId,
|
|
44
48
|
} = options;
|
|
45
49
|
|
|
46
|
-
let userId = configuredUserId;
|
|
50
|
+
let userId = exploreUserId || configuredUserId;
|
|
47
51
|
if (!userId) {
|
|
48
52
|
userId = await getMacOrUuid();
|
|
49
53
|
saveUserId(userId);
|
|
@@ -64,9 +68,17 @@ export async function handleExplore(options) {
|
|
|
64
68
|
console.error(`关注/粉丝: ${exploreEnableFollow ? '启用' : '禁用'}`);
|
|
65
69
|
console.error(`服务器: ${serverUrl}(断开会自动重连)`);
|
|
66
70
|
if (exploreMaxUsers > 0) console.error(`上限: ${exploreMaxUsers} 个用户`);
|
|
71
|
+
console.error(`CDP 端口: ${explorePort || 9222}, 用户编号: ${userId}`);
|
|
72
|
+
if (exploreProfile) console.error(`浏览器配置: ${exploreProfile}`);
|
|
67
73
|
|
|
68
|
-
const
|
|
69
|
-
|
|
74
|
+
const cdpOptions = {};
|
|
75
|
+
if (explorePort) cdpOptions.port = explorePort;
|
|
76
|
+
if (exploreProfile) {
|
|
77
|
+
cdpOptions.userDataDir = path.join(os.homedir(), 'Library', 'Application Support', `Microsoft Edge For Testing_${exploreProfile}`);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const browser = await ensureBrowserReadyCDP(cdpOptions);
|
|
81
|
+
const { processExplore } = await import('../scraper/explore-core.js');
|
|
70
82
|
|
|
71
83
|
const page = await getOrCreatePage(browser);
|
|
72
84
|
|
package/src/cli/scrape.js
CHANGED
package/src/cli/videos.js
CHANGED
package/src/cli/watch.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { existsSync } from 'fs';
|
|
2
|
-
import { createStore } from '../watch/data-store.
|
|
3
|
-
import { startWatchServer, openBrowser } from '../watch/server.
|
|
2
|
+
import { createStore } from '../watch/data-store.js';
|
|
3
|
+
import { startWatchServer, openBrowser } from '../watch/server.js';
|
|
4
4
|
|
|
5
5
|
export async function handleWatch(options) {
|
|
6
6
|
const { outputFile, watchPort } = options;
|
|
@@ -13,7 +13,7 @@ export async function handleWatch(options) {
|
|
|
13
13
|
}
|
|
14
14
|
|
|
15
15
|
if (!existsSync(outputFile)) {
|
|
16
|
-
console.error(
|
|
16
|
+
console.error(`文件不存�? ${outputFile}`);
|
|
17
17
|
process.exit(1);
|
|
18
18
|
}
|
|
19
19
|
|
|
@@ -27,5 +27,5 @@ export async function handleWatch(options) {
|
|
|
27
27
|
process.exit(0);
|
|
28
28
|
});
|
|
29
29
|
|
|
30
|
-
console.error('
|
|
30
|
+
console.error('�?Ctrl+C 停止监控服务');
|
|
31
31
|
}
|
package/src/lib/args.js
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import { readFileSync } from 'fs';
|
|
2
2
|
import { server as defaultServer } from './constants.js';
|
|
3
|
-
import { proxy } from './constants.js';
|
|
4
3
|
|
|
5
4
|
const PRESETS = ['fast', 'normal', 'slow', 'stealth'];
|
|
6
5
|
|
|
@@ -162,6 +161,9 @@ function parseExploreArgs(args) {
|
|
|
162
161
|
let exploreMaxFollowers = 5;
|
|
163
162
|
let exploreLocation = 'ES';
|
|
164
163
|
let exploreMaxUsers = 0;
|
|
164
|
+
let explorePort = null;
|
|
165
|
+
let exploreProfile = null;
|
|
166
|
+
let exploreUserId = null;
|
|
165
167
|
|
|
166
168
|
const positional = [];
|
|
167
169
|
const PRESETS = ['fast', 'normal', 'slow', 'stealth'];
|
|
@@ -186,6 +188,12 @@ function parseExploreArgs(args) {
|
|
|
186
188
|
exploreMaxFollowers = parseInt(args[++i]) || 5;
|
|
187
189
|
} else if (arg === '--max-users') {
|
|
188
190
|
exploreMaxUsers = parseInt(args[++i]) || 0;
|
|
191
|
+
} else if (arg === '--port') {
|
|
192
|
+
explorePort = parseInt(args[++i]) || 9222;
|
|
193
|
+
} else if (arg === '--profile') {
|
|
194
|
+
exploreProfile = args[++i];
|
|
195
|
+
} else if (arg === '--user-id') {
|
|
196
|
+
exploreUserId = args[++i];
|
|
189
197
|
} else {
|
|
190
198
|
positional.push(arg);
|
|
191
199
|
}
|
|
@@ -215,6 +223,9 @@ function parseExploreArgs(args) {
|
|
|
215
223
|
exploreLocation,
|
|
216
224
|
serverUrl,
|
|
217
225
|
exploreMaxUsers,
|
|
226
|
+
explorePort,
|
|
227
|
+
exploreProfile,
|
|
228
|
+
exploreUserId,
|
|
218
229
|
urls: [],
|
|
219
230
|
outputFormat: 'json',
|
|
220
231
|
exploreCount: 0,
|