tt-help-cli-ycl 1.3.6 → 1.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -17
- package/cli.js +9 -9
- package/package.json +45 -45
- package/src/cli/auto.js +131 -121
- package/src/cli/explore.js +147 -138
- package/src/cli/progress.js +111 -111
- package/src/cli/scrape.js +47 -47
- package/src/cli/utils.js +18 -18
- package/src/cli/videos.js +41 -41
- package/src/cli/watch.js +31 -31
- package/src/lib/args.js +391 -391
- package/src/lib/browser/anti-detect.js +23 -23
- package/src/lib/browser/cdp.js +142 -142
- package/src/lib/browser/launch.js +43 -43
- package/src/lib/browser/page.js +87 -87
- package/src/lib/constants.js +109 -95
- package/src/lib/delay.js +54 -54
- package/src/lib/explore-fetch.js +118 -118
- package/src/lib/fetcher.js +45 -45
- package/src/lib/filter.js +66 -66
- package/src/lib/io.js +54 -54
- package/src/lib/mac-or-uuid.js +82 -0
- package/src/lib/output.js +80 -80
- package/src/lib/parser.js +47 -47
- package/src/lib/retry.js +44 -44
- package/src/lib/scrape.js +40 -40
- package/src/lib/url.js +52 -52
- package/src/main.mjs +221 -221
- package/src/scraper/auto-core.mjs +185 -185
- package/src/scraper/core.mjs +190 -190
- package/src/scraper/explore-core.mjs +162 -162
- package/src/scraper/modules/captcha-handler.mjs +114 -114
- package/src/scraper/modules/comment-extractor.mjs +69 -69
- package/src/scraper/modules/follow-extractor.mjs +121 -121
- package/src/scraper/modules/guess-extractor.mjs +51 -51
- package/src/scraper/modules/page-error-detector.mjs +70 -70
- package/src/scraper/modules/page-helpers.mjs +48 -48
- package/src/scraper/modules/scroll-collector.mjs +189 -189
- package/src/test-auto-follow.cjs +109 -0
- package/src/test-extractors.cjs +75 -0
- package/src/test-follow.cjs +41 -0
- package/src/videos/core.mjs +126 -126
- package/src/watch/data-store.mjs +258 -261
- package/src/watch/public/index.html +580 -464
- package/src/watch/server.mjs +308 -281
- package/src/results/user-videos-bar.lar.lar.moeta.json +0 -37
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
const path = require('path');
|
|
2
|
+
const { ensureBrowserReady, ensureTikTokPage, delay } = require('./scraper/modules/page-helpers.cjs');
|
|
3
|
+
const { extractFollowAndFollowers } = require('./scraper/modules/follow-extractor.cjs');
|
|
4
|
+
|
|
5
|
+
async function main() {
|
|
6
|
+
const url = process.argv[2] || 'https://www.tiktok.com/@qiqi23280';
|
|
7
|
+
console.error(`目标: ${url}`);
|
|
8
|
+
|
|
9
|
+
const browser = await ensureBrowserReady();
|
|
10
|
+
try {
|
|
11
|
+
const page = await ensureTikTokPage(browser, url);
|
|
12
|
+
await page.goto(url, { waitUntil: 'load', timeout: 30000 });
|
|
13
|
+
console.error('等待页面加载...');
|
|
14
|
+
await delay(3000, 5000);
|
|
15
|
+
|
|
16
|
+
console.error('开始提取关注和粉丝...\n');
|
|
17
|
+
const result = await extractFollowAndFollowers(page, {
|
|
18
|
+
log: console.error,
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
console.error('\n--- 提取完成 ---');
|
|
22
|
+
console.error(`关注: ${result.following.length} 人`);
|
|
23
|
+
console.error(`粉丝: ${result.followers.length} 人`);
|
|
24
|
+
|
|
25
|
+
const outDir = path.join(__dirname, '..', 'results');
|
|
26
|
+
const fs = require('fs');
|
|
27
|
+
fs.mkdirSync(outDir, { recursive: true });
|
|
28
|
+
const outPath = path.join(outDir, 'follow-result.json');
|
|
29
|
+
fs.writeFileSync(outPath, JSON.stringify(result, null, 2));
|
|
30
|
+
console.error(`已保存到 ${outPath}`);
|
|
31
|
+
|
|
32
|
+
console.log(JSON.stringify(result, null, 2));
|
|
33
|
+
} finally {
|
|
34
|
+
await browser.close().catch(() => {});
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
main().catch(err => {
|
|
39
|
+
console.error('错误:', err.message);
|
|
40
|
+
process.exit(1);
|
|
41
|
+
});
|
package/src/videos/core.mjs
CHANGED
|
@@ -1,126 +1,126 @@
|
|
|
1
|
-
import { delay, ensureBrowserReady, ensureTikTokPage, retryWithBackoff } from '../scraper/modules/page-helpers.mjs';
|
|
2
|
-
import { scrollAndCollect } from '../scraper/modules/scroll-collector.mjs';
|
|
3
|
-
|
|
4
|
-
async function getUserInfo(page) {
|
|
5
|
-
return await page.evaluate(() => {
|
|
6
|
-
const html = document.documentElement.outerHTML;
|
|
7
|
-
const result = {};
|
|
8
|
-
|
|
9
|
-
const m = window.location.href.match(/\/@([^/]+)/);
|
|
10
|
-
if (m) result.uniqueId = m[1];
|
|
11
|
-
|
|
12
|
-
const patterns = {
|
|
13
|
-
secUid: /"secUid":"([^"]+)"/,
|
|
14
|
-
nickname: /"nickname":"((?:[^"\\]|\\.)*)"/,
|
|
15
|
-
ttSeller: /"ttSeller":\s*(true|false)/,
|
|
16
|
-
verified: /"verified":\s*(true|false)/,
|
|
17
|
-
followerCount: /"followerCount":(\d+)/,
|
|
18
|
-
videoCount: /"videoCount":(\d+)/,
|
|
19
|
-
followingCount: /"followingCount":(\d+)/,
|
|
20
|
-
heartCount: /"heartCount":(\d+)/,
|
|
21
|
-
signature: /"signature":"((?:[^"\\]|\\.)*)"/,
|
|
22
|
-
locationCreated: /"locationCreated":"([^"]*)/,
|
|
23
|
-
region: /"region":"([^"]*)/,
|
|
24
|
-
};
|
|
25
|
-
|
|
26
|
-
const boolKeys = ['ttSeller', 'verified'];
|
|
27
|
-
const numKeys = ['followerCount', 'videoCount', 'followingCount', 'heartCount'];
|
|
28
|
-
|
|
29
|
-
for (const [key, pat] of Object.entries(patterns)) {
|
|
30
|
-
const match = html.match(pat);
|
|
31
|
-
if (match) {
|
|
32
|
-
if (boolKeys.includes(key)) result[key] = match[1] === 'true';
|
|
33
|
-
else if (numKeys.includes(key)) result[key] = parseInt(match[1], 10);
|
|
34
|
-
else if (key === 'signature') result[key] = match[1].replace(/\\n/g, '\n').replace(/\\\\/g, '\\');
|
|
35
|
-
else result[key] = match[1];
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
return result;
|
|
40
|
-
});
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
async function collectVideos(page, username, maxVideos, log) {
|
|
44
|
-
const allLinks = await scrollAndCollect(page, {
|
|
45
|
-
container: '[class*="ColumnListContainer"]',
|
|
46
|
-
extraArgs: { handle: username },
|
|
47
|
-
collectFn: (container, args) => {
|
|
48
|
-
const pattern = '/@' + args.handle + '/video/';
|
|
49
|
-
return {
|
|
50
|
-
items: Array.from(document.querySelectorAll('a'))
|
|
51
|
-
.filter(el => (el.getAttribute('href') || '').includes(pattern))
|
|
52
|
-
.map(el => {
|
|
53
|
-
const href = el.getAttribute('href') || '';
|
|
54
|
-
const idMatch = href.match(/\/video\/(\d+)/);
|
|
55
|
-
return { id: idMatch ? idMatch[1] : null, href };
|
|
56
|
-
})
|
|
57
|
-
.filter(v => v.id),
|
|
58
|
-
};
|
|
59
|
-
},
|
|
60
|
-
maxItems: maxVideos,
|
|
61
|
-
delayRange: [2000, 3000],
|
|
62
|
-
staleThreshold: 5,
|
|
63
|
-
maxRounds: 500,
|
|
64
|
-
onRound: (round, items, allItems) => {
|
|
65
|
-
const uniqueCount = new Set(allItems.map(v => v.id)).size;
|
|
66
|
-
if (uniqueCount > 0 && (uniqueCount % 10 === 0 || items.length > 0)) {
|
|
67
|
-
log(`滚动 ${round + 1}: ${uniqueCount} 个视频 (本轮 ${items.length} 条)`);
|
|
68
|
-
}
|
|
69
|
-
},
|
|
70
|
-
});
|
|
71
|
-
|
|
72
|
-
const uniqueVideos = new Map();
|
|
73
|
-
allLinks.forEach(v => {
|
|
74
|
-
if (!uniqueVideos.has(v.id)) uniqueVideos.set(v.id, v);
|
|
75
|
-
});
|
|
76
|
-
|
|
77
|
-
log(`收集完成: ${uniqueVideos.size} 个视频`);
|
|
78
|
-
return uniqueVideos;
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
async function runGetUserVideos(options) {
|
|
82
|
-
const { username, maxVideos = 5, log = console.error } = options;
|
|
83
|
-
const url = `https://www.tiktok.com/@${username}`;
|
|
84
|
-
|
|
85
|
-
log(`用户: @${username}`);
|
|
86
|
-
log(`URL: ${url}`);
|
|
87
|
-
log(`最大视频数: ${maxVideos}\n`);
|
|
88
|
-
|
|
89
|
-
log('连接浏览器...');
|
|
90
|
-
const browser = await ensureBrowserReady();
|
|
91
|
-
|
|
92
|
-
let page;
|
|
93
|
-
try {
|
|
94
|
-
page = await ensureTikTokPage(browser, url);
|
|
95
|
-
} catch (e) {
|
|
96
|
-
await browser.close().catch(() => {});
|
|
97
|
-
throw e;
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
await retryWithBackoff(() => page.goto(url, { waitUntil: 'load', timeout: 30000 }), { log });
|
|
101
|
-
await delay(3000, 5000);
|
|
102
|
-
await page.waitForSelector('[class*="DivVideoList"]', { timeout: 10000 }).catch(() => {});
|
|
103
|
-
|
|
104
|
-
log('获取用户信息...');
|
|
105
|
-
const userInfo = await getUserInfo(page);
|
|
106
|
-
log('用户信息: ' + JSON.stringify(userInfo, null, 2));
|
|
107
|
-
|
|
108
|
-
log('\n开始滚动收集视频...');
|
|
109
|
-
const videos = await collectVideos(page, username, maxVideos, log);
|
|
110
|
-
const allVideos = Array.from(videos.values());
|
|
111
|
-
|
|
112
|
-
log(`\n总计: ${allVideos.length} 个视频`);
|
|
113
|
-
|
|
114
|
-
const output = {
|
|
115
|
-
user: userInfo,
|
|
116
|
-
totalVideos: Math.min(allVideos.length, maxVideos),
|
|
117
|
-
videos: allVideos.slice(0, maxVideos).map(v => ({
|
|
118
|
-
id: v.id,
|
|
119
|
-
url: v.href.startsWith('http') ? v.href : `https://www.tiktok.com${v.href}`,
|
|
120
|
-
})),
|
|
121
|
-
};
|
|
122
|
-
|
|
123
|
-
return { output, browser };
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
export { getUserInfo, collectVideos, runGetUserVideos };
|
|
1
|
+
import { delay, ensureBrowserReady, ensureTikTokPage, retryWithBackoff } from '../scraper/modules/page-helpers.mjs';
|
|
2
|
+
import { scrollAndCollect } from '../scraper/modules/scroll-collector.mjs';
|
|
3
|
+
|
|
4
|
+
async function getUserInfo(page) {
|
|
5
|
+
return await page.evaluate(() => {
|
|
6
|
+
const html = document.documentElement.outerHTML;
|
|
7
|
+
const result = {};
|
|
8
|
+
|
|
9
|
+
const m = window.location.href.match(/\/@([^/]+)/);
|
|
10
|
+
if (m) result.uniqueId = m[1];
|
|
11
|
+
|
|
12
|
+
const patterns = {
|
|
13
|
+
secUid: /"secUid":"([^"]+)"/,
|
|
14
|
+
nickname: /"nickname":"((?:[^"\\]|\\.)*)"/,
|
|
15
|
+
ttSeller: /"ttSeller":\s*(true|false)/,
|
|
16
|
+
verified: /"verified":\s*(true|false)/,
|
|
17
|
+
followerCount: /"followerCount":(\d+)/,
|
|
18
|
+
videoCount: /"videoCount":(\d+)/,
|
|
19
|
+
followingCount: /"followingCount":(\d+)/,
|
|
20
|
+
heartCount: /"heartCount":(\d+)/,
|
|
21
|
+
signature: /"signature":"((?:[^"\\]|\\.)*)"/,
|
|
22
|
+
locationCreated: /"locationCreated":"([^"]*)/,
|
|
23
|
+
region: /"region":"([^"]*)/,
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
const boolKeys = ['ttSeller', 'verified'];
|
|
27
|
+
const numKeys = ['followerCount', 'videoCount', 'followingCount', 'heartCount'];
|
|
28
|
+
|
|
29
|
+
for (const [key, pat] of Object.entries(patterns)) {
|
|
30
|
+
const match = html.match(pat);
|
|
31
|
+
if (match) {
|
|
32
|
+
if (boolKeys.includes(key)) result[key] = match[1] === 'true';
|
|
33
|
+
else if (numKeys.includes(key)) result[key] = parseInt(match[1], 10);
|
|
34
|
+
else if (key === 'signature') result[key] = match[1].replace(/\\n/g, '\n').replace(/\\\\/g, '\\');
|
|
35
|
+
else result[key] = match[1];
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
return result;
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
async function collectVideos(page, username, maxVideos, log) {
|
|
44
|
+
const allLinks = await scrollAndCollect(page, {
|
|
45
|
+
container: '[class*="ColumnListContainer"]',
|
|
46
|
+
extraArgs: { handle: username },
|
|
47
|
+
collectFn: (container, args) => {
|
|
48
|
+
const pattern = '/@' + args.handle + '/video/';
|
|
49
|
+
return {
|
|
50
|
+
items: Array.from(document.querySelectorAll('a'))
|
|
51
|
+
.filter(el => (el.getAttribute('href') || '').includes(pattern))
|
|
52
|
+
.map(el => {
|
|
53
|
+
const href = el.getAttribute('href') || '';
|
|
54
|
+
const idMatch = href.match(/\/video\/(\d+)/);
|
|
55
|
+
return { id: idMatch ? idMatch[1] : null, href };
|
|
56
|
+
})
|
|
57
|
+
.filter(v => v.id),
|
|
58
|
+
};
|
|
59
|
+
},
|
|
60
|
+
maxItems: maxVideos,
|
|
61
|
+
delayRange: [2000, 3000],
|
|
62
|
+
staleThreshold: 5,
|
|
63
|
+
maxRounds: 500,
|
|
64
|
+
onRound: (round, items, allItems) => {
|
|
65
|
+
const uniqueCount = new Set(allItems.map(v => v.id)).size;
|
|
66
|
+
if (uniqueCount > 0 && (uniqueCount % 10 === 0 || items.length > 0)) {
|
|
67
|
+
log(`滚动 ${round + 1}: ${uniqueCount} 个视频 (本轮 ${items.length} 条)`);
|
|
68
|
+
}
|
|
69
|
+
},
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
const uniqueVideos = new Map();
|
|
73
|
+
allLinks.forEach(v => {
|
|
74
|
+
if (!uniqueVideos.has(v.id)) uniqueVideos.set(v.id, v);
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
log(`收集完成: ${uniqueVideos.size} 个视频`);
|
|
78
|
+
return uniqueVideos;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
async function runGetUserVideos(options) {
|
|
82
|
+
const { username, maxVideos = 5, log = console.error } = options;
|
|
83
|
+
const url = `https://www.tiktok.com/@${username}`;
|
|
84
|
+
|
|
85
|
+
log(`用户: @${username}`);
|
|
86
|
+
log(`URL: ${url}`);
|
|
87
|
+
log(`最大视频数: ${maxVideos}\n`);
|
|
88
|
+
|
|
89
|
+
log('连接浏览器...');
|
|
90
|
+
const browser = await ensureBrowserReady();
|
|
91
|
+
|
|
92
|
+
let page;
|
|
93
|
+
try {
|
|
94
|
+
page = await ensureTikTokPage(browser, url);
|
|
95
|
+
} catch (e) {
|
|
96
|
+
await browser.close().catch(() => {});
|
|
97
|
+
throw e;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
await retryWithBackoff(() => page.goto(url, { waitUntil: 'load', timeout: 30000 }), { log });
|
|
101
|
+
await delay(3000, 5000);
|
|
102
|
+
await page.waitForSelector('[class*="DivVideoList"]', { timeout: 10000 }).catch(() => {});
|
|
103
|
+
|
|
104
|
+
log('获取用户信息...');
|
|
105
|
+
const userInfo = await getUserInfo(page);
|
|
106
|
+
log('用户信息: ' + JSON.stringify(userInfo, null, 2));
|
|
107
|
+
|
|
108
|
+
log('\n开始滚动收集视频...');
|
|
109
|
+
const videos = await collectVideos(page, username, maxVideos, log);
|
|
110
|
+
const allVideos = Array.from(videos.values());
|
|
111
|
+
|
|
112
|
+
log(`\n总计: ${allVideos.length} 个视频`);
|
|
113
|
+
|
|
114
|
+
const output = {
|
|
115
|
+
user: userInfo,
|
|
116
|
+
totalVideos: Math.min(allVideos.length, maxVideos),
|
|
117
|
+
videos: allVideos.slice(0, maxVideos).map(v => ({
|
|
118
|
+
id: v.id,
|
|
119
|
+
url: v.href.startsWith('http') ? v.href : `https://www.tiktok.com${v.href}`,
|
|
120
|
+
})),
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
return { output, browser };
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
export { getUserInfo, collectVideos, runGetUserVideos };
|