tt-help-cli-ycl 1.0.7 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -17
- package/cli.js +9 -9
- package/package.json +44 -44
- package/src/auto-core.cjs +367 -288
- package/src/data-store.cjs +69 -65
- package/src/explore-core.cjs +157 -0
- package/src/get-user-videos-core.cjs +142 -164
- package/src/get-user-videos.cjs +58 -58
- package/src/lib/args.js +397 -287
- package/src/lib/auto-browser.mjs +12 -10
- package/src/lib/constants.js +151 -148
- package/src/lib/explore.js +225 -244
- package/src/lib/fetcher.js +60 -60
- package/src/lib/filter.js +66 -66
- package/src/lib/get-user-videos-browser.mjs +5 -5
- package/src/lib/io.js +76 -76
- package/src/lib/output.js +80 -80
- package/src/lib/parser.js +47 -47
- package/src/lib/scrape-browser.mjs +5 -5
- package/src/lib/scrape.js +39 -39
- package/src/main.mjs +962 -652
- package/src/scraper/core.cjs +213 -191
- package/src/scraper/index.cjs +96 -92
- package/src/scraper/modules/comment-extractor.cjs +49 -122
- package/src/scraper/modules/follow-extractor.cjs +112 -0
- package/src/scraper/modules/guess-extractor.cjs +53 -0
- package/src/scraper/modules/page-helpers.cjs +422 -422
- package/src/scraper/modules/scroll-collector.cjs +173 -0
- package/src/scraper/modules/video-scanner.cjs +43 -43
- package/src/test-auto-follow.cjs +109 -0
- package/src/test-extractors.cjs +75 -0
- package/src/test-follow.cjs +41 -0
- package/src/watch/public/index.html +271 -265
- package/src/watch/server.mjs +153 -145
- package/src/results/user-videos-bar.lar.lar.moeta.json +0 -37
|
@@ -1,122 +1,49 @@
|
|
|
1
|
-
const { delay, getDelayConfig } = require('./page-helpers.cjs');
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
const
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
await
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
const
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
const
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
if (
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
let evalResult;
|
|
52
|
-
try {
|
|
53
|
-
evalResult = await page.evaluate(() => {
|
|
54
|
-
const selectors = ['.DivCommentMain', '[class*="CommentMain"]'];
|
|
55
|
-
let container = null;
|
|
56
|
-
for (const sel of selectors) {
|
|
57
|
-
container = document.querySelector(sel);
|
|
58
|
-
if (container) break;
|
|
59
|
-
}
|
|
60
|
-
if (!container) return { authors: [], scrollHeight: 0, containerFound: false, listFound: false };
|
|
61
|
-
|
|
62
|
-
function findScrollableAncestor(el) {
|
|
63
|
-
if (el.scrollHeight > el.clientHeight + 10) return el;
|
|
64
|
-
let current = el.parentElement;
|
|
65
|
-
while (current && current !== document.body) {
|
|
66
|
-
if (current.scrollHeight > current.clientHeight + 10) return current;
|
|
67
|
-
current = current.parentElement;
|
|
68
|
-
}
|
|
69
|
-
return el;
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
const scrollContainer = findScrollableAncestor(container);
|
|
73
|
-
const currentHeight = scrollContainer.scrollHeight;
|
|
74
|
-
|
|
75
|
-
const lc = document.querySelector('[class*="CommentListContainer"]');
|
|
76
|
-
if (!lc) return { authors: [], scrollHeight: currentHeight, containerFound: true, listFound: false };
|
|
77
|
-
|
|
78
|
-
const result = [];
|
|
79
|
-
Array.from(lc.children).forEach(w => {
|
|
80
|
-
const authorEl = w.querySelector('[class*="UsernameContentWrapper"]');
|
|
81
|
-
if (!authorEl) return;
|
|
82
|
-
const link = authorEl.querySelector('a');
|
|
83
|
-
if (link && link.href) {
|
|
84
|
-
const m = link.href.match(/@([^/]+)/);
|
|
85
|
-
if (m) result.push('@' + m[1]);
|
|
86
|
-
}
|
|
87
|
-
});
|
|
88
|
-
return { authors: result, scrollHeight: currentHeight, containerFound: true, listFound: true };
|
|
89
|
-
});
|
|
90
|
-
} catch (e) {
|
|
91
|
-
break;
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
const { authors, scrollHeight, containerFound, listFound } = evalResult;
|
|
95
|
-
|
|
96
|
-
if (!containerFound || !listFound) {
|
|
97
|
-
break;
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
allAuthors.push(...authors);
|
|
101
|
-
|
|
102
|
-
if (scrollHeight === maxScrollHeight) {
|
|
103
|
-
noChangeCount++;
|
|
104
|
-
if (noChangeCount >= maxNoChange) {
|
|
105
|
-
break;
|
|
106
|
-
}
|
|
107
|
-
} else {
|
|
108
|
-
noChangeCount = 0;
|
|
109
|
-
}
|
|
110
|
-
maxScrollHeight = scrollHeight;
|
|
111
|
-
|
|
112
|
-
if (allAuthors.length >= maxComments) break;
|
|
113
|
-
scrollAttempts++;
|
|
114
|
-
await delay(Math.round(config.commentMax * 0.3), config.commentMax);
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
return [...new Set(allAuthors.slice(0, maxComments))];
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
module.exports = {
|
|
121
|
-
extractCommentAuthors,
|
|
122
|
-
};
|
|
1
|
+
const { delay, getDelayConfig, closeCommentPanel } = require('./page-helpers.cjs');
|
|
2
|
+
const { scrollAndCollect } = require('./scroll-collector.cjs');
|
|
3
|
+
|
|
4
|
+
async function openCommentPanel(page) {
|
|
5
|
+
const tabs = page.locator('[class*="tabbar-item"]');
|
|
6
|
+
const commentTab = tabs.filter({ hasText: '评论' }).first();
|
|
7
|
+
await commentTab.click();
|
|
8
|
+
const config = getDelayConfig();
|
|
9
|
+
await delay(Math.round(config.commentMax * 0.5), config.commentMax);
|
|
10
|
+
await page.waitForSelector('[class*="CommentListContainer"]', { timeout: 5000 }).catch(() => {});
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
async function extractCommentAuthors(page, maxComments = 10) {
|
|
14
|
+
await openCommentPanel(page);
|
|
15
|
+
|
|
16
|
+
const config = getDelayConfig();
|
|
17
|
+
const allAuthors = await scrollAndCollect(page, {
|
|
18
|
+
container: '[class*="CommentMain"]',
|
|
19
|
+
findScrollable: true,
|
|
20
|
+
collectFn: (container) => {
|
|
21
|
+
const list = document.querySelector('[class*="CommentListContainer"]');
|
|
22
|
+
if (!list) return { items: [] };
|
|
23
|
+
|
|
24
|
+
const authors = [];
|
|
25
|
+
Array.from(list.children).forEach(wrapper => {
|
|
26
|
+
const link = wrapper.querySelector('[class*="UsernameContentWrapper"] a');
|
|
27
|
+
if (link) {
|
|
28
|
+
const href = link.href || link.getAttribute('href');
|
|
29
|
+
const m = href && href.match(/@([^/]+)/);
|
|
30
|
+
if (m) authors.push('@' + m[1]);
|
|
31
|
+
}
|
|
32
|
+
});
|
|
33
|
+
return { items: authors };
|
|
34
|
+
},
|
|
35
|
+
uniqueKey: (a) => a,
|
|
36
|
+
maxItems: maxComments,
|
|
37
|
+
delayRange: [Math.round(config.commentMax * 0.3), config.commentMax],
|
|
38
|
+
staleThreshold: 3,
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
await closeCommentPanel(page);
|
|
42
|
+
await delay(Math.round(config.commentMax * 0.3), config.commentMax);
|
|
43
|
+
|
|
44
|
+
return allAuthors.slice(0, maxComments);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
module.exports = {
|
|
48
|
+
extractCommentAuthors,
|
|
49
|
+
};
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
const { delay, getDelayConfig } = require('./page-helpers.cjs');
|
|
2
|
+
const { scrollAndCollect } = require('./scroll-collector.cjs');
|
|
3
|
+
|
|
4
|
+
const FILTER_WORDS = ['主页', '已关注', '粉丝', '推荐'];
|
|
5
|
+
|
|
6
|
+
async function openFollowModal(page) {
|
|
7
|
+
const el = await page.$('[data-e2e=following]');
|
|
8
|
+
if (!el) {
|
|
9
|
+
throw new Error('未找到 [data-e2e=following] 元素,请确认当前页面为用户主页');
|
|
10
|
+
}
|
|
11
|
+
await el.evaluate(el => el.parentElement.click());
|
|
12
|
+
await delay(2000, 3000);
|
|
13
|
+
|
|
14
|
+
await page.waitForSelector('[class*=DivUserListContainer]', { timeout: 5000 }).catch(() => {
|
|
15
|
+
throw new Error('关注弹窗未出现 DivUserListContainer');
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
async function switchToFollowersTab(page) {
|
|
20
|
+
await page.evaluate(() => {
|
|
21
|
+
const tabs = document.querySelectorAll('[class*=DivTabItem]');
|
|
22
|
+
for (const tab of tabs) {
|
|
23
|
+
if (tab.textContent?.includes('粉丝')) {
|
|
24
|
+
tab.click();
|
|
25
|
+
return;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
throw new Error('未找到粉丝 Tab');
|
|
29
|
+
});
|
|
30
|
+
await delay(2000, 3000);
|
|
31
|
+
await page.waitForSelector('[class*=DivUserListContainer]', { timeout: 5000 }).catch(() => {});
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
async function closeFollowModal(page) {
|
|
35
|
+
await page.evaluate(() => {
|
|
36
|
+
const closeBtn = document.querySelector('[data-e2e=follow-popup-close]');
|
|
37
|
+
if (closeBtn) closeBtn.click();
|
|
38
|
+
});
|
|
39
|
+
await delay(1000, 2000);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function createUserCollectFn() {
|
|
43
|
+
return (container) => {
|
|
44
|
+
const FILTER_WORDS = ['主页', '已关注', '粉丝', '推荐'];
|
|
45
|
+
const modal = document.querySelector('[class*=eyhy6180]');
|
|
46
|
+
const root = modal || document;
|
|
47
|
+
const users = [];
|
|
48
|
+
const seen = new Set();
|
|
49
|
+
const links = root.querySelectorAll('a[href*="/@"]');
|
|
50
|
+
for (const link of links) {
|
|
51
|
+
const match = link.href.match(/@([^/?]+)/);
|
|
52
|
+
if (!match) continue;
|
|
53
|
+
const handle = '@' + decodeURIComponent(match[1]);
|
|
54
|
+
const text = (link.textContent || '').trim();
|
|
55
|
+
if (text.length <= 2) continue;
|
|
56
|
+
if (FILTER_WORDS.includes(text)) continue;
|
|
57
|
+
if (seen.has(handle)) continue;
|
|
58
|
+
seen.add(handle);
|
|
59
|
+
users.push({ handle, displayName: text });
|
|
60
|
+
}
|
|
61
|
+
return { items: users };
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
async function extractUsersFromModal(page, maxUsers) {
|
|
66
|
+
const config = getDelayConfig();
|
|
67
|
+
const allUsers = await scrollAndCollect(page, {
|
|
68
|
+
container: '[class*=DivUserListContainer]',
|
|
69
|
+
findScrollable: false,
|
|
70
|
+
collectFn: createUserCollectFn(),
|
|
71
|
+
uniqueKey: (u) => u.handle,
|
|
72
|
+
maxItems: maxUsers,
|
|
73
|
+
delayRange: [Math.round(config.commentMax * 0.3), config.commentMax],
|
|
74
|
+
staleThreshold: 5,
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
return allUsers.slice(0, maxUsers);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
async function extractFollowAndFollowers(page, options = {}) {
|
|
81
|
+
const {
|
|
82
|
+
maxFollowing = 999,
|
|
83
|
+
maxFollowers = 999,
|
|
84
|
+
log = () => {},
|
|
85
|
+
} = options;
|
|
86
|
+
|
|
87
|
+
log('打开关注弹窗...');
|
|
88
|
+
await openFollowModal(page);
|
|
89
|
+
|
|
90
|
+
log('提取关注列表...');
|
|
91
|
+
const following = await extractUsersFromModal(page, maxFollowing);
|
|
92
|
+
log(` 已关注: ${following.length}`);
|
|
93
|
+
|
|
94
|
+
log('切换到粉丝 Tab...');
|
|
95
|
+
await switchToFollowersTab(page);
|
|
96
|
+
|
|
97
|
+
log('提取粉丝列表...');
|
|
98
|
+
const followers = await extractUsersFromModal(page, maxFollowers);
|
|
99
|
+
log(` 粉丝: ${followers.length}`);
|
|
100
|
+
|
|
101
|
+
log('关闭弹窗...');
|
|
102
|
+
await closeFollowModal(page);
|
|
103
|
+
|
|
104
|
+
return {
|
|
105
|
+
following: following.map(u => [u.handle, u.displayName]),
|
|
106
|
+
followers: followers.map(u => [u.handle, u.displayName]),
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
module.exports = {
|
|
111
|
+
extractFollowAndFollowers,
|
|
112
|
+
};
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
const { delay, getDelayConfig, closeCommentPanel } = require('./page-helpers.cjs');
|
|
2
|
+
const { scrollAndCollect } = require('./scroll-collector.cjs');
|
|
3
|
+
|
|
4
|
+
async function openGuessTab(page) {
|
|
5
|
+
const tabs = page.locator('[class*="tabbar-item"]');
|
|
6
|
+
const guessTab = tabs.filter({ hasText: /猜你喜欢/i }).first();
|
|
7
|
+
await guessTab.click();
|
|
8
|
+
const config = getDelayConfig();
|
|
9
|
+
await delay(Math.round(config.commentMax * 0.5), config.commentMax);
|
|
10
|
+
await page.waitForSelector('[class*="Related"]', { timeout: 5000 }).catch(() => {});
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
async function extractGuessVideos(page, maxVideos = 10) {
|
|
14
|
+
await openGuessTab(page);
|
|
15
|
+
|
|
16
|
+
const config = getDelayConfig();
|
|
17
|
+
const allVideos = await scrollAndCollect(page, {
|
|
18
|
+
container: '[class*="Related"]',
|
|
19
|
+
findScrollable: true,
|
|
20
|
+
collectFn: (container) => {
|
|
21
|
+
const items = [];
|
|
22
|
+
Array.from(container.querySelectorAll('[class*="DivItemContainer"]')).forEach(item => {
|
|
23
|
+
const link = item.querySelector('a[href*="/video/"]');
|
|
24
|
+
if (link) {
|
|
25
|
+
const href = link.href || link.getAttribute('href');
|
|
26
|
+
const m = href && href.match(/@([^/]+)\/video\/(\d+)/);
|
|
27
|
+
if (m) {
|
|
28
|
+
items.push({
|
|
29
|
+
author: '@' + m[1],
|
|
30
|
+
videoId: m[2],
|
|
31
|
+
url: href,
|
|
32
|
+
title: '',
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
});
|
|
37
|
+
return { items };
|
|
38
|
+
},
|
|
39
|
+
uniqueKey: (v) => v.videoId,
|
|
40
|
+
maxItems: maxVideos,
|
|
41
|
+
delayRange: [Math.round(config.commentMax * 0.3), config.commentMax],
|
|
42
|
+
staleThreshold: 3,
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
await closeCommentPanel(page);
|
|
46
|
+
await delay(Math.round(config.commentMax * 0.3), config.commentMax);
|
|
47
|
+
|
|
48
|
+
return allVideos.slice(0, maxVideos);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
module.exports = {
|
|
52
|
+
extractGuessVideos,
|
|
53
|
+
};
|