tt-help-cli-ycl 1.0.8 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,214 +1,214 @@
1
- const {
2
- closeCommentPanel,
3
- delay,
4
- ensureBrowserReady,
5
- ensureTikTokPage,
6
- setDelayConfig,
7
- getDelayConfig,
8
- retryWithBackoff,
9
- } = require("./modules/page-helpers.cjs");
10
- const { extractCommentAuthors } = require("./modules/comment-extractor.cjs");
11
- const { extractGuessVideos } = require("./modules/guess-extractor.cjs");
12
-
13
- async function scrapeSingleVideo(page, maxComments, maxGuess, log, location = 'ES') {
14
- const config = getDelayConfig();
15
-
16
- await page
17
- .waitForSelector('[class*="VideoMeta"]', { timeout: 10000 })
18
- .catch(() => {});
19
- await delay(Math.round(config.commentMax * 0.3), config.commentMax);
20
-
21
- const userData = await page.evaluate(() => {
22
- const result = {};
23
-
24
- const m = window.location.href.match(/\/@([^\/]+)\/video/);
25
- if (m) result.uniqueId = m[1];
26
-
27
- const authorEls = document.querySelectorAll('[class*="Author"]');
28
- for (const el of authorEls) {
29
- const text = (el.textContent || "").trim();
30
- if (text && !text.includes("TikTok") && !text.includes("Share")) {
31
- result.nickname = text;
32
- break;
33
- }
34
- }
35
-
36
- const html = document.documentElement.outerHTML;
37
- const locMatch = html.match(/"locationCreated":"([^"]*)/);
38
- if (locMatch) result.locationCreated = locMatch[1];
39
-
40
- return result;
41
- });
42
-
43
- const videoAuthor = userData.uniqueId ? "@" + userData.uniqueId : null;
44
- if (!videoAuthor) {
45
- throw new Error("无法获取视频作者");
46
- }
47
-
48
- let guessVideos = [];
49
- let commentUsers = [];
50
-
51
- if (userData.locationCreated === location) {
52
- guessVideos = await extractGuessVideos(page, maxGuess);
53
-
54
- commentUsers = await extractCommentAuthors(page, maxComments);
55
- await closeCommentPanel(page);
56
- await delay(Math.round(config.commentMax * 0.3), config.commentMax);
57
- }
58
-
59
- const uniqueUsers = [...new Set(commentUsers)];
60
-
61
- return {
62
- videoAuthor,
63
- uniqueId: userData.uniqueId,
64
- nickname: userData.nickname,
65
- locationCreated: userData.locationCreated,
66
- commentUsers: uniqueUsers,
67
- guessVideos,
68
- };
69
- }
70
-
71
- async function runScrape(options) {
72
- const {
73
- videoUrl,
74
- maxVideos = 20,
75
- maxComments = 999,
76
- maxGuess = 10,
77
- preset = null,
78
- switchMax = null,
79
- commentMax = null,
80
- log = console.error,
81
- browser: externalBrowser = null,
82
- page: externalPage = null,
83
- } = options;
84
-
85
- if (preset) {
86
- setDelayConfig(preset);
87
- } else if (switchMax || commentMax) {
88
- setDelayConfig({
89
- switchMax: switchMax || 5000,
90
- commentMax: commentMax || 3000,
91
- });
92
- }
93
-
94
- const config = getDelayConfig();
95
-
96
- let browser, page;
97
- const isExternal = !!(externalBrowser && externalPage);
98
-
99
- if (!isExternal) {
100
- log(`视频地址: ${videoUrl}`);
101
- log(
102
- `视频数: ${maxVideos}, 评论数: ${maxComments}, 猜你喜欢: ${maxGuess}, 切换延迟: ${config.switchMax}ms, 评论延迟: ${config.commentMax}ms`,
103
- );
104
- }
105
-
106
- if (isExternal) {
107
- browser = externalBrowser;
108
- page = externalPage;
109
- } else {
110
- browser = await ensureBrowserReady();
111
- try {
112
- page = await ensureTikTokPage(browser, videoUrl);
113
- } catch (e) {
114
- await browser.close().catch(() => {});
115
- throw e;
116
- }
117
- }
118
-
119
- await retryWithBackoff(() => page.goto(videoUrl, { waitUntil: "load", timeout: 30000 }), { log });
120
- await delay(Math.round(config.switchMax * 0.5), config.switchMax);
121
- await closeCommentPanel(page);
122
- await delay(Math.round(config.commentMax * 0.5), config.commentMax);
123
-
124
- const allResults = [];
125
- const videoAuthors = new Set();
126
- const commentUsers = new Set();
127
- const allCommentAuthorsList = [];
128
- const allGuessAuthors = new Set();
129
- const allGuessVideos = [];
130
-
131
- for (let i = 0; i < maxVideos; i++) {
132
- await delay(Math.round(config.commentMax * 0.3), config.commentMax);
133
-
134
- let result;
135
- try {
136
- result = await scrapeSingleVideo(page, maxComments, maxGuess, log);
137
- } catch (e) {
138
- log(`[${i + 1}/${maxVideos}] 跳过: ${e.message}`);
139
- if (i < maxVideos - 1) {
140
- await page.evaluate(() => {
141
- const container = document.querySelector(
142
- '[class*="ColumnListContainer"]',
143
- );
144
- if (container) container.scrollTop += 700;
145
- else window.scrollBy(0, 700);
146
- });
147
- await delay(Math.round(config.switchMax * 0.5), config.switchMax);
148
- }
149
- continue;
150
- }
151
- allResults.push(result);
152
- videoAuthors.add(result.videoAuthor);
153
- result.commentUsers.forEach((u) => commentUsers.add(u));
154
- allCommentAuthorsList.push(...result.commentUsers);
155
- if (result.guessVideos) {
156
- allGuessVideos.push(...result.guessVideos);
157
- result.guessVideos.forEach((v) => {
158
- if (v.author) allGuessAuthors.add(v.author);
159
- });
160
- }
161
-
162
- if ((i + 1) % 5 === 0 || i === 0) {
163
- log(
164
- `[${i + 1}/${maxVideos}] ${result.videoAuthor} | 昵称: ${result.nickname || "-"} | 评论用户: ${result.commentUsers.length} | 猜你喜欢: ${result.guessVideos ? result.guessVideos.length : 0}`,
165
- );
166
- }
167
-
168
- if (i < maxVideos - 1) {
169
- await page.evaluate(() => {
170
- const container = document.querySelector(
171
- '[class*="ColumnListContainer"]',
172
- );
173
- if (container) container.scrollTop += 700;
174
- });
175
- await delay(2000, config.switchMax);
176
- }
177
- }
178
-
179
- log(
180
- `\n结果: 视频作者 ${videoAuthors.size} | 评论用户 ${commentUsers.size} | 总评论 ${allCommentAuthorsList.length} | 猜你喜欢作者 ${allGuessAuthors.size} | 总猜中视频 ${allGuessVideos.length}`,
181
- );
182
-
183
- const videoDetails = {};
184
- for (const r of allResults) {
185
- const key = r.videoAuthor;
186
- if (!videoDetails[key]) {
187
- videoDetails[key] = {
188
- videoAuthor: r.videoAuthor,
189
- uniqueId: r.uniqueId,
190
- nickname: r.nickname,
191
- locationCreated: r.locationCreated,
192
- };
193
- }
194
- }
195
-
196
- const output = {
197
- videoDetails: Object.values(videoDetails),
198
- commentUsers: [...commentUsers].sort(),
199
- allCommentAuthorsList,
200
- guessVideos: allGuessVideos,
201
- guessAuthors: [...allGuessAuthors].sort(),
202
- stats: {
203
- totalVideos: allResults.length,
204
- uniqueVideoAuthors: videoAuthors.size,
205
- uniqueCommentAuthors: commentUsers.size,
206
- uniqueGuessAuthors: allGuessAuthors.size,
207
- totalGuessVideos: allGuessVideos.length,
208
- },
209
- };
210
-
211
- return { output, browser, isExternal };
212
- }
213
-
1
+ const {
2
+ closeCommentPanel,
3
+ delay,
4
+ ensureBrowserReady,
5
+ ensureTikTokPage,
6
+ setDelayConfig,
7
+ getDelayConfig,
8
+ retryWithBackoff,
9
+ } = require("./modules/page-helpers.cjs");
10
+ const { extractCommentAuthors } = require("./modules/comment-extractor.cjs");
11
+ const { extractGuessVideos } = require("./modules/guess-extractor.cjs");
12
+
13
+ async function scrapeSingleVideo(page, maxComments, maxGuess, log, location = 'ES') {
14
+ const config = getDelayConfig();
15
+
16
+ await page
17
+ .waitForSelector('[class*="VideoMeta"]', { timeout: 10000 })
18
+ .catch(() => {});
19
+ await delay(Math.round(config.commentMax * 0.3), config.commentMax);
20
+
21
+ const userData = await page.evaluate(() => {
22
+ const result = {};
23
+
24
+ const m = window.location.href.match(/\/@([^\/]+)\/video/);
25
+ if (m) result.uniqueId = m[1];
26
+
27
+ const authorEls = document.querySelectorAll('[class*="Author"]');
28
+ for (const el of authorEls) {
29
+ const text = (el.textContent || "").trim();
30
+ if (text && !text.includes("TikTok") && !text.includes("Share")) {
31
+ result.nickname = text;
32
+ break;
33
+ }
34
+ }
35
+
36
+ const html = document.documentElement.outerHTML;
37
+ const locMatch = html.match(/"locationCreated":"([^"]*)/);
38
+ if (locMatch) result.locationCreated = locMatch[1];
39
+
40
+ return result;
41
+ });
42
+
43
+ const videoAuthor = userData.uniqueId ? "@" + userData.uniqueId : null;
44
+ if (!videoAuthor) {
45
+ throw new Error("无法获取视频作者");
46
+ }
47
+
48
+ let guessVideos = [];
49
+ let commentUsers = [];
50
+
51
+ if (userData.locationCreated === location) {
52
+ guessVideos = await extractGuessVideos(page, maxGuess);
53
+
54
+ commentUsers = await extractCommentAuthors(page, maxComments);
55
+ await closeCommentPanel(page);
56
+ await delay(Math.round(config.commentMax * 0.3), config.commentMax);
57
+ }
58
+
59
+ const uniqueUsers = [...new Set(commentUsers)];
60
+
61
+ return {
62
+ videoAuthor,
63
+ uniqueId: userData.uniqueId,
64
+ nickname: userData.nickname,
65
+ locationCreated: userData.locationCreated,
66
+ commentUsers: uniqueUsers,
67
+ guessVideos,
68
+ };
69
+ }
70
+
71
+ async function runScrape(options) {
72
+ const {
73
+ videoUrl,
74
+ maxVideos = 20,
75
+ maxComments = 999,
76
+ maxGuess = 10,
77
+ preset = null,
78
+ switchMax = null,
79
+ commentMax = null,
80
+ log = console.error,
81
+ browser: externalBrowser = null,
82
+ page: externalPage = null,
83
+ } = options;
84
+
85
+ if (preset) {
86
+ setDelayConfig(preset);
87
+ } else if (switchMax || commentMax) {
88
+ setDelayConfig({
89
+ switchMax: switchMax || 5000,
90
+ commentMax: commentMax || 3000,
91
+ });
92
+ }
93
+
94
+ const config = getDelayConfig();
95
+
96
+ let browser, page;
97
+ const isExternal = !!(externalBrowser && externalPage);
98
+
99
+ if (!isExternal) {
100
+ log(`视频地址: ${videoUrl}`);
101
+ log(
102
+ `视频数: ${maxVideos}, 评论数: ${maxComments}, 猜你喜欢: ${maxGuess}, 切换延迟: ${config.switchMax}ms, 评论延迟: ${config.commentMax}ms`,
103
+ );
104
+ }
105
+
106
+ if (isExternal) {
107
+ browser = externalBrowser;
108
+ page = externalPage;
109
+ } else {
110
+ browser = await ensureBrowserReady();
111
+ try {
112
+ page = await ensureTikTokPage(browser, videoUrl);
113
+ } catch (e) {
114
+ await browser.close().catch(() => {});
115
+ throw e;
116
+ }
117
+ }
118
+
119
+ await retryWithBackoff(() => page.goto(videoUrl, { waitUntil: "load", timeout: 30000 }), { log });
120
+ await delay(Math.round(config.switchMax * 0.5), config.switchMax);
121
+ await closeCommentPanel(page);
122
+ await delay(Math.round(config.commentMax * 0.5), config.commentMax);
123
+
124
+ const allResults = [];
125
+ const videoAuthors = new Set();
126
+ const commentUsers = new Set();
127
+ const allCommentAuthorsList = [];
128
+ const allGuessAuthors = new Set();
129
+ const allGuessVideos = [];
130
+
131
+ for (let i = 0; i < maxVideos; i++) {
132
+ await delay(Math.round(config.commentMax * 0.3), config.commentMax);
133
+
134
+ let result;
135
+ try {
136
+ result = await scrapeSingleVideo(page, maxComments, maxGuess, log);
137
+ } catch (e) {
138
+ log(`[${i + 1}/${maxVideos}] 跳过: ${e.message}`);
139
+ if (i < maxVideos - 1) {
140
+ await page.evaluate(() => {
141
+ const container = document.querySelector(
142
+ '[class*="ColumnListContainer"]',
143
+ );
144
+ if (container) container.scrollTop += 700;
145
+ else window.scrollBy(0, 700);
146
+ });
147
+ await delay(Math.round(config.switchMax * 0.5), config.switchMax);
148
+ }
149
+ continue;
150
+ }
151
+ allResults.push(result);
152
+ videoAuthors.add(result.videoAuthor);
153
+ result.commentUsers.forEach((u) => commentUsers.add(u));
154
+ allCommentAuthorsList.push(...result.commentUsers);
155
+ if (result.guessVideos) {
156
+ allGuessVideos.push(...result.guessVideos);
157
+ result.guessVideos.forEach((v) => {
158
+ if (v.author) allGuessAuthors.add(v.author);
159
+ });
160
+ }
161
+
162
+ if ((i + 1) % 5 === 0 || i === 0) {
163
+ log(
164
+ `[${i + 1}/${maxVideos}] ${result.videoAuthor} | 昵称: ${result.nickname || "-"} | 评论用户: ${result.commentUsers.length} | 猜你喜欢: ${result.guessVideos ? result.guessVideos.length : 0}`,
165
+ );
166
+ }
167
+
168
+ if (i < maxVideos - 1) {
169
+ await page.evaluate(() => {
170
+ const container = document.querySelector(
171
+ '[class*="ColumnListContainer"]',
172
+ );
173
+ if (container) container.scrollTop += 700;
174
+ });
175
+ await delay(2000, config.switchMax);
176
+ }
177
+ }
178
+
179
+ log(
180
+ `\n结果: 视频作者 ${videoAuthors.size} | 评论用户 ${commentUsers.size} | 总评论 ${allCommentAuthorsList.length} | 猜你喜欢作者 ${allGuessAuthors.size} | 总猜中视频 ${allGuessVideos.length}`,
181
+ );
182
+
183
+ const videoDetails = {};
184
+ for (const r of allResults) {
185
+ const key = r.videoAuthor;
186
+ if (!videoDetails[key]) {
187
+ videoDetails[key] = {
188
+ videoAuthor: r.videoAuthor,
189
+ uniqueId: r.uniqueId,
190
+ nickname: r.nickname,
191
+ locationCreated: r.locationCreated,
192
+ };
193
+ }
194
+ }
195
+
196
+ const output = {
197
+ videoDetails: Object.values(videoDetails),
198
+ commentUsers: [...commentUsers].sort(),
199
+ allCommentAuthorsList,
200
+ guessVideos: allGuessVideos,
201
+ guessAuthors: [...allGuessAuthors].sort(),
202
+ stats: {
203
+ totalVideos: allResults.length,
204
+ uniqueVideoAuthors: videoAuthors.size,
205
+ uniqueCommentAuthors: commentUsers.size,
206
+ uniqueGuessAuthors: allGuessAuthors.size,
207
+ totalGuessVideos: allGuessVideos.length,
208
+ },
209
+ };
210
+
211
+ return { output, browser, isExternal };
212
+ }
213
+
214
214
  module.exports = { scrapeSingleVideo, runScrape };
@@ -1,97 +1,97 @@
1
- const { runScrape } = require("./core.cjs");
2
-
3
- async function main() {
4
- const rawArgs = process.argv.slice(2);
5
-
6
- let outputPath = null;
7
- const args = [];
8
- for (let i = 0; i < rawArgs.length; i++) {
9
- if (rawArgs[i] === "-o" || rawArgs[i] === "--output") {
10
- outputPath = rawArgs[++i];
11
- } else {
12
- args.push(rawArgs[i]);
13
- }
14
- }
15
-
16
- const videoUrl = args[0];
17
-
18
- let preset = null;
19
- let maxVideos = 20;
20
- let maxComments = 999;
21
- let maxGuess = 10;
22
- let switchMax = null;
23
- let commentMax = null;
24
-
25
- if (args[1]) {
26
- if (["fast", "normal", "slow", "stealth"].includes(args[1].toLowerCase())) {
27
- preset = args[1].toLowerCase();
28
- maxVideos = parseInt(args[2]) || 20;
29
- maxComments = parseInt(args[3]) || 999;
30
- maxGuess = parseInt(args[4]) || 10;
31
- } else {
32
- maxVideos = parseInt(args[1]) || 20;
33
- maxComments = parseInt(args[2]) || 999;
34
- maxGuess = parseInt(args[3]) || 10;
35
- switchMax = parseInt(args[4]) || null;
36
- commentMax = parseInt(args[5]) || null;
37
- }
38
- }
39
-
40
- if (!videoUrl) {
41
- console.error("用法:");
42
- console.error(
43
- " 预设模式: node index.cjs <视频URL> <preset> [最大视频数] [最大评论数] [猜你喜欢数] [-o 输出路径]",
44
- );
45
- console.error(
46
- " 手动模式: node index.cjs <视频URL> [最大视频数] [最大评论数] [猜你喜欢数] [切换延迟ms] [评论延迟ms] [-o 输出路径]",
47
- );
48
- console.error(
49
- "预设: fast(1s/0.8s), normal(2.5s/1.5s), slow(5s/3s), stealth(8s/5s)",
50
- );
51
- console.error(
52
- "选项: -o, --output <路径> 输出到文件; 不指定则输出到 stdout",
53
- );
54
- process.exit(1);
55
- }
56
-
57
- let browser;
58
- try {
59
- const { output, browser: b, isExternal } = await runScrape({
60
- videoUrl,
61
- maxVideos,
62
- maxComments,
63
- maxGuess,
64
- preset,
65
- switchMax,
66
- commentMax,
67
- log: console.error,
68
- });
69
- if (!isExternal) {
70
- browser = b;
71
- }
72
-
73
- const json = JSON.stringify(output, null, 2);
74
- if (outputPath) {
75
- const fs = require("fs");
76
- const path = require("path");
77
- const resultFile = path.isAbsolute(outputPath)
78
- ? outputPath
79
- : path.resolve(outputPath);
80
- fs.mkdirSync(path.dirname(resultFile), { recursive: true });
81
- fs.writeFileSync(resultFile, json);
82
- console.error(`已保存到 ${resultFile}`);
83
- } else {
84
- process.stdout.write(json + "\n");
85
- }
86
- } catch (err) {
87
- console.error(err.message);
88
- process.exit(1);
89
- } finally {
90
- if (browser) await browser.close().catch(() => {});
91
- }
92
- }
93
-
94
- main().catch((err) => {
95
- console.error(err.message);
96
- process.exit(1);
1
+ const { runScrape } = require("./core.cjs");
2
+
3
+ async function main() {
4
+ const rawArgs = process.argv.slice(2);
5
+
6
+ let outputPath = null;
7
+ const args = [];
8
+ for (let i = 0; i < rawArgs.length; i++) {
9
+ if (rawArgs[i] === "-o" || rawArgs[i] === "--output") {
10
+ outputPath = rawArgs[++i];
11
+ } else {
12
+ args.push(rawArgs[i]);
13
+ }
14
+ }
15
+
16
+ const videoUrl = args[0];
17
+
18
+ let preset = null;
19
+ let maxVideos = 20;
20
+ let maxComments = 999;
21
+ let maxGuess = 10;
22
+ let switchMax = null;
23
+ let commentMax = null;
24
+
25
+ if (args[1]) {
26
+ if (["fast", "normal", "slow", "stealth"].includes(args[1].toLowerCase())) {
27
+ preset = args[1].toLowerCase();
28
+ maxVideos = parseInt(args[2]) || 20;
29
+ maxComments = parseInt(args[3]) || 999;
30
+ maxGuess = parseInt(args[4]) || 10;
31
+ } else {
32
+ maxVideos = parseInt(args[1]) || 20;
33
+ maxComments = parseInt(args[2]) || 999;
34
+ maxGuess = parseInt(args[3]) || 10;
35
+ switchMax = parseInt(args[4]) || null;
36
+ commentMax = parseInt(args[5]) || null;
37
+ }
38
+ }
39
+
40
+ if (!videoUrl) {
41
+ console.error("用法:");
42
+ console.error(
43
+ " 预设模式: node index.cjs <视频URL> <preset> [最大视频数] [最大评论数] [猜你喜欢数] [-o 输出路径]",
44
+ );
45
+ console.error(
46
+ " 手动模式: node index.cjs <视频URL> [最大视频数] [最大评论数] [猜你喜欢数] [切换延迟ms] [评论延迟ms] [-o 输出路径]",
47
+ );
48
+ console.error(
49
+ "预设: fast(1s/0.8s), normal(2.5s/1.5s), slow(5s/3s), stealth(8s/5s)",
50
+ );
51
+ console.error(
52
+ "选项: -o, --output <路径> 输出到文件; 不指定则输出到 stdout",
53
+ );
54
+ process.exit(1);
55
+ }
56
+
57
+ let browser;
58
+ try {
59
+ const { output, browser: b, isExternal } = await runScrape({
60
+ videoUrl,
61
+ maxVideos,
62
+ maxComments,
63
+ maxGuess,
64
+ preset,
65
+ switchMax,
66
+ commentMax,
67
+ log: console.error,
68
+ });
69
+ if (!isExternal) {
70
+ browser = b;
71
+ }
72
+
73
+ const json = JSON.stringify(output, null, 2);
74
+ if (outputPath) {
75
+ const fs = require("fs");
76
+ const path = require("path");
77
+ const resultFile = path.isAbsolute(outputPath)
78
+ ? outputPath
79
+ : path.resolve(outputPath);
80
+ fs.mkdirSync(path.dirname(resultFile), { recursive: true });
81
+ fs.writeFileSync(resultFile, json);
82
+ console.error(`已保存到 ${resultFile}`);
83
+ } else {
84
+ process.stdout.write(json + "\n");
85
+ }
86
+ } catch (err) {
87
+ console.error(err.message);
88
+ process.exit(1);
89
+ } finally {
90
+ if (browser) await browser.close().catch(() => {});
91
+ }
92
+ }
93
+
94
+ main().catch((err) => {
95
+ console.error(err.message);
96
+ process.exit(1);
97
97
  });