tt-help-cli-ycl 1.3.83 → 1.3.85

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,296 @@
1
+ import { chromium } from "playwright";
2
+ import { detectBrowser } from "./browser/launch.js";
3
+ import { getAntiDetectScript } from "./browser/anti-detect.js";
4
+ import { TikTokScraper } from "./tiktok-scraper.mjs";
5
+
6
+ const TAG_URL = "https://www.tiktok.com/tag";
7
+ const USER_AGENT =
8
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
9
+ const SCROLL_INTERVAL = 3000;
10
+ const MAX_STALE_ROUNDS = 3;
11
+
12
+ function sleep(ms) {
13
+ return new Promise((r) => setTimeout(r, ms));
14
+ }
15
+
16
+ function findBrowser() {
17
+ return detectBrowser();
18
+ }
19
+
20
+ async function launchBrowser(browserPath) {
21
+ const opts = {
22
+ headless: true,
23
+ args: [
24
+ "--no-sandbox",
25
+ "--disable-blink-features=AutomationControlled",
26
+ "--disable-dev-shm-usage",
27
+ ],
28
+ };
29
+ if (browserPath) opts.executablePath = browserPath;
30
+
31
+ try {
32
+ return await chromium.launch(opts);
33
+ } catch {
34
+ if (browserPath) {
35
+ opts.executablePath = undefined;
36
+ return await chromium.launch(opts);
37
+ }
38
+ throw new Error("无法启动浏览器");
39
+ }
40
+ }
41
+
42
+ async function extractItemData(item) {
43
+ return {
44
+ id: item.id || "",
45
+ desc: (item.desc || "").trim(),
46
+ authorUniqueId: item.author?.uniqueId || "",
47
+ authorId: item.author?.id || "",
48
+ authorNickname: item.author?.nickname || "",
49
+ authorSecUid: item.author?.secUid || "",
50
+ createTime: item.createTime || 0,
51
+ playCount: item.stats?.playCount || 0,
52
+ diggCount: item.stats?.diggCount || 0,
53
+ shareCount: item.stats?.shareCount || 0,
54
+ commentCount: item.stats?.commentCount || 0,
55
+ musicTitle: item.music?.title || "",
56
+ isAd: item.isAd || false,
57
+ duration: item.video?.duration || 0,
58
+ };
59
+ }
60
+
61
+ /**
62
+ * 获取 TikTok 标签页下的所有视频和作者
63
+ * @param {string} tag - 标签名称(不含 # 号)
64
+ * @param {object} [options]
65
+ * @param {number} [options.timeout=300000] - 最大等待时间 (ms)
66
+ * @param {string} [options.browserPath] - 浏览器可执行文件路径,不传则自动探测
67
+ * @param {string} [options.locale='en-US'] - 页面语言
68
+ * @param {Function} [options.onProgress] - 进度回调 ({ videos, authors })
69
+ * @returns {Promise<{ tag: string, challengeId: string, totalPosts: number, videos: Array, uniqueAuthors: string[] }>}
70
+ */
71
+ export async function fetchTagData(tag, options = {}) {
72
+ const {
73
+ timeout = 300000,
74
+ browserPath: customBrowserPath,
75
+ locale = "en-US",
76
+ onProgress,
77
+ } = options;
78
+
79
+ const browserPath = customBrowserPath || findBrowser();
80
+ if (!browserPath) {
81
+ throw new Error(
82
+ "未找到可用的浏览器,请设置 browserPath 或安装 Chrome/Edge",
83
+ );
84
+ }
85
+
86
+ const browser = await launchBrowser(browserPath);
87
+
88
+ try {
89
+ const context = await browser.newContext({
90
+ viewport: { width: 1280, height: 900 },
91
+ userAgent: USER_AGENT,
92
+ locale,
93
+ });
94
+ await context.addInitScript(getAntiDetectScript());
95
+ const page = await context.newPage();
96
+
97
+ let challengeInfo = null;
98
+ const rawVideos = [];
99
+ const authors = new Set();
100
+
101
+ page.on("response", async (resp) => {
102
+ try {
103
+ const url = resp.url();
104
+ const ct = resp.headers()["content-type"] || "";
105
+
106
+ if (url.includes("/api/challenge/detail/") && ct.includes("json")) {
107
+ const body = await resp.json();
108
+ if (body?.challengeInfo?.challenge) {
109
+ challengeInfo = body.challengeInfo.challenge;
110
+ }
111
+ }
112
+
113
+ if (url.includes("/api/challenge/item_list/") && ct.includes("json")) {
114
+ const body = await resp.json();
115
+ if (!body?.itemList) return;
116
+ for (const item of body.itemList) {
117
+ const uid = item.author?.uniqueId || "";
118
+ if (uid) authors.add(uid);
119
+ rawVideos.push(await extractItemData(item));
120
+ }
121
+ if (onProgress) {
122
+ onProgress({ videos: rawVideos.length, authors: authors.size });
123
+ }
124
+ }
125
+ } catch {}
126
+ });
127
+
128
+ const tagUrl = `${TAG_URL}/${encodeURIComponent(tag)}`;
129
+ const resp = await page.goto(tagUrl, {
130
+ waitUntil: "domcontentloaded",
131
+ timeout: 30000,
132
+ });
133
+
134
+ if (resp.status() !== 200) {
135
+ throw new Error(`标签页返回 HTTP ${resp.status()}`);
136
+ }
137
+
138
+ await page.waitForTimeout(3000);
139
+
140
+ const pageError = await page.evaluate(() => {
141
+ const text = document.body?.innerText || "";
142
+ if (text.includes("Something went wrong")) return "page_error";
143
+ return null;
144
+ });
145
+ if (pageError) {
146
+ throw new Error("标签页加载失败,TikTok 返回了错误页面");
147
+ }
148
+
149
+ let lastCount = 0;
150
+ let staleRounds = 0;
151
+ const startTime = Date.now();
152
+
153
+ while (staleRounds < MAX_STALE_ROUNDS) {
154
+ if (Date.now() - startTime > timeout) break;
155
+
156
+ await page.evaluate(() => window.scrollBy(0, 3000));
157
+ await sleep(SCROLL_INTERVAL);
158
+
159
+ if (rawVideos.length === lastCount) {
160
+ staleRounds++;
161
+ } else {
162
+ staleRounds = 0;
163
+ lastCount = rawVideos.length;
164
+ }
165
+ }
166
+
167
+ const seen = new Set();
168
+ const uniqueVideos = rawVideos.filter((v) =>
169
+ seen.has(v.id) ? false : (seen.add(v.id), true),
170
+ );
171
+
172
+ const totalPosts = challengeInfo?.stats?.videoCount || 0;
173
+
174
+ return {
175
+ tag,
176
+ challengeId: challengeInfo?.id || "",
177
+ totalPosts,
178
+ videoCount: uniqueVideos.length,
179
+ uniqueAuthorCount: authors.size,
180
+ videos: uniqueVideos,
181
+ uniqueAuthors: [...authors],
182
+ };
183
+ } finally {
184
+ await browser.close();
185
+ }
186
+ }
187
+
188
+ /**
189
+ * 为视频列表补充国家/地区信息(通过 TikTokScraper view-source 方式)
190
+ * @param {Array} videos - fetchTagData 返回的 videos 数组
191
+ * @param {object} [options]
192
+ * @param {string} [options.mode='users'] - 'users': 按作者查用户信息 / 'videos': 按视频查视频信息
193
+ * @param {number} [options.poolSize=3] - 并发页面数
194
+ * @param {number} [options.maxRetries=3] - 单个请求最大重试次数
195
+ * @param {Function} [options.onProgress] - 进度回调 ({ done, total, current, locationCreated })
196
+ * @returns {Promise<{ videos: Array, locationMap: Record<string, string|null> }>}
197
+ */
198
+ export async function enrichVideosWithLocation(videos, options = {}) {
199
+ const {
200
+ mode = "videos",
201
+ poolSize = 3,
202
+ maxRetries = 3,
203
+ onProgress,
204
+ existingScraper,
205
+ } = options;
206
+
207
+ const scraper = existingScraper || new TikTokScraper({ poolSize });
208
+ const ownsScraper = !existingScraper;
209
+ if (ownsScraper) await scraper.init();
210
+
211
+ try {
212
+ const enriched = [...videos];
213
+ const locationMap = {};
214
+ let done = 0;
215
+
216
+ if (mode === "users") {
217
+ const uniqueAuthors = [
218
+ ...new Set(videos.map((v) => v.authorUniqueId).filter(Boolean)),
219
+ ];
220
+ const total = uniqueAuthors.length;
221
+
222
+ const tasks = uniqueAuthors.map((uniqueId) =>
223
+ (async () => {
224
+ try {
225
+ const userInfo = await scraper.getUserInfo(uniqueId, maxRetries);
226
+ const location = userInfo?.locationCreated || null;
227
+ locationMap[uniqueId] = location;
228
+ done++;
229
+ if (onProgress)
230
+ onProgress({
231
+ done,
232
+ total,
233
+ current: uniqueId,
234
+ locationCreated: location,
235
+ });
236
+ } catch {
237
+ locationMap[uniqueId] = null;
238
+ done++;
239
+ if (onProgress)
240
+ onProgress({
241
+ done,
242
+ total,
243
+ current: uniqueId,
244
+ locationCreated: null,
245
+ });
246
+ }
247
+ })(),
248
+ );
249
+
250
+ await Promise.allSettled(tasks);
251
+
252
+ for (const v of enriched) {
253
+ v.locationCreated = locationMap[v.authorUniqueId] || null;
254
+ }
255
+ } else {
256
+ const total = videos.length;
257
+
258
+ const tasks = enriched.map((v) =>
259
+ (async () => {
260
+ const videoUrl = `https://www.tiktok.com/@${v.authorUniqueId}/video/${v.id}`;
261
+ try {
262
+ const videoInfo = await scraper.getVideoInfo(videoUrl, maxRetries);
263
+ const location = videoInfo?.locationCreated || null;
264
+ v.locationCreated = location;
265
+ locationMap[v.id] = location;
266
+ done++;
267
+ if (onProgress)
268
+ onProgress({
269
+ done,
270
+ total,
271
+ current: videoUrl,
272
+ locationCreated: location,
273
+ });
274
+ } catch {
275
+ v.locationCreated = null;
276
+ locationMap[v.id] = null;
277
+ done++;
278
+ if (onProgress)
279
+ onProgress({
280
+ done,
281
+ total,
282
+ current: videoUrl,
283
+ locationCreated: null,
284
+ });
285
+ }
286
+ })(),
287
+ );
288
+
289
+ await Promise.allSettled(tasks);
290
+ }
291
+
292
+ return { videos: enriched, locationMap };
293
+ } finally {
294
+ if (ownsScraper) await scraper.close();
295
+ }
296
+ }
@@ -51,10 +51,28 @@ function findFirstMatchingLocation(
51
51
  );
52
52
  }
53
53
 
54
+ /**
55
+ * 从按频率排序的 entries 中,找第一个属于目标国家的。
56
+ * @param {Array<[string, number]>} entries - 已按频率降序排列的 [国家, 次数] 数组
57
+ * @param {string[]} targetLocations - 目标国家列表
58
+ * @returns {string|null} 频率最高的目标国家,如都不匹配则返回 null
59
+ */
60
+ function findBestMatchingLocation(
61
+ entries,
62
+ targetLocations = DEFAULT_TARGET_LOCATIONS,
63
+ ) {
64
+ const normalizedTarget = normalizeLocationList(targetLocations);
65
+ for (const [loc] of entries) {
66
+ if (normalizedTarget.includes(loc)) return loc;
67
+ }
68
+ return null;
69
+ }
70
+
54
71
  export {
55
72
  DEFAULT_TARGET_LOCATIONS,
56
73
  DEFAULT_TARGET_LOCATIONS_CSV,
57
74
  findFirstMatchingLocation,
75
+ findBestMatchingLocation,
58
76
  isLocationInList,
59
77
  normalizeLocation,
60
78
  normalizeLocationList,
package/src/main.js CHANGED
@@ -11,6 +11,12 @@ import { handleVideoStats } from "./cli/videostats.js";
11
11
  import { handleDbImport } from "./cli/db-import.js";
12
12
  import { handleWebserver } from "./cli/webserver.js";
13
13
  import { handleRefresh } from "./cli/refresh.js";
14
+ import {
15
+ handleTag,
16
+ handleDiscover,
17
+ handleScore,
18
+ handleScoreAll,
19
+ } from "./cli/tag.js";
14
20
 
15
21
  async function main() {
16
22
  const parsed = parseArgs();
@@ -36,6 +42,14 @@ async function main() {
36
42
  return handleDbImport(parsed);
37
43
  case "refresh":
38
44
  return handleRefresh(parsed);
45
+ case "tag":
46
+ return handleTag(parsed);
47
+ case "tag-discover":
48
+ return handleDiscover(parsed);
49
+ case "tag-score":
50
+ return handleScore(parsed);
51
+ case "tag-score-all":
52
+ return handleScoreAll(parsed);
39
53
  }
40
54
 
41
55
  const {
package/src/npm-main.js CHANGED
@@ -7,6 +7,7 @@ import { handleConfig, showConfig, showUsage, version } from "./cli/config.js";
7
7
  import { handleOpen } from "./cli/open.js";
8
8
  import { handleComments } from "./cli/comments.js";
9
9
  import { handleRefresh } from "./cli/refresh.js";
10
+ import { handleTag } from "./cli/tag.js";
10
11
 
11
12
  function exitUnsupportedCommand(command) {
12
13
  console.error(
@@ -36,6 +37,8 @@ async function main() {
36
37
  return handleComments(parsed);
37
38
  case "refresh":
38
39
  return handleRefresh(parsed);
40
+ case "tag":
41
+ return handleTag(parsed);
39
42
  }
40
43
 
41
44
  const {
@@ -6,7 +6,7 @@ import { extractFollowAndFollowers } from "./modules/follow-extractor.js";
6
6
  import { extractVideoLocation, setScraperProxy } from "../lib/scrape.js";
7
7
  import {
8
8
  DEFAULT_TARGET_LOCATIONS_CSV,
9
- findFirstMatchingLocation,
9
+ findBestMatchingLocation,
10
10
  isLocationInList,
11
11
  normalizeLocation,
12
12
  normalizeLocationList,
@@ -152,13 +152,13 @@ async function processExplore(page, username, options, log) {
152
152
  locationDecision = `众数 (${entries[0][1]}次)`;
153
153
  }
154
154
  } else {
155
- // explore 模式:优先命中目标国家,不匹配则回退众数
156
- const matchedTargetLocation = findFirstMatchingLocation(
157
- normalizedLocations,
155
+ // explore 模式:取频率最高的目标国家,不匹配则回退众数
156
+ const bestTargetLocation = findBestMatchingLocation(
157
+ entries,
158
158
  locationList,
159
159
  );
160
- if (matchedTargetLocation) {
161
- locationCreated = matchedTargetLocation;
160
+ if (bestTargetLocation) {
161
+ locationCreated = bestTargetLocation;
162
162
  locationDecision = "命中目标国家";
163
163
  } else if (entries.length > 0) {
164
164
  locationCreated = entries[0][0];