tt-help-cli-ycl 1.3.52 → 1.3.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tt-help-cli-ycl",
3
- "version": "1.3.52",
3
+ "version": "1.3.55",
4
4
  "description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
5
5
  "type": "module",
6
6
  "bin": {
@@ -0,0 +1,128 @@
1
+ /**
2
+ * 测试工具:分析 TikTok view-source HTML 的三种情况
3
+ * 1. 正常用户(有 SSR 数据)
4
+ * 2. 空壳 HTML(11182 字节,无 SSR — 需要重试)
5
+ * 3. 异常用户(有 SSR 但 userInfo 为空,statusCode=10202 — 重试无效)
6
+ */
7
+
8
+ import { TikTokScraper } from "../src/lib/tiktok-scraper.mjs";
9
+ import fs from "fs";
10
+
11
+ const testUsers = [
12
+ { id: "nike", type: "正常用户" },
13
+ { id: "galb508", type: "异常用户(可能被封/删除)" },
14
+ { id: "notexist_user_xxxxxx12345", type: "不存在的用户" },
15
+ ];
16
+
17
+ async function analyzeUser(uniqueId, typeLabel) {
18
+ console.log(`\n${"=".repeat(60)}`);
19
+ console.log(`分析 @${uniqueId} (${typeLabel})`);
20
+ console.log("=".repeat(60));
21
+
22
+ const scraper = new TikTokScraper({ poolSize: 1 });
23
+ await scraper.init();
24
+ const slot = scraper._pickSlot();
25
+
26
+ // 多次采样
27
+ const samples = [];
28
+ for (let i = 0; i < 3; i++) {
29
+ const rawHtml = await scraper._fetchViewSource(
30
+ `https://www.tiktok.com/@${uniqueId}`,
31
+ slot,
32
+ );
33
+ const byteLen = Buffer.byteLength(rawHtml, "utf8");
34
+ const hasSSR = rawHtml.includes("__UNIVERSAL_DATA_FOR_REHYDRATION__");
35
+
36
+ let analysis = {
37
+ round: i + 1,
38
+ size: rawHtml.length,
39
+ byteLen,
40
+ hasSSR,
41
+ };
42
+
43
+ // 如果有 SSR 数据,进一步分析
44
+ if (hasSSR) {
45
+ try {
46
+ const idx = rawHtml.indexOf("__UNIVERSAL_DATA_FOR_REHYDRATION__");
47
+ const sIdx = rawHtml.indexOf(">", idx) + 1;
48
+ const eIdx = rawHtml.indexOf("</script>", sIdx);
49
+ const jsonStr = rawHtml.substring(sIdx, eIdx);
50
+ const data = JSON.parse(jsonStr);
51
+ const ud = data.__DEFAULT_SCOPE__?.["webapp.user-detail"];
52
+
53
+ analysis.scopeKeys = data.__DEFAULT_SCOPE__
54
+ ? Object.keys(data.__DEFAULT_SCOPE__)
55
+ : [];
56
+ analysis.hasUserInfo = !!(ud && ud.userInfo);
57
+ analysis.statusCode = ud?.statusCode;
58
+ analysis.statusMsg = ud?.statusMsg;
59
+ analysis.needFix = ud?.needFix;
60
+ analysis.udKeys = ud ? Object.keys(ud) : [];
61
+ } catch (e) {
62
+ analysis.parseError = e.message;
63
+ }
64
+ } else {
65
+ // 空壳 HTML,检查特征
66
+ analysis.hasEmptyTitle = rawHtml.includes(
67
+ '<title data-rh="true"></title>',
68
+ );
69
+ analysis.hasEmotionStyle = rawHtml.includes('data-emotion="tiktok"');
70
+ }
71
+
72
+ samples.push(analysis);
73
+ console.log(
74
+ ` 第 ${i + 1} 次: ${rawHtml.length} 字符, ${byteLen} 字节, SSR: ${hasSSR ? "✓" : "✗"}`,
75
+ );
76
+ if (hasSSR && analysis.statusCode !== undefined) {
77
+ console.log(
78
+ ` statusCode: ${analysis.statusCode}, hasUserInfo: ${analysis.hasUserInfo}, udKeys: [${analysis.udKeys.join(", ")}]`,
79
+ );
80
+ }
81
+ }
82
+
83
+ // 总结
84
+ const shellCount = samples.filter((s) => !s.hasSSR).length;
85
+ const hasDataCount = samples.filter((s) => s.hasUserInfo).length;
86
+ const statusCode10202 = samples.filter((s) => s.statusCode === 10202).length;
87
+
88
+ console.log("\n 总结:");
89
+ console.log(` 空壳 HTML 次数: ${shellCount}/3`);
90
+ console.log(` 有 userInfo 次数: ${hasDataCount}/3`);
91
+ console.log(` statusCode=10202 次数: ${statusCode10202}/3`);
92
+
93
+ // 判断类型
94
+ if (shellCount === 3) {
95
+ console.log(" → 判定: 持续空壳(可能是并发限流,重试可能有效)");
96
+ } else if (hasDataCount > 0) {
97
+ console.log(" → 判定: 正常用户(有完整数据)");
98
+ } else if (statusCode10202 > 0) {
99
+ console.log(" → 判定: 异常用户(statusCode=10202,重试无效)");
100
+ } else {
101
+ console.log(" → 判定: 无法确定");
102
+ }
103
+
104
+ await scraper.close();
105
+ return samples;
106
+ }
107
+
108
+ async function main() {
109
+ console.log("TikTok view-source HTML 分析工具");
110
+ console.log("测试三种情况的 HTML 特征差异\n");
111
+
112
+ const results = {};
113
+ for (const { id, type } of testUsers) {
114
+ results[id] = await analyzeUser(id, type);
115
+ }
116
+
117
+ // 保存结果
118
+ fs.writeFileSync(
119
+ "./test-html-analysis-result.json",
120
+ JSON.stringify(results, null, 2),
121
+ );
122
+ console.log("\n\n结果已保存到 test-html-analysis-result.json");
123
+ }
124
+
125
+ main().catch((err) => {
126
+ console.error("测试失败:", err);
127
+ process.exit(1);
128
+ });
package/src/cli/attach.js CHANGED
@@ -244,10 +244,16 @@ export async function handleAttach(options) {
244
244
  );
245
245
  failCount++;
246
246
  } else if (info) {
247
- successTasks.push({ uniqueId, info });
247
+ // info 可能是 { error: true, statusCode: xxx } 表示 TikTok 给了明确响应
248
+ if (info.error) {
249
+ // 有 statusCode 说明 TikTok 已给出明确响应,提交到后端记录,不算错误
250
+ successTasks.push({ uniqueId, info });
251
+ } else {
252
+ successTasks.push({ uniqueId, info });
253
+ }
248
254
  } else {
249
- attachLog(` - @${uniqueId} 无用户信息`);
250
- successTasks.push({ uniqueId, info: {} });
255
+ attachLog(` @${uniqueId} 未获取到用户信息`);
256
+ failCount++;
251
257
  }
252
258
  } else {
253
259
  attachLog(
@@ -263,26 +269,32 @@ export async function handleAttach(options) {
263
269
  const batchRet = await apiPost(`${serverUrl}/api/user-info-batch`, {
264
270
  updates: successTasks,
265
271
  });
266
- if (batchRet && batchRet.results) {
267
- const nicknameMap = {};
268
- for (const { uniqueId, info } of successTasks) {
269
- if (info?.nickname) {
270
- nicknameMap[uniqueId] = info.nickname;
271
- }
272
- }
272
+ if (batchRet && batchRet.results && Array.isArray(batchRet.results)) {
273
273
  for (const r of batchRet.results) {
274
274
  if (r.ok) {
275
275
  successCount++;
276
- const nickname = nicknameMap[r.uniqueId] || r.uniqueId;
277
- attachLog(` ✓ @${r.uniqueId} (${nickname}) 已提交更新`);
276
+ // 查找对应的 info 判断是否有 statusCode
277
+ const task = successTasks.find(
278
+ (t) => t.uniqueId === r.uniqueId,
279
+ );
280
+ if (task && task.info && task.info.error) {
281
+ attachLog(
282
+ ` ⚠ @${r.uniqueId} 已记录 (statusCode=${task.info.statusCode})`,
283
+ );
284
+ } else {
285
+ attachLog(` ✓ @${r.uniqueId} 已提交更新`);
286
+ }
278
287
  } else {
279
288
  failCount++;
280
289
  attachLog(` ✗ @${r.uniqueId} 提交失败: ${r.error}`);
281
290
  }
282
291
  }
283
292
  } else {
293
+ // 后端返回格式异常,降级处理
294
+ attachLog(
295
+ ` ⚠ 后端响应格式异常 (batchRet=${JSON.stringify(batchRet).slice(0, 200)}), 降级为批量成功`,
296
+ );
284
297
  successCount = successTasks.length;
285
- attachLog(` ✓ 批量提交完成 (${successTasks.length} 条)`);
286
298
  }
287
299
  } catch (err) {
288
300
  failCount += successTasks.length;
package/src/cli/info.js CHANGED
@@ -1,25 +1,38 @@
1
- import { TikTokScraper } from '../lib/tiktok-scraper.mjs';
2
- import { isProfileUrl, isVideoUrl, extractUniqueId, normalizeUsername } from '../lib/url.js';
1
+ import { TikTokScraper } from "../lib/tiktok-scraper.mjs";
2
+ import {
3
+ isProfileUrl,
4
+ isVideoUrl,
5
+ extractUniqueId,
6
+ normalizeUsername,
7
+ } from "../lib/url.js";
3
8
 
4
9
  async function handleInfo(options) {
5
10
  const { infoUrls, infoOnlyVideo } = options;
6
11
 
7
12
  if (!infoUrls || infoUrls.length === 0) {
8
- console.error('用法: tt-help info <URL> [URL2 URL3...] [--onlyvideo]');
9
- console.error('');
10
- console.error('参数:');
11
- console.error(' <URL> TikTok 主页或视频 URL,支持多个 URL 同时查询');
12
- console.error(' --onlyvideo 只返回视频信息(不返回用户信息)');
13
- console.error('');
14
- console.error('默认行为:');
15
- console.error(' 主页 URL → 返回用户信息(bio、region、粉丝数等)');
16
- console.error(' 视频 URL → 返回用户信息 + 视频信息');
17
- console.error(' 视频 URL + --onlyvideo → 只返回视频信息');
18
- console.error('');
19
- console.error('示例:');
20
- console.error(' tt-help info https://www.tiktok.com/@nike');
21
- console.error(' tt-help info https://www.tiktok.com/@nike/video/7234567890');
22
- console.error(' tt-help info https://www.tiktok.com/@nike https://www.tiktok.com/@apple');
13
+ console.error("用法: tt-help info <URL> [URL2 URL3...] [--onlyvideo]");
14
+ console.error("");
15
+ console.error("参数:");
16
+ console.error(
17
+ " <URL> TikTok 主页或视频 URL,支持多个 URL 同时查询",
18
+ );
19
+ console.error(" --onlyvideo 只返回视频信息(不返回用户信息)");
20
+ console.error("");
21
+ console.error("默认行为:");
22
+ console.error(
23
+ " 主页 URL → 返回用户信息(bio、region、粉丝数等)",
24
+ );
25
+ console.error(" 视频 URL → 返回用户信息 + 视频信息");
26
+ console.error(" 视频 URL + --onlyvideo → 只返回视频信息");
27
+ console.error("");
28
+ console.error("示例:");
29
+ console.error(" tt-help info https://www.tiktok.com/@nike");
30
+ console.error(
31
+ " tt-help info https://www.tiktok.com/@nike/video/7234567890",
32
+ );
33
+ console.error(
34
+ " tt-help info https://www.tiktok.com/@nike https://www.tiktok.com/@apple",
35
+ );
23
36
  process.exit(1);
24
37
  }
25
38
 
@@ -34,28 +47,35 @@ async function handleInfo(options) {
34
47
  const uniqueId = extractUniqueId(url);
35
48
  const normalized = normalizeUsername(uniqueId);
36
49
  const user = await scraper.getUserInfo(normalized);
37
- if (user) {
38
- result[normalized] = { user };
39
- console.error(`用户: @${user.uniqueId} (${user.nickname})`);
50
+ if (!user || user.error) {
51
+ const code = user?.statusCode;
52
+ console.error(
53
+ `无法获取用户 @${uniqueId} 的信息${code !== undefined ? ` (statusCode=${code})` : ""}`,
54
+ );
55
+ continue;
40
56
  }
57
+ result[normalized] = { user };
58
+ console.error(`用户: @${user.uniqueId} (${user.nickname})`);
41
59
  } else if (isVideoUrl(url)) {
42
60
  const uniqueId = extractUniqueId(url);
43
61
  const normalized = normalizeUsername(uniqueId);
44
62
 
45
63
  if (infoOnlyVideo) {
46
64
  const video = await scraper.getVideoInfo(url);
47
- if (video) {
48
- const key = normalized + '/video/' + video.id;
49
- result[key] = { video };
50
- console.error(`视频: ${video.id}`);
65
+ if (!video) {
66
+ console.error(`无法获取视频信息: ${url}`);
67
+ continue;
51
68
  }
69
+ const key = normalized + "/video/" + video.id;
70
+ result[key] = { video };
71
+ console.error(`视频: ${video.id}`);
52
72
  } else {
53
73
  const [user, video] = await Promise.all([
54
74
  scraper.getUserInfo(normalized),
55
75
  scraper.getVideoInfo(url),
56
76
  ]);
57
77
  const entry = {};
58
- if (user) {
78
+ if (user && !user.error) {
59
79
  entry.user = user;
60
80
  console.error(`用户: @${user.uniqueId} (${user.nickname})`);
61
81
  }
@@ -63,10 +83,12 @@ async function handleInfo(options) {
63
83
  entry.video = video;
64
84
  console.error(`视频: ${video.id}`);
65
85
  }
66
- if (user || video) {
67
- const key = normalized + '/video/' + (video ? video.id : 'unknown');
68
- result[key] = entry;
86
+ if ((!user || user.error) && !video) {
87
+ console.error(`无法获取信息: ${url}`);
88
+ continue;
69
89
  }
90
+ const key = normalized + "/video/" + (video ? video.id : "unknown");
91
+ result[key] = entry;
70
92
  }
71
93
  } else {
72
94
  console.error(`无法识别 URL: ${url}`);
package/src/cli/open.js CHANGED
@@ -33,13 +33,15 @@ export async function handleOpen(parsed) {
33
33
  if (!openPort) {
34
34
  console.error("用法: tt-help open <端口>");
35
35
  console.error("示例: tt-help open 9222");
36
- console.error('运行 "tt-help open --list" 查看所有内置配置');
36
+ console.error("");
37
+ console.error("可用端口: 9222 - 9231 (共 10 个)");
38
+ console.error('运行 "tt-help open --list" 查看所有配置');
37
39
  process.exit(1);
38
40
  }
39
41
 
40
42
  const port = parseInt(openPort);
41
- if (isNaN(port) || port < 1 || port > 65535) {
42
- console.error(`端口 ${openPort} 无效,请输入 1-65535 之间的端口号`);
43
+ if (isNaN(port) || port < BASE_PORT || port >= BASE_PORT + TOTAL_ACCOUNTS) {
44
+ console.error(`端口 ${openPort} 不在有效范围内 (9222 - 9231)`);
43
45
  process.exit(1);
44
46
  }
45
47
 
@@ -1,20 +1,65 @@
1
+ /**
2
+ * 判断失败是否可重试
3
+ * - 有 statusCode(无论值是多少):TikTok 给了明确响应,不可重试
4
+ * - 没有 statusCode(空壳 HTML,无 SSR):并发限流,可重试
5
+ */
6
+ export function isRetryableFailure(rawHtml) {
7
+ if (!rawHtml || typeof rawHtml !== "string") return false;
8
+ // 没有 SSR 标记 = 空壳 HTML = 可重试
9
+ if (!rawHtml.includes("__UNIVERSAL_DATA_FOR_REHYDRATION__")) {
10
+ return true;
11
+ }
12
+ // 有 SSR 数据,检查是否有 statusCode
13
+ try {
14
+ const idx = rawHtml.indexOf("__UNIVERSAL_DATA_FOR_REHYDRATION__");
15
+ const sIdx = rawHtml.indexOf(">", idx) + 1;
16
+ const eIdx = rawHtml.indexOf("</script>", sIdx);
17
+ if (sIdx < 0 || eIdx < 0) return true;
18
+ const data = JSON.parse(rawHtml.substring(sIdx, eIdx));
19
+ const ud = data.__DEFAULT_SCOPE__?.["webapp.user-detail"];
20
+ // 有 statusCode 说明 TikTok 给了明确响应(0=成功,10202=被封,10221=不存在等),不可重试
21
+ if (ud && "statusCode" in ud) return false;
22
+ } catch {}
23
+ return true;
24
+ }
25
+
1
26
  function parseSSR(rawHtml) {
2
- if (!rawHtml.includes('__UNIVERSAL_DATA_FOR_REHYDRATION__')) return null;
3
- const dataStart = rawHtml.indexOf('__UNIVERSAL_DATA_FOR_REHYDRATION__');
4
- // 从该字符串后面找 <script 标签的 >,确保找到的是正确行的 >
5
- const scriptStart = rawHtml.lastIndexOf('<script', dataStart);
6
- const sIdx = (scriptStart >= 0 ? rawHtml.indexOf('>', scriptStart) : rawHtml.indexOf('>', dataStart)) + 1;
7
- const eIdx = rawHtml.indexOf('</script>', sIdx);
8
- if (sIdx < 0 || eIdx < 0) return null;
27
+ if (!rawHtml || typeof rawHtml !== "string") {
28
+ return null;
29
+ }
30
+ if (!rawHtml.includes("__UNIVERSAL_DATA_FOR_REHYDRATION__")) {
31
+ return null;
32
+ }
33
+ const dataStart = rawHtml.indexOf("__UNIVERSAL_DATA_FOR_REHYDRATION__");
34
+ const scriptStart = rawHtml.lastIndexOf("<script", dataStart);
35
+ const sIdx =
36
+ (scriptStart >= 0
37
+ ? rawHtml.indexOf(">", scriptStart)
38
+ : rawHtml.indexOf(">", dataStart)) + 1;
39
+ const eIdx = rawHtml.indexOf("</script>", sIdx);
40
+ if (sIdx < 0 || eIdx < 0) {
41
+ return null;
42
+ }
9
43
  const jsonStr = rawHtml.substring(sIdx, eIdx);
10
- return JSON.parse(jsonStr);
44
+ try {
45
+ return JSON.parse(jsonStr);
46
+ } catch (e) {
47
+ return null;
48
+ }
11
49
  }
12
50
 
13
51
  export function parseUserInfo(rawHtml) {
14
52
  const data = parseSSR(rawHtml);
15
53
  if (!data) return null;
16
- const ud = data.__DEFAULT_SCOPE__['webapp.user-detail'];
17
- if (!ud || !ud.userInfo) return null;
54
+ const scopeKeys = data.__DEFAULT_SCOPE__
55
+ ? Object.keys(data.__DEFAULT_SCOPE__)
56
+ : [];
57
+ const ud =
58
+ data.__DEFAULT_SCOPE__ && data.__DEFAULT_SCOPE__["webapp.user-detail"];
59
+ if (!ud || !ud.userInfo) {
60
+ const code = ud?.statusCode;
61
+ return { error: true, statusCode: code };
62
+ }
18
63
  const u = ud.userInfo.user;
19
64
  const s = ud.userInfo.stats;
20
65
  return {
@@ -24,8 +69,8 @@ export function parseUserInfo(rawHtml) {
24
69
  verified: u.verified,
25
70
  privateAccount: u.privateAccount,
26
71
  language: u.language,
27
- bio: u.signature || '',
28
- avatar: u.avatarLarger || u.avatarMedium || u.avatarThumb || '',
72
+ bio: u.signature || "",
73
+ avatar: u.avatarLarger || u.avatarMedium || u.avatarThumb || "",
29
74
  followerCount: s.followerCount,
30
75
  followingCount: s.followingCount,
31
76
  heartCount: s.heartCount,
@@ -35,20 +80,30 @@ export function parseUserInfo(rawHtml) {
35
80
  secUid: u.secUid,
36
81
  ttSeller: u.ttSeller || false,
37
82
  locationCreated: u.locationCreated || null,
83
+ statusCode: 0,
38
84
  };
39
85
  }
40
86
 
41
87
  export function parseVideoInfo(rawHtml) {
42
88
  const data = parseSSR(rawHtml);
43
89
  if (!data) return null;
44
- const vd = data.__DEFAULT_SCOPE__['webapp.video-detail'];
45
- if (!vd || !vd.itemInfo || !vd.itemInfo.itemStruct) return null;
90
+ const scopeKeys = data.__DEFAULT_SCOPE__
91
+ ? Object.keys(data.__DEFAULT_SCOPE__)
92
+ : [];
93
+ const vd =
94
+ data.__DEFAULT_SCOPE__ && data.__DEFAULT_SCOPE__["webapp.video-detail"];
95
+ if (!vd || !vd.itemInfo || !vd.itemInfo.itemStruct) {
96
+ console.error(
97
+ `[parseVideoInfo] webapp.video-detail 不存在, scope keys: ${JSON.stringify(scopeKeys)}`,
98
+ );
99
+ return null;
100
+ }
46
101
  const item = vd.itemInfo.itemStruct;
47
102
  const author = item.author || {};
48
103
  const stats = item.stats || {};
49
104
  return {
50
105
  id: item.id,
51
- desc: item.desc || '',
106
+ desc: item.desc || "",
52
107
  createTime: item.createTime || null,
53
108
  locationCreated: item.locationCreated || null,
54
109
  author: {
package/src/lib/scrape.js CHANGED
@@ -57,7 +57,7 @@ export async function extractUserData(url) {
57
57
  const uniqueId = extractUniqueId(url);
58
58
  if (!uniqueId) throw new Error(`无法从URL提取用户名: ${url}`);
59
59
  const user = await scraper.getUserInfo(normalizeUsername(uniqueId));
60
- if (!user) return null;
60
+ if (!user) throw new Error('无法解析用户信息');
61
61
  return mapUserInfo(user);
62
62
  }
63
63
 
@@ -70,7 +70,7 @@ export async function extractVideoLocation(videoUrl) {
70
70
  export async function processUrl(url) {
71
71
  if (isProfileUrl(url)) {
72
72
  const profileData = await extractUserData(url);
73
- return profileData ? [profileData] : [];
73
+ return [profileData];
74
74
  }
75
75
 
76
76
  if (isVideoUrl(url)) {
@@ -82,7 +82,6 @@ export async function processUrl(url) {
82
82
  extractVideoLocation(url),
83
83
  ]);
84
84
 
85
- if (!profileData) return [];
86
85
  return [{ ...profileData, locationCreated }];
87
86
  }
88
87
 
@@ -1,31 +1,21 @@
1
- import os from "os";
2
- import path from "path";
3
- import fs from "fs";
4
1
  import { chromium } from "playwright";
5
2
  import { detectBrowser } from "./browser/launch.js";
6
- import { parseUserInfo, parseVideoInfo } from "./parse-ssr.mjs";
3
+ import {
4
+ parseUserInfo,
5
+ parseVideoInfo,
6
+ isRetryableFailure,
7
+ } from "./parse-ssr.mjs";
7
8
 
8
9
  const DEFAULT_POOL_SIZE = 3;
9
10
  const DEFAULT_WAF_TTL = 120000;
10
11
  const DEFAULT_WARM_URL = "https://www.tiktok.com/@nike";
11
12
  const BROWSER_CLOSE_TIMEOUT = 5000;
12
13
  const DEFAULT_MAX_REQUESTS_PER_PAGE = 50;
13
- const FALLBACK_PROFILE_PORT = 9999;
14
14
 
15
15
  function delay(ms) {
16
16
  return new Promise((r) => setTimeout(r, ms));
17
17
  }
18
18
 
19
- function getFallbackProfileDir() {
20
- const profile = `p${FALLBACK_PROFILE_PORT}`;
21
- return path.join(
22
- os.homedir(),
23
- "Library",
24
- "Application Support",
25
- `Microsoft Edge For Testing_${profile}`,
26
- );
27
- }
28
-
29
19
  class PageSlot {
30
20
  constructor(page) {
31
21
  this.page = page;
@@ -61,49 +51,6 @@ class PromiseQueue {
61
51
  }
62
52
  }
63
53
 
64
- function createLaunchOptions(executablePath) {
65
- return {
66
- headless: true,
67
- executablePath,
68
- handleSIGINT: false,
69
- handleSIGTERM: false,
70
- handleSIGHUP: false,
71
- args: [
72
- "--no-sandbox",
73
- "--disable-setuid-sandbox",
74
- "--disable-dev-shm-usage",
75
- ],
76
- };
77
- }
78
-
79
- async function initContext(executablePath, poolSize, userDataDir) {
80
- let context;
81
- let browser = null;
82
- const slots = [];
83
-
84
- if (userDataDir) {
85
- context = await chromium.launchPersistentContext(
86
- userDataDir,
87
- createLaunchOptions(executablePath),
88
- );
89
- const existing = context.pages();
90
- if (existing.length > 0) {
91
- slots.push(new PageSlot(existing[0]));
92
- }
93
- for (let i = slots.length; i < poolSize; i++) {
94
- slots.push(new PageSlot(await context.newPage()));
95
- }
96
- } else {
97
- browser = await chromium.launch(createLaunchOptions(executablePath));
98
- context = await browser.newContext();
99
- for (let i = 0; i < poolSize; i++) {
100
- slots.push(new PageSlot(await context.newPage()));
101
- }
102
- }
103
-
104
- return { browser, context, slots };
105
- }
106
-
107
54
  export class TikTokScraper {
108
55
  constructor({
109
56
  poolSize = DEFAULT_POOL_SIZE,
@@ -121,11 +68,6 @@ export class TikTokScraper {
121
68
  this.slotIdx = 0;
122
69
  this.lastWarmTime = 0;
123
70
  this.warmPromise = null;
124
- // 登录态 pool(init 时直接启动)
125
- this.authBrowser = null;
126
- this.authContext = null;
127
- this.authSlots = [];
128
- this.authSlotIdx = 0;
129
71
  }
130
72
 
131
73
  async init() {
@@ -135,73 +77,49 @@ export class TikTokScraper {
135
77
  "未找到本地浏览器(Chrome/Edge),请先安装浏览器或执行 npx playwright install",
136
78
  );
137
79
  }
138
- const { browser, context, slots } = await initContext(
80
+ this.browser = await chromium.launch({
81
+ headless: true,
139
82
  executablePath,
140
- this.poolSize,
141
- null,
142
- );
143
- this.browser = browser;
144
- this.context = context;
145
- this.slots = slots;
146
-
147
- // 启动登录态 pool(1 个 slot)
148
- // profile 不存在则跳过,不影响主流程
149
- const fallbackDir = getFallbackProfileDir();
150
- if (fs.existsSync(fallbackDir)) {
151
- const {
152
- browser: authBrowser,
153
- context: authContext,
154
- slots: authSlots,
155
- } = await initContext(executablePath, 1, fallbackDir);
156
- this.authBrowser = authBrowser;
157
- this.authContext = authContext;
158
- this.authSlots = authSlots;
159
- } else {
160
- console.error(
161
- `[TikTokScraper] 登录态 profile 不存在 (${fallbackDir}),跳过登录态 pool。请先运行 tt-help open 9999 登录 TikTok`,
162
- );
83
+ handleSIGINT: false,
84
+ handleSIGTERM: false,
85
+ handleSIGHUP: false,
86
+ args: [
87
+ "--no-sandbox",
88
+ "--disable-setuid-sandbox",
89
+ "--disable-dev-shm-usage",
90
+ ],
91
+ });
92
+ this.context = await this.browser.newContext();
93
+ for (let i = 0; i < this.poolSize; i++) {
94
+ this.slots.push(new PageSlot(await this.context.newPage()));
163
95
  }
164
-
165
96
  await this.warmWaf();
166
97
  }
167
98
 
168
99
  async close() {
169
- const closeAll = async (browser) => {
170
- if (browser) {
171
- let closeTimedOut = false;
172
- const closePromise = browser.close().catch((error) => {
173
- console.error(
174
- `[TikTokScraper] browser.close() failed: ${error.message}`,
175
- );
176
- });
177
- await Promise.race([
178
- closePromise,
179
- delay(BROWSER_CLOSE_TIMEOUT).then(() => {
180
- closeTimedOut = true;
181
- }),
182
- ]);
183
- if (closeTimedOut) {
184
- console.error(
185
- `[TikTokScraper] browser.close() 超时 ${BROWSER_CLOSE_TIMEOUT}ms,跳过等待并继续退出`,
186
- );
187
- }
100
+ if (this.browser) {
101
+ const browser = this.browser;
102
+ let closeTimedOut = false;
103
+ const closePromise = browser.close().catch((error) => {
104
+ console.error(
105
+ `[TikTokScraper] browser.close() failed: ${error.message}`,
106
+ );
107
+ });
108
+ await Promise.race([
109
+ closePromise,
110
+ delay(BROWSER_CLOSE_TIMEOUT).then(() => {
111
+ closeTimedOut = true;
112
+ }),
113
+ ]);
114
+ if (closeTimedOut) {
115
+ console.error(
116
+ `[TikTokScraper] browser.close() 超时 ${BROWSER_CLOSE_TIMEOUT}ms,跳过等待并继续退出`,
117
+ );
188
118
  }
189
- };
190
-
191
- // 无登录态的 browser
192
- await closeAll(this.browser);
193
- // 登录态的 context(launchPersistentContext 返回的是 context 当 browser)
194
- if (this.authContext) {
195
- try {
196
- await this.authContext.close();
197
- } catch {}
119
+ this.browser = null;
120
+ this.context = null;
121
+ this.slots = [];
198
122
  }
199
- this.browser = null;
200
- this.context = null;
201
- this.slots = [];
202
- this.authBrowser = null;
203
- this.authContext = null;
204
- this.authSlots = [];
205
123
  }
206
124
 
207
125
  async restart() {
@@ -211,7 +129,7 @@ export class TikTokScraper {
211
129
 
212
130
  isAlive() {
213
131
  try {
214
- return !!(this.context && (!this.browser || this.browser.isConnected()));
132
+ return !!(this.browser && this.browser.isConnected());
215
133
  } catch {
216
134
  return false;
217
135
  }
@@ -238,11 +156,17 @@ export class TikTokScraper {
238
156
  return this.warmPromise;
239
157
  }
240
158
 
241
- isWarmExpired() {
159
+ _needWarm() {
242
160
  return Date.now() - this.lastWarmTime > this.wafTtl;
243
161
  }
244
162
 
245
- async _ensurePage(slot, context) {
163
+ _pickSlot() {
164
+ const slot = this.slots[this.slotIdx % this.poolSize];
165
+ this.slotIdx++;
166
+ return slot;
167
+ }
168
+
169
+ async _ensurePage(slot) {
246
170
  try {
247
171
  if (
248
172
  !slot.page.isClosed() &&
@@ -252,111 +176,88 @@ export class TikTokScraper {
252
176
  }
253
177
  } catch {}
254
178
  await slot.page?.close().catch(() => {});
255
- slot.page = await context.newPage();
179
+ slot.page = await this.context.newPage();
256
180
  slot.requestCount = 0;
257
181
  return slot.page;
258
182
  }
259
183
 
260
- async _fetchViewSource(url, ctx) {
261
- const slots = ctx === this.authContext ? this.authSlots : this.slots;
262
- const slotIdx = ctx === this.authContext ? this.authSlotIdx : this.slotIdx;
263
- const slot = slots[slotIdx % slots.length];
264
- if (ctx === this.authContext) {
265
- this.authSlotIdx++;
266
- } else {
267
- this.slotIdx++;
268
- }
269
-
270
- try {
271
- return await slot.lock.run(async () => {
272
- const page = await this._ensurePage(slot, ctx);
273
-
274
- if (ctx === this.context && this.isWarmExpired()) {
275
- await this.warmWaf();
276
- }
184
+ async _fetchViewSource(url, slot) {
185
+ const page = await this._ensurePage(slot);
277
186
 
278
- await page.goto("view-source:" + url, {
279
- waitUntil: "domcontentloaded",
280
- timeout: 15000,
281
- });
282
-
283
- const content = await page.evaluate(() => {
284
- const rows = document.querySelectorAll("tr");
285
- let content = "";
286
- rows.forEach((r) => {
287
- const lc = r.querySelector(".line-content");
288
- if (lc) content += lc.textContent + "\n";
289
- });
290
- return content;
291
- });
292
-
293
- // 导航到 about:blank 释放当前页面的 DOM 和 JS 堆
294
- await page
295
- .goto("about:blank", {
296
- waitUntil: "domcontentloaded",
297
- timeout: 5000,
298
- })
299
- .catch(() => {});
300
-
301
- slot.requestCount += 1;
187
+ await page.goto("view-source:" + url, {
188
+ waitUntil: "domcontentloaded",
189
+ timeout: 15000,
190
+ });
302
191
 
303
- return content;
192
+ const content = await page.evaluate(() => {
193
+ const rows = document.querySelectorAll("tr");
194
+ let content = "";
195
+ rows.forEach((r) => {
196
+ const lc = r.querySelector(".line-content");
197
+ if (lc) content += lc.textContent + "\n";
304
198
  });
305
- } catch (e) {
306
- console.error(`[TikTokScraper] _fetchViewSource failed: ${e.message}`);
307
- return null;
308
- }
309
- }
310
-
311
- async getUserInfo(username) {
312
- const normalizedUsername = username.startsWith("@")
313
- ? username.slice(1)
314
- : username;
315
- const url = `https://www.tiktok.com/@${normalizedUsername}`;
316
-
317
- // 第一趟:无登录态
318
- let rawHtml = await this._fetchViewSource(url, this.context);
319
- let result = rawHtml ? parseUserInfo(rawHtml) : null;
199
+ return content;
200
+ });
320
201
 
321
- // 解析失败:先尝试 warmWaf 后重试
322
- if (!result) {
323
- try {
324
- await this.warmWaf();
325
- } catch {}
326
- rawHtml = await this._fetchViewSource(url, this.context);
327
- result = rawHtml ? parseUserInfo(rawHtml) : null;
328
- }
202
+ // 导航到 about:blank 释放当前页面的 DOM 和 JS 堆
203
+ await page
204
+ .goto("about:blank", {
205
+ waitUntil: "domcontentloaded",
206
+ timeout: 5000,
207
+ })
208
+ .catch(() => {});
329
209
 
330
- // 仍然失败:使用登录态 pool
331
- if (!result && this.authContext) {
332
- rawHtml = await this._fetchViewSource(url, this.authContext);
333
- result = rawHtml ? parseUserInfo(rawHtml) : null;
334
- }
210
+ slot.requestCount += 1;
335
211
 
336
- return result || null;
212
+ return content;
337
213
  }
338
214
 
339
- async getVideoInfo(videoUrl) {
340
- // 第一趟:无登录态
341
- let rawHtml = await this._fetchViewSource(videoUrl, this.context);
342
- let result = rawHtml ? parseVideoInfo(rawHtml) : null;
343
-
344
- // 解析失败:先尝试 warmWaf 后重试
345
- if (!result) {
346
- try {
347
- await this.warmWaf();
348
- } catch {}
349
- rawHtml = await this._fetchViewSource(videoUrl, this.context);
350
- result = rawHtml ? parseVideoInfo(rawHtml) : null;
351
- }
352
-
353
- // 仍然失败:使用登录态 pool
354
- if (!result && this.authContext) {
355
- rawHtml = await this._fetchViewSource(videoUrl, this.authContext);
356
- result = rawHtml ? parseVideoInfo(rawHtml) : null;
357
- }
215
+ async getUserInfo(uniqueId, maxRetries = 3) {
216
+ const slot = this._pickSlot();
217
+ return slot.lock.run(async () => {
218
+ let rawHtml = await this._fetchViewSource(
219
+ `https://www.tiktok.com/@${uniqueId}`,
220
+ slot,
221
+ );
222
+ let result = parseUserInfo(rawHtml);
223
+ for (let attempt = 1; !result && attempt <= maxRetries; attempt++) {
224
+ // 检查是否值得重试:用户异常/不存在则跳过重试
225
+ if (!isRetryableFailure(rawHtml)) {
226
+ break;
227
+ }
228
+ try {
229
+ await this.warmWaf();
230
+ } catch {}
231
+ await delay(500 * attempt);
232
+ rawHtml = await this._fetchViewSource(
233
+ `https://www.tiktok.com/@${uniqueId}`,
234
+ slot,
235
+ );
236
+ result = parseUserInfo(rawHtml);
237
+ }
238
+ return result || null;
239
+ });
240
+ }
358
241
 
359
- return result || null;
242
+ async getVideoInfo(videoUrl, maxRetries = 3) {
243
+ const slot = this._pickSlot();
244
+ return slot.lock.run(async () => {
245
+ let rawHtml = await this._fetchViewSource(videoUrl, slot);
246
+ let result = parseVideoInfo(rawHtml);
247
+ for (let attempt = 1; !result && attempt <= maxRetries; attempt++) {
248
+ // 检查是否值得重试
249
+ if (!isRetryableFailure(rawHtml)) {
250
+ break;
251
+ }
252
+ try {
253
+ await this.warmWaf();
254
+ } catch {}
255
+ await delay(500 * attempt);
256
+ rawHtml = await this._fetchViewSource(videoUrl, slot);
257
+ result = parseVideoInfo(rawHtml);
258
+ }
259
+ return result || null;
260
+ });
360
261
  }
361
262
 
362
263
  async getUserAndVideo(videoUrl) {
@@ -114,9 +114,21 @@ function initUserDb(filePath) {
114
114
  updated_at INTEGER,
115
115
  region TEXT,
116
116
  signature TEXT,
117
- sec_uid TEXT
117
+ sec_uid TEXT,
118
+ status_code INTEGER
118
119
  )
119
120
  `);
121
+
122
+ // 迁移:为已存在的 jobs 表添加 status_code 列
123
+ const existingJobColumns = new Set(
124
+ db
125
+ .prepare("PRAGMA table_info(jobs)")
126
+ .all()
127
+ .map((c) => c.name),
128
+ );
129
+ if (!existingJobColumns.has("status_code")) {
130
+ db.exec(`ALTER TABLE jobs ADD COLUMN status_code INTEGER`);
131
+ }
120
132
  db.exec(`
121
133
  CREATE TABLE IF NOT EXISTS raw_jobs (
122
134
  unique_id TEXT PRIMARY KEY,
@@ -146,9 +158,21 @@ function initUserDb(filePath) {
146
158
  updated_at INTEGER,
147
159
  region TEXT,
148
160
  signature TEXT,
149
- sec_uid TEXT
161
+ sec_uid TEXT,
162
+ status_code INTEGER
150
163
  )
151
164
  `);
165
+
166
+ // 迁移:为已存在的 raw_jobs 表添加 status_code 列
167
+ const existingRawJobColumns = new Set(
168
+ db
169
+ .prepare("PRAGMA table_info(raw_jobs)")
170
+ .all()
171
+ .map((c) => c.name),
172
+ );
173
+ if (!existingRawJobColumns.has("status_code")) {
174
+ db.exec(`ALTER TABLE raw_jobs ADD COLUMN status_code INTEGER`);
175
+ }
152
176
  db.exec(`
153
177
  CREATE TABLE IF NOT EXISTS videos (
154
178
  id TEXT PRIMARY KEY,
@@ -1065,9 +1089,7 @@ function restoreRawJobsByFilter({ search, location }) {
1065
1089
 
1066
1090
  const count =
1067
1091
  db
1068
- .prepare(
1069
- `SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql}`,
1070
- )
1092
+ .prepare(`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql}`)
1071
1093
  .get(...args)?.c || 0;
1072
1094
 
1073
1095
  if (!count) {
@@ -1293,6 +1315,7 @@ const jobBooleanColumns = new Set([
1293
1315
  "processed",
1294
1316
  "tt_seller",
1295
1317
  "verified",
1318
+ "error",
1296
1319
  ]);
1297
1320
 
1298
1321
  const videoBooleanColumns = new Set(["tt_seller"]);
@@ -1324,17 +1347,20 @@ const writableJobColumns = new Set([
1324
1347
  "region",
1325
1348
  "signature",
1326
1349
  "sec_uid",
1350
+ "status_code",
1327
1351
  ]);
1328
1352
 
1329
1353
  function normalizeJobValue(column, value) {
1354
+ if (value === undefined || value === null) return null;
1330
1355
  if (column === "sources") {
1331
1356
  if (!Array.isArray(value)) return JSON.stringify([]);
1332
1357
  return JSON.stringify([...new Set(value)]);
1333
1358
  }
1334
1359
  if (jobBooleanColumns.has(column)) {
1335
- if (value === undefined || value === null || value === "") return null;
1336
1360
  return value ? 1 : 0;
1337
1361
  }
1362
+ // 防御:如果值是对象或数组,转为 JSON 字符串
1363
+ if (typeof value === "object") return JSON.stringify(value);
1338
1364
  return value;
1339
1365
  }
1340
1366
 
@@ -1812,6 +1838,11 @@ export function createStore(filePath) {
1812
1838
  const args = [];
1813
1839
  if (!loggedIn) {
1814
1840
  where.push("COALESCE(tt_seller, 0) != 1");
1841
+ // 未登录用户只能领取 statusCode 为空的任务(209002 只能被登录用户领取)
1842
+ where.push("status_code IS NULL");
1843
+ } else {
1844
+ // 登录用户可以领取 statusCode 为空 或 statusCode=209002 的任务
1845
+ where.push("status_code IS NULL OR status_code = 209002");
1815
1846
  }
1816
1847
  if (requireVideo) {
1817
1848
  where.push("COALESCE(video_count, 0) > 0");
@@ -2644,7 +2675,9 @@ export function createStore(filePath) {
2644
2675
  u.ttSeller === null || u.ttSeller === undefined || u.ttSeller === "";
2645
2676
  if (!ttSellerEmpty) return false;
2646
2677
  if (
2647
- updateCount === null || updateCount === undefined || updateCount <= 0
2678
+ updateCount === null ||
2679
+ updateCount === undefined ||
2680
+ updateCount <= 0
2648
2681
  ) {
2649
2682
  if (hasCountryFilter) {
2650
2683
  const loc = (u.guessedLocation || "").toUpperCase();
@@ -2686,7 +2719,19 @@ export function createStore(filePath) {
2686
2719
  function batchUpdateUserInfo(updates) {
2687
2720
  if (db) {
2688
2721
  const txn = db.transaction((items) =>
2689
- items.map((item) => updateJobInfo(item.uniqueId, item.info, true)),
2722
+ items.map((item) => {
2723
+ // 处理 { error: true, statusCode: xxx } 的情况
2724
+ const info = item.info;
2725
+ if (info && info.error && info.statusCode !== undefined) {
2726
+ // 只更新 status_code,不更新其他字段
2727
+ return updateJobInfo(
2728
+ item.uniqueId,
2729
+ { statusCode: info.statusCode },
2730
+ true,
2731
+ );
2732
+ }
2733
+ return updateJobInfo(item.uniqueId, info, true);
2734
+ }),
2690
2735
  );
2691
2736
  return txn(updates).map((result, index) =>
2692
2737
  result.error
@@ -2706,15 +2751,20 @@ export function createStore(filePath) {
2706
2751
  results.push({ uniqueId: item.uniqueId, error: "user not found" });
2707
2752
  continue;
2708
2753
  }
2709
- if (item.info) {
2710
- for (const key of Object.keys(item.info)) {
2711
- if (key === "uniqueId" || key === "sources") continue;
2754
+ const info = item.info;
2755
+ if (info && info.error && info.statusCode !== undefined) {
2756
+ // 只更新 status_code
2757
+ user.statusCode = info.statusCode;
2758
+ } else {
2759
+ for (const key of Object.keys(info)) {
2760
+ if (key === "uniqueId" || key === "sources" || key === "error")
2761
+ continue;
2712
2762
  if (
2713
- item.info[key] !== undefined &&
2714
- item.info[key] !== null &&
2715
- item.info[key] !== ""
2763
+ info[key] !== undefined &&
2764
+ info[key] !== null &&
2765
+ info[key] !== ""
2716
2766
  ) {
2717
- user[key] = item.info[key];
2767
+ user[key] = info[key];
2718
2768
  }
2719
2769
  }
2720
2770
  }
@@ -1150,6 +1150,7 @@
1150
1150
  <th>猜测国家</th>
1151
1151
  <th>来源</th>
1152
1152
  <th>状态</th>
1153
+ <th>StatusCode</th>
1153
1154
  <th>处理端</th>
1154
1155
  <th>领取时间</th>
1155
1156
  <th>完成时间</th>
@@ -1530,6 +1531,9 @@
1530
1531
  const claimer = u.claimedBy || '-';
1531
1532
  const claimTime = u.claimedAt ? formatTime(u.claimedAt) : '-';
1532
1533
  const procTime = u.processedAt ? formatTime(u.processedAt) : '-';
1534
+ const statusCodeDisplay = u.statusCode != null && u.statusCode !== 0
1535
+ ? `<span class="tag error" style="font-size:10px">${u.statusCode}</span>`
1536
+ : '';
1533
1537
  return `<tr${rowClass}>
1534
1538
  <td class="user-id" data-label="用户名">@${u.uniqueId}</td>
1535
1539
  <td data-label="昵称">${nick}</td>
@@ -1539,6 +1543,7 @@
1539
1543
  <td data-label="猜测国家">${guessedLoc}</td>
1540
1544
  <td data-label="来源">${sources || '-'}</td>
1541
1545
  <td data-label="状态">${statusTag} ${extraTags.join(' ')}</td>
1546
+ <td data-label="StatusCode">${statusCodeDisplay}</td>
1542
1547
  <td data-label="处理端" style="font-size:11px;color:#888">${claimer}</td>
1543
1548
  <td data-label="领取时间" style="font-size:11px;color:#888">${claimTime}</td>
1544
1549
  <td data-label="完成时间" style="font-size:11px;color:#888">${procTime}</td>