tt-help-cli-ycl 1.3.84 → 1.3.86
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/scripts/test-refill-order.mjs +218 -0
- package/src/cli/tag.js +736 -0
- package/src/lib/args.js +182 -6
- package/src/lib/constants.js +43 -0
- package/src/lib/parse-ssr.mjs +1 -0
- package/src/lib/tag-discover.js +150 -0
- package/src/lib/tag-fetcher.js +296 -0
- package/src/lib/target-locations.js +18 -0
- package/src/main.js +14 -0
- package/src/npm-main.js +14 -0
- package/src/scraper/explore-core.js +6 -6
- package/src/watch/data-store.js +344 -49
- package/src/watch/server.js +178 -1
- package/src/watch/tag-service.js +339 -0
package/src/lib/args.js
CHANGED
|
@@ -714,6 +714,186 @@ function parseCommentsArgs(args) {
|
|
|
714
714
|
};
|
|
715
715
|
}
|
|
716
716
|
|
|
717
|
+
function parseTagArgs(args) {
|
|
718
|
+
const tags = [];
|
|
719
|
+
let outputFile = null;
|
|
720
|
+
let authorsOnly = false;
|
|
721
|
+
let videosOnly = false;
|
|
722
|
+
let enrich = null;
|
|
723
|
+
let locations = null;
|
|
724
|
+
let noFilter = false;
|
|
725
|
+
let serverUrl = null;
|
|
726
|
+
let discover = null;
|
|
727
|
+
let discoverCountries = [];
|
|
728
|
+
let discoverCount = 4;
|
|
729
|
+
let discoverPrompt = null;
|
|
730
|
+
let isDiscover = false;
|
|
731
|
+
let isScore = false;
|
|
732
|
+
let isScoreAll = false;
|
|
733
|
+
let scoreTag = null;
|
|
734
|
+
let scoreCountries = null;
|
|
735
|
+
|
|
736
|
+
for (let i = 0; i < args.length; i++) {
|
|
737
|
+
const arg = args[i];
|
|
738
|
+
if (arg === "-o" || arg === "--output") {
|
|
739
|
+
outputFile = args[++i];
|
|
740
|
+
} else if (arg === "-s" || arg === "--server") {
|
|
741
|
+
serverUrl = args[++i];
|
|
742
|
+
} else if (arg === "--authors-only") {
|
|
743
|
+
authorsOnly = true;
|
|
744
|
+
} else if (arg === "--videos-only") {
|
|
745
|
+
videosOnly = true;
|
|
746
|
+
} else if (arg === "--enrich") {
|
|
747
|
+
const next = args[i + 1];
|
|
748
|
+
if (next === "users" || next === "videos") {
|
|
749
|
+
enrich = next;
|
|
750
|
+
i++;
|
|
751
|
+
} else {
|
|
752
|
+
enrich = true;
|
|
753
|
+
}
|
|
754
|
+
} else if (arg === "--locations") {
|
|
755
|
+
locations = args[++i];
|
|
756
|
+
} else if (arg === "--no-filter") {
|
|
757
|
+
noFilter = true;
|
|
758
|
+
} else if (arg === "--discover") {
|
|
759
|
+
// 旧版 --discover 兼容:跟在 tag 命令后面无子命令
|
|
760
|
+
const next = args[i + 1];
|
|
761
|
+
const count = parseInt(next);
|
|
762
|
+
if (!isNaN(count) && count > 0) {
|
|
763
|
+
discover = count;
|
|
764
|
+
i++;
|
|
765
|
+
} else {
|
|
766
|
+
discover = true;
|
|
767
|
+
}
|
|
768
|
+
} else if (arg === "--count") {
|
|
769
|
+
discoverCount = parseInt(args[++i]) || 4;
|
|
770
|
+
} else if (arg === "--countries") {
|
|
771
|
+
scoreCountries = args[++i]
|
|
772
|
+
.split(",")
|
|
773
|
+
.map((s) => s.trim().toUpperCase())
|
|
774
|
+
.filter(Boolean);
|
|
775
|
+
} else if (arg === "-p" || arg === "--prompt") {
|
|
776
|
+
discoverPrompt = args[++i];
|
|
777
|
+
} else if (!arg.startsWith("-")) {
|
|
778
|
+
const cleaned = arg.replace("#", "").trim();
|
|
779
|
+
if (cleaned.toLowerCase() === "discover") {
|
|
780
|
+
isDiscover = true;
|
|
781
|
+
for (let j = i + 1; j < args.length; j++) {
|
|
782
|
+
if (args[j].startsWith("-")) break;
|
|
783
|
+
discoverCountries.push(args[j].trim().toUpperCase());
|
|
784
|
+
}
|
|
785
|
+
i += discoverCountries.length;
|
|
786
|
+
} else if (cleaned.toLowerCase() === "score") {
|
|
787
|
+
isScore = true;
|
|
788
|
+
// score 后的第一个位置参数为 tag 名
|
|
789
|
+
if (i + 1 < args.length && !args[i + 1].startsWith("-")) {
|
|
790
|
+
scoreTag = args[i + 1].replace("#", "").trim().toLowerCase();
|
|
791
|
+
i++;
|
|
792
|
+
}
|
|
793
|
+
// 后续位置参数为 countries
|
|
794
|
+
for (let j = i + 1; j < args.length; j++) {
|
|
795
|
+
if (args[j].startsWith("-")) break;
|
|
796
|
+
if (!scoreCountries) scoreCountries = [];
|
|
797
|
+
scoreCountries.push(args[j].trim().toUpperCase());
|
|
798
|
+
}
|
|
799
|
+
if (scoreCountries) i += scoreCountries.length;
|
|
800
|
+
} else if (cleaned.toLowerCase() === "score-all") {
|
|
801
|
+
isScoreAll = true;
|
|
802
|
+
} else {
|
|
803
|
+
tags.push(cleaned.toLowerCase());
|
|
804
|
+
}
|
|
805
|
+
}
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
if (isDiscover) {
|
|
809
|
+
return {
|
|
810
|
+
subcommand: "tag-discover",
|
|
811
|
+
tagDiscover: {
|
|
812
|
+
countries: discoverCountries,
|
|
813
|
+
count: discoverCount,
|
|
814
|
+
prompt: discoverPrompt,
|
|
815
|
+
serverUrl,
|
|
816
|
+
},
|
|
817
|
+
urls: [],
|
|
818
|
+
outputFormat: "json",
|
|
819
|
+
exploreCount: 0,
|
|
820
|
+
showConfig: false,
|
|
821
|
+
showHelp: false,
|
|
822
|
+
customProxy: null,
|
|
823
|
+
configAction: null,
|
|
824
|
+
configValue: null,
|
|
825
|
+
pipeMode: false,
|
|
826
|
+
filterStr: null,
|
|
827
|
+
};
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
if (isScore) {
|
|
831
|
+
return {
|
|
832
|
+
subcommand: "tag-score",
|
|
833
|
+
tagScore: {
|
|
834
|
+
tag: scoreTag,
|
|
835
|
+
countries: scoreCountries,
|
|
836
|
+
serverUrl,
|
|
837
|
+
},
|
|
838
|
+
urls: [],
|
|
839
|
+
outputFormat: "json",
|
|
840
|
+
exploreCount: 0,
|
|
841
|
+
showConfig: false,
|
|
842
|
+
showHelp: false,
|
|
843
|
+
customProxy: null,
|
|
844
|
+
configAction: null,
|
|
845
|
+
configValue: null,
|
|
846
|
+
pipeMode: false,
|
|
847
|
+
filterStr: null,
|
|
848
|
+
};
|
|
849
|
+
}
|
|
850
|
+
|
|
851
|
+
if (isScoreAll) {
|
|
852
|
+
return {
|
|
853
|
+
subcommand: "tag-score-all",
|
|
854
|
+
tagScoreAll: {
|
|
855
|
+
countries: scoreCountries,
|
|
856
|
+
serverUrl,
|
|
857
|
+
},
|
|
858
|
+
urls: [],
|
|
859
|
+
outputFormat: "json",
|
|
860
|
+
exploreCount: 0,
|
|
861
|
+
showConfig: false,
|
|
862
|
+
showHelp: false,
|
|
863
|
+
customProxy: null,
|
|
864
|
+
configAction: null,
|
|
865
|
+
configValue: null,
|
|
866
|
+
pipeMode: false,
|
|
867
|
+
filterStr: null,
|
|
868
|
+
};
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
return {
|
|
872
|
+
subcommand: "tag",
|
|
873
|
+
tagTags: {
|
|
874
|
+
tags,
|
|
875
|
+
outputFile,
|
|
876
|
+
authorsOnly,
|
|
877
|
+
videosOnly,
|
|
878
|
+
enrich,
|
|
879
|
+
locations,
|
|
880
|
+
noFilter,
|
|
881
|
+
serverUrl,
|
|
882
|
+
discover,
|
|
883
|
+
},
|
|
884
|
+
urls: [],
|
|
885
|
+
outputFormat: "json",
|
|
886
|
+
exploreCount: 0,
|
|
887
|
+
showConfig: false,
|
|
888
|
+
showHelp: false,
|
|
889
|
+
customProxy: null,
|
|
890
|
+
configAction: null,
|
|
891
|
+
configValue: null,
|
|
892
|
+
pipeMode: false,
|
|
893
|
+
filterStr: null,
|
|
894
|
+
};
|
|
895
|
+
}
|
|
896
|
+
|
|
717
897
|
export function parseArgs() {
|
|
718
898
|
const args = process.argv.slice(2);
|
|
719
899
|
|
|
@@ -780,12 +960,8 @@ export function parseArgs() {
|
|
|
780
960
|
return parseCommentsArgs(args.slice(1));
|
|
781
961
|
}
|
|
782
962
|
|
|
783
|
-
if (args.length > 0 && args[0] === "
|
|
784
|
-
return
|
|
785
|
-
}
|
|
786
|
-
|
|
787
|
-
if (args.length > 0 && args[0] === "db-import") {
|
|
788
|
-
return parseDbImportArgs(args.slice(1));
|
|
963
|
+
if (args.length > 0 && args[0] === "tag") {
|
|
964
|
+
return parseTagArgs(args.slice(1));
|
|
789
965
|
}
|
|
790
966
|
|
|
791
967
|
if (args.length > 0 && args[0] === "refresh") {
|
package/src/lib/constants.js
CHANGED
|
@@ -208,6 +208,46 @@ const HELP_TEXT = [
|
|
|
208
208
|
" POST /api/tiktok/lookup 同时获取视频和作者信息 { videoUrl: string }",
|
|
209
209
|
" 示例: tt-help webserver -p 3000",
|
|
210
210
|
"",
|
|
211
|
+
" tag <标签名> [...] [选项]",
|
|
212
|
+
" 抓取标签页视频和作者(旧版 CLI 模式)",
|
|
213
|
+
" 选项:",
|
|
214
|
+
" -s, --server <URL> 推送到 watch 服务端",
|
|
215
|
+
" --enrich [users|videos] 补充国家/地区信息(默认 videos)",
|
|
216
|
+
` --locations <国家代码> 目标国家,逗号分隔(默认 ${DEFAULT_TARGET_LOCATIONS_CSV})`,
|
|
217
|
+
" --no-filter 不过滤国家",
|
|
218
|
+
" --discover [数量] LLM 自动发现标签 + 记录有效标签",
|
|
219
|
+
" -o, --output <file> 输出到 JSON 文件",
|
|
220
|
+
" --authors-only 只输出作者列表",
|
|
221
|
+
" --videos-only 只输出视频列表",
|
|
222
|
+
" 示例: tt-help tag ventas --enrich -s http://127.0.0.1:3001",
|
|
223
|
+
"",
|
|
224
|
+
" tag discover <国家> [国家...] [选项]",
|
|
225
|
+
" LLM 生成对应语言的 TikTok 电商标签,存入 tags 表",
|
|
226
|
+
" 选项:",
|
|
227
|
+
" --count <N> 每个国家生成标签数(默认 4)",
|
|
228
|
+
" -p, --prompt <文本> 用户自定义领域提示",
|
|
229
|
+
" -s, --server <URL> 服务端地址(默认 http://127.0.0.1:3000)",
|
|
230
|
+
" 示例: tt-help tag discover ES",
|
|
231
|
+
" tt-help tag discover ES FR DE --count 5",
|
|
232
|
+
' tt-help tag discover DE -p "手工首饰卖家"',
|
|
233
|
+
"",
|
|
234
|
+
" tag score <标签名> [选项]",
|
|
235
|
+
" 客户端本地打分:抓取标签页 → 查作者国家 → 算分 → 推送用户 → 上报服务端",
|
|
236
|
+
" 选项:",
|
|
237
|
+
" --countries <CSV> 目标国家,逗号分隔(默认 13 个欧洲国家)",
|
|
238
|
+
" -s, --server <URL> 服务端地址(默认 http://127.0.0.1:3000)",
|
|
239
|
+
" 示例: tt-help tag score ventas",
|
|
240
|
+
" tt-help tag score ventas --countries ES",
|
|
241
|
+
"",
|
|
242
|
+
" tag score-all [选项]",
|
|
243
|
+
" 自动循环打分:从服务端 tags 表取 new 标签,逐个本地打分并上报",
|
|
244
|
+
" enrich 浏览器实例在整个循环中复用",
|
|
245
|
+
" 选项:",
|
|
246
|
+
" --countries <CSV> 目标国家,逗号分隔(默认 13 个欧洲国家)",
|
|
247
|
+
" -s, --server <URL> 服务端地址(默认 http://127.0.0.1:3000)",
|
|
248
|
+
" 示例: tt-help tag score-all",
|
|
249
|
+
" tt-help tag score-all --countries ES -s http://127.0.0.1:3001",
|
|
250
|
+
"",
|
|
211
251
|
" config [show|set|unset|reset]",
|
|
212
252
|
" config 查看当前配置",
|
|
213
253
|
" config set <key> <value> 设置配置(key: proxy, server, browser, userId, maxFollowing, maxFollowers, maxVideos, maxComments)",
|
|
@@ -225,6 +265,9 @@ const HELP_TEXT = [
|
|
|
225
265
|
" tt-help attach -p 5 -i 10",
|
|
226
266
|
" tt-help watch -o data/result.db",
|
|
227
267
|
" tt-help videostats data/result.db -p 3",
|
|
268
|
+
" tt-help tag discover ES FR --count 5",
|
|
269
|
+
" tt-help tag score ventas --countries ES",
|
|
270
|
+
" tt-help tag score-all --countries ES,FR -s http://127.0.0.1:3001",
|
|
228
271
|
];
|
|
229
272
|
|
|
230
273
|
const PUBLIC_HELP_HIDDEN_HEADERS = new Set([
|
package/src/lib/parse-ssr.mjs
CHANGED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import { readFileSync, writeFileSync, existsSync } from "fs";
|
|
2
|
+
import { resolve, dirname } from "path";
|
|
3
|
+
import { fileURLToPath } from "url";
|
|
4
|
+
|
|
5
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
6
|
+
const TAGS_FILE = resolve(
|
|
7
|
+
__dirname,
|
|
8
|
+
"..",
|
|
9
|
+
"..",
|
|
10
|
+
"data",
|
|
11
|
+
"productive-tags.json",
|
|
12
|
+
);
|
|
13
|
+
|
|
14
|
+
function loadTags() {
|
|
15
|
+
try {
|
|
16
|
+
if (existsSync(TAGS_FILE)) {
|
|
17
|
+
return JSON.parse(readFileSync(TAGS_FILE, "utf-8"));
|
|
18
|
+
}
|
|
19
|
+
} catch {}
|
|
20
|
+
return { tags: [], lastUpdated: null };
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function saveTags(data) {
|
|
24
|
+
const dir = dirname(TAGS_FILE);
|
|
25
|
+
if (!existsSync(dir)) {
|
|
26
|
+
const { mkdirSync } = require("fs");
|
|
27
|
+
mkdirSync(dir, { recursive: true });
|
|
28
|
+
}
|
|
29
|
+
writeFileSync(TAGS_FILE, JSON.stringify(data, null, 2), "utf-8");
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function getProductiveTags() {
|
|
33
|
+
return loadTags().tags;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function recordProductiveTag(tag, country, userCount) {
|
|
37
|
+
const data = loadTags();
|
|
38
|
+
const existing = data.tags.find((t) => t.tag === tag);
|
|
39
|
+
if (existing) {
|
|
40
|
+
if (!existing.countries.includes(country)) {
|
|
41
|
+
existing.countries.push(country);
|
|
42
|
+
}
|
|
43
|
+
existing.userCount += userCount;
|
|
44
|
+
existing.lastUsed = new Date().toISOString();
|
|
45
|
+
} else {
|
|
46
|
+
data.tags.push({
|
|
47
|
+
tag,
|
|
48
|
+
countries: [country],
|
|
49
|
+
userCount,
|
|
50
|
+
firstSeen: new Date().toISOString(),
|
|
51
|
+
lastUsed: new Date().toISOString(),
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
data.lastUpdated = new Date().toISOString();
|
|
55
|
+
saveTags(data);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
async function callLLM(prompt) {
|
|
59
|
+
const apiKey = process.env.APIKEY || "";
|
|
60
|
+
const { fetch } = await import("undici");
|
|
61
|
+
|
|
62
|
+
const response = await fetch(
|
|
63
|
+
"http://82.156.52.214:18000/v1/chat/completions",
|
|
64
|
+
{
|
|
65
|
+
method: "POST",
|
|
66
|
+
headers: {
|
|
67
|
+
"Content-Type": "application/json",
|
|
68
|
+
Authorization: `Bearer ${apiKey}`,
|
|
69
|
+
},
|
|
70
|
+
body: JSON.stringify({
|
|
71
|
+
model: "zc-fast",
|
|
72
|
+
messages: [{ role: "user", content: prompt }],
|
|
73
|
+
max_tokens: 1024,
|
|
74
|
+
temperature: 0.7,
|
|
75
|
+
}),
|
|
76
|
+
},
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
const result = await response.json();
|
|
80
|
+
const content = result.choices?.[0]?.message?.content || "";
|
|
81
|
+
return content;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function normalizeTag(t) {
|
|
85
|
+
return t.replace(/^#+/, "").trim().toLowerCase();
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function parseTagsFromResponse(content) {
|
|
89
|
+
try {
|
|
90
|
+
const parsed = JSON.parse(content);
|
|
91
|
+
if (Array.isArray(parsed)) {
|
|
92
|
+
return parsed.map(normalizeTag).filter((t) => t && t.length >= 2);
|
|
93
|
+
}
|
|
94
|
+
if (Array.isArray(parsed.tags)) {
|
|
95
|
+
return parsed.tags.map(normalizeTag).filter((t) => t && t.length >= 2);
|
|
96
|
+
}
|
|
97
|
+
} catch {}
|
|
98
|
+
|
|
99
|
+
const lines = content.split(/[\n,]+/);
|
|
100
|
+
const tags = [];
|
|
101
|
+
for (const line of lines) {
|
|
102
|
+
const cleaned = normalizeTag(line.replace(/^[-\d.\s]+/, ""));
|
|
103
|
+
if (cleaned && /^[a-z0-9_]+$/.test(cleaned) && cleaned.length >= 2) {
|
|
104
|
+
tags.push(cleaned);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
return tags;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
export async function discoverTags(countries, options = {}) {
|
|
111
|
+
const { language = "auto", count = 10 } = options;
|
|
112
|
+
|
|
113
|
+
const productiveTags = getProductiveTags();
|
|
114
|
+
const countryStr = Array.isArray(countries)
|
|
115
|
+
? countries.join(", ")
|
|
116
|
+
: countries;
|
|
117
|
+
const langHint =
|
|
118
|
+
language === "auto" ? "" : `Tags should be in ${language} language.`;
|
|
119
|
+
|
|
120
|
+
const historyHint =
|
|
121
|
+
productiveTags.length > 0
|
|
122
|
+
? `Previously productive tags for these countries: ${productiveTags
|
|
123
|
+
.filter((t) => t.countries.some((c) => countries.includes(c)))
|
|
124
|
+
.map((t) => `#${t.tag}`)
|
|
125
|
+
.join(", ")}. Generate new ones, don't repeat these.`
|
|
126
|
+
: "";
|
|
127
|
+
|
|
128
|
+
const prompt = `Generate ${count} TikTok hashtags (lowercase, no spaces, no # symbol) that are likely to be used by online sellers, shop owners, e-commerce merchants, and small businesses in these countries: ${countryStr}.
|
|
129
|
+
|
|
130
|
+
Requirements:
|
|
131
|
+
- Focus on tags that sellers/merchants actually use to promote their products
|
|
132
|
+
- Include local language commerce tags (sell, shop, store, online, vendor, etc. in the local language)
|
|
133
|
+
- Mix broad commerce tags with country-specific tags
|
|
134
|
+
${langHint}
|
|
135
|
+
${historyHint}
|
|
136
|
+
|
|
137
|
+
Return ONLY a JSON array of tag strings, nothing else. Example: ["ventas","tiendaonline","vender"]`;
|
|
138
|
+
|
|
139
|
+
process.stderr.write(
|
|
140
|
+
` [LLM] 正在生成 ${count} 个标签 (目标: ${countryStr})...\n`,
|
|
141
|
+
);
|
|
142
|
+
const content = await callLLM(prompt);
|
|
143
|
+
const tags = parseTagsFromResponse(content);
|
|
144
|
+
|
|
145
|
+
const unique = [...new Set(tags)].slice(0, count);
|
|
146
|
+
process.stderr.write(
|
|
147
|
+
` [LLM] 生成 ${unique.length} 个标签: ${unique.join(", ")}\n`,
|
|
148
|
+
);
|
|
149
|
+
return unique;
|
|
150
|
+
}
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
import { chromium } from "playwright";
|
|
2
|
+
import { detectBrowser } from "./browser/launch.js";
|
|
3
|
+
import { getAntiDetectScript } from "./browser/anti-detect.js";
|
|
4
|
+
import { TikTokScraper } from "./tiktok-scraper.mjs";
|
|
5
|
+
|
|
6
|
+
const TAG_URL = "https://www.tiktok.com/tag";
|
|
7
|
+
const USER_AGENT =
|
|
8
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
|
|
9
|
+
const SCROLL_INTERVAL = 3000;
|
|
10
|
+
const MAX_STALE_ROUNDS = 3;
|
|
11
|
+
|
|
12
|
+
function sleep(ms) {
|
|
13
|
+
return new Promise((r) => setTimeout(r, ms));
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function findBrowser() {
|
|
17
|
+
return detectBrowser();
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
async function launchBrowser(browserPath) {
|
|
21
|
+
const opts = {
|
|
22
|
+
headless: true,
|
|
23
|
+
args: [
|
|
24
|
+
"--no-sandbox",
|
|
25
|
+
"--disable-blink-features=AutomationControlled",
|
|
26
|
+
"--disable-dev-shm-usage",
|
|
27
|
+
],
|
|
28
|
+
};
|
|
29
|
+
if (browserPath) opts.executablePath = browserPath;
|
|
30
|
+
|
|
31
|
+
try {
|
|
32
|
+
return await chromium.launch(opts);
|
|
33
|
+
} catch {
|
|
34
|
+
if (browserPath) {
|
|
35
|
+
opts.executablePath = undefined;
|
|
36
|
+
return await chromium.launch(opts);
|
|
37
|
+
}
|
|
38
|
+
throw new Error("无法启动浏览器");
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
async function extractItemData(item) {
|
|
43
|
+
return {
|
|
44
|
+
id: item.id || "",
|
|
45
|
+
desc: (item.desc || "").trim(),
|
|
46
|
+
authorUniqueId: item.author?.uniqueId || "",
|
|
47
|
+
authorId: item.author?.id || "",
|
|
48
|
+
authorNickname: item.author?.nickname || "",
|
|
49
|
+
authorSecUid: item.author?.secUid || "",
|
|
50
|
+
createTime: item.createTime || 0,
|
|
51
|
+
playCount: item.stats?.playCount || 0,
|
|
52
|
+
diggCount: item.stats?.diggCount || 0,
|
|
53
|
+
shareCount: item.stats?.shareCount || 0,
|
|
54
|
+
commentCount: item.stats?.commentCount || 0,
|
|
55
|
+
musicTitle: item.music?.title || "",
|
|
56
|
+
isAd: item.isAd || false,
|
|
57
|
+
duration: item.video?.duration || 0,
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* 获取 TikTok 标签页下的所有视频和作者
|
|
63
|
+
* @param {string} tag - 标签名称(不含 # 号)
|
|
64
|
+
* @param {object} [options]
|
|
65
|
+
* @param {number} [options.timeout=300000] - 最大等待时间 (ms)
|
|
66
|
+
* @param {string} [options.browserPath] - 浏览器可执行文件路径,不传则自动探测
|
|
67
|
+
* @param {string} [options.locale='en-US'] - 页面语言
|
|
68
|
+
* @param {Function} [options.onProgress] - 进度回调 ({ videos, authors })
|
|
69
|
+
* @returns {Promise<{ tag: string, challengeId: string, totalPosts: number, videos: Array, uniqueAuthors: string[] }>}
|
|
70
|
+
*/
|
|
71
|
+
export async function fetchTagData(tag, options = {}) {
|
|
72
|
+
const {
|
|
73
|
+
timeout = 300000,
|
|
74
|
+
browserPath: customBrowserPath,
|
|
75
|
+
locale = "en-US",
|
|
76
|
+
onProgress,
|
|
77
|
+
} = options;
|
|
78
|
+
|
|
79
|
+
const browserPath = customBrowserPath || findBrowser();
|
|
80
|
+
if (!browserPath) {
|
|
81
|
+
throw new Error(
|
|
82
|
+
"未找到可用的浏览器,请设置 browserPath 或安装 Chrome/Edge",
|
|
83
|
+
);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const browser = await launchBrowser(browserPath);
|
|
87
|
+
|
|
88
|
+
try {
|
|
89
|
+
const context = await browser.newContext({
|
|
90
|
+
viewport: { width: 1280, height: 900 },
|
|
91
|
+
userAgent: USER_AGENT,
|
|
92
|
+
locale,
|
|
93
|
+
});
|
|
94
|
+
await context.addInitScript(getAntiDetectScript());
|
|
95
|
+
const page = await context.newPage();
|
|
96
|
+
|
|
97
|
+
let challengeInfo = null;
|
|
98
|
+
const rawVideos = [];
|
|
99
|
+
const authors = new Set();
|
|
100
|
+
|
|
101
|
+
page.on("response", async (resp) => {
|
|
102
|
+
try {
|
|
103
|
+
const url = resp.url();
|
|
104
|
+
const ct = resp.headers()["content-type"] || "";
|
|
105
|
+
|
|
106
|
+
if (url.includes("/api/challenge/detail/") && ct.includes("json")) {
|
|
107
|
+
const body = await resp.json();
|
|
108
|
+
if (body?.challengeInfo?.challenge) {
|
|
109
|
+
challengeInfo = body.challengeInfo.challenge;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
if (url.includes("/api/challenge/item_list/") && ct.includes("json")) {
|
|
114
|
+
const body = await resp.json();
|
|
115
|
+
if (!body?.itemList) return;
|
|
116
|
+
for (const item of body.itemList) {
|
|
117
|
+
const uid = item.author?.uniqueId || "";
|
|
118
|
+
if (uid) authors.add(uid);
|
|
119
|
+
rawVideos.push(await extractItemData(item));
|
|
120
|
+
}
|
|
121
|
+
if (onProgress) {
|
|
122
|
+
onProgress({ videos: rawVideos.length, authors: authors.size });
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
} catch {}
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
const tagUrl = `${TAG_URL}/${encodeURIComponent(tag)}`;
|
|
129
|
+
const resp = await page.goto(tagUrl, {
|
|
130
|
+
waitUntil: "domcontentloaded",
|
|
131
|
+
timeout: 30000,
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
if (resp.status() !== 200) {
|
|
135
|
+
throw new Error(`标签页返回 HTTP ${resp.status()}`);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
await page.waitForTimeout(3000);
|
|
139
|
+
|
|
140
|
+
const pageError = await page.evaluate(() => {
|
|
141
|
+
const text = document.body?.innerText || "";
|
|
142
|
+
if (text.includes("Something went wrong")) return "page_error";
|
|
143
|
+
return null;
|
|
144
|
+
});
|
|
145
|
+
if (pageError) {
|
|
146
|
+
throw new Error("标签页加载失败,TikTok 返回了错误页面");
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
let lastCount = 0;
|
|
150
|
+
let staleRounds = 0;
|
|
151
|
+
const startTime = Date.now();
|
|
152
|
+
|
|
153
|
+
while (staleRounds < MAX_STALE_ROUNDS) {
|
|
154
|
+
if (Date.now() - startTime > timeout) break;
|
|
155
|
+
|
|
156
|
+
await page.evaluate(() => window.scrollBy(0, 3000));
|
|
157
|
+
await sleep(SCROLL_INTERVAL);
|
|
158
|
+
|
|
159
|
+
if (rawVideos.length === lastCount) {
|
|
160
|
+
staleRounds++;
|
|
161
|
+
} else {
|
|
162
|
+
staleRounds = 0;
|
|
163
|
+
lastCount = rawVideos.length;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
const seen = new Set();
|
|
168
|
+
const uniqueVideos = rawVideos.filter((v) =>
|
|
169
|
+
seen.has(v.id) ? false : (seen.add(v.id), true),
|
|
170
|
+
);
|
|
171
|
+
|
|
172
|
+
const totalPosts = challengeInfo?.stats?.videoCount || 0;
|
|
173
|
+
|
|
174
|
+
return {
|
|
175
|
+
tag,
|
|
176
|
+
challengeId: challengeInfo?.id || "",
|
|
177
|
+
totalPosts,
|
|
178
|
+
videoCount: uniqueVideos.length,
|
|
179
|
+
uniqueAuthorCount: authors.size,
|
|
180
|
+
videos: uniqueVideos,
|
|
181
|
+
uniqueAuthors: [...authors],
|
|
182
|
+
};
|
|
183
|
+
} finally {
|
|
184
|
+
await browser.close();
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* 为视频列表补充国家/地区信息(通过 TikTokScraper view-source 方式)
|
|
190
|
+
* @param {Array} videos - fetchTagData 返回的 videos 数组
|
|
191
|
+
* @param {object} [options]
|
|
192
|
+
* @param {string} [options.mode='users'] - 'users': 按作者查用户信息 / 'videos': 按视频查视频信息
|
|
193
|
+
* @param {number} [options.poolSize=3] - 并发页面数
|
|
194
|
+
* @param {number} [options.maxRetries=3] - 单个请求最大重试次数
|
|
195
|
+
* @param {Function} [options.onProgress] - 进度回调 ({ done, total, current, locationCreated })
|
|
196
|
+
* @returns {Promise<{ videos: Array, locationMap: Record<string, string|null> }>}
|
|
197
|
+
*/
|
|
198
|
+
export async function enrichVideosWithLocation(videos, options = {}) {
|
|
199
|
+
const {
|
|
200
|
+
mode = "videos",
|
|
201
|
+
poolSize = 3,
|
|
202
|
+
maxRetries = 3,
|
|
203
|
+
onProgress,
|
|
204
|
+
existingScraper,
|
|
205
|
+
} = options;
|
|
206
|
+
|
|
207
|
+
const scraper = existingScraper || new TikTokScraper({ poolSize });
|
|
208
|
+
const ownsScraper = !existingScraper;
|
|
209
|
+
if (ownsScraper) await scraper.init();
|
|
210
|
+
|
|
211
|
+
try {
|
|
212
|
+
const enriched = [...videos];
|
|
213
|
+
const locationMap = {};
|
|
214
|
+
let done = 0;
|
|
215
|
+
|
|
216
|
+
if (mode === "users") {
|
|
217
|
+
const uniqueAuthors = [
|
|
218
|
+
...new Set(videos.map((v) => v.authorUniqueId).filter(Boolean)),
|
|
219
|
+
];
|
|
220
|
+
const total = uniqueAuthors.length;
|
|
221
|
+
|
|
222
|
+
const tasks = uniqueAuthors.map((uniqueId) =>
|
|
223
|
+
(async () => {
|
|
224
|
+
try {
|
|
225
|
+
const userInfo = await scraper.getUserInfo(uniqueId, maxRetries);
|
|
226
|
+
const location = userInfo?.locationCreated || null;
|
|
227
|
+
locationMap[uniqueId] = location;
|
|
228
|
+
done++;
|
|
229
|
+
if (onProgress)
|
|
230
|
+
onProgress({
|
|
231
|
+
done,
|
|
232
|
+
total,
|
|
233
|
+
current: uniqueId,
|
|
234
|
+
locationCreated: location,
|
|
235
|
+
});
|
|
236
|
+
} catch {
|
|
237
|
+
locationMap[uniqueId] = null;
|
|
238
|
+
done++;
|
|
239
|
+
if (onProgress)
|
|
240
|
+
onProgress({
|
|
241
|
+
done,
|
|
242
|
+
total,
|
|
243
|
+
current: uniqueId,
|
|
244
|
+
locationCreated: null,
|
|
245
|
+
});
|
|
246
|
+
}
|
|
247
|
+
})(),
|
|
248
|
+
);
|
|
249
|
+
|
|
250
|
+
await Promise.allSettled(tasks);
|
|
251
|
+
|
|
252
|
+
for (const v of enriched) {
|
|
253
|
+
v.locationCreated = locationMap[v.authorUniqueId] || null;
|
|
254
|
+
}
|
|
255
|
+
} else {
|
|
256
|
+
const total = videos.length;
|
|
257
|
+
|
|
258
|
+
const tasks = enriched.map((v) =>
|
|
259
|
+
(async () => {
|
|
260
|
+
const videoUrl = `https://www.tiktok.com/@${v.authorUniqueId}/video/${v.id}`;
|
|
261
|
+
try {
|
|
262
|
+
const videoInfo = await scraper.getVideoInfo(videoUrl, maxRetries);
|
|
263
|
+
const location = videoInfo?.locationCreated || null;
|
|
264
|
+
v.locationCreated = location;
|
|
265
|
+
locationMap[v.id] = location;
|
|
266
|
+
done++;
|
|
267
|
+
if (onProgress)
|
|
268
|
+
onProgress({
|
|
269
|
+
done,
|
|
270
|
+
total,
|
|
271
|
+
current: videoUrl,
|
|
272
|
+
locationCreated: location,
|
|
273
|
+
});
|
|
274
|
+
} catch {
|
|
275
|
+
v.locationCreated = null;
|
|
276
|
+
locationMap[v.id] = null;
|
|
277
|
+
done++;
|
|
278
|
+
if (onProgress)
|
|
279
|
+
onProgress({
|
|
280
|
+
done,
|
|
281
|
+
total,
|
|
282
|
+
current: videoUrl,
|
|
283
|
+
locationCreated: null,
|
|
284
|
+
});
|
|
285
|
+
}
|
|
286
|
+
})(),
|
|
287
|
+
);
|
|
288
|
+
|
|
289
|
+
await Promise.allSettled(tasks);
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
return { videos: enriched, locationMap };
|
|
293
|
+
} finally {
|
|
294
|
+
if (ownsScraper) await scraper.close();
|
|
295
|
+
}
|
|
296
|
+
}
|