tt-help-cli-ycl 1.3.83 → 1.3.85
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/scripts/test-refill-order.mjs +218 -0
- package/src/cli/attach.js +3 -34
- package/src/cli/auto.js +3 -18
- package/src/cli/comments.js +13 -57
- package/src/cli/explore.js +255 -266
- package/src/cli/refresh.js +6 -21
- package/src/cli/tag.js +712 -0
- package/src/lib/api-client.js +101 -0
- package/src/lib/args.js +182 -6
- package/src/lib/constants.js +43 -0
- package/src/lib/parse-ssr.mjs +1 -0
- package/src/lib/tag-discover.js +124 -0
- package/src/lib/tag-fetcher.js +296 -0
- package/src/lib/target-locations.js +18 -0
- package/src/main.js +14 -0
- package/src/npm-main.js +3 -0
- package/src/scraper/explore-core.js +6 -6
- package/src/watch/data-store.js +304 -49
- package/src/watch/public/app.js +95 -0
- package/src/watch/public/index.html +15 -0
- package/src/watch/public/style.css +107 -0
- package/src/watch/server.js +185 -0
- package/src/watch/tag-service.js +334 -0
package/src/cli/tag.js
ADDED
|
@@ -0,0 +1,712 @@
|
|
|
1
|
+
import { writeFileSync } from "fs";
|
|
2
|
+
import { fetchTagData, enrichVideosWithLocation } from "../lib/tag-fetcher.js";
|
|
3
|
+
import { TikTokScraper } from "../lib/tiktok-scraper.mjs";
|
|
4
|
+
import {
|
|
5
|
+
DEFAULT_TARGET_LOCATIONS,
|
|
6
|
+
isLocationInList,
|
|
7
|
+
} from "../lib/target-locations.js";
|
|
8
|
+
import { discoverTags, recordProductiveTag } from "../lib/tag-discover.js";
|
|
9
|
+
|
|
10
|
+
async function pushToServer(serverUrl, filteredAuthors, videos) {
|
|
11
|
+
const users = filteredAuthors.map((author) => {
|
|
12
|
+
const video = videos.find((v) => v.authorUniqueId === author);
|
|
13
|
+
return {
|
|
14
|
+
uniqueId: author,
|
|
15
|
+
sources: ["tag"],
|
|
16
|
+
locationCreated: video?.locationCreated || null,
|
|
17
|
+
};
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
const res = await fetch(`${serverUrl}/api/raw-users`, {
|
|
21
|
+
method: "POST",
|
|
22
|
+
headers: { "Content-Type": "application/json" },
|
|
23
|
+
body: JSON.stringify({ users }),
|
|
24
|
+
});
|
|
25
|
+
const data = await res.json();
|
|
26
|
+
process.stderr.write(
|
|
27
|
+
` 已推送 ${data.added} 个用户到 jobs_base (来源: tag, 跳过: ${data.skipped})\n`,
|
|
28
|
+
);
|
|
29
|
+
return data;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// 共享算分逻辑(handleScore / handleScoreAll 共用)
|
|
33
|
+
function calcScore(authorCount, matchedAuthors, filteredVideoCount) {
|
|
34
|
+
const densityScore =
|
|
35
|
+
authorCount > 0 ? (matchedAuthors / authorCount) * 50 : 0;
|
|
36
|
+
const absoluteScore = Math.min(matchedAuthors / 10, 1) * 30;
|
|
37
|
+
const videoBonus =
|
|
38
|
+
filteredVideoCount > 0 ? Math.min(filteredVideoCount / 20, 1) * 20 : 0;
|
|
39
|
+
const score = Math.round(
|
|
40
|
+
Math.min(densityScore + absoluteScore + videoBonus, 100),
|
|
41
|
+
);
|
|
42
|
+
let status;
|
|
43
|
+
if (score >= 70) status = "productive";
|
|
44
|
+
else if (score >= 50) status = "scored";
|
|
45
|
+
else if (score < 10) status = "dead";
|
|
46
|
+
else status = "scored";
|
|
47
|
+
return { score, status };
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// 通用的过滤 + 算分流程(handleScore / handleScoreAll 共用)
|
|
51
|
+
function applyFilterAndScore(videos, targetCountries, result) {
|
|
52
|
+
const filtered = videos.filter((v) =>
|
|
53
|
+
isLocationInList(v.locationCreated, targetCountries),
|
|
54
|
+
);
|
|
55
|
+
const matchedAuthorSet = new Set(
|
|
56
|
+
filtered.map((v) => v.authorUniqueId).filter(Boolean),
|
|
57
|
+
);
|
|
58
|
+
result.matchedAuthors = matchedAuthorSet.size;
|
|
59
|
+
|
|
60
|
+
const countryStats = {};
|
|
61
|
+
for (const v of filtered) {
|
|
62
|
+
if (v.locationCreated)
|
|
63
|
+
countryStats[v.locationCreated] =
|
|
64
|
+
(countryStats[v.locationCreated] || 0) + 1;
|
|
65
|
+
}
|
|
66
|
+
result.matchedCountries = Object.entries(countryStats).map(([c, n]) => ({
|
|
67
|
+
c,
|
|
68
|
+
n,
|
|
69
|
+
}));
|
|
70
|
+
|
|
71
|
+
const { score, status } = calcScore(
|
|
72
|
+
result.authorCount,
|
|
73
|
+
result.matchedAuthors,
|
|
74
|
+
filtered.length,
|
|
75
|
+
);
|
|
76
|
+
result.score = score;
|
|
77
|
+
result.status = status;
|
|
78
|
+
|
|
79
|
+
return { filtered, matchedAuthorSet };
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
async function processTag(
|
|
83
|
+
tag,
|
|
84
|
+
index,
|
|
85
|
+
total,
|
|
86
|
+
{ enrich, targetLocations, noFilter, serverUrl, recordTags },
|
|
87
|
+
) {
|
|
88
|
+
const prefix = total > 1 ? `[${index + 1}/${total}]` : "";
|
|
89
|
+
process.stderr.write(`${prefix} 正在获取 #${tag} ... `);
|
|
90
|
+
|
|
91
|
+
try {
|
|
92
|
+
const result = await fetchTagData(tag, {
|
|
93
|
+
onProgress: ({ videos, authors }) => {
|
|
94
|
+
process.stderr.write(
|
|
95
|
+
`\r${prefix} #${tag}: ${videos} 视频, ${authors} 作者`,
|
|
96
|
+
);
|
|
97
|
+
},
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
process.stderr.write(
|
|
101
|
+
`\r${prefix} #${tag}: ${result.videoCount} 视频, ${result.uniqueAuthorCount} 作者`,
|
|
102
|
+
);
|
|
103
|
+
|
|
104
|
+
let videos = result.videos;
|
|
105
|
+
let filteredAuthors = result.uniqueAuthors;
|
|
106
|
+
|
|
107
|
+
if (enrich) {
|
|
108
|
+
const enrichMode = enrich === true ? "videos" : enrich;
|
|
109
|
+
if (noFilter) {
|
|
110
|
+
process.stderr.write(
|
|
111
|
+
`\n 正在补充国家信息 (${enrichMode} 模式,不过滤)...\n`,
|
|
112
|
+
);
|
|
113
|
+
} else {
|
|
114
|
+
process.stderr.write(
|
|
115
|
+
`\n 正在补充国家信息 (${enrichMode} 模式,目标: ${targetLocations.join(",")})...\n`,
|
|
116
|
+
);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const enriched = await enrichVideosWithLocation(videos, {
|
|
120
|
+
mode: enrichMode,
|
|
121
|
+
onProgress: ({ done, total, current, locationCreated }) => {
|
|
122
|
+
const label = enrichMode === "users" ? `@${current}` : current;
|
|
123
|
+
const loc = locationCreated || "-";
|
|
124
|
+
const hit =
|
|
125
|
+
locationCreated &&
|
|
126
|
+
isLocationInList(locationCreated, targetLocations);
|
|
127
|
+
process.stderr.write(
|
|
128
|
+
`\r [${done}/${total}] ${label} → ${loc}${hit ? " ✓" : ""}`,
|
|
129
|
+
);
|
|
130
|
+
},
|
|
131
|
+
});
|
|
132
|
+
videos = enriched.videos;
|
|
133
|
+
process.stderr.write("\n");
|
|
134
|
+
|
|
135
|
+
if (!noFilter) {
|
|
136
|
+
const before = videos.length;
|
|
137
|
+
videos = videos.filter((v) =>
|
|
138
|
+
isLocationInList(v.locationCreated, targetLocations),
|
|
139
|
+
);
|
|
140
|
+
const filteredAuthorsSet = new Set(
|
|
141
|
+
videos.map((v) => v.authorUniqueId).filter(Boolean),
|
|
142
|
+
);
|
|
143
|
+
filteredAuthors = [...filteredAuthorsSet];
|
|
144
|
+
process.stderr.write(
|
|
145
|
+
` 过滤后: ${before} → ${videos.length} 视频, ${filteredAuthors.length} 作者\n`,
|
|
146
|
+
);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
if (serverUrl && filteredAuthors.length > 0) {
|
|
151
|
+
const pushResult = await pushToServer(serverUrl, filteredAuthors, videos);
|
|
152
|
+
if (recordTags && pushResult.added > 0) {
|
|
153
|
+
const countries = [
|
|
154
|
+
...new Set(videos.map((v) => v.locationCreated).filter(Boolean)),
|
|
155
|
+
];
|
|
156
|
+
for (const c of countries) {
|
|
157
|
+
recordProductiveTag(tag, c, pushResult.added);
|
|
158
|
+
}
|
|
159
|
+
process.stderr.write(
|
|
160
|
+
` 已记录标签 #${tag} (${countries.join(",")}, ${pushResult.added} 用户)\n`,
|
|
161
|
+
);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
process.stderr.write(
|
|
166
|
+
`\r${prefix} #${tag}: ${videos.length} 视频, ${filteredAuthors.length} 作者\n`,
|
|
167
|
+
);
|
|
168
|
+
|
|
169
|
+
return {
|
|
170
|
+
tag,
|
|
171
|
+
totalPosts: result.totalPosts,
|
|
172
|
+
videoCount: videos.length,
|
|
173
|
+
authorCount: filteredAuthors.length,
|
|
174
|
+
authors: filteredAuthors,
|
|
175
|
+
videos,
|
|
176
|
+
};
|
|
177
|
+
} catch (err) {
|
|
178
|
+
process.stderr.write(`\r${prefix} #${tag}: 失败 - ${err.message}\n`);
|
|
179
|
+
return { tag, error: err.message };
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
export async function handleDiscover(parsed) {
|
|
184
|
+
const { tagDiscover } = parsed;
|
|
185
|
+
const { countries, count = 4, prompt, serverUrl } = tagDiscover || {};
|
|
186
|
+
|
|
187
|
+
if (!countries || countries.length === 0) {
|
|
188
|
+
console.error(
|
|
189
|
+
"用法: tt-help tag discover <国家> [国家...] [--count <n>] [--prompt <文本>] [-s <服务端>]",
|
|
190
|
+
);
|
|
191
|
+
console.error("");
|
|
192
|
+
console.error("示例:");
|
|
193
|
+
console.error(
|
|
194
|
+
" tt-help tag discover ES # 为西班牙生成 4 个标签",
|
|
195
|
+
);
|
|
196
|
+
console.error(
|
|
197
|
+
" tt-help tag discover ES FR --count 5 # 各生成 5 个",
|
|
198
|
+
);
|
|
199
|
+
console.error(
|
|
200
|
+
' tt-help tag discover DE -p "卖手工首饰" # 带用户提示',
|
|
201
|
+
);
|
|
202
|
+
console.error(
|
|
203
|
+
" tt-help tag discover ES -s http://127.0.0.1:3001 # 指定服务端",
|
|
204
|
+
);
|
|
205
|
+
process.exit(1);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
const baseUrl = serverUrl || "http://127.0.0.1:3000";
|
|
209
|
+
|
|
210
|
+
for (const country of countries) {
|
|
211
|
+
const params = new URLSearchParams({ country, count: String(count) });
|
|
212
|
+
if (prompt) params.set("prompt", prompt);
|
|
213
|
+
|
|
214
|
+
try {
|
|
215
|
+
const res = await fetch(`${baseUrl}/api/tags/discover?${params}`);
|
|
216
|
+
const data = await res.json();
|
|
217
|
+
if (data.error) {
|
|
218
|
+
console.error(`${country}: 错误 - ${data.error}`);
|
|
219
|
+
} else {
|
|
220
|
+
console.log(
|
|
221
|
+
`${country}: 新增 ${data.added}/${data.total} 个标签: ${(data.tags || []).join(", ")}`,
|
|
222
|
+
);
|
|
223
|
+
}
|
|
224
|
+
} catch (e) {
|
|
225
|
+
console.error(`${country}: 请求失败 - ${e.message}`);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
export async function handleScore(parsed) {
|
|
231
|
+
const { tagScore } = parsed;
|
|
232
|
+
const { tag, countries, serverUrl } = tagScore || {};
|
|
233
|
+
|
|
234
|
+
if (!tag) {
|
|
235
|
+
console.error(
|
|
236
|
+
"用法: tt-help tag score <tag名称> [--countries <CSV>] [-s <服务端>]",
|
|
237
|
+
);
|
|
238
|
+
console.error("");
|
|
239
|
+
console.error("示例:");
|
|
240
|
+
console.error(
|
|
241
|
+
" tt-help tag score ventas # 打分单个标签",
|
|
242
|
+
);
|
|
243
|
+
console.error(
|
|
244
|
+
" tt-help tag score ventas --countries ES,FR # 指定目标国家",
|
|
245
|
+
);
|
|
246
|
+
console.error(
|
|
247
|
+
" tt-help tag score ventas -s http://127.0.0.1:3001 # 指定服务端",
|
|
248
|
+
);
|
|
249
|
+
process.exit(1);
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
const baseUrl = serverUrl || "http://127.0.0.1:3000";
|
|
253
|
+
const targetCountries = countries || [
|
|
254
|
+
"ES",
|
|
255
|
+
"FR",
|
|
256
|
+
"DE",
|
|
257
|
+
"PT",
|
|
258
|
+
"IT",
|
|
259
|
+
"NL",
|
|
260
|
+
"BE",
|
|
261
|
+
"AT",
|
|
262
|
+
"IE",
|
|
263
|
+
"PL",
|
|
264
|
+
"CZ",
|
|
265
|
+
"GR",
|
|
266
|
+
"HU",
|
|
267
|
+
];
|
|
268
|
+
|
|
269
|
+
const log = (...args) => process.stderr.write(args.join(" ") + "\n");
|
|
270
|
+
const startTime = Date.now();
|
|
271
|
+
|
|
272
|
+
log("");
|
|
273
|
+
log("========================================");
|
|
274
|
+
log(` 标签打分: #${tag}`);
|
|
275
|
+
log(` 目标国家: ${targetCountries.join(", ")}`);
|
|
276
|
+
log(` 服务端: ${baseUrl}`);
|
|
277
|
+
log(" 模式: 客户端本地打分(Playwright → enrich → 算分 → 上报)");
|
|
278
|
+
log("========================================");
|
|
279
|
+
log("");
|
|
280
|
+
|
|
281
|
+
const result = {
|
|
282
|
+
tag,
|
|
283
|
+
status: "error",
|
|
284
|
+
score: 0,
|
|
285
|
+
totalPosts: 0,
|
|
286
|
+
authorCount: 0,
|
|
287
|
+
matchedAuthors: 0,
|
|
288
|
+
matchedCountries: [],
|
|
289
|
+
pushedUsers: 0,
|
|
290
|
+
error: null,
|
|
291
|
+
};
|
|
292
|
+
|
|
293
|
+
try {
|
|
294
|
+
// Step 1: 打开标签页抓取视频
|
|
295
|
+
log("Step 1/4: 打开 TikTok 标签页抓取视频...");
|
|
296
|
+
const tagResult = await fetchTagData(tag, {
|
|
297
|
+
onProgress: ({ videos, authors }) => {
|
|
298
|
+
process.stderr.write(`\r 已抓取: ${videos} 视频, ${authors} 作者`);
|
|
299
|
+
},
|
|
300
|
+
});
|
|
301
|
+
log(
|
|
302
|
+
`\r 完成: ${tagResult.videoCount} 视频, ${tagResult.uniqueAuthorCount} 作者`,
|
|
303
|
+
);
|
|
304
|
+
result.totalPosts = tagResult.totalPosts || 0;
|
|
305
|
+
result.authorCount = tagResult.uniqueAuthorCount || 0;
|
|
306
|
+
|
|
307
|
+
let videos = tagResult.videos;
|
|
308
|
+
if (!videos || videos.length === 0) {
|
|
309
|
+
log(" ⚠️ 没有视频,标记为 dead");
|
|
310
|
+
result.status = "dead";
|
|
311
|
+
result.error = "no videos found";
|
|
312
|
+
await reportToServer(baseUrl, result);
|
|
313
|
+
return;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
// Step 2/4: 通过 TikTokScraper.getVideoInfo 逐个视频获取国家
|
|
317
|
+
log(`Step 2/4: 补充国家信息 (${videos.length} 个视频)...`);
|
|
318
|
+
const enriched = await enrichVideosWithLocation(videos, {
|
|
319
|
+
mode: "videos",
|
|
320
|
+
onProgress: ({ done, total, current, locationCreated }) => {
|
|
321
|
+
if (done % 10 === 0 || done === total) {
|
|
322
|
+
process.stderr.write(
|
|
323
|
+
`\r [${done}/${total}] ${current.split("/").pop().slice(0, 20)} → ${locationCreated || "-"}`,
|
|
324
|
+
);
|
|
325
|
+
}
|
|
326
|
+
},
|
|
327
|
+
});
|
|
328
|
+
videos = enriched.videos;
|
|
329
|
+
const withLoc = videos.filter((v) => v.locationCreated).length;
|
|
330
|
+
log(`\r 完成: ${withLoc}/${videos.length} 个视频有国家信息`);
|
|
331
|
+
|
|
332
|
+
// Step 3/4: 过滤 + 算分
|
|
333
|
+
log("Step 3/4: 过滤目标国家 + 计算分数...");
|
|
334
|
+
const { matchedAuthorSet } = applyFilterAndScore(
|
|
335
|
+
videos,
|
|
336
|
+
targetCountries,
|
|
337
|
+
result,
|
|
338
|
+
);
|
|
339
|
+
|
|
340
|
+
log(
|
|
341
|
+
` 算分: ${result.score}/100 → ${result.status} (匹配 ${result.matchedAuthors}/${result.authorCount} 作者)`,
|
|
342
|
+
);
|
|
343
|
+
if (result.matchedCountries.length > 0) {
|
|
344
|
+
log(
|
|
345
|
+
` 国家: ${result.matchedCountries.map((c) => `${c.c}:${c.n}`).join(", ")}`,
|
|
346
|
+
);
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
// Step 4/4: 推送用户 + 上报结果
|
|
350
|
+
log("Step 4/4: 推送用户到服务端 + 上报打分结果...");
|
|
351
|
+
if (result.matchedAuthors > 0) {
|
|
352
|
+
const pushResult = await pushToServer(
|
|
353
|
+
baseUrl,
|
|
354
|
+
[...matchedAuthorSet],
|
|
355
|
+
videos,
|
|
356
|
+
);
|
|
357
|
+
result.pushedUsers = pushResult.added || 0;
|
|
358
|
+
}
|
|
359
|
+
await reportToServer(baseUrl, result);
|
|
360
|
+
} catch (e) {
|
|
361
|
+
log(`❌ 错误: ${e.message}`);
|
|
362
|
+
result.error = e.message;
|
|
363
|
+
try {
|
|
364
|
+
await reportToServer(baseUrl, result);
|
|
365
|
+
} catch {}
|
|
366
|
+
return;
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
370
|
+
const icon =
|
|
371
|
+
result.status === "productive"
|
|
372
|
+
? "🟢"
|
|
373
|
+
: result.status === "scored"
|
|
374
|
+
? "🟡"
|
|
375
|
+
: result.status === "dead"
|
|
376
|
+
? "🔴"
|
|
377
|
+
: "⚪";
|
|
378
|
+
log("");
|
|
379
|
+
log("----------------------------------------");
|
|
380
|
+
log(` ${icon} 打分完成 (${elapsed}s)`);
|
|
381
|
+
log(` 状态: ${result.status} 分数: ${result.score}/100`);
|
|
382
|
+
log(
|
|
383
|
+
` 视频作者: ${result.authorCount} 匹配: ${result.matchedAuthors} 推送: ${result.pushedUsers}`,
|
|
384
|
+
);
|
|
385
|
+
log("----------------------------------------");
|
|
386
|
+
|
|
387
|
+
console.log(JSON.stringify(result, null, 2));
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
async function reportToServer(baseUrl, result) {
|
|
391
|
+
try {
|
|
392
|
+
const res = await fetch(`${baseUrl}/api/tags/score-result`, {
|
|
393
|
+
method: "POST",
|
|
394
|
+
headers: { "Content-Type": "application/json" },
|
|
395
|
+
body: JSON.stringify(result),
|
|
396
|
+
});
|
|
397
|
+
const data = await res.json();
|
|
398
|
+
if (!data.ok) process.stderr.write(` ⚠️ 上报失败: ${data.error}\n`);
|
|
399
|
+
} catch (e) {
|
|
400
|
+
process.stderr.write(` ⚠️ 上报请求失败: ${e.message}\n`);
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
export async function handleScoreAll(parsed) {
|
|
405
|
+
const { tagScoreAll } = parsed;
|
|
406
|
+
const { countries, serverUrl } = tagScoreAll || {};
|
|
407
|
+
|
|
408
|
+
const baseUrl = serverUrl || "http://127.0.0.1:3000";
|
|
409
|
+
const targetCountries = countries || [
|
|
410
|
+
"ES",
|
|
411
|
+
"FR",
|
|
412
|
+
"DE",
|
|
413
|
+
"PT",
|
|
414
|
+
"IT",
|
|
415
|
+
"NL",
|
|
416
|
+
"BE",
|
|
417
|
+
"AT",
|
|
418
|
+
"IE",
|
|
419
|
+
"PL",
|
|
420
|
+
"CZ",
|
|
421
|
+
"GR",
|
|
422
|
+
"HU",
|
|
423
|
+
];
|
|
424
|
+
|
|
425
|
+
const log = (...args) => process.stderr.write(args.join(" ") + "\n");
|
|
426
|
+
|
|
427
|
+
log("");
|
|
428
|
+
log("========================================");
|
|
429
|
+
log(" 自动循环打分模式(客户端本地执行)");
|
|
430
|
+
log(` 目标国家: ${targetCountries.join(", ")}`);
|
|
431
|
+
log(` 服务端: ${baseUrl}`);
|
|
432
|
+
log(" 流程: 从服务端拉 tag → 本地 Playwright 抓取 → enrich → 算分 → 上报");
|
|
433
|
+
log(" 每个标签约 1-2 分钟");
|
|
434
|
+
log("========================================");
|
|
435
|
+
log("");
|
|
436
|
+
|
|
437
|
+
let totalScored = 0;
|
|
438
|
+
let totalNew = null;
|
|
439
|
+
|
|
440
|
+
// 复用 TikTokScraper 实例,避免每次 enrich 都启动/关闭 headless 浏览器
|
|
441
|
+
const enrichScraper = new TikTokScraper({ poolSize: 3 });
|
|
442
|
+
await enrichScraper.init();
|
|
443
|
+
log("✅ TikTokScraper 已就绪 (enrich 复用)");
|
|
444
|
+
log("");
|
|
445
|
+
|
|
446
|
+
try {
|
|
447
|
+
while (true) {
|
|
448
|
+
// 查剩余数量
|
|
449
|
+
if (totalNew === null) {
|
|
450
|
+
try {
|
|
451
|
+
const statsRes = await fetch(
|
|
452
|
+
`${baseUrl}/api/tags?status=new&limit=1000`,
|
|
453
|
+
);
|
|
454
|
+
const statsData = await statsRes.json();
|
|
455
|
+
totalNew = statsData.total || 0;
|
|
456
|
+
log(`📋 待打分标签: ${totalNew} 个`);
|
|
457
|
+
log("");
|
|
458
|
+
} catch (e) {
|
|
459
|
+
log(`⚠️ 无法连接服务端: ${e.message}`);
|
|
460
|
+
break;
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
// 从服务端取下一个 new 标签
|
|
465
|
+
const tagsRes = await fetch(`${baseUrl}/api/tags?status=new&limit=1`);
|
|
466
|
+
const tagsData = await tagsRes.json();
|
|
467
|
+
if (!tagsData.tags || tagsData.tags.length === 0) {
|
|
468
|
+
log("");
|
|
469
|
+
log("========================================");
|
|
470
|
+
log(` 🎉 全部完成! 共打分 ${totalScored} 个标签`);
|
|
471
|
+
log("========================================");
|
|
472
|
+
break;
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
const tag = tagsData.tags[0].tag;
|
|
476
|
+
const startTime = Date.now();
|
|
477
|
+
|
|
478
|
+
log(`[${totalScored + 1}/${totalNew || "?"}] 正在打分 #${tag} ...`);
|
|
479
|
+
|
|
480
|
+
const result = {
|
|
481
|
+
tag,
|
|
482
|
+
status: "error",
|
|
483
|
+
score: 0,
|
|
484
|
+
totalPosts: 0,
|
|
485
|
+
authorCount: 0,
|
|
486
|
+
matchedAuthors: 0,
|
|
487
|
+
matchedCountries: [],
|
|
488
|
+
pushedUsers: 0,
|
|
489
|
+
error: null,
|
|
490
|
+
};
|
|
491
|
+
|
|
492
|
+
try {
|
|
493
|
+
// 锁定 tag
|
|
494
|
+
const claimRes = await fetch(`${baseUrl}/api/tags/claim`, {
|
|
495
|
+
method: "POST",
|
|
496
|
+
headers: { "Content-Type": "application/json" },
|
|
497
|
+
body: JSON.stringify({ tag }),
|
|
498
|
+
});
|
|
499
|
+
const claimData = await claimRes.json();
|
|
500
|
+
if (!claimData.ok) {
|
|
501
|
+
log(` ⚠️ 无法锁定 (${claimData.error}),跳过`);
|
|
502
|
+
continue;
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
// 抓取视频
|
|
506
|
+
log(` 抓取 TikTok 标签页...`);
|
|
507
|
+
const tagResult = await fetchTagData(tag, {
|
|
508
|
+
onProgress: ({ videos, authors }) => {
|
|
509
|
+
process.stderr.write(`\r 抓取中: ${videos} 视频, ${authors} 作者`);
|
|
510
|
+
},
|
|
511
|
+
});
|
|
512
|
+
log(
|
|
513
|
+
`\r 完成: ${tagResult.videoCount} 视频, ${tagResult.uniqueAuthorCount} 作者`,
|
|
514
|
+
);
|
|
515
|
+
|
|
516
|
+
result.totalPosts = tagResult.totalPosts || 0;
|
|
517
|
+
result.authorCount = tagResult.uniqueAuthorCount || 0;
|
|
518
|
+
let videos = tagResult.videos;
|
|
519
|
+
|
|
520
|
+
if (!videos || videos.length === 0) {
|
|
521
|
+
log(" ⚠️ 无视频,标记 dead");
|
|
522
|
+
result.status = "dead";
|
|
523
|
+
result.error = "no videos found";
|
|
524
|
+
await reportToServer(baseUrl, result);
|
|
525
|
+
totalScored++;
|
|
526
|
+
continue;
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
// enrich: 逐个视频查 view-source 获取国家
|
|
530
|
+
log(` 补充国家信息...`);
|
|
531
|
+
const enriched = await enrichVideosWithLocation(videos, {
|
|
532
|
+
mode: "videos",
|
|
533
|
+
existingScraper: enrichScraper,
|
|
534
|
+
onProgress: ({ done, total, current, locationCreated }) => {
|
|
535
|
+
if (done % 10 === 0 || done === total) {
|
|
536
|
+
process.stderr.write(
|
|
537
|
+
`\r [${done}/${total}] ${current.split("/").pop().slice(0, 20)} → ${locationCreated || "-"}`,
|
|
538
|
+
);
|
|
539
|
+
}
|
|
540
|
+
},
|
|
541
|
+
});
|
|
542
|
+
videos = enriched.videos;
|
|
543
|
+
const withLoc = videos.filter((v) => v.locationCreated).length;
|
|
544
|
+
log(` 完成: ${withLoc}/${videos.length} 个视频有国家信息`);
|
|
545
|
+
|
|
546
|
+
// 过滤 + 算分 (共用函数)
|
|
547
|
+
const { matchedAuthorSet } = applyFilterAndScore(
|
|
548
|
+
videos,
|
|
549
|
+
targetCountries,
|
|
550
|
+
result,
|
|
551
|
+
);
|
|
552
|
+
|
|
553
|
+
// 推送用户
|
|
554
|
+
if (result.matchedAuthors > 0) {
|
|
555
|
+
const pushResult = await pushToServer(
|
|
556
|
+
baseUrl,
|
|
557
|
+
[...matchedAuthorSet],
|
|
558
|
+
videos,
|
|
559
|
+
);
|
|
560
|
+
result.pushedUsers = pushResult.added || 0;
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
// 上报结果
|
|
564
|
+
await reportToServer(baseUrl, result);
|
|
565
|
+
|
|
566
|
+
totalScored++;
|
|
567
|
+
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
568
|
+
const icon =
|
|
569
|
+
result.status === "productive"
|
|
570
|
+
? "🟢"
|
|
571
|
+
: result.status === "scored"
|
|
572
|
+
? "🟡"
|
|
573
|
+
: result.status === "dead"
|
|
574
|
+
? "🔴"
|
|
575
|
+
: "⚪";
|
|
576
|
+
const mc = result.matchedCountries
|
|
577
|
+
.map((c) => `${c.c}:${c.n}`)
|
|
578
|
+
.join(" ");
|
|
579
|
+
log(
|
|
580
|
+
` ${icon} ${result.status} score=${result.score} authors=${result.authorCount} matched=${result.matchedAuthors} (${elapsed}s)`,
|
|
581
|
+
);
|
|
582
|
+
if (mc) log(` 国家: ${mc}`);
|
|
583
|
+
log(` 剩余: ~${Math.max(0, (totalNew || 0) - totalScored)} 个`);
|
|
584
|
+
log("");
|
|
585
|
+
} catch (e) {
|
|
586
|
+
log(` ❌ 失败: ${e.message}`);
|
|
587
|
+
result.error = e.message;
|
|
588
|
+
try {
|
|
589
|
+
await reportToServer(baseUrl, result);
|
|
590
|
+
} catch {}
|
|
591
|
+
totalScored++;
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
} finally {
|
|
595
|
+
await enrichScraper.close();
|
|
596
|
+
log("✅ TikTokScraper 已关闭");
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
export async function handleTag(parsed) {
|
|
601
|
+
const { tagTags } = parsed;
|
|
602
|
+
|
|
603
|
+
if (!tagTags || tagTags.length === 0) {
|
|
604
|
+
console.error(
|
|
605
|
+
"用法: tt-help tag <tag名称> [...] [-s <服务端>] [--enrich] [--locations <国家>] [--no-filter] [--discover]",
|
|
606
|
+
);
|
|
607
|
+
console.error("");
|
|
608
|
+
console.error("选项:");
|
|
609
|
+
console.error(" -o, --output <file> 输出到 JSON 文件");
|
|
610
|
+
console.error(" -s, --server <url> 推送到 watch 服务端");
|
|
611
|
+
console.error(
|
|
612
|
+
" --enrich [users|videos] 补充国家/地区信息(默认 videos)",
|
|
613
|
+
);
|
|
614
|
+
console.error(
|
|
615
|
+
" --locations <国家代码> 目标国家,逗号分隔(默认欧洲13国)",
|
|
616
|
+
);
|
|
617
|
+
console.error(" --no-filter 不过滤国家");
|
|
618
|
+
console.error(
|
|
619
|
+
" --discover [数量] LLM 自动发现标签 + 记录有效标签",
|
|
620
|
+
);
|
|
621
|
+
console.error(" --authors-only 只输出作者列表");
|
|
622
|
+
console.error(" --videos-only 只输出视频列表");
|
|
623
|
+
console.error("");
|
|
624
|
+
console.error("示例:");
|
|
625
|
+
console.error(" tt-help tag ventas --enrich -s http://127.0.0.1:3000");
|
|
626
|
+
console.error(" tt-help tag --discover --enrich -s http://127.0.0.1:3000");
|
|
627
|
+
console.error(
|
|
628
|
+
" tt-help tag --discover 20 --locations ES,FR -s http://127.0.0.1:3000",
|
|
629
|
+
);
|
|
630
|
+
process.exit(1);
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
const {
|
|
634
|
+
tags,
|
|
635
|
+
outputFile,
|
|
636
|
+
authorsOnly,
|
|
637
|
+
videosOnly,
|
|
638
|
+
enrich,
|
|
639
|
+
locations,
|
|
640
|
+
noFilter,
|
|
641
|
+
serverUrl,
|
|
642
|
+
discover,
|
|
643
|
+
} = tagTags;
|
|
644
|
+
|
|
645
|
+
const targetLocations = locations
|
|
646
|
+
? locations
|
|
647
|
+
.split(",")
|
|
648
|
+
.map((s) => s.trim().toUpperCase())
|
|
649
|
+
.filter(Boolean)
|
|
650
|
+
: DEFAULT_TARGET_LOCATIONS;
|
|
651
|
+
|
|
652
|
+
const autoEnrich = enrich || !!discover;
|
|
653
|
+
|
|
654
|
+
let finalTags = tags || [];
|
|
655
|
+
|
|
656
|
+
if (discover) {
|
|
657
|
+
const discoverCount = typeof discover === "number" ? discover : 10;
|
|
658
|
+
const generatedTags = await discoverTags(targetLocations, {
|
|
659
|
+
count: discoverCount,
|
|
660
|
+
});
|
|
661
|
+
finalTags = [...new Set([...finalTags, ...generatedTags])];
|
|
662
|
+
process.stderr.write(` 共 ${finalTags.length} 个标签待处理\n\n`);
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
if (finalTags.length === 0) {
|
|
666
|
+
console.error("没有标签可处理,请提供标签或使用 --discover");
|
|
667
|
+
process.exit(1);
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
const allResults = [];
|
|
671
|
+
|
|
672
|
+
for (let i = 0; i < finalTags.length; i++) {
|
|
673
|
+
const result = await processTag(finalTags[i], i, finalTags.length, {
|
|
674
|
+
enrich: autoEnrich,
|
|
675
|
+
targetLocations,
|
|
676
|
+
noFilter,
|
|
677
|
+
serverUrl,
|
|
678
|
+
recordTags: !!discover,
|
|
679
|
+
});
|
|
680
|
+
|
|
681
|
+
const output = { tag: result.tag };
|
|
682
|
+
if (result.error) {
|
|
683
|
+
output.error = result.error;
|
|
684
|
+
} else if (authorsOnly) {
|
|
685
|
+
output.authors = result.authors;
|
|
686
|
+
} else if (videosOnly) {
|
|
687
|
+
output.videos = result.videos;
|
|
688
|
+
} else {
|
|
689
|
+
Object.assign(output, {
|
|
690
|
+
totalPosts: result.totalPosts,
|
|
691
|
+
videoCount: result.videoCount,
|
|
692
|
+
authorCount: result.authorCount,
|
|
693
|
+
authors: result.authors,
|
|
694
|
+
videos: result.videos,
|
|
695
|
+
});
|
|
696
|
+
}
|
|
697
|
+
allResults.push(output);
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
const json = JSON.stringify(
|
|
701
|
+
allResults.length === 1 ? allResults[0] : allResults,
|
|
702
|
+
null,
|
|
703
|
+
2,
|
|
704
|
+
);
|
|
705
|
+
|
|
706
|
+
if (outputFile) {
|
|
707
|
+
writeFileSync(outputFile, json, "utf-8");
|
|
708
|
+
process.stderr.write(`\n已保存到 ${outputFile}\n`);
|
|
709
|
+
} else {
|
|
710
|
+
console.log(json);
|
|
711
|
+
}
|
|
712
|
+
}
|