tt-help-cli-ycl 1.3.85 → 1.3.87

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tt-help-cli-ycl",
3
- "version": "1.3.85",
3
+ "version": "1.3.87",
4
4
  "description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli/tag.js CHANGED
@@ -6,6 +6,10 @@ import {
6
6
  isLocationInList,
7
7
  } from "../lib/target-locations.js";
8
8
  import { discoverTags, recordProductiveTag } from "../lib/tag-discover.js";
9
+ import { server as cfgServer } from "../lib/constants.js";
10
+
11
+ const ALL_COUNTRIES = DEFAULT_TARGET_LOCATIONS;
12
+ const DEFAULT_SERVER = cfgServer || "http://127.0.0.1:3000";
9
13
 
10
14
  async function pushToServer(serverUrl, filteredAuthors, videos) {
11
15
  const users = filteredAuthors.map((author) => {
@@ -182,14 +186,26 @@ async function processTag(
182
186
 
183
187
  export async function handleDiscover(parsed) {
184
188
  const { tagDiscover } = parsed;
185
- const { countries, count = 4, prompt, serverUrl } = tagDiscover || {};
189
+ let { countries, count = 4, prompt, serverUrl } = tagDiscover || {};
190
+
191
+ // 支持 'all' 展开为全部目标国家
192
+ if (
193
+ countries &&
194
+ countries.length === 1 &&
195
+ countries[0].toUpperCase() === "ALL"
196
+ ) {
197
+ countries = ALL_COUNTRIES;
198
+ }
186
199
 
187
200
  if (!countries || countries.length === 0) {
188
201
  console.error(
189
- "用法: tt-help tag discover <国家> [国家...] [--count <n>] [--prompt <文本>] [-s <服务端>]",
202
+ "用法: tt-help tag discover <国家|all> [国家...] [--count <n>] [--prompt <文本>] [-s <服务端>]",
190
203
  );
191
204
  console.error("");
192
205
  console.error("示例:");
206
+ console.error(
207
+ " tt-help tag discover all --count 10 # 为全部 13 个国家各生成 10 个标签",
208
+ );
193
209
  console.error(
194
210
  " tt-help tag discover ES # 为西班牙生成 4 个标签",
195
211
  );
@@ -249,7 +265,7 @@ export async function handleScore(parsed) {
249
265
  process.exit(1);
250
266
  }
251
267
 
252
- const baseUrl = serverUrl || "http://127.0.0.1:3000";
268
+ const baseUrl = serverUrl || DEFAULT_SERVER;
253
269
  const targetCountries = countries || [
254
270
  "ES",
255
271
  "FR",
@@ -403,9 +419,9 @@ async function reportToServer(baseUrl, result) {
403
419
 
404
420
  export async function handleScoreAll(parsed) {
405
421
  const { tagScoreAll } = parsed;
406
- const { countries, serverUrl } = tagScoreAll || {};
422
+ let { countries, serverUrl, autoDiscover } = tagScoreAll || {};
407
423
 
408
- const baseUrl = serverUrl || "http://127.0.0.1:3000";
424
+ const baseUrl = serverUrl || DEFAULT_SERVER;
409
425
  const targetCountries = countries || [
410
426
  "ES",
411
427
  "FR",
@@ -429,13 +445,15 @@ export async function handleScoreAll(parsed) {
429
445
  log(" 自动循环打分模式(客户端本地执行)");
430
446
  log(` 目标国家: ${targetCountries.join(", ")}`);
431
447
  log(` 服务端: ${baseUrl}`);
448
+ if (autoDiscover) log(` 自动发现: 开启(无任务时自动生成标签)`);
432
449
  log(" 流程: 从服务端拉 tag → 本地 Playwright 抓取 → enrich → 算分 → 上报");
433
450
  log(" 每个标签约 1-2 分钟");
434
451
  log("========================================");
435
452
  log("");
436
453
 
437
454
  let totalScored = 0;
438
- let totalNew = null;
455
+ let lastDiscoverTime = 0;
456
+ const DISCOVER_COOLDOWN = 5 * 60 * 1000; // 5 分钟冷却
439
457
 
440
458
  // 复用 TikTokScraper 实例,避免每次 enrich 都启动/关闭 headless 浏览器
441
459
  const enrichScraper = new TikTokScraper({ poolSize: 3 });
@@ -445,37 +463,42 @@ export async function handleScoreAll(parsed) {
445
463
 
446
464
  try {
447
465
  while (true) {
448
- // 查剩余数量
449
- if (totalNew === null) {
450
- try {
451
- const statsRes = await fetch(
452
- `${baseUrl}/api/tags?status=new&limit=1000`,
453
- );
454
- const statsData = await statsRes.json();
455
- totalNew = statsData.total || 0;
456
- log(`📋 待打分标签: ${totalNew} 个`);
457
- log("");
458
- } catch (e) {
459
- log(`⚠️ 无法连接服务端: ${e.message}`);
460
- break;
461
- }
462
- }
463
-
464
466
  // 从服务端取下一个 new 标签
465
467
  const tagsRes = await fetch(`${baseUrl}/api/tags?status=new&limit=1`);
466
468
  const tagsData = await tagsRes.json();
467
469
  if (!tagsData.tags || tagsData.tags.length === 0) {
468
- log("");
469
- log("========================================");
470
- log(` 🎉 全部完成! 共打分 ${totalScored} 个标签`);
471
- log("========================================");
472
- break;
470
+ // 自动发现:无任务时自动生成标签
471
+ if (autoDiscover && Date.now() - lastDiscoverTime > DISCOVER_COOLDOWN) {
472
+ log(
473
+ `🔍 无待打分标签,自动为 ${targetCountries.length} 个国家生成标签...`,
474
+ );
475
+ for (const country of targetCountries) {
476
+ try {
477
+ const discRes = await fetch(
478
+ `${baseUrl}/api/tags/discover?country=${country}&count=5`,
479
+ );
480
+ const discData = await discRes.json();
481
+ if (discData.inserted) {
482
+ log(` ${country}: 新增 ${discData.inserted} 个`);
483
+ }
484
+ } catch (e) {
485
+ log(` ${country}: 请求失败 (${e.message})`);
486
+ }
487
+ }
488
+ lastDiscoverTime = Date.now();
489
+ // 等 3 秒让服务端处理完
490
+ await new Promise((r) => setTimeout(r, 3000));
491
+ continue;
492
+ }
493
+ log(`⏳ 暂无待打分标签,10 秒后重试...`);
494
+ await new Promise((r) => setTimeout(r, 10000));
495
+ continue;
473
496
  }
474
497
 
475
- const tag = tagsData.tags[0].tag;
498
+ const tag = tagsData.tags[0].tag.replace(/^#+/, "").trim().toLowerCase();
476
499
  const startTime = Date.now();
477
500
 
478
- log(`[${totalScored + 1}/${totalNew || "?"}] 正在打分 #${tag} ...`);
501
+ log(`[${totalScored + 1}] 正在打分 #${tag} ...`);
479
502
 
480
503
  const result = {
481
504
  tag,
@@ -498,7 +521,16 @@ export async function handleScoreAll(parsed) {
498
521
  });
499
522
  const claimData = await claimRes.json();
500
523
  if (!claimData.ok) {
501
- log(` ⚠️ 无法锁定 (${claimData.error}),跳过`);
524
+ // already claimed: 其他机器抢先了,跳过不标 dead
525
+ if (claimData.error && claimData.error.includes("already claimed")) {
526
+ log(` ⏭️ 已被其他客户端锁定,跳过`);
527
+ continue;
528
+ }
529
+ log(` ⚠️ 无法锁定 (${claimData.error}),标记为 dead 并跳过`);
530
+ result.error = claimData.error;
531
+ result.status = "dead";
532
+ await reportToServer(baseUrl, result);
533
+ totalScored++;
502
534
  continue;
503
535
  }
504
536
 
@@ -527,7 +559,6 @@ export async function handleScoreAll(parsed) {
527
559
  }
528
560
 
529
561
  // enrich: 逐个视频查 view-source 获取国家
530
- log(` 补充国家信息...`);
531
562
  const enriched = await enrichVideosWithLocation(videos, {
532
563
  mode: "videos",
533
564
  existingScraper: enrichScraper,
@@ -540,8 +571,6 @@ export async function handleScoreAll(parsed) {
540
571
  },
541
572
  });
542
573
  videos = enriched.videos;
543
- const withLoc = videos.filter((v) => v.locationCreated).length;
544
- log(` 完成: ${withLoc}/${videos.length} 个视频有国家信息`);
545
574
 
546
575
  // 过滤 + 算分 (共用函数)
547
576
  const { matchedAuthorSet } = applyFilterAndScore(
@@ -577,10 +606,8 @@ export async function handleScoreAll(parsed) {
577
606
  .map((c) => `${c.c}:${c.n}`)
578
607
  .join(" ");
579
608
  log(
580
- ` ${icon} ${result.status} score=${result.score} authors=${result.authorCount} matched=${result.matchedAuthors} (${elapsed}s)`,
609
+ ` ${icon} ${result.status} score=${result.score} authors=${result.authorCount} matched=${result.matchedAuthors} (${elapsed}s)${mc ? " " + mc : ""}`,
581
610
  );
582
- if (mc) log(` 国家: ${mc}`);
583
- log(` 剩余: ~${Math.max(0, (totalNew || 0) - totalScored)} 个`);
584
611
  log("");
585
612
  } catch (e) {
586
613
  log(` ❌ 失败: ${e.message}`);
package/src/lib/args.js CHANGED
@@ -727,6 +727,7 @@ function parseTagArgs(args) {
727
727
  let discoverCountries = [];
728
728
  let discoverCount = 4;
729
729
  let discoverPrompt = null;
730
+ let autoDiscover = false;
730
731
  let isDiscover = false;
731
732
  let isScore = false;
732
733
  let isScoreAll = false;
@@ -767,6 +768,8 @@ function parseTagArgs(args) {
767
768
  }
768
769
  } else if (arg === "--count") {
769
770
  discoverCount = parseInt(args[++i]) || 4;
771
+ } else if (arg === "--auto-discover") {
772
+ autoDiscover = true;
770
773
  } else if (arg === "--countries") {
771
774
  scoreCountries = args[++i]
772
775
  .split(",")
@@ -854,6 +857,7 @@ function parseTagArgs(args) {
854
857
  tagScoreAll: {
855
858
  countries: scoreCountries,
856
859
  serverUrl,
860
+ autoDiscover,
857
861
  },
858
862
  urls: [],
859
863
  outputFormat: "json",
@@ -1,14 +1,20 @@
1
- import { readFileSync, writeFileSync, existsSync } from 'fs';
2
- import { resolve, dirname } from 'path';
3
- import { fileURLToPath } from 'url';
1
+ import { readFileSync, writeFileSync, existsSync } from "fs";
2
+ import { resolve, dirname } from "path";
3
+ import { fileURLToPath } from "url";
4
4
 
5
5
  const __dirname = dirname(fileURLToPath(import.meta.url));
6
- const TAGS_FILE = resolve(__dirname, '..', '..', 'data', 'productive-tags.json');
6
+ const TAGS_FILE = resolve(
7
+ __dirname,
8
+ "..",
9
+ "..",
10
+ "data",
11
+ "productive-tags.json",
12
+ );
7
13
 
8
14
  function loadTags() {
9
15
  try {
10
16
  if (existsSync(TAGS_FILE)) {
11
- return JSON.parse(readFileSync(TAGS_FILE, 'utf-8'));
17
+ return JSON.parse(readFileSync(TAGS_FILE, "utf-8"));
12
18
  }
13
19
  } catch {}
14
20
  return { tags: [], lastUpdated: null };
@@ -17,10 +23,10 @@ function loadTags() {
17
23
  function saveTags(data) {
18
24
  const dir = dirname(TAGS_FILE);
19
25
  if (!existsSync(dir)) {
20
- const { mkdirSync } = require('fs');
26
+ const { mkdirSync } = require("fs");
21
27
  mkdirSync(dir, { recursive: true });
22
28
  }
23
- writeFileSync(TAGS_FILE, JSON.stringify(data, null, 2), 'utf-8');
29
+ writeFileSync(TAGS_FILE, JSON.stringify(data, null, 2), "utf-8");
24
30
  }
25
31
 
26
32
  export function getProductiveTags() {
@@ -29,7 +35,7 @@ export function getProductiveTags() {
29
35
 
30
36
  export function recordProductiveTag(tag, country, userCount) {
31
37
  const data = loadTags();
32
- const existing = data.tags.find(t => t.tag === tag);
38
+ const existing = data.tags.find((t) => t.tag === tag);
33
39
  if (existing) {
34
40
  if (!existing.countries.includes(country)) {
35
41
  existing.countries.push(country);
@@ -50,39 +56,50 @@ export function recordProductiveTag(tag, country, userCount) {
50
56
  }
51
57
 
52
58
  async function callLLM(prompt) {
53
- const apiKey = process.env.APIKEY || '';
54
- const { fetch } = await import('undici');
55
-
56
- const response = await fetch('http://82.156.52.214:18000/v1/chat/completions', {
57
- method: 'POST',
58
- headers: {
59
- 'Content-Type': 'application/json',
60
- Authorization: `Bearer ${apiKey}`,
59
+ const apiKey = process.env.APIKEY || "";
60
+ const { fetch } = await import("undici");
61
+
62
+ const response = await fetch(
63
+ "http://82.156.52.214:18000/v1/chat/completions",
64
+ {
65
+ method: "POST",
66
+ headers: {
67
+ "Content-Type": "application/json",
68
+ Authorization: `Bearer ${apiKey}`,
69
+ },
70
+ body: JSON.stringify({
71
+ model: "zc-fast",
72
+ messages: [{ role: "user", content: prompt }],
73
+ max_tokens: 1024,
74
+ temperature: 0.7,
75
+ }),
61
76
  },
62
- body: JSON.stringify({
63
- model: 'zc-fast',
64
- messages: [{ role: 'user', content: prompt }],
65
- max_tokens: 1024,
66
- temperature: 0.7,
67
- }),
68
- });
77
+ );
69
78
 
70
79
  const result = await response.json();
71
- const content = result.choices?.[0]?.message?.content || '';
80
+ const content = result.choices?.[0]?.message?.content || "";
72
81
  return content;
73
82
  }
74
83
 
84
+ function normalizeTag(t) {
85
+ return t.replace(/^#+/, "").trim().toLowerCase();
86
+ }
87
+
75
88
  function parseTagsFromResponse(content) {
76
89
  try {
77
90
  const parsed = JSON.parse(content);
78
- if (Array.isArray(parsed)) return parsed;
79
- if (Array.isArray(parsed.tags)) return parsed.tags;
91
+ if (Array.isArray(parsed)) {
92
+ return parsed.map(normalizeTag).filter((t) => t && t.length >= 2);
93
+ }
94
+ if (Array.isArray(parsed.tags)) {
95
+ return parsed.tags.map(normalizeTag).filter((t) => t && t.length >= 2);
96
+ }
80
97
  } catch {}
81
98
 
82
99
  const lines = content.split(/[\n,]+/);
83
100
  const tags = [];
84
101
  for (const line of lines) {
85
- const cleaned = line.replace(/^[-\d.\s#]+/, '').trim().toLowerCase();
102
+ const cleaned = normalizeTag(line.replace(/^[-\d.\s]+/, ""));
86
103
  if (cleaned && /^[a-z0-9_]+$/.test(cleaned) && cleaned.length >= 2) {
87
104
  tags.push(cleaned);
88
105
  }
@@ -91,17 +108,22 @@ function parseTagsFromResponse(content) {
91
108
  }
92
109
 
93
110
  export async function discoverTags(countries, options = {}) {
94
- const { language = 'auto', count = 10 } = options;
111
+ const { language = "auto", count = 10 } = options;
95
112
 
96
113
  const productiveTags = getProductiveTags();
97
- const countryStr = Array.isArray(countries) ? countries.join(', ') : countries;
98
- const langHint = language === 'auto'
99
- ? ''
100
- : `Tags should be in ${language} language.`;
101
-
102
- const historyHint = productiveTags.length > 0
103
- ? `Previously productive tags for these countries: ${productiveTags.filter(t => t.countries.some(c => countries.includes(c))).map(t => `#${t.tag}`).join(', ')}. Generate new ones, don't repeat these.`
104
- : '';
114
+ const countryStr = Array.isArray(countries)
115
+ ? countries.join(", ")
116
+ : countries;
117
+ const langHint =
118
+ language === "auto" ? "" : `Tags should be in ${language} language.`;
119
+
120
+ const historyHint =
121
+ productiveTags.length > 0
122
+ ? `Previously productive tags for these countries: ${productiveTags
123
+ .filter((t) => t.countries.some((c) => countries.includes(c)))
124
+ .map((t) => `#${t.tag}`)
125
+ .join(", ")}. Generate new ones, don't repeat these.`
126
+ : "";
105
127
 
106
128
  const prompt = `Generate ${count} TikTok hashtags (lowercase, no spaces, no # symbol) that are likely to be used by online sellers, shop owners, e-commerce merchants, and small businesses in these countries: ${countryStr}.
107
129
 
@@ -114,11 +136,15 @@ ${historyHint}
114
136
 
115
137
  Return ONLY a JSON array of tag strings, nothing else. Example: ["ventas","tiendaonline","vender"]`;
116
138
 
117
- process.stderr.write(` [LLM] 正在生成 ${count} 个标签 (目标: ${countryStr})...\n`);
139
+ process.stderr.write(
140
+ ` [LLM] 正在生成 ${count} 个标签 (目标: ${countryStr})...\n`,
141
+ );
118
142
  const content = await callLLM(prompt);
119
143
  const tags = parseTagsFromResponse(content);
120
144
 
121
145
  const unique = [...new Set(tags)].slice(0, count);
122
- process.stderr.write(` [LLM] 生成 ${unique.length} 个标签: ${unique.join(', ')}\n`);
146
+ process.stderr.write(
147
+ ` [LLM] 生成 ${unique.length} 个标签: ${unique.join(", ")}\n`,
148
+ );
123
149
  return unique;
124
150
  }
package/src/npm-main.js CHANGED
@@ -7,7 +7,12 @@ import { handleConfig, showConfig, showUsage, version } from "./cli/config.js";
7
7
  import { handleOpen } from "./cli/open.js";
8
8
  import { handleComments } from "./cli/comments.js";
9
9
  import { handleRefresh } from "./cli/refresh.js";
10
- import { handleTag } from "./cli/tag.js";
10
+ import {
11
+ handleTag,
12
+ handleDiscover,
13
+ handleScore,
14
+ handleScoreAll,
15
+ } from "./cli/tag.js";
11
16
 
12
17
  function exitUnsupportedCommand(command) {
13
18
  console.error(
@@ -39,6 +44,12 @@ async function main() {
39
44
  return handleRefresh(parsed);
40
45
  case "tag":
41
46
  return handleTag(parsed);
47
+ case "tag-discover":
48
+ return handleDiscover(parsed);
49
+ case "tag-score":
50
+ return handleScore(parsed);
51
+ case "tag-score-all":
52
+ return handleScoreAll(parsed);
42
53
  }
43
54
 
44
55
  const {
@@ -1344,6 +1344,11 @@ function getRawJobsPageFromDb({
1344
1344
 
1345
1345
  function insertTag(tag, countries, source = "llm") {
1346
1346
  if (!db) return { inserted: false, error: "db not ready" };
1347
+ // 防止存入带 # 前缀的 tag
1348
+ const normalized = tag.replace(/^#+/, "").trim().toLowerCase();
1349
+ if (!normalized || normalized.length < 2) {
1350
+ return { inserted: false, error: "invalid tag" };
1351
+ }
1347
1352
  try {
1348
1353
  const result = db
1349
1354
  .prepare(
@@ -1352,8 +1357,8 @@ function insertTag(tag, countries, source = "llm") {
1352
1357
  VALUES (?, ?, ?)
1353
1358
  `,
1354
1359
  )
1355
- .run(tag, JSON.stringify(countries), source);
1356
- return { inserted: result.changes > 0, tag };
1360
+ .run(normalized, JSON.stringify(countries), source);
1361
+ return { inserted: result.changes > 0, tag: normalized };
1357
1362
  } catch (e) {
1358
1363
  return { inserted: false, error: e.message };
1359
1364
  }
@@ -1415,12 +1420,19 @@ function getDeadTags(country) {
1415
1420
 
1416
1421
  function claimTag(tag) {
1417
1422
  if (!db) return { ok: false, error: "db not ready" };
1418
- const row = db.prepare("SELECT status FROM tags WHERE tag = ?").get(tag);
1419
- if (!row) return { ok: false, error: "tag not found" };
1420
- if (row.status !== "new")
1421
- return { ok: false, error: `tag status is ${row.status}, not new` };
1422
- db.prepare("UPDATE tags SET status = 'scoring' WHERE tag = ?").run(tag);
1423
- return { ok: true, tag, previousStatus: row.status };
1423
+ // 原子操作:只有 status='new' 时才更新为 'scoring',避免竞态
1424
+ const result = db
1425
+ .prepare(
1426
+ "UPDATE tags SET status = 'scoring' WHERE tag = ? AND status = 'new'",
1427
+ )
1428
+ .run(tag);
1429
+ if (result.changes === 0) {
1430
+ // 检查是否不存在 vs 已被别人锁定
1431
+ const row = db.prepare("SELECT status FROM tags WHERE tag = ?").get(tag);
1432
+ if (!row) return { ok: false, error: "tag not found" };
1433
+ return { ok: false, error: `tag status is ${row.status}, already claimed` };
1434
+ }
1435
+ return { ok: true, tag };
1424
1436
  }
1425
1437
 
1426
1438
  function reportTagScore(tag, fields) {
@@ -1503,6 +1515,68 @@ function rawQuery(sql, params = []) {
1503
1515
  }
1504
1516
  }
1505
1517
 
1518
+ // 清理 tags 表中以 # 开头的脏数据
1519
+ function normalizeTags() {
1520
+ if (!db) return { ok: false, error: "db not ready" };
1521
+ const dirtyRows = db
1522
+ .prepare("SELECT id, tag, countries FROM tags WHERE tag LIKE '#%'")
1523
+ .all();
1524
+ const fixed = [];
1525
+ const merged = [];
1526
+ const skipped = [];
1527
+
1528
+ for (const row of dirtyRows) {
1529
+ const cleanTag = row.tag.replace(/^#+/, "").trim().toLowerCase();
1530
+ if (!cleanTag || cleanTag.length < 2) {
1531
+ db.prepare("DELETE FROM tags WHERE id = ?").run(row.id);
1532
+ skipped.push({
1533
+ dirty: row.tag,
1534
+ reason: "empty after normalize, deleted",
1535
+ });
1536
+ continue;
1537
+ }
1538
+
1539
+ // 检查 cleanTag 是否已存在
1540
+ const existing = db
1541
+ .prepare("SELECT * FROM tags WHERE tag = ?")
1542
+ .get(cleanTag);
1543
+ if (existing) {
1544
+ // 合并:保留已有 clean 版本,合并 countries
1545
+ const oldCountries = JSON.parse(row.countries || "[]");
1546
+ const existCountries = JSON.parse(existing.countries || "[]");
1547
+ const mergedCountries = [
1548
+ ...new Set([...existCountries, ...oldCountries]),
1549
+ ];
1550
+ db.prepare("UPDATE tags SET countries = ? WHERE tag = ?").run(
1551
+ JSON.stringify(mergedCountries),
1552
+ cleanTag,
1553
+ );
1554
+ // 删除脏数据
1555
+ db.prepare("DELETE FROM tags WHERE id = ?").run(row.id);
1556
+ merged.push({ dirty: row.tag, clean: cleanTag, id: row.id });
1557
+ } else {
1558
+ // 直接重命名
1559
+ db.prepare("UPDATE tags SET tag = ? WHERE id = ?").run(cleanTag, row.id);
1560
+ fixed.push({ dirty: row.tag, clean: cleanTag, id: row.id });
1561
+ }
1562
+ }
1563
+
1564
+ return {
1565
+ ok: true,
1566
+ fixed: fixed.length,
1567
+ merged: merged.length,
1568
+ skipped: skipped.length,
1569
+ details: { fixed, merged, skipped },
1570
+ };
1571
+ }
1572
+
1573
+ function clearTags() {
1574
+ if (!db) return { ok: false, error: "db not ready" };
1575
+ const count = db.prepare("SELECT COUNT(*) as c FROM tags").get().c;
1576
+ db.exec("DELETE FROM tags");
1577
+ return { ok: true, deleted: count };
1578
+ }
1579
+
1506
1580
  function getUsersPageFromDb({
1507
1581
  status,
1508
1582
  search,
@@ -4759,6 +4833,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4759
4833
  claimTag,
4760
4834
  reportTagScore,
4761
4835
  getAllTags,
4836
+ normalizeTags,
4837
+ clearTags,
4762
4838
  data,
4763
4839
  };
4764
4840
 
@@ -994,6 +994,20 @@ export function startWatchServer(
994
994
  return;
995
995
  }
996
996
 
997
+ // POST /api/tags/normalize — 清理以 # 开头的脏 tag
998
+ if (req.method === "POST" && routePath === "/api/tags/normalize") {
999
+ const result = store.normalizeTags();
1000
+ sendJSON(res, 200, result);
1001
+ return;
1002
+ }
1003
+
1004
+ // POST /api/tags/clear — 清空 tags 表
1005
+ if (req.method === "POST" && routePath === "/api/tags/clear") {
1006
+ const result = store.clearTags();
1007
+ sendJSON(res, 200, result);
1008
+ return;
1009
+ }
1010
+
997
1011
  // POST /api/tags/claim { tag } — 锁定 tag 状态为 scoring(防并发冲突)
998
1012
  if (req.method === "POST" && routePath === "/api/tags/claim") {
999
1013
  try {
@@ -53,20 +53,25 @@ async function callLLM(prompt) {
53
53
  return result.choices?.[0]?.message?.content || "";
54
54
  }
55
55
 
56
+ function normalizeTag(t) {
57
+ return t.replace(/^#+/, "").trim().toLowerCase();
58
+ }
59
+
56
60
  function parseTagsFromResponse(content) {
57
61
  try {
58
62
  const parsed = JSON.parse(content);
59
- if (Array.isArray(parsed)) return parsed;
60
- if (Array.isArray(parsed.tags)) return parsed.tags;
63
+ if (Array.isArray(parsed)) {
64
+ return parsed.map(normalizeTag).filter((t) => t && t.length >= 2);
65
+ }
66
+ if (Array.isArray(parsed.tags)) {
67
+ return parsed.tags.map(normalizeTag).filter((t) => t && t.length >= 2);
68
+ }
61
69
  } catch {}
62
70
 
63
71
  const lines = content.split(/[\n,]+/);
64
72
  const tags = [];
65
73
  for (const line of lines) {
66
- const cleaned = line
67
- .replace(/^[-\d.\s#]+/, "")
68
- .trim()
69
- .toLowerCase();
74
+ const cleaned = normalizeTag(line.replace(/^[-\d.\s]+/, ""));
70
75
  if (cleaned && /^[a-z0-9_]+$/.test(cleaned) && cleaned.length >= 2) {
71
76
  tags.push(cleaned);
72
77
  }
@@ -165,7 +170,7 @@ export async function discoverTagsForCountry(
165
170
  const inserted = [];
166
171
  for (const tag of unique) {
167
172
  const result = store.insertTag(tag, [country], "llm");
168
- if (result.inserted) inserted.push(tag);
173
+ if (result.inserted) inserted.push(result.tag);
169
174
  }
170
175
 
171
176
  return {