tt-help-cli-ycl 1.3.88 → 1.3.91

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tt-help-cli-ycl",
3
- "version": "1.3.88",
3
+ "version": "1.3.91",
4
4
  "description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli/auto.js CHANGED
@@ -232,6 +232,13 @@ export async function handleAuto(options) {
232
232
  displayName: Array.isArray(f) ? f[1] : null,
233
233
  guessedLocation,
234
234
  })),
235
+ discoveredRecommended: (result.discoveredRecommended || []).map(
236
+ (f) => ({
237
+ handle: Array.isArray(f) ? f[0] : f,
238
+ displayName: Array.isArray(f) ? f[1] : null,
239
+ guessedLocation,
240
+ }),
241
+ ),
235
242
  };
236
243
  await apiPost(`${serverUrl}/api/job/${username}`, payload);
237
244
  console.error(" 已提交");
@@ -143,7 +143,9 @@ export async function handleExplore(options) {
143
143
  console.error(`CDP 端口: ${cdpOptions.port}, 用户编号: ${userId}`);
144
144
  console.error(`浏览器配置: ${path.basename(cdpOptions.userDataDir)}`);
145
145
 
146
- const { apiGet, apiPost } = createApiClient({ meta: { port: cdpOptions.port } });
146
+ const { apiGet, apiPost } = createApiClient({
147
+ meta: { port: cdpOptions.port },
148
+ });
147
149
 
148
150
  await apiGet(`${serverUrl}/api/stats`);
149
151
 
@@ -508,7 +510,8 @@ export async function handleExplore(options) {
508
510
  if (result.hasFollowData && result.keepFollow) {
509
511
  const totalFollows =
510
512
  (result.discoveredFollowing || []).length +
511
- (result.discoveredFollowers || []).length;
513
+ (result.discoveredFollowers || []).length +
514
+ (result.discoveredRecommended || []).length;
512
515
  if (totalFollows > 0) {
513
516
  lastFollowSuccessTime = Date.now();
514
517
  }
@@ -528,6 +531,13 @@ export async function handleExplore(options) {
528
531
  displayName: Array.isArray(f) ? f[1] : null,
529
532
  guessedLocation,
530
533
  })),
534
+ discoveredRecommended: (result.discoveredRecommended || []).map(
535
+ (f) => ({
536
+ handle: Array.isArray(f) ? f[0] : f,
537
+ displayName: Array.isArray(f) ? f[1] : null,
538
+ guessedLocation,
539
+ }),
540
+ ),
531
541
  processed: result.processed,
532
542
  hasFollowData: result.hasFollowData,
533
543
  keepFollow: result.keepFollow,
@@ -155,7 +155,9 @@ export async function handleRefresh(options) {
155
155
  );
156
156
  }
157
157
 
158
- const { apiGet, apiPost } = createApiClient({ meta: { port: cdpOptions.port } });
158
+ const { apiGet, apiPost } = createApiClient({
159
+ meta: { port: cdpOptions.port },
160
+ });
159
161
 
160
162
  // 连接服务器验证
161
163
  await apiGet(`${serverUrl}/api/stats`);
@@ -545,6 +547,13 @@ export async function handleRefresh(options) {
545
547
  displayName: Array.isArray(f) ? f[1] : null,
546
548
  guessedLocation,
547
549
  })),
550
+ discoveredRecommended: (result.discoveredRecommended || []).map(
551
+ (f) => ({
552
+ handle: Array.isArray(f) ? f[0] : f,
553
+ displayName: Array.isArray(f) ? f[1] : null,
554
+ guessedLocation,
555
+ }),
556
+ ),
548
557
  processed: result.processed,
549
558
  hasFollowData: result.hasFollowData,
550
559
  keepFollow: result.keepFollow,
package/src/cli/tag.js CHANGED
@@ -5,7 +5,7 @@ import {
5
5
  DEFAULT_TARGET_LOCATIONS,
6
6
  isLocationInList,
7
7
  } from "../lib/target-locations.js";
8
- import { discoverTags, recordProductiveTag } from "../lib/tag-discover.js";
8
+ import { discoverTags } from "../lib/tag-discover.js";
9
9
  import { server as cfgServer } from "../lib/constants.js";
10
10
 
11
11
  const ALL_COUNTRIES = DEFAULT_TARGET_LOCATIONS;
@@ -157,9 +157,18 @@ async function processTag(
157
157
  const countries = [
158
158
  ...new Set(videos.map((v) => v.locationCreated).filter(Boolean)),
159
159
  ];
160
- for (const c of countries) {
161
- recordProductiveTag(tag, c, pushResult.added);
162
- }
160
+ // 通过 API 上报到服务端,由服务端写入数据库
161
+ try {
162
+ await fetch(`${serverUrl}/api/tags/productive`, {
163
+ method: "POST",
164
+ headers: { "Content-Type": "application/json" },
165
+ body: JSON.stringify({
166
+ tag,
167
+ countries,
168
+ pushedUsers: pushResult.added,
169
+ }),
170
+ });
171
+ } catch {}
163
172
  process.stderr.write(
164
173
  ` 已记录标签 #${tag} (${countries.join(",")}, ${pushResult.added} 用户)\n`,
165
174
  );
@@ -221,7 +230,7 @@ export async function handleDiscover(parsed) {
221
230
  process.exit(1);
222
231
  }
223
232
 
224
- const baseUrl = serverUrl || "http://127.0.0.1:3000";
233
+ const baseUrl = serverUrl || DEFAULT_SERVER;
225
234
 
226
235
  for (const country of countries) {
227
236
  const params = new URLSearchParams({ country, count: String(count) });
@@ -452,8 +461,8 @@ export async function handleScoreAll(parsed) {
452
461
  log("");
453
462
 
454
463
  let totalScored = 0;
455
- let lastDiscoverTime = 0;
456
- const DISCOVER_COOLDOWN = 5 * 60 * 1000; // 5 分钟冷却
464
+ let emptyRounds = 0; // 连续无任务的轮数
465
+ const DISCOVER_AFTER_EMPTY = 3; // 连续 3 轮无任务时触发 discover
457
466
 
458
467
  // 复用 TikTokScraper 实例,避免每次 enrich 都启动/关闭 headless 浏览器
459
468
  const enrichScraper = new TikTokScraper({ poolSize: 3 });
@@ -463,56 +472,64 @@ export async function handleScoreAll(parsed) {
463
472
 
464
473
  try {
465
474
  while (true) {
466
- // 从服务端取下一个 new 标签
467
- const tagsRes = await fetch(`${baseUrl}/api/tags?status=new&limit=1`);
468
- const tagsData = await tagsRes.json();
469
- if (!tagsData.tags || tagsData.tags.length === 0) {
470
- // 自动发现:无任务时自动生成标签
471
- if (autoDiscover && Date.now() - lastDiscoverTime > DISCOVER_COOLDOWN) {
472
- log(
473
- `🔍 无待打分标签,自动为 ${targetCountries.length} 个国家生成标签...`,
474
- );
475
- for (const country of targetCountries) {
476
- try {
477
- const discRes = await fetch(
478
- `${baseUrl}/api/tags/discover?country=${country}&count=5`,
479
- );
480
- const discData = await discRes.json();
481
- if (discData.inserted) {
482
- log(` ${country}: 新增 ${discData.inserted} 个`);
475
+ try {
476
+ // 从服务端取下一个 new 标签
477
+ const tagsRes = await fetch(`${baseUrl}/api/tags?status=new&limit=1`);
478
+ const tagsData = await tagsRes.json();
479
+ if (!tagsData.tags || tagsData.tags.length === 0) {
480
+ emptyRounds++;
481
+
482
+ // 自动发现:连续 N 轮无任务时自动生成标签
483
+ if (autoDiscover && emptyRounds >= DISCOVER_AFTER_EMPTY) {
484
+ log(
485
+ `🔍 连续 ${emptyRounds} 轮无待打分标签,自动为 ${targetCountries.length} 个国家生成标签...`,
486
+ );
487
+ for (const country of targetCountries) {
488
+ try {
489
+ const discRes = await fetch(
490
+ `${baseUrl}/api/tags/discover?country=${country}&count=5`,
491
+ );
492
+ const discData = await discRes.json();
493
+ if (discData.inserted) {
494
+ log(` ${country}: 新增 ${discData.inserted} 个`);
495
+ }
496
+ } catch (e) {
497
+ log(` ${country}: 请求失败 (${e.message})`);
483
498
  }
484
- } catch (e) {
485
- log(` ${country}: 请求失败 (${e.message})`);
486
499
  }
500
+ emptyRounds = 0; // 重置计数器
501
+ // 等 3 秒让服务端处理完
502
+ await new Promise((r) => setTimeout(r, 3000));
503
+ continue;
487
504
  }
488
- lastDiscoverTime = Date.now();
489
- // 3 秒让服务端处理完
490
- await new Promise((r) => setTimeout(r, 3000));
505
+ log(`⏳ 暂无待打分标签(连续 ${emptyRounds} 轮),10 秒后重试...`);
506
+ await new Promise((r) => setTimeout(r, 10000));
491
507
  continue;
492
508
  }
493
- log(`⏳ 暂无待打分标签,10 秒后重试...`);
494
- await new Promise((r) => setTimeout(r, 10000));
495
- continue;
496
- }
497
-
498
- const tag = tagsData.tags[0].tag.replace(/^#+/, "").trim().toLowerCase();
499
- const startTime = Date.now();
500
509
 
501
- log(`[${totalScored + 1}] 正在打分 #${tag} ...`);
510
+ // 有任务了,重置计数器
511
+ emptyRounds = 0;
512
+
513
+ const tag = tagsData.tags[0].tag
514
+ .replace(/^#+/, "")
515
+ .trim()
516
+ .toLowerCase();
517
+ const startTime = Date.now();
518
+
519
+ log(`[${totalScored + 1}] 正在打分 #${tag} ...`);
520
+
521
+ const result = {
522
+ tag,
523
+ status: "error",
524
+ score: 0,
525
+ totalPosts: 0,
526
+ authorCount: 0,
527
+ matchedAuthors: 0,
528
+ matchedCountries: [],
529
+ pushedUsers: 0,
530
+ error: null,
531
+ };
502
532
 
503
- const result = {
504
- tag,
505
- status: "error",
506
- score: 0,
507
- totalPosts: 0,
508
- authorCount: 0,
509
- matchedAuthors: 0,
510
- matchedCountries: [],
511
- pushedUsers: 0,
512
- error: null,
513
- };
514
-
515
- try {
516
533
  // 锁定 tag
517
534
  const claimRes = await fetch(`${baseUrl}/api/tags/claim`, {
518
535
  method: "POST",
@@ -610,10 +627,28 @@ export async function handleScoreAll(parsed) {
610
627
  );
611
628
  log("");
612
629
  } catch (e) {
630
+ // 区分网络错误和业务错误
631
+ const isNetworkError =
632
+ e.code === "ECONNREFUSED" ||
633
+ e.code === "ENOTFOUND" ||
634
+ e.code === "ECONNRESET" ||
635
+ (e.message &&
636
+ (e.message.includes("ECONNREFUSED") ||
637
+ e.message.includes("fetch failed") ||
638
+ e.message.includes("network")));
639
+ if (isNetworkError) {
640
+ log(` ⚠️ 服务端连接失败 (${e.message}),15 秒后重试...`);
641
+ await new Promise((r) => setTimeout(r, 15000));
642
+ continue;
643
+ }
613
644
  log(` ❌ 失败: ${e.message}`);
614
- result.error = e.message;
615
645
  try {
616
- await reportToServer(baseUrl, result);
646
+ await reportToServer(baseUrl, {
647
+ tag: "",
648
+ status: "error",
649
+ score: 0,
650
+ error: e.message,
651
+ });
617
652
  } catch {}
618
653
  totalScored++;
619
654
  }
@@ -684,6 +719,7 @@ export async function handleTag(parsed) {
684
719
  const discoverCount = typeof discover === "number" ? discover : 10;
685
720
  const generatedTags = await discoverTags(targetLocations, {
686
721
  count: discoverCount,
722
+ serverUrl,
687
723
  });
688
724
  finalTags = [...new Set([...finalTags, ...generatedTags])];
689
725
  process.stderr.write(` 共 ${finalTags.length} 个标签待处理\n\n`);
@@ -1,150 +1,113 @@
1
- import { readFileSync, writeFileSync, existsSync } from "fs";
2
- import { resolve, dirname } from "path";
3
- import { fileURLToPath } from "url";
4
-
5
- const __dirname = dirname(fileURLToPath(import.meta.url));
6
- const TAGS_FILE = resolve(
7
- __dirname,
8
- "..",
9
- "..",
10
- "data",
11
- "productive-tags.json",
12
- );
13
-
14
- function loadTags() {
15
- try {
16
- if (existsSync(TAGS_FILE)) {
17
- return JSON.parse(readFileSync(TAGS_FILE, "utf-8"));
18
- }
19
- } catch {}
20
- return { tags: [], lastUpdated: null };
1
+ /**
2
+ * Tag 发现(CLI 模式)
3
+ *
4
+ * 使用 tag-service 的公共函数(LLM 调用、prompt 组装、解析)。
5
+ * 历史 tag 数据通过 API 从服务端获取,不再读写 productive-tags.json。
6
+ */
7
+ import {
8
+ COUNTRY_LANG,
9
+ getLang,
10
+ callLLM,
11
+ normalizeTag,
12
+ parseTagsFromResponse,
13
+ buildDiscoverPrompt,
14
+ } from "../watch/tag-service.js";
15
+
16
+ const DEFAULT_SERVER = "http://127.0.0.1:3000";
17
+
18
+ /**
19
+ * 从服务端获取某国的历史 tag(正样本 + 负样本 + 全部已存在)
20
+ */
21
+ async function fetchTagHistory(serverUrl, country) {
22
+ const baseUrl = serverUrl || DEFAULT_SERVER;
23
+
24
+ const productivePromise = fetch(
25
+ `${baseUrl}/api/tags/history?country=${country}&type=productive`,
26
+ )
27
+ .then((r) => r.json())
28
+ .then((data) => data.tags || [])
29
+ .catch(() => []);
30
+
31
+ const deadPromise = fetch(
32
+ `${baseUrl}/api/tags/history?country=${country}&type=dead`,
33
+ )
34
+ .then((r) => r.json())
35
+ .then((data) => data.tags || [])
36
+ .catch(() => []);
37
+
38
+ // 获取所有已存在的 tag(防止重复生成)
39
+ const allPromise = fetch(
40
+ `${baseUrl}/api/tags/history?country=${country}&type=all`,
41
+ )
42
+ .then((r) => r.json())
43
+ .then((data) => data.tags || [])
44
+ .catch(() => []);
45
+
46
+ const [productive, dead, allExisting] = await Promise.all([
47
+ productivePromise,
48
+ deadPromise,
49
+ allPromise,
50
+ ]);
51
+ return { productive, dead, allExisting: allExisting.map((t) => t.tag) };
21
52
  }
22
53
 
23
- function saveTags(data) {
24
- const dir = dirname(TAGS_FILE);
25
- if (!existsSync(dir)) {
26
- const { mkdirSync } = require("fs");
27
- mkdirSync(dir, { recursive: true });
54
+ /**
55
+ * 为单个国家生成 tag(CLI 模式,通过 API 获取历史数据)
56
+ */
57
+ async function discoverTagsForCountryCli(
58
+ country,
59
+ count = 4,
60
+ userPrompt = null,
61
+ serverUrl = null,
62
+ ) {
63
+ if (!COUNTRY_LANG[country]) {
64
+ return { country, error: `不支持的国家代码: ${country}` };
28
65
  }
29
- writeFileSync(TAGS_FILE, JSON.stringify(data, null, 2), "utf-8");
30
- }
31
-
32
- export function getProductiveTags() {
33
- return loadTags().tags;
34
- }
35
-
36
- export function recordProductiveTag(tag, country, userCount) {
37
- const data = loadTags();
38
- const existing = data.tags.find((t) => t.tag === tag);
39
- if (existing) {
40
- if (!existing.countries.includes(country)) {
41
- existing.countries.push(country);
42
- }
43
- existing.userCount += userCount;
44
- existing.lastUsed = new Date().toISOString();
45
- } else {
46
- data.tags.push({
47
- tag,
48
- countries: [country],
49
- userCount,
50
- firstSeen: new Date().toISOString(),
51
- lastUsed: new Date().toISOString(),
52
- });
53
- }
54
- data.lastUpdated = new Date().toISOString();
55
- saveTags(data);
56
- }
57
66
 
58
- async function callLLM(prompt) {
59
- const apiKey = process.env.APIKEY || "";
60
- const { fetch } = await import("undici");
61
-
62
- const response = await fetch(
63
- "http://82.156.52.214:18000/v1/chat/completions",
64
- {
65
- method: "POST",
66
- headers: {
67
- "Content-Type": "application/json",
68
- Authorization: `Bearer ${apiKey}`,
69
- },
70
- body: JSON.stringify({
71
- model: "zc-fast",
72
- messages: [{ role: "user", content: prompt }],
73
- max_tokens: 1024,
74
- temperature: 0.7,
75
- }),
76
- },
77
- );
78
-
79
- const result = await response.json();
80
- const content = result.choices?.[0]?.message?.content || "";
81
- return content;
82
- }
67
+ // 从服务端获取历史 tag
68
+ const history = await fetchTagHistory(serverUrl, country);
83
69
 
84
- function normalizeTag(t) {
85
- return t.replace(/^#+/, "").trim().toLowerCase();
86
- }
87
-
88
- function parseTagsFromResponse(content) {
89
- try {
90
- const parsed = JSON.parse(content);
91
- if (Array.isArray(parsed)) {
92
- return parsed.map(normalizeTag).filter((t) => t && t.length >= 2);
93
- }
94
- if (Array.isArray(parsed.tags)) {
95
- return parsed.tags.map(normalizeTag).filter((t) => t && t.length >= 2);
96
- }
97
- } catch {}
98
-
99
- const lines = content.split(/[\n,]+/);
100
- const tags = [];
101
- for (const line of lines) {
102
- const cleaned = normalizeTag(line.replace(/^[-\d.\s]+/, ""));
103
- if (cleaned && /^[a-z0-9_]+$/.test(cleaned) && cleaned.length >= 2) {
104
- tags.push(cleaned);
105
- }
106
- }
107
- return tags;
108
- }
109
-
110
- export async function discoverTags(countries, options = {}) {
111
- const { language = "auto", count = 10 } = options;
112
-
113
- const productiveTags = getProductiveTags();
114
- const countryStr = Array.isArray(countries)
115
- ? countries.join(", ")
116
- : countries;
117
- const langHint =
118
- language === "auto" ? "" : `Tags should be in ${language} language.`;
119
-
120
- const historyHint =
121
- productiveTags.length > 0
122
- ? `Previously productive tags for these countries: ${productiveTags
123
- .filter((t) => t.countries.some((c) => countries.includes(c)))
124
- .map((t) => `#${t.tag}`)
125
- .join(", ")}. Generate new ones, don't repeat these.`
126
- : "";
127
-
128
- const prompt = `Generate ${count} TikTok hashtags (lowercase, no spaces, no # symbol) that are likely to be used by online sellers, shop owners, e-commerce merchants, and small businesses in these countries: ${countryStr}.
129
-
130
- Requirements:
131
- - Focus on tags that sellers/merchants actually use to promote their products
132
- - Include local language commerce tags (sell, shop, store, online, vendor, etc. in the local language)
133
- - Mix broad commerce tags with country-specific tags
134
- ${langHint}
135
- ${historyHint}
136
-
137
- Return ONLY a JSON array of tag strings, nothing else. Example: ["ventas","tiendaonline","vender"]`;
70
+ // 使用统一的 prompt 组装
71
+ const prompt = buildDiscoverPrompt(country, count, history, userPrompt);
138
72
 
139
73
  process.stderr.write(
140
- ` [LLM] 正在生成 ${count} 个标签 (目标: ${countryStr})...\n`,
74
+ ` [LLM] 正在生成 ${count} 个标签 (国家: ${country}, 语言: ${getLang(country)})...\n`,
141
75
  );
142
76
  const content = await callLLM(prompt);
143
77
  const tags = parseTagsFromResponse(content);
144
-
145
78
  const unique = [...new Set(tags)].slice(0, count);
79
+
146
80
  process.stderr.write(
147
81
  ` [LLM] 生成 ${unique.length} 个标签: ${unique.join(", ")}\n`,
148
82
  );
149
83
  return unique;
150
84
  }
85
+
86
+ /**
87
+ * 批量为多个国家生成 tag(兼容旧接口)
88
+ * @param {string|string[]} countries - 国家代码或数组
89
+ * @param {object} options
90
+ * @param {number} [options.count=10] - 每个国家生成的 tag 数量
91
+ * @param {string} [options.serverUrl] - 服务端地址
92
+ * @param {string} [options.prompt] - 用户自定义提示
93
+ */
94
+ export async function discoverTags(countries, options = {}) {
95
+ const { count = 10, serverUrl, prompt: userPrompt } = options;
96
+
97
+ const countryList = Array.isArray(countries) ? countries : [countries];
98
+ const allTags = [];
99
+
100
+ for (const country of countryList) {
101
+ const tags = await discoverTagsForCountryCli(
102
+ country,
103
+ count,
104
+ userPrompt,
105
+ serverUrl,
106
+ );
107
+ allTags.push(...tags);
108
+ }
109
+
110
+ return allTags;
111
+ }
112
+
113
+ export { discoverTagsForCountryCli };
@@ -35,6 +35,7 @@ async function processExplore(page, username, options, log) {
35
35
  discoveredGuessAuthors: [],
36
36
  discoveredFollowing: [],
37
37
  discoveredFollowers: [],
38
+ discoveredRecommended: [],
38
39
  collectedVideos: 0,
39
40
  processed: false,
40
41
  hasFollowData: false,
@@ -205,19 +206,18 @@ async function processExplore(page, username, options, log) {
205
206
  log(
206
207
  ` 商家用户,关注采集: ${effectiveMaxFollowing}, 粉丝采集: ${effectiveMaxFollowers}`,
207
208
  );
208
- const { following, followers } = await extractFollowAndFollowers(
209
- page,
210
- {
209
+ const { following, followers, recommended } =
210
+ await extractFollowAndFollowers(page, {
211
211
  maxFollowing: effectiveMaxFollowing,
212
212
  maxFollowers: effectiveMaxFollowers,
213
213
  log,
214
- },
215
- );
214
+ });
216
215
  result.discoveredFollowing = following || [];
217
216
  result.discoveredFollowers = followers || [];
217
+ result.discoveredRecommended = recommended || [];
218
218
  result.hasFollowData = true;
219
219
  log(
220
- ` 关注: ${result.discoveredFollowing.length}, 粉丝: ${result.discoveredFollowers.length}`,
220
+ ` 关注: ${result.discoveredFollowing.length}, 粉丝: ${result.discoveredFollowers.length}, 推荐: ${result.discoveredRecommended.length}`,
221
221
  );
222
222
  } catch (e) {
223
223
  log(` 关注/粉丝提取失败: ${e.message}`);
@@ -2,7 +2,7 @@ import { delay, getDelayConfig } from "./page-helpers.js";
2
2
  import { scrollAndCollect } from "./scroll-collector.js";
3
3
  import { extractUniqueId, toProfileUrl } from "../../lib/url.js";
4
4
 
5
- const FILTER_WORDS = ["主页", "已关注", "粉丝", "推荐"];
5
+ const FILTER_WORDS = ["主页", "已关注", "粉丝"];
6
6
 
7
7
  const FOLLOW_TRIGGER_SELECTORS = [
8
8
  "[data-e2e=following]",
@@ -11,6 +11,8 @@ const FOLLOW_TRIGGER_SELECTORS = [
11
11
  '[data-e2e*="following"]',
12
12
  ];
13
13
 
14
+ const RECOMMEND_TAB_TEXTS = ["推荐", "Suggested", "Recommended"];
15
+
14
16
  async function waitForFollowTrigger(page, timeout = 15000) {
15
17
  await page
16
18
  .waitForFunction(
@@ -187,7 +189,7 @@ async function closeFollowModal(page) {
187
189
 
188
190
  function createUserCollectFn() {
189
191
  return (container) => {
190
- const FILTER_WORDS = ["主页", "已关注", "粉丝", "推荐"];
192
+ const FILTER_WORDS = ["主页", "已关注", "粉丝"];
191
193
  const modal = document.querySelector("[class*=eyhy6180]");
192
194
  const root = modal || document;
193
195
  const users = [];
@@ -239,12 +241,55 @@ async function extractFollowAndFollowers(page, options = {}) {
239
241
  const followers = await extractUsersFromModal(page, maxFollowers);
240
242
  log(` 粉丝: ${followers.length}`);
241
243
 
244
+ // ===== 3. 采集推荐 =====
245
+ let recommended = [];
246
+ if (following.length > 0 || followers.length > 0) {
247
+ try {
248
+ await delay(500, 1500);
249
+ await clickRecommendTab(page);
250
+ await delay(500, 1500);
251
+ recommended = await scrollAndCollect(page, {
252
+ container: "[class*=DivUserListContainer]",
253
+ findScrollable: false,
254
+ collectFn: createUserCollectFn(),
255
+ uniqueKey: (u) => u.handle,
256
+ maxItems: 50,
257
+ staleThreshold: 2,
258
+ });
259
+ if (log) log(` 推荐: ${recommended.length}`);
260
+ } catch (e) {
261
+ if (log) log(` 推荐采集失败: ${e.message}`);
262
+ }
263
+ }
264
+
242
265
  await closeFollowModal(page);
243
266
 
244
267
  return {
245
268
  following: following.map((u) => [u.handle, u.displayName]),
246
269
  followers: followers.map((u) => [u.handle, u.displayName]),
270
+ recommended: recommended.map((u) => [u.handle, u.displayName]),
247
271
  };
248
272
  }
249
273
 
274
+ async function clickRecommendTab(page) {
275
+ await page.evaluate(() => {
276
+ const tabs = document.querySelectorAll("[class*=DivTabItem]");
277
+ for (const tab of tabs) {
278
+ const text = (tab.textContent || "").trim();
279
+ if (
280
+ text.includes("推荐") ||
281
+ text.includes("Suggested") ||
282
+ text.includes("Recommended")
283
+ ) {
284
+ tab.click();
285
+ return;
286
+ }
287
+ }
288
+ throw new Error("未找到推荐 Tab");
289
+ });
290
+ await page.waitForSelector("[class*=DivUserListContainer]", {
291
+ timeout: 30000,
292
+ });
293
+ }
294
+
250
295
  export { extractFollowAndFollowers };
@@ -2223,6 +2223,58 @@ export function createStore(filePath, options = {}) {
2223
2223
  if (filePath) {
2224
2224
  // 初始化 SQLite 用户表(用于判重)
2225
2225
  initUserDb(filePath);
2226
+ // 从数据库恢复偏移量
2227
+ loadLlmSampleOffsets();
2228
+ }
2229
+
2230
+ /**
2231
+ * 从数据库加载 LLM 采样偏移量
2232
+ */
2233
+ function loadLlmSampleOffsets() {
2234
+ try {
2235
+ const row = db
2236
+ .prepare(`SELECT offsets FROM _llm_sample_offsets LIMIT 1`)
2237
+ .get();
2238
+ if (row && row.offsets) {
2239
+ const parsed = JSON.parse(row.offsets);
2240
+ if (parsed && typeof parsed === "object") {
2241
+ Object.entries(parsed).forEach(([k, v]) => {
2242
+ llmSampleOffsets.set(k, v);
2243
+ });
2244
+ console.error(
2245
+ `[data-store] 已恢复 LLM 采样偏移量: ${Array.from(
2246
+ llmSampleOffsets.entries(),
2247
+ )
2248
+ .map(([k, v]) => `${k}:${v}`)
2249
+ .join(", ")}`,
2250
+ );
2251
+ }
2252
+ }
2253
+ } catch (e) {
2254
+ // 表不存在或解析失败,使用空偏移量
2255
+ console.error(
2256
+ `[data-store] 加载 LLM 采样偏移量失败,使用空偏移量: ${e.message}`,
2257
+ );
2258
+ }
2259
+ }
2260
+
2261
+ /**
2262
+ * 将 LLM 采样偏移量持久化到数据库
2263
+ */
2264
+ function saveLlmSampleOffsets() {
2265
+ try {
2266
+ const offsetsJson = JSON.stringify(Object.fromEntries(llmSampleOffsets));
2267
+ // 表不存在则创建
2268
+ db.prepare(
2269
+ `CREATE TABLE IF NOT EXISTS _llm_sample_offsets (id INTEGER PRIMARY KEY CHECK (id = 1), offsets TEXT)`,
2270
+ ).run();
2271
+ // 插入或更新
2272
+ db.prepare(
2273
+ `INSERT OR REPLACE INTO _llm_sample_offsets (id, offsets) VALUES (1, ?)`,
2274
+ ).run(offsetsJson);
2275
+ } catch (e) {
2276
+ console.error(`[data-store] 保存 LLM 采样偏移量失败: ${e.message}`);
2277
+ }
2226
2278
  }
2227
2279
 
2228
2280
  // stats 缓存
@@ -2383,15 +2435,97 @@ export function createStore(filePath, options = {}) {
2383
2435
  }
2384
2436
 
2385
2437
  function flushSave() {
2438
+ // 数据库模式:先保存 LLM 偏移量,再备份数据库
2439
+ if (db && dbPath) {
2440
+ try {
2441
+ saveLlmSampleOffsets();
2442
+ } catch (e) {
2443
+ console.error(`[data-store] 保存 LLM 偏移量失败: ${e.message}`);
2444
+ }
2445
+ }
2386
2446
  return Promise.resolve();
2387
2447
  }
2388
2448
 
2389
- function saveVideos() {
2390
- return;
2449
+ /**
2450
+ * 数据库备份:使用 SQLite BACKUP 命令,保留最新 maxBackups 个备份
2451
+ * @param {number} maxBackups - 保留的备份数量,默认 3
2452
+ * @returns {string|null} 备份文件路径,失败返回 null
2453
+ */
2454
+ function backupDatabase(maxBackups = 3) {
2455
+ if (!db || !dbPath) {
2456
+ console.error("[data-store] 数据库未初始化,跳过备份");
2457
+ return null;
2458
+ }
2459
+
2460
+ try {
2461
+ // 生成备份文件名:result-20260627T094400.db
2462
+ const now = new Date();
2463
+ const timestamp = now
2464
+ .toISOString()
2465
+ .replace(/[-:T.]/g, "")
2466
+ .slice(0, 15); // YYYYMMDDHHmmss
2467
+ const baseName = path.basename(dbPath, ".db");
2468
+ const backupName = `${baseName}-${timestamp}.db`;
2469
+ const backupDir = path.dirname(dbPath);
2470
+ const backupPath = path.join(backupDir, backupName);
2471
+
2472
+ console.error(`[data-store] 正在备份数据库: ${backupName}`);
2473
+
2474
+ // 使用 better-sqlite3 的 backup API(原子性备份,安全可靠)
2475
+ const backupDb = new Database(backupPath);
2476
+ db.backup("main", backupDb, "main");
2477
+ backupDb.close();
2478
+
2479
+ // 验证备份文件大小
2480
+ const stat = fs.statSync(backupPath);
2481
+ const sizeMB = (stat.size / 1024 / 1024).toFixed(2);
2482
+ console.error(`[data-store] 备份完成: ${backupName} (${sizeMB} MB)`);
2483
+
2484
+ // 清理旧备份:保留最新 maxBackups 个
2485
+ cleanupOldBackups(backupDir, baseName, maxBackups);
2486
+
2487
+ return backupPath;
2488
+ } catch (e) {
2489
+ console.error(`[data-store] 备份失败: ${e.message}`);
2490
+ return null;
2491
+ }
2492
+ }
2493
+
2494
+ /**
2495
+ * 清理旧备份文件,保留最新 maxBackups 个
2496
+ */
2497
+ function cleanupOldBackups(backupDir, baseName, maxBackups) {
2498
+ try {
2499
+ // 查找所有备份文件:baseName-YYYYMMDDHHmmss.db
2500
+ const pattern = new RegExp(`^${baseName}-\\d{15}\\.db$`);
2501
+ const backups = fs
2502
+ .readdirSync(backupDir)
2503
+ .filter((f) => pattern.test(f))
2504
+ .sort() // 按时间戳排序(ASCII 排序 = 时间排序)
2505
+ .reverse(); // 最新的在前
2506
+
2507
+ if (backups.length > maxBackups) {
2508
+ const toDelete = backups.slice(maxBackups);
2509
+ for (const file of toDelete) {
2510
+ const filePath = path.join(backupDir, file);
2511
+ fs.unlinkSync(filePath);
2512
+ console.error(`[data-store] 已清理旧备份: ${file}`);
2513
+ }
2514
+ }
2515
+
2516
+ console.error(
2517
+ `[data-store] 备份清理完成: 保留 ${Math.min(backups.length, maxBackups)} / ${backups.length} 个备份`,
2518
+ );
2519
+ } catch (e) {
2520
+ console.error(`[data-store] 清理旧备份失败: ${e.message}`);
2521
+ }
2391
2522
  }
2392
2523
 
2393
2524
  function stopBackup() {
2394
- return;
2525
+ // 退出时执行备份
2526
+ if (db && dbPath) {
2527
+ backupDatabase();
2528
+ }
2395
2529
  }
2396
2530
 
2397
2531
  function getUser(uid) {
@@ -2795,7 +2929,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
2795
2929
  .prepare(
2796
2930
  `
2797
2931
  SELECT * FROM raw_jobs WHERE ${whereSql} AND guessed_location = ?
2798
- ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
2932
+ ORDER BY
2933
+ CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
2934
+ COALESCE(video_count, 0) DESC, created_at DESC
2799
2935
  LIMIT ? OFFSET ?
2800
2936
  `,
2801
2937
  )
@@ -2803,11 +2939,32 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
2803
2939
 
2804
2940
  if (samples.length === 0) break;
2805
2941
 
2806
- const scores = await scoreJobsBatch(
2807
- samples,
2808
- DEFAULT_TARGET_LOCATIONS,
2942
+ // 分离 tag 来源和非 tag 来源:tag 来源跳过 LLM 打分直接合格
2943
+ const tagSamples = samples.filter((s) =>
2944
+ (s.sources || "").includes("tag"),
2809
2945
  );
2810
- const batchQualified = scores.filter((s) => s.score >= llmMinScore);
2946
+ const nonTagSamples = samples.filter(
2947
+ (s) => !(s.sources || "").includes("tag"),
2948
+ );
2949
+
2950
+ // tag 来源直接加入合格列表
2951
+ if (tagSamples.length > 0) {
2952
+ allQualified.push(...tagSamples.map((s) => s.unique_id));
2953
+ console.error(
2954
+ `[data-store] ${location}: 本批 ${tagSamples.length} 条 tag 来源任务跳过 LLM 打分直接合格`,
2955
+ );
2956
+ }
2957
+
2958
+ // 非 tag 来源走 LLM 打分
2959
+ let batchQualified = [];
2960
+ let scores = [];
2961
+ if (nonTagSamples.length > 0) {
2962
+ scores = await scoreJobsBatch(
2963
+ nonTagSamples,
2964
+ DEFAULT_TARGET_LOCATIONS,
2965
+ );
2966
+ batchQualified = scores.filter((s) => s.score >= llmMinScore);
2967
+ }
2811
2968
 
2812
2969
  allScores.push(...scores);
2813
2970
  allQualified.push(...batchQualified.map((s) => s.uniqueId));
@@ -2829,12 +2986,27 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
2829
2986
  if (allQualified.length >= llmMinReturn) break;
2830
2987
  }
2831
2988
 
2832
- // 按分数降序排序,取前 safeLimit
2833
- const qualifiedScores = allScores
2989
+ // 分离 tag 合格和非 tag 合格
2990
+ // tag 任务直接合格(不在 allScores 中),非 tag 任务走 LLM 打分
2991
+ const tagQualified = allQualified.filter(
2992
+ (uid) => !allScores.find((s) => s.uniqueId === uid),
2993
+ );
2994
+ const nonTagQualifiedScores = allScores
2834
2995
  .filter((s) => s.score >= llmMinScore)
2835
- .sort((a, b) => b.score - a.score)
2836
- .slice(0, safeLimit);
2837
- const qualified = qualifiedScores.map((s) => s.uniqueId);
2996
+ .sort((a, b) => b.score - a.score);
2997
+ const nonTagQualified = nonTagQualifiedScores.map((s) => s.uniqueId);
2998
+
2999
+ // 限制 tag 占比:最多占 safeLimit 的 70%,留 30% 给非 tag
3000
+ const tagMaxCount = Math.floor(safeLimit * 0.7);
3001
+ const tagCount = Math.min(tagQualified.length, tagMaxCount);
3002
+ const nonTagMaxCount = safeLimit - tagCount;
3003
+ const finalNonTagQualified = nonTagQualified.slice(0, nonTagMaxCount);
3004
+
3005
+ // 最终合格列表:tag 优先 + 非 tag 按分数排序
3006
+ const qualified = [
3007
+ ...tagQualified.slice(0, tagCount),
3008
+ ...finalNonTagQualified,
3009
+ ];
2838
3010
 
2839
3011
  if (!qualified.length) {
2840
3012
  console.error(
@@ -2881,6 +3053,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
2881
3053
  moveTxn();
2882
3054
  markStatsDirty();
2883
3055
 
3056
+ // 持久化偏移量到数据库
3057
+ saveLlmSampleOffsets();
3058
+
2884
3059
  // 打印最终偏移量状态
2885
3060
  const finalOffsetSummary = Array.from(llmSampleOffsets.entries())
2886
3061
  .map(([k, v]) => `${k}:${v}`)
@@ -2920,7 +3095,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
2920
3095
  status_code, latest_video_time, user_create_time
2921
3096
  FROM raw_jobs
2922
3097
  WHERE ${whereSql}
2923
- ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
3098
+ ORDER BY
3099
+ CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
3100
+ COALESCE(video_count, 0) DESC, created_at DESC
2924
3101
  LIMIT ?
2925
3102
  `,
2926
3103
  ).run(...args, safeLimit);
@@ -2932,7 +3109,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
2932
3109
  WHERE unique_id IN (
2933
3110
  SELECT unique_id FROM raw_jobs
2934
3111
  WHERE ${whereSql}
2935
- ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
3112
+ ORDER BY
3113
+ CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
3114
+ COALESCE(video_count, 0) DESC, created_at DESC
2936
3115
  LIMIT ?
2937
3116
  )
2938
3117
  `,
@@ -3786,6 +3965,17 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3786
3965
  (typeof f === "object" && f.guessedLocation) || guessedLocation,
3787
3966
  };
3788
3967
  }),
3968
+ ...(result.discoveredRecommended || []).map((f) => {
3969
+ const handle = Array.isArray(f) ? f[0] : f.handle || "";
3970
+ const name = Array.isArray(f) ? f[1] : f.displayName || null;
3971
+ return {
3972
+ uniqueId: handle.replace(/^@/, ""),
3973
+ nickname: name,
3974
+ sources: ["recommended"],
3975
+ guessedLocation:
3976
+ (typeof f === "object" && f.guessedLocation) || guessedLocation,
3977
+ };
3978
+ }),
3789
3979
  ].filter((u) => u.uniqueId);
3790
3980
 
3791
3981
  // 先对 discovered 内部去重,再用 uidIndex 批量判断
@@ -3880,6 +4070,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3880
4070
  "discoveredGuessAuthors",
3881
4071
  "discoveredFollowing",
3882
4072
  "discoveredFollowers",
4073
+ "discoveredRecommended",
3883
4074
  "uniqueId",
3884
4075
  "sources",
3885
4076
  "topRecentVideo", // 单独处理,不进入通用循环
@@ -4262,7 +4453,12 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4262
4453
  sqlParams.push(...targetCountries);
4263
4454
  }
4264
4455
 
4265
- sql += ` ORDER BY created_at ASC, unique_id ASC LIMIT ?`;
4456
+ // 优先级:sources 包含 "tag" 的任务优先,其余按 created_at 排序
4457
+ sql += ` ORDER BY
4458
+ CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
4459
+ created_at ASC,
4460
+ unique_id ASC
4461
+ LIMIT ?`;
4266
4462
  sqlParams.push(l);
4267
4463
 
4268
4464
  const rows = db.prepare(sql).all(...sqlParams);
@@ -4310,6 +4506,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4310
4506
  }
4311
4507
  return false;
4312
4508
  })
4509
+ .sort((a, b) => {
4510
+ // 优先级:sources 包含 "tag" 的任务优先
4511
+ const aIsTag = (a.sources || "").includes("tag");
4512
+ const bIsTag = (b.sources || "").includes("tag");
4513
+ if (aIsTag !== bIsTag) return aIsTag ? -1 : 1;
4514
+ return (a.createdAt || 0) - (b.createdAt || 0);
4515
+ })
4313
4516
  .slice(0, l);
4314
4517
  // 接受任务时 userUpdateCount + 1
4315
4518
  pending.forEach((u) => {
@@ -4823,6 +5026,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4823
5026
  commitCommentTask,
4824
5027
  debugClaimNextJob,
4825
5028
  stopBackup,
5029
+ backupDatabase, // 手动备份数据库
4826
5030
  rawQuery,
4827
5031
  getLlmSampleOffsets, // 获取 LLM 采样偏移量状态
4828
5032
  // Tag 发现与打分
@@ -1125,6 +1125,51 @@ export function startWatchServer(
1125
1125
  return;
1126
1126
  }
1127
1127
 
1128
+ // GET /api/tags/history?country=ES&type=productive|dead — CLI 模式获取历史 tag
1129
+ if (req.method === "GET" && routePath === "/api/tags/history") {
1130
+ const country = params.country || null;
1131
+ const type = params.type || "productive";
1132
+
1133
+ if (!country) {
1134
+ sendJSON(res, 400, { error: "缺少 country 参数" });
1135
+ return;
1136
+ }
1137
+
1138
+ let tags;
1139
+ if (type === "dead") {
1140
+ tags = store.getDeadTags(country);
1141
+ } else if (type === "all") {
1142
+ tags = store.getTagsByCountry(country, 0);
1143
+ } else {
1144
+ tags = store.getTagsByCountry(country, 50);
1145
+ }
1146
+
1147
+ sendJSON(res, 200, { tags, total: tags.length });
1148
+ return;
1149
+ }
1150
+
1151
+ // POST /api/tags/productive — CLI 模式上报 productive tag
1152
+ if (req.method === "POST" && routePath === "/api/tags/productive") {
1153
+ try {
1154
+ const body = await readBody(req);
1155
+ const { tag, countries, pushedUsers } = body || {};
1156
+
1157
+ if (!tag || !countries || countries.length === 0) {
1158
+ sendJSON(res, 400, { error: "tag 和 countries 不能为空" });
1159
+ return;
1160
+ }
1161
+
1162
+ // 将 productive 信息写入数据库(更新已有 tag 或插入新 tag)
1163
+ for (const c of countries) {
1164
+ store.insertTag(tag, [c], "cli-productive");
1165
+ }
1166
+ sendJSON(res, 200, { ok: true });
1167
+ } catch (e) {
1168
+ sendJSON(res, 500, { error: e.message });
1169
+ }
1170
+ return;
1171
+ }
1172
+
1128
1173
  if (
1129
1174
  req.method === "GET" &&
1130
1175
  (routePath === "/" || routePath === "/index.html")
@@ -1205,7 +1250,10 @@ export function startWatchServer(
1205
1250
  console.error("[server] HTTP 服务已关闭");
1206
1251
  });
1207
1252
  await store.flushSave();
1208
- console.error("[server] 数据已保存,退出");
1253
+ console.error("[server] 数据已保存");
1254
+ // 备份数据库
1255
+ store.stopBackup();
1256
+ console.error("[server] 退出");
1209
1257
  process.exit(0);
1210
1258
  }
1211
1259
 
@@ -6,7 +6,7 @@
6
6
  */
7
7
 
8
8
  // 国家 → 语言映射
9
- const COUNTRY_LANG = {
9
+ export const COUNTRY_LANG = {
10
10
  CZ: "cs",
11
11
  GR: "el",
12
12
  HU: "hu",
@@ -22,16 +22,16 @@ const COUNTRY_LANG = {
22
22
  AT: "de",
23
23
  };
24
24
 
25
- const LLM_URL = "http://82.156.52.214:18000/v1/chat/completions";
26
- const LLM_MODEL = "zc-fast";
25
+ export const LLM_URL = "http://82.156.52.214:18000/v1/chat/completions";
26
+ export const LLM_MODEL = "zc-fast";
27
27
 
28
- function getLang(country) {
28
+ export function getLang(country) {
29
29
  return COUNTRY_LANG[country] || "en";
30
30
  }
31
31
 
32
32
  // ====== LLM 调用 ======
33
33
 
34
- async function callLLM(prompt) {
34
+ export async function callLLM(prompt) {
35
35
  const apiKey = process.env.APIKEY || "";
36
36
  const { fetch } = await import("undici");
37
37
 
@@ -53,11 +53,11 @@ async function callLLM(prompt) {
53
53
  return result.choices?.[0]?.message?.content || "";
54
54
  }
55
55
 
56
- function normalizeTag(t) {
56
+ export function normalizeTag(t) {
57
57
  return t.replace(/^#+/, "").trim().toLowerCase();
58
58
  }
59
59
 
60
- function parseTagsFromResponse(content) {
60
+ export function parseTagsFromResponse(content) {
61
61
  try {
62
62
  const parsed = JSON.parse(content);
63
63
  if (Array.isArray(parsed)) {
@@ -81,7 +81,7 @@ function parseTagsFromResponse(content) {
81
81
 
82
82
  // ====== Prompt 组装 ======
83
83
 
84
- function buildDiscoverPrompt(country, count, history, userPrompt) {
84
+ export function buildDiscoverPrompt(country, count, history, userPrompt) {
85
85
  const lang = getLang(country);
86
86
  const langNames = {
87
87
  cs: "Czech",
@@ -98,18 +98,18 @@ function buildDiscoverPrompt(country, count, history, userPrompt) {
98
98
  };
99
99
  const langName = langNames[lang] || lang;
100
100
 
101
- // 正样本:该国高分 tag
101
+ // 正样本:该国高分 tag(只给 LLM 看效果,不给模板)
102
102
  const productive = history.productive || [];
103
103
  const productiveHint =
104
104
  productive.length > 0
105
- ? `\nHigh-performing tags for ${country}: ${productive.map((t) => t.tag).join(", ")}. Generate new tags in similar patterns.`
105
+ ? `\nTags that already worked well for ${country}: ${productive.map((t) => t.tag).join(", ")}. These are examples of what works — explore DIFFERENT directions, not variations of these.`
106
106
  : "";
107
107
 
108
108
  // 负样本:该国 dead tag
109
109
  const dead = history.dead || [];
110
110
  const deadHint =
111
111
  dead.length > 0
112
- ? `\nAvoid these tags and similar patterns (they found no matching users): ${dead.map((t) => t.tag).join(", ")}.`
112
+ ? `\nTags that failed for ${country} (found no matching users): ${dead.map((t) => t.tag).join(", ")}. Avoid these and similar patterns.`
113
113
  : "";
114
114
 
115
115
  // 死因分析
@@ -118,20 +118,35 @@ function buildDiscoverPrompt(country, count, history, userPrompt) {
118
118
  ];
119
119
  const errorHint =
120
120
  errorPatterns.length > 0
121
- ? `\nReasons previous tags failed: ${errorPatterns.join("; ")}. Avoid generating tags likely to have same issues.`
121
+ ? `\nWhy previous tags failed: ${errorPatterns.join("; ")}. Avoid tags likely to have same issues.`
122
+ : "";
123
+
124
+ // 已存在的所有 tag(防止重复生成)
125
+ const allExisting = history.allExisting || [];
126
+ const existingHint =
127
+ allExisting.length > 0
128
+ ? `\nTags already in database (DO NOT generate these again): ${allExisting.slice(-50).join(", ")}.`
122
129
  : "";
123
130
 
124
131
  const userHint = userPrompt
125
132
  ? `\nAdditional focus: ${userPrompt}. Generate tags specifically for this niche.`
126
133
  : "";
127
134
 
128
- return `Generate ${count} TikTok hashtags in ${langName} language for e-commerce sellers, shop owners, and small business merchants in ${country}.
135
+ return `You are discovering TikTok hashtags used by people who sell things in ${country}.
136
+
137
+ Your goal: Find hashtags that real sellers in ${country} actually use — any kind of tag they might use. Think broadly:
138
+ - Who they are (seller, shop owner, entrepreneur, artisan...)
139
+ - What they sell (shoes, clothes, jewelry, food, pets, furniture...)
140
+ - How they sell (online, handmade, second-hand, local pickup...)
141
+ - Product-specific tags (sneakers, dresses, cakes, necklaces...)
142
+
143
+ All tags must be in ${langName} language (or widely used in ${country}).
144
+ Generate ${count} tags that are ALL DIFFERENT from each other and from any existing tags.
129
145
 
130
- Requirements:
131
- - Tags must be in ${langName} language (or widely used in ${country})
132
- - Focus on tags that sellers/merchants actually use to promote their products
133
- - Include local language commerce tags (sell, shop, store, online, vendor, etc.)
134
- - Prefer specific/niche tags over generic ones (e.g., "vendozapatos" not "vender")${productiveHint}${deadHint}${errorHint}${userHint}
146
+ Rules:
147
+ - Each tag should explore a DIFFERENT angle don't just swap country suffixes
148
+ - Prefer specific and niche tags over generic ones (e.g., "vendozapatos" beats "vender")
149
+ - Do NOT generate tags that already exist${productiveHint}${deadHint}${errorHint}${existingHint}${userHint}
135
150
 
136
151
  Return ONLY a JSON array of tag strings, nothing else. Example: ["ventas","tiendaonline","vender"]`;
137
152
  }
@@ -151,7 +166,10 @@ export async function discoverTagsForCountry(
151
166
  // 读取历史打分记录
152
167
  const productive = store.getTagsByCountry(country, 50);
153
168
  const dead = store.getDeadTags(country);
154
- const history = { productive, dead };
169
+ // 获取该国所有已存在的 tag 名(防止重复生成)
170
+ const allTags = store.getTagsByCountry(country, 0);
171
+ const allExisting = allTags.map((t) => t.tag);
172
+ const history = { productive, dead, allExisting };
155
173
 
156
174
  // 组装 prompt 并调用 LLM
157
175
  const prompt = buildDiscoverPrompt(country, count, history, userPrompt);