npm - tt-help-cli-ycl - Versions diffs - 1.3.88 → 1.3.91 - Mend

tt-help-cli-ycl 1.3.88 → 1.3.91

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/package.json +1 -1
package/src/cli/auto.js +7 -0
package/src/cli/explore.js +12 -2
package/src/cli/refresh.js +10 -1
package/src/cli/tag.js +88 -52
package/src/lib/tag-discover.js +97 -134
package/src/scraper/explore-core.js +6 -6
package/src/scraper/modules/follow-extractor.js +47 -2
package/src/watch/data-store.js +220 -16
package/src/watch/server.js +49 -1
package/src/watch/tag-service.js +37 -19

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "tt-help-cli-ycl",
-  "version": "1.3.88",
+  "version": "1.3.91",
   "description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
   "type": "module",
   "bin": {

package/src/cli/auto.js CHANGED Viewed

@@ -232,6 +232,13 @@ export async function handleAuto(options) {
           displayName: Array.isArray(f) ? f[1] : null,
           guessedLocation,
         })),
+        discoveredRecommended: (result.discoveredRecommended || []).map(
+          (f) => ({
+            handle: Array.isArray(f) ? f[0] : f,
+            displayName: Array.isArray(f) ? f[1] : null,
+            guessedLocation,
+          }),
+        ),
       };
       await apiPost(`${serverUrl}/api/job/${username}`, payload);
       console.error("  已提交");

package/src/cli/explore.js CHANGED Viewed

@@ -143,7 +143,9 @@ export async function handleExplore(options) {
     console.error(`CDP 端口: ${cdpOptions.port}, 用户编号: ${userId}`);
     console.error(`浏览器配置: ${path.basename(cdpOptions.userDataDir)}`);
-    const { apiGet, apiPost } = createApiClient({ meta: { port: cdpOptions.port } });
+    const { apiGet, apiPost } = createApiClient({
+      meta: { port: cdpOptions.port },
+    });
     await apiGet(`${serverUrl}/api/stats`);
@@ -508,7 +510,8 @@ export async function handleExplore(options) {
         if (result.hasFollowData && result.keepFollow) {
           const totalFollows =
             (result.discoveredFollowing || []).length +
-            (result.discoveredFollowers || []).length;
+            (result.discoveredFollowers || []).length +
+            (result.discoveredRecommended || []).length;
           if (totalFollows > 0) {
             lastFollowSuccessTime = Date.now();
           }
@@ -528,6 +531,13 @@ export async function handleExplore(options) {
             displayName: Array.isArray(f) ? f[1] : null,
             guessedLocation,
           })),
+          discoveredRecommended: (result.discoveredRecommended || []).map(
+            (f) => ({
+              handle: Array.isArray(f) ? f[0] : f,
+              displayName: Array.isArray(f) ? f[1] : null,
+              guessedLocation,
+            }),
+          ),
           processed: result.processed,
           hasFollowData: result.hasFollowData,
           keepFollow: result.keepFollow,

package/src/cli/refresh.js CHANGED Viewed

@@ -155,7 +155,9 @@ export async function handleRefresh(options) {
       );
     }
-    const { apiGet, apiPost } = createApiClient({ meta: { port: cdpOptions.port } });
+    const { apiGet, apiPost } = createApiClient({
+      meta: { port: cdpOptions.port },
+    });
     // 连接服务器验证
     await apiGet(`${serverUrl}/api/stats`);
@@ -545,6 +547,13 @@ export async function handleRefresh(options) {
           displayName: Array.isArray(f) ? f[1] : null,
           guessedLocation,
         })),
+        discoveredRecommended: (result.discoveredRecommended || []).map(
+          (f) => ({
+            handle: Array.isArray(f) ? f[0] : f,
+            displayName: Array.isArray(f) ? f[1] : null,
+            guessedLocation,
+          }),
+        ),
         processed: result.processed,
         hasFollowData: result.hasFollowData,
         keepFollow: result.keepFollow,

package/src/cli/tag.js CHANGED Viewed

@@ -5,7 +5,7 @@ import {
   DEFAULT_TARGET_LOCATIONS,
   isLocationInList,
 } from "../lib/target-locations.js";
-import { discoverTags, recordProductiveTag } from "../lib/tag-discover.js";
+import { discoverTags } from "../lib/tag-discover.js";
 import { server as cfgServer } from "../lib/constants.js";
 const ALL_COUNTRIES = DEFAULT_TARGET_LOCATIONS;
@@ -157,9 +157,18 @@ async function processTag(
         const countries = [
           ...new Set(videos.map((v) => v.locationCreated).filter(Boolean)),
         ];
-        for (const c of countries) {
-          recordProductiveTag(tag, c, pushResult.added);
-        }
+        // 通过 API 上报到服务端，由服务端写入数据库
+        try {
+          await fetch(`${serverUrl}/api/tags/productive`, {
+            method: "POST",
+            headers: { "Content-Type": "application/json" },
+            body: JSON.stringify({
+              tag,
+              countries,
+              pushedUsers: pushResult.added,
+            }),
+          });
+        } catch {}
         process.stderr.write(
           `  已记录标签 #${tag} (${countries.join(",")}, ${pushResult.added} 用户)\n`,
         );
@@ -221,7 +230,7 @@ export async function handleDiscover(parsed) {
     process.exit(1);
   }
-  const baseUrl = serverUrl || "http://127.0.0.1:3000";
+  const baseUrl = serverUrl || DEFAULT_SERVER;
   for (const country of countries) {
     const params = new URLSearchParams({ country, count: String(count) });
@@ -452,8 +461,8 @@ export async function handleScoreAll(parsed) {
   log("");
   let totalScored = 0;
-  let lastDiscoverTime = 0;
-  const DISCOVER_COOLDOWN = 5 * 60 * 1000; // 5 分钟冷却
+  let emptyRounds = 0; // 连续无任务的轮数
+  const DISCOVER_AFTER_EMPTY = 3; // 连续 3 轮无任务时触发 discover
   // 复用 TikTokScraper 实例，避免每次 enrich 都启动/关闭 headless 浏览器
   const enrichScraper = new TikTokScraper({ poolSize: 3 });
@@ -463,56 +472,64 @@ export async function handleScoreAll(parsed) {
   try {
     while (true) {
-      // 从服务端取下一个 new 标签
-      const tagsRes = await fetch(`${baseUrl}/api/tags?status=new&limit=1`);
-      const tagsData = await tagsRes.json();
-      if (!tagsData.tags || tagsData.tags.length === 0) {
-        // 自动发现：无任务时自动生成标签
-        if (autoDiscover && Date.now() - lastDiscoverTime > DISCOVER_COOLDOWN) {
-          log(
-            `🔍 无待打分标签，自动为 ${targetCountries.length} 个国家生成标签...`,
-          );
-          for (const country of targetCountries) {
-            try {
-              const discRes = await fetch(
-                `${baseUrl}/api/tags/discover?country=${country}&count=5`,
-              );
-              const discData = await discRes.json();
-              if (discData.inserted) {
-                log(`  ${country}: 新增 ${discData.inserted} 个`);
+      try {
+        // 从服务端取下一个 new 标签
+        const tagsRes = await fetch(`${baseUrl}/api/tags?status=new&limit=1`);
+        const tagsData = await tagsRes.json();
+        if (!tagsData.tags || tagsData.tags.length === 0) {
+          emptyRounds++;
+          // 自动发现：连续 N 轮无任务时自动生成标签
+          if (autoDiscover && emptyRounds >= DISCOVER_AFTER_EMPTY) {
+            log(
+              `🔍 连续 ${emptyRounds} 轮无待打分标签，自动为 ${targetCountries.length} 个国家生成标签...`,
+            );
+            for (const country of targetCountries) {
+              try {
+                const discRes = await fetch(
+                  `${baseUrl}/api/tags/discover?country=${country}&count=5`,
+                );
+                const discData = await discRes.json();
+                if (discData.inserted) {
+                  log(`  ${country}: 新增 ${discData.inserted} 个`);
+                }
+              } catch (e) {
+                log(`  ${country}: 请求失败 (${e.message})`);
               }
-            } catch (e) {
-              log(`  ${country}: 请求失败 (${e.message})`);
             }
+            emptyRounds = 0; // 重置计数器
+            // 等 3 秒让服务端处理完
+            await new Promise((r) => setTimeout(r, 3000));
+            continue;
           }
-          lastDiscoverTime = Date.now();
-          // 等 3 秒让服务端处理完
-          await new Promise((r) => setTimeout(r, 3000));
+          log(`⏳ 暂无待打分标签（连续 ${emptyRounds} 轮），10 秒后重试...`);
+          await new Promise((r) => setTimeout(r, 10000));
           continue;
         }
-        log(`⏳ 暂无待打分标签，10 秒后重试...`);
-        await new Promise((r) => setTimeout(r, 10000));
-        continue;
-      }
-      const tag = tagsData.tags[0].tag.replace(/^#+/, "").trim().toLowerCase();
-      const startTime = Date.now();
-      log(`[${totalScored + 1}] 正在打分 #${tag} ...`);
+        // 有任务了，重置计数器
+        emptyRounds = 0;
+        const tag = tagsData.tags[0].tag
+          .replace(/^#+/, "")
+          .trim()
+          .toLowerCase();
+        const startTime = Date.now();
+        log(`[${totalScored + 1}] 正在打分 #${tag} ...`);
+        const result = {
+          tag,
+          status: "error",
+          score: 0,
+          totalPosts: 0,
+          authorCount: 0,
+          matchedAuthors: 0,
+          matchedCountries: [],
+          pushedUsers: 0,
+          error: null,
+        };
-      const result = {
-        tag,
-        status: "error",
-        score: 0,
-        totalPosts: 0,
-        authorCount: 0,
-        matchedAuthors: 0,
-        matchedCountries: [],
-        pushedUsers: 0,
-        error: null,
-      };
-      try {
         // 锁定 tag
         const claimRes = await fetch(`${baseUrl}/api/tags/claim`, {
           method: "POST",
@@ -610,10 +627,28 @@ export async function handleScoreAll(parsed) {
         );
         log("");
       } catch (e) {
+        // 区分网络错误和业务错误
+        const isNetworkError =
+          e.code === "ECONNREFUSED" ||
+          e.code === "ENOTFOUND" ||
+          e.code === "ECONNRESET" ||
+          (e.message &&
+            (e.message.includes("ECONNREFUSED") ||
+              e.message.includes("fetch failed") ||
+              e.message.includes("network")));
+        if (isNetworkError) {
+          log(`  ⚠️ 服务端连接失败 (${e.message})，15 秒后重试...`);
+          await new Promise((r) => setTimeout(r, 15000));
+          continue;
+        }
         log(`  ❌ 失败: ${e.message}`);
-        result.error = e.message;
         try {
-          await reportToServer(baseUrl, result);
+          await reportToServer(baseUrl, {
+            tag: "",
+            status: "error",
+            score: 0,
+            error: e.message,
+          });
         } catch {}
         totalScored++;
       }
@@ -684,6 +719,7 @@ export async function handleTag(parsed) {
     const discoverCount = typeof discover === "number" ? discover : 10;
     const generatedTags = await discoverTags(targetLocations, {
       count: discoverCount,
+      serverUrl,
     });
     finalTags = [...new Set([...finalTags, ...generatedTags])];
     process.stderr.write(`  共 ${finalTags.length} 个标签待处理\n\n`);

package/src/lib/tag-discover.js CHANGED Viewed

@@ -1,150 +1,113 @@
-import { readFileSync, writeFileSync, existsSync } from "fs";
-import { resolve, dirname } from "path";
-import { fileURLToPath } from "url";
-const __dirname = dirname(fileURLToPath(import.meta.url));
-const TAGS_FILE = resolve(
-  __dirname,
-  "..",
-  "..",
-  "data",
-  "productive-tags.json",
-);
-function loadTags() {
-  try {
-    if (existsSync(TAGS_FILE)) {
-      return JSON.parse(readFileSync(TAGS_FILE, "utf-8"));
-    }
-  } catch {}
-  return { tags: [], lastUpdated: null };
+/**
+ * Tag 发现（CLI 模式）
+ *
+ * 使用 tag-service 的公共函数（LLM 调用、prompt 组装、解析）。
+ * 历史 tag 数据通过 API 从服务端获取，不再读写 productive-tags.json。
+ */
+import {
+  COUNTRY_LANG,
+  getLang,
+  callLLM,
+  normalizeTag,
+  parseTagsFromResponse,
+  buildDiscoverPrompt,
+} from "../watch/tag-service.js";
+const DEFAULT_SERVER = "http://127.0.0.1:3000";
+/**
+ * 从服务端获取某国的历史 tag（正样本 + 负样本 + 全部已存在）
+ */
+async function fetchTagHistory(serverUrl, country) {
+  const baseUrl = serverUrl || DEFAULT_SERVER;
+  const productivePromise = fetch(
+    `${baseUrl}/api/tags/history?country=${country}&type=productive`,
+  )
+    .then((r) => r.json())
+    .then((data) => data.tags || [])
+    .catch(() => []);
+  const deadPromise = fetch(
+    `${baseUrl}/api/tags/history?country=${country}&type=dead`,
+  )
+    .then((r) => r.json())
+    .then((data) => data.tags || [])
+    .catch(() => []);
+  // 获取所有已存在的 tag（防止重复生成）
+  const allPromise = fetch(
+    `${baseUrl}/api/tags/history?country=${country}&type=all`,
+  )
+    .then((r) => r.json())
+    .then((data) => data.tags || [])
+    .catch(() => []);
+  const [productive, dead, allExisting] = await Promise.all([
+    productivePromise,
+    deadPromise,
+    allPromise,
+  ]);
+  return { productive, dead, allExisting: allExisting.map((t) => t.tag) };
 }
-function saveTags(data) {
-  const dir = dirname(TAGS_FILE);
-  if (!existsSync(dir)) {
-    const { mkdirSync } = require("fs");
-    mkdirSync(dir, { recursive: true });
+/**
+ * 为单个国家生成 tag（CLI 模式，通过 API 获取历史数据）
+ */
+async function discoverTagsForCountryCli(
+  country,
+  count = 4,
+  userPrompt = null,
+  serverUrl = null,
+) {
+  if (!COUNTRY_LANG[country]) {
+    return { country, error: `不支持的国家代码: ${country}` };
   }
-  writeFileSync(TAGS_FILE, JSON.stringify(data, null, 2), "utf-8");
-}
-export function getProductiveTags() {
-  return loadTags().tags;
-}
-export function recordProductiveTag(tag, country, userCount) {
-  const data = loadTags();
-  const existing = data.tags.find((t) => t.tag === tag);
-  if (existing) {
-    if (!existing.countries.includes(country)) {
-      existing.countries.push(country);
-    }
-    existing.userCount += userCount;
-    existing.lastUsed = new Date().toISOString();
-  } else {
-    data.tags.push({
-      tag,
-      countries: [country],
-      userCount,
-      firstSeen: new Date().toISOString(),
-      lastUsed: new Date().toISOString(),
-    });
-  }
-  data.lastUpdated = new Date().toISOString();
-  saveTags(data);
-}
-async function callLLM(prompt) {
-  const apiKey = process.env.APIKEY || "";
-  const { fetch } = await import("undici");
-  const response = await fetch(
-    "http://82.156.52.214:18000/v1/chat/completions",
-    {
-      method: "POST",
-      headers: {
-        "Content-Type": "application/json",
-        Authorization: `Bearer ${apiKey}`,
-      },
-      body: JSON.stringify({
-        model: "zc-fast",
-        messages: [{ role: "user", content: prompt }],
-        max_tokens: 1024,
-        temperature: 0.7,
-      }),
-    },
-  );
-  const result = await response.json();
-  const content = result.choices?.[0]?.message?.content || "";
-  return content;
-}
+  // 从服务端获取历史 tag
+  const history = await fetchTagHistory(serverUrl, country);
-function normalizeTag(t) {
-  return t.replace(/^#+/, "").trim().toLowerCase();
-}
-function parseTagsFromResponse(content) {
-  try {
-    const parsed = JSON.parse(content);
-    if (Array.isArray(parsed)) {
-      return parsed.map(normalizeTag).filter((t) => t && t.length >= 2);
-    }
-    if (Array.isArray(parsed.tags)) {
-      return parsed.tags.map(normalizeTag).filter((t) => t && t.length >= 2);
-    }
-  } catch {}
-  const lines = content.split(/[\n,]+/);
-  const tags = [];
-  for (const line of lines) {
-    const cleaned = normalizeTag(line.replace(/^[-\d.\s]+/, ""));
-    if (cleaned && /^[a-z0-9_]+$/.test(cleaned) && cleaned.length >= 2) {
-      tags.push(cleaned);
-    }
-  }
-  return tags;
-}
-export async function discoverTags(countries, options = {}) {
-  const { language = "auto", count = 10 } = options;
-  const productiveTags = getProductiveTags();
-  const countryStr = Array.isArray(countries)
-    ? countries.join(", ")
-    : countries;
-  const langHint =
-    language === "auto" ? "" : `Tags should be in ${language} language.`;
-  const historyHint =
-    productiveTags.length > 0
-      ? `Previously productive tags for these countries: ${productiveTags
-          .filter((t) => t.countries.some((c) => countries.includes(c)))
-          .map((t) => `#${t.tag}`)
-          .join(", ")}. Generate new ones, don't repeat these.`
-      : "";
-  const prompt = `Generate ${count} TikTok hashtags (lowercase, no spaces, no # symbol) that are likely to be used by online sellers, shop owners, e-commerce merchants, and small businesses in these countries: ${countryStr}.
-Requirements:
-- Focus on tags that sellers/merchants actually use to promote their products
-- Include local language commerce tags (sell, shop, store, online, vendor, etc. in the local language)
-- Mix broad commerce tags with country-specific tags
-${langHint}
-${historyHint}
-Return ONLY a JSON array of tag strings, nothing else. Example: ["ventas","tiendaonline","vender"]`;
+  // 使用统一的 prompt 组装
+  const prompt = buildDiscoverPrompt(country, count, history, userPrompt);
   process.stderr.write(
-    `  [LLM] 正在生成 ${count} 个标签 (目标: ${countryStr})...\n`,
+    `  [LLM] 正在生成 ${count} 个标签 (国家: ${country}, 语言: ${getLang(country)})...\n`,
   );
   const content = await callLLM(prompt);
   const tags = parseTagsFromResponse(content);
   const unique = [...new Set(tags)].slice(0, count);
   process.stderr.write(
     `  [LLM] 生成 ${unique.length} 个标签: ${unique.join(", ")}\n`,
   );
   return unique;
 }
+/**
+ * 批量为多个国家生成 tag（兼容旧接口）
+ * @param {string|string[]} countries - 国家代码或数组
+ * @param {object} options
+ * @param {number} [options.count=10] - 每个国家生成的 tag 数量
+ * @param {string} [options.serverUrl] - 服务端地址
+ * @param {string} [options.prompt] - 用户自定义提示
+ */
+export async function discoverTags(countries, options = {}) {
+  const { count = 10, serverUrl, prompt: userPrompt } = options;
+  const countryList = Array.isArray(countries) ? countries : [countries];
+  const allTags = [];
+  for (const country of countryList) {
+    const tags = await discoverTagsForCountryCli(
+      country,
+      count,
+      userPrompt,
+      serverUrl,
+    );
+    allTags.push(...tags);
+  }
+  return allTags;
+}
+export { discoverTagsForCountryCli };

package/src/scraper/explore-core.js CHANGED Viewed

@@ -35,6 +35,7 @@ async function processExplore(page, username, options, log) {
     discoveredGuessAuthors: [],
     discoveredFollowing: [],
     discoveredFollowers: [],
+    discoveredRecommended: [],
     collectedVideos: 0,
     processed: false,
     hasFollowData: false,
@@ -205,19 +206,18 @@ async function processExplore(page, username, options, log) {
               log(
                 `  商家用户，关注采集: ${effectiveMaxFollowing}, 粉丝采集: ${effectiveMaxFollowers}`,
               );
-            const { following, followers } = await extractFollowAndFollowers(
-              page,
-              {
+            const { following, followers, recommended } =
+              await extractFollowAndFollowers(page, {
                 maxFollowing: effectiveMaxFollowing,
                 maxFollowers: effectiveMaxFollowers,
                 log,
-              },
-            );
+              });
             result.discoveredFollowing = following || [];
             result.discoveredFollowers = followers || [];
+            result.discoveredRecommended = recommended || [];
             result.hasFollowData = true;
             log(
-              `  关注: ${result.discoveredFollowing.length}, 粉丝: ${result.discoveredFollowers.length}`,
+              `  关注: ${result.discoveredFollowing.length}, 粉丝: ${result.discoveredFollowers.length}, 推荐: ${result.discoveredRecommended.length}`,
             );
           } catch (e) {
             log(`  关注/粉丝提取失败: ${e.message}`);

package/src/scraper/modules/follow-extractor.js CHANGED Viewed

@@ -2,7 +2,7 @@ import { delay, getDelayConfig } from "./page-helpers.js";
 import { scrollAndCollect } from "./scroll-collector.js";
 import { extractUniqueId, toProfileUrl } from "../../lib/url.js";
-const FILTER_WORDS = ["主页", "已关注", "粉丝", "推荐"];
+const FILTER_WORDS = ["主页", "已关注", "粉丝"];
 const FOLLOW_TRIGGER_SELECTORS = [
   "[data-e2e=following]",
@@ -11,6 +11,8 @@ const FOLLOW_TRIGGER_SELECTORS = [
   '[data-e2e*="following"]',
 ];
+const RECOMMEND_TAB_TEXTS = ["推荐", "Suggested", "Recommended"];
 async function waitForFollowTrigger(page, timeout = 15000) {
   await page
     .waitForFunction(
@@ -187,7 +189,7 @@ async function closeFollowModal(page) {
 function createUserCollectFn() {
   return (container) => {
-    const FILTER_WORDS = ["主页", "已关注", "粉丝", "推荐"];
+    const FILTER_WORDS = ["主页", "已关注", "粉丝"];
     const modal = document.querySelector("[class*=eyhy6180]");
     const root = modal || document;
     const users = [];
@@ -239,12 +241,55 @@ async function extractFollowAndFollowers(page, options = {}) {
   const followers = await extractUsersFromModal(page, maxFollowers);
   log(`  粉丝: ${followers.length}`);
+  // ===== 3. 采集推荐 =====
+  let recommended = [];
+  if (following.length > 0 || followers.length > 0) {
+    try {
+      await delay(500, 1500);
+      await clickRecommendTab(page);
+      await delay(500, 1500);
+      recommended = await scrollAndCollect(page, {
+        container: "[class*=DivUserListContainer]",
+        findScrollable: false,
+        collectFn: createUserCollectFn(),
+        uniqueKey: (u) => u.handle,
+        maxItems: 50,
+        staleThreshold: 2,
+      });
+      if (log) log(`    推荐: ${recommended.length}`);
+    } catch (e) {
+      if (log) log(`    推荐采集失败: ${e.message}`);
+    }
+  }
   await closeFollowModal(page);
   return {
     following: following.map((u) => [u.handle, u.displayName]),
     followers: followers.map((u) => [u.handle, u.displayName]),
+    recommended: recommended.map((u) => [u.handle, u.displayName]),
   };
 }
+async function clickRecommendTab(page) {
+  await page.evaluate(() => {
+    const tabs = document.querySelectorAll("[class*=DivTabItem]");
+    for (const tab of tabs) {
+      const text = (tab.textContent || "").trim();
+      if (
+        text.includes("推荐") ||
+        text.includes("Suggested") ||
+        text.includes("Recommended")
+      ) {
+        tab.click();
+        return;
+      }
+    }
+    throw new Error("未找到推荐 Tab");
+  });
+  await page.waitForSelector("[class*=DivUserListContainer]", {
+    timeout: 30000,
+  });
+}
 export { extractFollowAndFollowers };

package/src/watch/data-store.js CHANGED Viewed

@@ -2223,6 +2223,58 @@ export function createStore(filePath, options = {}) {
   if (filePath) {
     // 初始化 SQLite 用户表（用于判重）
     initUserDb(filePath);
+    // 从数据库恢复偏移量
+    loadLlmSampleOffsets();
+  }
+  /**
+   * 从数据库加载 LLM 采样偏移量
+   */
+  function loadLlmSampleOffsets() {
+    try {
+      const row = db
+        .prepare(`SELECT offsets FROM _llm_sample_offsets LIMIT 1`)
+        .get();
+      if (row && row.offsets) {
+        const parsed = JSON.parse(row.offsets);
+        if (parsed && typeof parsed === "object") {
+          Object.entries(parsed).forEach(([k, v]) => {
+            llmSampleOffsets.set(k, v);
+          });
+          console.error(
+            `[data-store] 已恢复 LLM 采样偏移量: ${Array.from(
+              llmSampleOffsets.entries(),
+            )
+              .map(([k, v]) => `${k}:${v}`)
+              .join(", ")}`,
+          );
+        }
+      }
+    } catch (e) {
+      // 表不存在或解析失败，使用空偏移量
+      console.error(
+        `[data-store] 加载 LLM 采样偏移量失败，使用空偏移量: ${e.message}`,
+      );
+    }
+  }
+  /**
+   * 将 LLM 采样偏移量持久化到数据库
+   */
+  function saveLlmSampleOffsets() {
+    try {
+      const offsetsJson = JSON.stringify(Object.fromEntries(llmSampleOffsets));
+      // 表不存在则创建
+      db.prepare(
+        `CREATE TABLE IF NOT EXISTS _llm_sample_offsets (id INTEGER PRIMARY KEY CHECK (id = 1), offsets TEXT)`,
+      ).run();
+      // 插入或更新
+      db.prepare(
+        `INSERT OR REPLACE INTO _llm_sample_offsets (id, offsets) VALUES (1, ?)`,
+      ).run(offsetsJson);
+    } catch (e) {
+      console.error(`[data-store] 保存 LLM 采样偏移量失败: ${e.message}`);
+    }
   }
   // stats 缓存
@@ -2383,15 +2435,97 @@ export function createStore(filePath, options = {}) {
   }
   function flushSave() {
+    // 数据库模式：先保存 LLM 偏移量，再备份数据库
+    if (db && dbPath) {
+      try {
+        saveLlmSampleOffsets();
+      } catch (e) {
+        console.error(`[data-store] 保存 LLM 偏移量失败: ${e.message}`);
+      }
+    }
     return Promise.resolve();
   }
-  function saveVideos() {
-    return;
+  /**
+   * 数据库备份：使用 SQLite BACKUP 命令，保留最新 maxBackups 个备份
+   * @param {number} maxBackups - 保留的备份数量，默认 3
+   * @returns {string|null} 备份文件路径，失败返回 null
+   */
+  function backupDatabase(maxBackups = 3) {
+    if (!db || !dbPath) {
+      console.error("[data-store] 数据库未初始化，跳过备份");
+      return null;
+    }
+    try {
+      // 生成备份文件名：result-20260627T094400.db
+      const now = new Date();
+      const timestamp = now
+        .toISOString()
+        .replace(/[-:T.]/g, "")
+        .slice(0, 15); // YYYYMMDDHHmmss
+      const baseName = path.basename(dbPath, ".db");
+      const backupName = `${baseName}-${timestamp}.db`;
+      const backupDir = path.dirname(dbPath);
+      const backupPath = path.join(backupDir, backupName);
+      console.error(`[data-store] 正在备份数据库: ${backupName}`);
+      // 使用 better-sqlite3 的 backup API（原子性备份，安全可靠）
+      const backupDb = new Database(backupPath);
+      db.backup("main", backupDb, "main");
+      backupDb.close();
+      // 验证备份文件大小
+      const stat = fs.statSync(backupPath);
+      const sizeMB = (stat.size / 1024 / 1024).toFixed(2);
+      console.error(`[data-store] 备份完成: ${backupName} (${sizeMB} MB)`);
+      // 清理旧备份：保留最新 maxBackups 个
+      cleanupOldBackups(backupDir, baseName, maxBackups);
+      return backupPath;
+    } catch (e) {
+      console.error(`[data-store] 备份失败: ${e.message}`);
+      return null;
+    }
+  }
+  /**
+   * 清理旧备份文件，保留最新 maxBackups 个
+   */
+  function cleanupOldBackups(backupDir, baseName, maxBackups) {
+    try {
+      // 查找所有备份文件：baseName-YYYYMMDDHHmmss.db
+      const pattern = new RegExp(`^${baseName}-\\d{15}\\.db$`);
+      const backups = fs
+        .readdirSync(backupDir)
+        .filter((f) => pattern.test(f))
+        .sort() // 按时间戳排序（ASCII 排序 = 时间排序）
+        .reverse(); // 最新的在前
+      if (backups.length > maxBackups) {
+        const toDelete = backups.slice(maxBackups);
+        for (const file of toDelete) {
+          const filePath = path.join(backupDir, file);
+          fs.unlinkSync(filePath);
+          console.error(`[data-store] 已清理旧备份: ${file}`);
+        }
+      }
+      console.error(
+        `[data-store] 备份清理完成: 保留 ${Math.min(backups.length, maxBackups)} / ${backups.length} 个备份`,
+      );
+    } catch (e) {
+      console.error(`[data-store] 清理旧备份失败: ${e.message}`);
+    }
   }
   function stopBackup() {
-    return;
+    // 退出时执行备份
+    if (db && dbPath) {
+      backupDatabase();
+    }
   }
   function getUser(uid) {
@@ -2795,7 +2929,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
               .prepare(
                 `
                 SELECT * FROM raw_jobs WHERE ${whereSql} AND guessed_location = ?
-                ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
+                ORDER BY
+                  CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
+                  COALESCE(video_count, 0) DESC, created_at DESC
                 LIMIT ? OFFSET ?
               `,
               )
@@ -2803,11 +2939,32 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
             if (samples.length === 0) break;
-            const scores = await scoreJobsBatch(
-              samples,
-              DEFAULT_TARGET_LOCATIONS,
+            // 分离 tag 来源和非 tag 来源：tag 来源跳过 LLM 打分直接合格
+            const tagSamples = samples.filter((s) =>
+              (s.sources || "").includes("tag"),
             );
-            const batchQualified = scores.filter((s) => s.score >= llmMinScore);
+            const nonTagSamples = samples.filter(
+              (s) => !(s.sources || "").includes("tag"),
+            );
+            // tag 来源直接加入合格列表
+            if (tagSamples.length > 0) {
+              allQualified.push(...tagSamples.map((s) => s.unique_id));
+              console.error(
+                `[data-store] ${location}: 本批 ${tagSamples.length} 条 tag 来源任务跳过 LLM 打分直接合格`,
+              );
+            }
+            // 非 tag 来源走 LLM 打分
+            let batchQualified = [];
+            let scores = [];
+            if (nonTagSamples.length > 0) {
+              scores = await scoreJobsBatch(
+                nonTagSamples,
+                DEFAULT_TARGET_LOCATIONS,
+              );
+              batchQualified = scores.filter((s) => s.score >= llmMinScore);
+            }
             allScores.push(...scores);
             allQualified.push(...batchQualified.map((s) => s.uniqueId));
@@ -2829,12 +2986,27 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
           if (allQualified.length >= llmMinReturn) break;
         }
-        // 按分数降序排序，取前 safeLimit 条
-        const qualifiedScores = allScores
+        // 分离 tag 合格和非 tag 合格
+        // tag 任务直接合格（不在 allScores 中），非 tag 任务走 LLM 打分
+        const tagQualified = allQualified.filter(
+          (uid) => !allScores.find((s) => s.uniqueId === uid),
+        );
+        const nonTagQualifiedScores = allScores
           .filter((s) => s.score >= llmMinScore)
-          .sort((a, b) => b.score - a.score)
-          .slice(0, safeLimit);
-        const qualified = qualifiedScores.map((s) => s.uniqueId);
+          .sort((a, b) => b.score - a.score);
+        const nonTagQualified = nonTagQualifiedScores.map((s) => s.uniqueId);
+        // 限制 tag 占比：最多占 safeLimit 的 70%，留 30% 给非 tag
+        const tagMaxCount = Math.floor(safeLimit * 0.7);
+        const tagCount = Math.min(tagQualified.length, tagMaxCount);
+        const nonTagMaxCount = safeLimit - tagCount;
+        const finalNonTagQualified = nonTagQualified.slice(0, nonTagMaxCount);
+        // 最终合格列表：tag 优先 + 非 tag 按分数排序
+        const qualified = [
+          ...tagQualified.slice(0, tagCount),
+          ...finalNonTagQualified,
+        ];
         if (!qualified.length) {
           console.error(
@@ -2881,6 +3053,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
         moveTxn();
         markStatsDirty();
+        // 持久化偏移量到数据库
+        saveLlmSampleOffsets();
         // 打印最终偏移量状态
         const finalOffsetSummary = Array.from(llmSampleOffsets.entries())
           .map(([k, v]) => `${k}:${v}`)
@@ -2920,7 +3095,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
           status_code, latest_video_time, user_create_time
         FROM raw_jobs
         WHERE ${whereSql}
-        ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
+        ORDER BY
+          CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
+          COALESCE(video_count, 0) DESC, created_at DESC
         LIMIT ?
       `,
       ).run(...args, safeLimit);
@@ -2932,7 +3109,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
         WHERE unique_id IN (
           SELECT unique_id FROM raw_jobs
           WHERE ${whereSql}
-          ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
+          ORDER BY
+            CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
+            COALESCE(video_count, 0) DESC, created_at DESC
           LIMIT ?
         )
       `,
@@ -3786,6 +3965,17 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
             (typeof f === "object" && f.guessedLocation) || guessedLocation,
         };
       }),
+      ...(result.discoveredRecommended || []).map((f) => {
+        const handle = Array.isArray(f) ? f[0] : f.handle || "";
+        const name = Array.isArray(f) ? f[1] : f.displayName || null;
+        return {
+          uniqueId: handle.replace(/^@/, ""),
+          nickname: name,
+          sources: ["recommended"],
+          guessedLocation:
+            (typeof f === "object" && f.guessedLocation) || guessedLocation,
+        };
+      }),
     ].filter((u) => u.uniqueId);
     // 先对 discovered 内部去重，再用 uidIndex 批量判断
@@ -3880,6 +4070,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
         "discoveredGuessAuthors",
         "discoveredFollowing",
         "discoveredFollowers",
+        "discoveredRecommended",
         "uniqueId",
         "sources",
         "topRecentVideo", // 单独处理，不进入通用循环
@@ -4262,7 +4453,12 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
         sqlParams.push(...targetCountries);
       }
-      sql += ` ORDER BY created_at ASC, unique_id ASC LIMIT ?`;
+      // 优先级：sources 包含 "tag" 的任务优先，其余按 created_at 排序
+      sql += ` ORDER BY
+        CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
+        created_at ASC,
+        unique_id ASC
+      LIMIT ?`;
       sqlParams.push(l);
       const rows = db.prepare(sql).all(...sqlParams);
@@ -4310,6 +4506,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
         }
         return false;
       })
+      .sort((a, b) => {
+        // 优先级：sources 包含 "tag" 的任务优先
+        const aIsTag = (a.sources || "").includes("tag");
+        const bIsTag = (b.sources || "").includes("tag");
+        if (aIsTag !== bIsTag) return aIsTag ? -1 : 1;
+        return (a.createdAt || 0) - (b.createdAt || 0);
+      })
       .slice(0, l);
     // 接受任务时 userUpdateCount + 1
     pending.forEach((u) => {
@@ -4823,6 +5026,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
     commitCommentTask,
     debugClaimNextJob,
     stopBackup,
+    backupDatabase, // 手动备份数据库
     rawQuery,
     getLlmSampleOffsets, // 获取 LLM 采样偏移量状态
     // Tag 发现与打分

package/src/watch/server.js CHANGED Viewed

@@ -1125,6 +1125,51 @@ export function startWatchServer(
         return;
       }
+      // GET /api/tags/history?country=ES&type=productive|dead — CLI 模式获取历史 tag
+      if (req.method === "GET" && routePath === "/api/tags/history") {
+        const country = params.country || null;
+        const type = params.type || "productive";
+        if (!country) {
+          sendJSON(res, 400, { error: "缺少 country 参数" });
+          return;
+        }
+        let tags;
+        if (type === "dead") {
+          tags = store.getDeadTags(country);
+        } else if (type === "all") {
+          tags = store.getTagsByCountry(country, 0);
+        } else {
+          tags = store.getTagsByCountry(country, 50);
+        }
+        sendJSON(res, 200, { tags, total: tags.length });
+        return;
+      }
+      // POST /api/tags/productive — CLI 模式上报 productive tag
+      if (req.method === "POST" && routePath === "/api/tags/productive") {
+        try {
+          const body = await readBody(req);
+          const { tag, countries, pushedUsers } = body || {};
+          if (!tag || !countries || countries.length === 0) {
+            sendJSON(res, 400, { error: "tag 和 countries 不能为空" });
+            return;
+          }
+          // 将 productive 信息写入数据库（更新已有 tag 或插入新 tag）
+          for (const c of countries) {
+            store.insertTag(tag, [c], "cli-productive");
+          }
+          sendJSON(res, 200, { ok: true });
+        } catch (e) {
+          sendJSON(res, 500, { error: e.message });
+        }
+        return;
+      }
       if (
         req.method === "GET" &&
         (routePath === "/" || routePath === "/index.html")
@@ -1205,7 +1250,10 @@ export function startWatchServer(
         console.error("[server] HTTP 服务已关闭");
       });
       await store.flushSave();
-      console.error("[server] 数据已保存，退出");
+      console.error("[server] 数据已保存");
+      // 备份数据库
+      store.stopBackup();
+      console.error("[server] 退出");
       process.exit(0);
     }

package/src/watch/tag-service.js CHANGED Viewed

@@ -6,7 +6,7 @@
  */
 // 国家 → 语言映射
-const COUNTRY_LANG = {
+export const COUNTRY_LANG = {
   CZ: "cs",
   GR: "el",
   HU: "hu",
@@ -22,16 +22,16 @@ const COUNTRY_LANG = {
   AT: "de",
 };
-const LLM_URL = "http://82.156.52.214:18000/v1/chat/completions";
-const LLM_MODEL = "zc-fast";
+export const LLM_URL = "http://82.156.52.214:18000/v1/chat/completions";
+export const LLM_MODEL = "zc-fast";
-function getLang(country) {
+export function getLang(country) {
   return COUNTRY_LANG[country] || "en";
 }
 // ====== LLM 调用 ======
-async function callLLM(prompt) {
+export async function callLLM(prompt) {
   const apiKey = process.env.APIKEY || "";
   const { fetch } = await import("undici");
@@ -53,11 +53,11 @@ async function callLLM(prompt) {
   return result.choices?.[0]?.message?.content || "";
 }
-function normalizeTag(t) {
+export function normalizeTag(t) {
   return t.replace(/^#+/, "").trim().toLowerCase();
 }
-function parseTagsFromResponse(content) {
+export function parseTagsFromResponse(content) {
   try {
     const parsed = JSON.parse(content);
     if (Array.isArray(parsed)) {
@@ -81,7 +81,7 @@ function parseTagsFromResponse(content) {
 // ====== Prompt 组装 ======
-function buildDiscoverPrompt(country, count, history, userPrompt) {
+export function buildDiscoverPrompt(country, count, history, userPrompt) {
   const lang = getLang(country);
   const langNames = {
     cs: "Czech",
@@ -98,18 +98,18 @@ function buildDiscoverPrompt(country, count, history, userPrompt) {
   };
   const langName = langNames[lang] || lang;
-  // 正样本：该国高分 tag
+  // 正样本：该国高分 tag（只给 LLM 看效果，不给模板）
   const productive = history.productive || [];
   const productiveHint =
     productive.length > 0
-      ? `\nHigh-performing tags for ${country}: ${productive.map((t) => t.tag).join(", ")}. Generate new tags in similar patterns.`
+      ? `\nTags that already worked well for ${country}: ${productive.map((t) => t.tag).join(", ")}. These are examples of what works — explore DIFFERENT directions, not variations of these.`
       : "";
   // 负样本：该国 dead tag
   const dead = history.dead || [];
   const deadHint =
     dead.length > 0
-      ? `\nAvoid these tags and similar patterns (they found no matching users): ${dead.map((t) => t.tag).join(", ")}.`
+      ? `\nTags that failed for ${country} (found no matching users): ${dead.map((t) => t.tag).join(", ")}. Avoid these and similar patterns.`
       : "";
   // 死因分析
@@ -118,20 +118,35 @@ function buildDiscoverPrompt(country, count, history, userPrompt) {
   ];
   const errorHint =
     errorPatterns.length > 0
-      ? `\nReasons previous tags failed: ${errorPatterns.join("; ")}. Avoid generating tags likely to have same issues.`
+      ? `\nWhy previous tags failed: ${errorPatterns.join("; ")}. Avoid tags likely to have same issues.`
+      : "";
+  // 已存在的所有 tag（防止重复生成）
+  const allExisting = history.allExisting || [];
+  const existingHint =
+    allExisting.length > 0
+      ? `\nTags already in database (DO NOT generate these again): ${allExisting.slice(-50).join(", ")}.`
       : "";
   const userHint = userPrompt
     ? `\nAdditional focus: ${userPrompt}. Generate tags specifically for this niche.`
     : "";
-  return `Generate ${count} TikTok hashtags in ${langName} language for e-commerce sellers, shop owners, and small business merchants in ${country}.
+  return `You are discovering TikTok hashtags used by people who sell things in ${country}.
+Your goal: Find hashtags that real sellers in ${country} actually use — any kind of tag they might use. Think broadly:
+- Who they are (seller, shop owner, entrepreneur, artisan...)
+- What they sell (shoes, clothes, jewelry, food, pets, furniture...)
+- How they sell (online, handmade, second-hand, local pickup...)
+- Product-specific tags (sneakers, dresses, cakes, necklaces...)
+All tags must be in ${langName} language (or widely used in ${country}).
+Generate ${count} tags that are ALL DIFFERENT from each other and from any existing tags.
-Requirements:
-- Tags must be in ${langName} language (or widely used in ${country})
-- Focus on tags that sellers/merchants actually use to promote their products
-- Include local language commerce tags (sell, shop, store, online, vendor, etc.)
-- Prefer specific/niche tags over generic ones (e.g., "vendozapatos" not "vender")${productiveHint}${deadHint}${errorHint}${userHint}
+Rules:
+- Each tag should explore a DIFFERENT angle — don't just swap country suffixes
+- Prefer specific and niche tags over generic ones (e.g., "vendozapatos" beats "vender")
+- Do NOT generate tags that already exist${productiveHint}${deadHint}${errorHint}${existingHint}${userHint}
 Return ONLY a JSON array of tag strings, nothing else. Example: ["ventas","tiendaonline","vender"]`;
 }
@@ -151,7 +166,10 @@ export async function discoverTagsForCountry(
   // 读取历史打分记录
   const productive = store.getTagsByCountry(country, 50);
   const dead = store.getDeadTags(country);
-  const history = { productive, dead };
+  // 获取该国所有已存在的 tag 名（防止重复生成）
+  const allTags = store.getTagsByCountry(country, 0);
+  const allExisting = allTags.map((t) => t.tag);
+  const history = { productive, dead, allExisting };
   // 组装 prompt 并调用 LLM
   const prompt = buildDiscoverPrompt(country, count, history, userPrompt);