npm - tt-help-cli-ycl - Versions diffs - 1.3.85 → 1.3.87 - Mend

tt-help-cli-ycl 1.3.85 → 1.3.87

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "tt-help-cli-ycl",
-  "version": "1.3.85",
+  "version": "1.3.87",
   "description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
   "type": "module",
   "bin": {

package/src/cli/tag.js CHANGED Viewed

@@ -6,6 +6,10 @@ import {
   isLocationInList,
 } from "../lib/target-locations.js";
 import { discoverTags, recordProductiveTag } from "../lib/tag-discover.js";
+import { server as cfgServer } from "../lib/constants.js";
+const ALL_COUNTRIES = DEFAULT_TARGET_LOCATIONS;
+const DEFAULT_SERVER = cfgServer || "http://127.0.0.1:3000";
 async function pushToServer(serverUrl, filteredAuthors, videos) {
   const users = filteredAuthors.map((author) => {
@@ -182,14 +186,26 @@ async function processTag(
 export async function handleDiscover(parsed) {
   const { tagDiscover } = parsed;
-  const { countries, count = 4, prompt, serverUrl } = tagDiscover || {};
+  let { countries, count = 4, prompt, serverUrl } = tagDiscover || {};
+  // 支持 'all' 展开为全部目标国家
+  if (
+    countries &&
+    countries.length === 1 &&
+    countries[0].toUpperCase() === "ALL"
+  ) {
+    countries = ALL_COUNTRIES;
+  }
   if (!countries || countries.length === 0) {
     console.error(
-      "用法: tt-help tag discover <国家> [国家...] [--count <n>] [--prompt <文本>] [-s <服务端>]",
+      "用法: tt-help tag discover <国家|all> [国家...] [--count <n>] [--prompt <文本>] [-s <服务端>]",
     );
     console.error("");
     console.error("示例:");
+    console.error(
+      "  tt-help tag discover all --count 10             # 为全部 13 个国家各生成 10 个标签",
+    );
     console.error(
       "  tt-help tag discover ES                          # 为西班牙生成 4 个标签",
     );
@@ -249,7 +265,7 @@ export async function handleScore(parsed) {
     process.exit(1);
   }
-  const baseUrl = serverUrl || "http://127.0.0.1:3000";
+  const baseUrl = serverUrl || DEFAULT_SERVER;
   const targetCountries = countries || [
     "ES",
     "FR",
@@ -403,9 +419,9 @@ async function reportToServer(baseUrl, result) {
 export async function handleScoreAll(parsed) {
   const { tagScoreAll } = parsed;
-  const { countries, serverUrl } = tagScoreAll || {};
+  let { countries, serverUrl, autoDiscover } = tagScoreAll || {};
-  const baseUrl = serverUrl || "http://127.0.0.1:3000";
+  const baseUrl = serverUrl || DEFAULT_SERVER;
   const targetCountries = countries || [
     "ES",
     "FR",
@@ -429,13 +445,15 @@ export async function handleScoreAll(parsed) {
   log("  自动循环打分模式（客户端本地执行）");
   log(`  目标国家: ${targetCountries.join(", ")}`);
   log(`  服务端:   ${baseUrl}`);
+  if (autoDiscover) log(`  自动发现: 开启（无任务时自动生成标签）`);
   log("  流程: 从服务端拉 tag → 本地 Playwright 抓取 → enrich → 算分 → 上报");
   log("  每个标签约 1-2 分钟");
   log("========================================");
   log("");
   let totalScored = 0;
-  let totalNew = null;
+  let lastDiscoverTime = 0;
+  const DISCOVER_COOLDOWN = 5 * 60 * 1000; // 5 分钟冷却
   // 复用 TikTokScraper 实例，避免每次 enrich 都启动/关闭 headless 浏览器
   const enrichScraper = new TikTokScraper({ poolSize: 3 });
@@ -445,37 +463,42 @@ export async function handleScoreAll(parsed) {
   try {
     while (true) {
-      // 查剩余数量
-      if (totalNew === null) {
-        try {
-          const statsRes = await fetch(
-            `${baseUrl}/api/tags?status=new&limit=1000`,
-          );
-          const statsData = await statsRes.json();
-          totalNew = statsData.total || 0;
-          log(`📋 待打分标签: ${totalNew} 个`);
-          log("");
-        } catch (e) {
-          log(`⚠️ 无法连接服务端: ${e.message}`);
-          break;
-        }
-      }
       // 从服务端取下一个 new 标签
       const tagsRes = await fetch(`${baseUrl}/api/tags?status=new&limit=1`);
       const tagsData = await tagsRes.json();
       if (!tagsData.tags || tagsData.tags.length === 0) {
-        log("");
-        log("========================================");
-        log(`  🎉 全部完成! 共打分 ${totalScored} 个标签`);
-        log("========================================");
-        break;
+        // 自动发现：无任务时自动生成标签
+        if (autoDiscover && Date.now() - lastDiscoverTime > DISCOVER_COOLDOWN) {
+          log(
+            `🔍 无待打分标签，自动为 ${targetCountries.length} 个国家生成标签...`,
+          );
+          for (const country of targetCountries) {
+            try {
+              const discRes = await fetch(
+                `${baseUrl}/api/tags/discover?country=${country}&count=5`,
+              );
+              const discData = await discRes.json();
+              if (discData.inserted) {
+                log(`  ${country}: 新增 ${discData.inserted} 个`);
+              }
+            } catch (e) {
+              log(`  ${country}: 请求失败 (${e.message})`);
+            }
+          }
+          lastDiscoverTime = Date.now();
+          // 等 3 秒让服务端处理完
+          await new Promise((r) => setTimeout(r, 3000));
+          continue;
+        }
+        log(`⏳ 暂无待打分标签，10 秒后重试...`);
+        await new Promise((r) => setTimeout(r, 10000));
+        continue;
       }
-      const tag = tagsData.tags[0].tag;
+      const tag = tagsData.tags[0].tag.replace(/^#+/, "").trim().toLowerCase();
       const startTime = Date.now();
-      log(`[${totalScored + 1}/${totalNew || "?"}] 正在打分 #${tag} ...`);
+      log(`[${totalScored + 1}] 正在打分 #${tag} ...`);
       const result = {
         tag,
@@ -498,7 +521,16 @@ export async function handleScoreAll(parsed) {
         });
         const claimData = await claimRes.json();
         if (!claimData.ok) {
-          log(`  ⚠️ 无法锁定 (${claimData.error})，跳过`);
+          // already claimed: 其他机器抢先了，跳过不标 dead
+          if (claimData.error && claimData.error.includes("already claimed")) {
+            log(`  ⏭️ 已被其他客户端锁定，跳过`);
+            continue;
+          }
+          log(`  ⚠️ 无法锁定 (${claimData.error})，标记为 dead 并跳过`);
+          result.error = claimData.error;
+          result.status = "dead";
+          await reportToServer(baseUrl, result);
+          totalScored++;
           continue;
         }
@@ -527,7 +559,6 @@ export async function handleScoreAll(parsed) {
         }
         // enrich: 逐个视频查 view-source 获取国家
-        log(`  补充国家信息...`);
         const enriched = await enrichVideosWithLocation(videos, {
           mode: "videos",
           existingScraper: enrichScraper,
@@ -540,8 +571,6 @@ export async function handleScoreAll(parsed) {
           },
         });
         videos = enriched.videos;
-        const withLoc = videos.filter((v) => v.locationCreated).length;
-        log(`  完成: ${withLoc}/${videos.length} 个视频有国家信息`);
         // 过滤 + 算分 (共用函数)
         const { matchedAuthorSet } = applyFilterAndScore(
@@ -577,10 +606,8 @@ export async function handleScoreAll(parsed) {
           .map((c) => `${c.c}:${c.n}`)
           .join(" ");
         log(
-          `  ${icon} ${result.status} score=${result.score} authors=${result.authorCount} matched=${result.matchedAuthors} (${elapsed}s)`,
+          `  ${icon} ${result.status} score=${result.score} authors=${result.authorCount} matched=${result.matchedAuthors} (${elapsed}s)${mc ? "  " + mc : ""}`,
         );
-        if (mc) log(`     国家: ${mc}`);
-        log(`     剩余: ~${Math.max(0, (totalNew || 0) - totalScored)} 个`);
         log("");
       } catch (e) {
         log(`  ❌ 失败: ${e.message}`);

package/src/lib/args.js CHANGED Viewed

@@ -727,6 +727,7 @@ function parseTagArgs(args) {
   let discoverCountries = [];
   let discoverCount = 4;
   let discoverPrompt = null;
+  let autoDiscover = false;
   let isDiscover = false;
   let isScore = false;
   let isScoreAll = false;
@@ -767,6 +768,8 @@ function parseTagArgs(args) {
       }
     } else if (arg === "--count") {
       discoverCount = parseInt(args[++i]) || 4;
+    } else if (arg === "--auto-discover") {
+      autoDiscover = true;
     } else if (arg === "--countries") {
       scoreCountries = args[++i]
         .split(",")
@@ -854,6 +857,7 @@ function parseTagArgs(args) {
       tagScoreAll: {
         countries: scoreCountries,
         serverUrl,
+        autoDiscover,
       },
       urls: [],
       outputFormat: "json",

package/src/lib/tag-discover.js CHANGED Viewed

@@ -1,14 +1,20 @@
-import { readFileSync, writeFileSync, existsSync } from 'fs';
-import { resolve, dirname } from 'path';
-import { fileURLToPath } from 'url';
+import { readFileSync, writeFileSync, existsSync } from "fs";
+import { resolve, dirname } from "path";
+import { fileURLToPath } from "url";
 const __dirname = dirname(fileURLToPath(import.meta.url));
-const TAGS_FILE = resolve(__dirname, '..', '..', 'data', 'productive-tags.json');
+const TAGS_FILE = resolve(
+  __dirname,
+  "..",
+  "..",
+  "data",
+  "productive-tags.json",
+);
 function loadTags() {
   try {
     if (existsSync(TAGS_FILE)) {
-      return JSON.parse(readFileSync(TAGS_FILE, 'utf-8'));
+      return JSON.parse(readFileSync(TAGS_FILE, "utf-8"));
     }
   } catch {}
   return { tags: [], lastUpdated: null };
@@ -17,10 +23,10 @@ function loadTags() {
 function saveTags(data) {
   const dir = dirname(TAGS_FILE);
   if (!existsSync(dir)) {
-    const { mkdirSync } = require('fs');
+    const { mkdirSync } = require("fs");
     mkdirSync(dir, { recursive: true });
   }
-  writeFileSync(TAGS_FILE, JSON.stringify(data, null, 2), 'utf-8');
+  writeFileSync(TAGS_FILE, JSON.stringify(data, null, 2), "utf-8");
 }
 export function getProductiveTags() {
@@ -29,7 +35,7 @@ export function getProductiveTags() {
 export function recordProductiveTag(tag, country, userCount) {
   const data = loadTags();
-  const existing = data.tags.find(t => t.tag === tag);
+  const existing = data.tags.find((t) => t.tag === tag);
   if (existing) {
     if (!existing.countries.includes(country)) {
       existing.countries.push(country);
@@ -50,39 +56,50 @@ export function recordProductiveTag(tag, country, userCount) {
 }
 async function callLLM(prompt) {
-  const apiKey = process.env.APIKEY || '';
-  const { fetch } = await import('undici');
-  const response = await fetch('http://82.156.52.214:18000/v1/chat/completions', {
-    method: 'POST',
-    headers: {
-      'Content-Type': 'application/json',
-      Authorization: `Bearer ${apiKey}`,
+  const apiKey = process.env.APIKEY || "";
+  const { fetch } = await import("undici");
+  const response = await fetch(
+    "http://82.156.52.214:18000/v1/chat/completions",
+    {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        Authorization: `Bearer ${apiKey}`,
+      },
+      body: JSON.stringify({
+        model: "zc-fast",
+        messages: [{ role: "user", content: prompt }],
+        max_tokens: 1024,
+        temperature: 0.7,
+      }),
     },
-    body: JSON.stringify({
-      model: 'zc-fast',
-      messages: [{ role: 'user', content: prompt }],
-      max_tokens: 1024,
-      temperature: 0.7,
-    }),
-  });
+  );
   const result = await response.json();
-  const content = result.choices?.[0]?.message?.content || '';
+  const content = result.choices?.[0]?.message?.content || "";
   return content;
 }
+function normalizeTag(t) {
+  return t.replace(/^#+/, "").trim().toLowerCase();
+}
 function parseTagsFromResponse(content) {
   try {
     const parsed = JSON.parse(content);
-    if (Array.isArray(parsed)) return parsed;
-    if (Array.isArray(parsed.tags)) return parsed.tags;
+    if (Array.isArray(parsed)) {
+      return parsed.map(normalizeTag).filter((t) => t && t.length >= 2);
+    }
+    if (Array.isArray(parsed.tags)) {
+      return parsed.tags.map(normalizeTag).filter((t) => t && t.length >= 2);
+    }
   } catch {}
   const lines = content.split(/[\n,]+/);
   const tags = [];
   for (const line of lines) {
-    const cleaned = line.replace(/^[-\d.\s#]+/, '').trim().toLowerCase();
+    const cleaned = normalizeTag(line.replace(/^[-\d.\s]+/, ""));
     if (cleaned && /^[a-z0-9_]+$/.test(cleaned) && cleaned.length >= 2) {
       tags.push(cleaned);
     }
@@ -91,17 +108,22 @@ function parseTagsFromResponse(content) {
 }
 export async function discoverTags(countries, options = {}) {
-  const { language = 'auto', count = 10 } = options;
+  const { language = "auto", count = 10 } = options;
   const productiveTags = getProductiveTags();
-  const countryStr = Array.isArray(countries) ? countries.join(', ') : countries;
-  const langHint = language === 'auto'
-    ? ''
-    : `Tags should be in ${language} language.`;
-  const historyHint = productiveTags.length > 0
-    ? `Previously productive tags for these countries: ${productiveTags.filter(t => t.countries.some(c => countries.includes(c))).map(t => `#${t.tag}`).join(', ')}. Generate new ones, don't repeat these.`
-    : '';
+  const countryStr = Array.isArray(countries)
+    ? countries.join(", ")
+    : countries;
+  const langHint =
+    language === "auto" ? "" : `Tags should be in ${language} language.`;
+  const historyHint =
+    productiveTags.length > 0
+      ? `Previously productive tags for these countries: ${productiveTags
+          .filter((t) => t.countries.some((c) => countries.includes(c)))
+          .map((t) => `#${t.tag}`)
+          .join(", ")}. Generate new ones, don't repeat these.`
+      : "";
   const prompt = `Generate ${count} TikTok hashtags (lowercase, no spaces, no # symbol) that are likely to be used by online sellers, shop owners, e-commerce merchants, and small businesses in these countries: ${countryStr}.
@@ -114,11 +136,15 @@ ${historyHint}
 Return ONLY a JSON array of tag strings, nothing else. Example: ["ventas","tiendaonline","vender"]`;
-  process.stderr.write(`  [LLM] 正在生成 ${count} 个标签 (目标: ${countryStr})...\n`);
+  process.stderr.write(
+    `  [LLM] 正在生成 ${count} 个标签 (目标: ${countryStr})...\n`,
+  );
   const content = await callLLM(prompt);
   const tags = parseTagsFromResponse(content);
   const unique = [...new Set(tags)].slice(0, count);
-  process.stderr.write(`  [LLM] 生成 ${unique.length} 个标签: ${unique.join(', ')}\n`);
+  process.stderr.write(
+    `  [LLM] 生成 ${unique.length} 个标签: ${unique.join(", ")}\n`,
+  );
   return unique;
 }

package/src/npm-main.js CHANGED Viewed

@@ -7,7 +7,12 @@ import { handleConfig, showConfig, showUsage, version } from "./cli/config.js";
 import { handleOpen } from "./cli/open.js";
 import { handleComments } from "./cli/comments.js";
 import { handleRefresh } from "./cli/refresh.js";
-import { handleTag } from "./cli/tag.js";
+import {
+  handleTag,
+  handleDiscover,
+  handleScore,
+  handleScoreAll,
+} from "./cli/tag.js";
 function exitUnsupportedCommand(command) {
   console.error(
@@ -39,6 +44,12 @@ async function main() {
       return handleRefresh(parsed);
     case "tag":
       return handleTag(parsed);
+    case "tag-discover":
+      return handleDiscover(parsed);
+    case "tag-score":
+      return handleScore(parsed);
+    case "tag-score-all":
+      return handleScoreAll(parsed);
   }
   const {

package/src/watch/data-store.js CHANGED Viewed

@@ -1344,6 +1344,11 @@ function getRawJobsPageFromDb({
 function insertTag(tag, countries, source = "llm") {
   if (!db) return { inserted: false, error: "db not ready" };
+  // 防止存入带 # 前缀的 tag
+  const normalized = tag.replace(/^#+/, "").trim().toLowerCase();
+  if (!normalized || normalized.length < 2) {
+    return { inserted: false, error: "invalid tag" };
+  }
   try {
     const result = db
       .prepare(
@@ -1352,8 +1357,8 @@ function insertTag(tag, countries, source = "llm") {
       VALUES (?, ?, ?)
     `,
       )
-      .run(tag, JSON.stringify(countries), source);
-    return { inserted: result.changes > 0, tag };
+      .run(normalized, JSON.stringify(countries), source);
+    return { inserted: result.changes > 0, tag: normalized };
   } catch (e) {
     return { inserted: false, error: e.message };
   }
@@ -1415,12 +1420,19 @@ function getDeadTags(country) {
 function claimTag(tag) {
   if (!db) return { ok: false, error: "db not ready" };
-  const row = db.prepare("SELECT status FROM tags WHERE tag = ?").get(tag);
-  if (!row) return { ok: false, error: "tag not found" };
-  if (row.status !== "new")
-    return { ok: false, error: `tag status is ${row.status}, not new` };
-  db.prepare("UPDATE tags SET status = 'scoring' WHERE tag = ?").run(tag);
-  return { ok: true, tag, previousStatus: row.status };
+  // 原子操作：只有 status='new' 时才更新为 'scoring'，避免竞态
+  const result = db
+    .prepare(
+      "UPDATE tags SET status = 'scoring' WHERE tag = ? AND status = 'new'",
+    )
+    .run(tag);
+  if (result.changes === 0) {
+    // 检查是否不存在 vs 已被别人锁定
+    const row = db.prepare("SELECT status FROM tags WHERE tag = ?").get(tag);
+    if (!row) return { ok: false, error: "tag not found" };
+    return { ok: false, error: `tag status is ${row.status}, already claimed` };
+  }
+  return { ok: true, tag };
 }
 function reportTagScore(tag, fields) {
@@ -1503,6 +1515,68 @@ function rawQuery(sql, params = []) {
   }
 }
+// 清理 tags 表中以 # 开头的脏数据
+function normalizeTags() {
+  if (!db) return { ok: false, error: "db not ready" };
+  const dirtyRows = db
+    .prepare("SELECT id, tag, countries FROM tags WHERE tag LIKE '#%'")
+    .all();
+  const fixed = [];
+  const merged = [];
+  const skipped = [];
+  for (const row of dirtyRows) {
+    const cleanTag = row.tag.replace(/^#+/, "").trim().toLowerCase();
+    if (!cleanTag || cleanTag.length < 2) {
+      db.prepare("DELETE FROM tags WHERE id = ?").run(row.id);
+      skipped.push({
+        dirty: row.tag,
+        reason: "empty after normalize, deleted",
+      });
+      continue;
+    }
+    // 检查 cleanTag 是否已存在
+    const existing = db
+      .prepare("SELECT * FROM tags WHERE tag = ?")
+      .get(cleanTag);
+    if (existing) {
+      // 合并：保留已有 clean 版本，合并 countries
+      const oldCountries = JSON.parse(row.countries || "[]");
+      const existCountries = JSON.parse(existing.countries || "[]");
+      const mergedCountries = [
+        ...new Set([...existCountries, ...oldCountries]),
+      ];
+      db.prepare("UPDATE tags SET countries = ? WHERE tag = ?").run(
+        JSON.stringify(mergedCountries),
+        cleanTag,
+      );
+      // 删除脏数据
+      db.prepare("DELETE FROM tags WHERE id = ?").run(row.id);
+      merged.push({ dirty: row.tag, clean: cleanTag, id: row.id });
+    } else {
+      // 直接重命名
+      db.prepare("UPDATE tags SET tag = ? WHERE id = ?").run(cleanTag, row.id);
+      fixed.push({ dirty: row.tag, clean: cleanTag, id: row.id });
+    }
+  }
+  return {
+    ok: true,
+    fixed: fixed.length,
+    merged: merged.length,
+    skipped: skipped.length,
+    details: { fixed, merged, skipped },
+  };
+}
+function clearTags() {
+  if (!db) return { ok: false, error: "db not ready" };
+  const count = db.prepare("SELECT COUNT(*) as c FROM tags").get().c;
+  db.exec("DELETE FROM tags");
+  return { ok: true, deleted: count };
+}
 function getUsersPageFromDb({
   status,
   search,
@@ -4759,6 +4833,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
     claimTag,
     reportTagScore,
     getAllTags,
+    normalizeTags,
+    clearTags,
     data,
   };

package/src/watch/server.js CHANGED Viewed

@@ -994,6 +994,20 @@ export function startWatchServer(
         return;
       }
+      // POST /api/tags/normalize — 清理以 # 开头的脏 tag
+      if (req.method === "POST" && routePath === "/api/tags/normalize") {
+        const result = store.normalizeTags();
+        sendJSON(res, 200, result);
+        return;
+      }
+      // POST /api/tags/clear — 清空 tags 表
+      if (req.method === "POST" && routePath === "/api/tags/clear") {
+        const result = store.clearTags();
+        sendJSON(res, 200, result);
+        return;
+      }
       // POST /api/tags/claim  { tag } — 锁定 tag 状态为 scoring（防并发冲突）
       if (req.method === "POST" && routePath === "/api/tags/claim") {
         try {

package/src/watch/tag-service.js CHANGED Viewed

@@ -53,20 +53,25 @@ async function callLLM(prompt) {
   return result.choices?.[0]?.message?.content || "";
 }
+function normalizeTag(t) {
+  return t.replace(/^#+/, "").trim().toLowerCase();
+}
 function parseTagsFromResponse(content) {
   try {
     const parsed = JSON.parse(content);
-    if (Array.isArray(parsed)) return parsed;
-    if (Array.isArray(parsed.tags)) return parsed.tags;
+    if (Array.isArray(parsed)) {
+      return parsed.map(normalizeTag).filter((t) => t && t.length >= 2);
+    }
+    if (Array.isArray(parsed.tags)) {
+      return parsed.tags.map(normalizeTag).filter((t) => t && t.length >= 2);
+    }
   } catch {}
   const lines = content.split(/[\n,]+/);
   const tags = [];
   for (const line of lines) {
-    const cleaned = line
-      .replace(/^[-\d.\s#]+/, "")
-      .trim()
-      .toLowerCase();
+    const cleaned = normalizeTag(line.replace(/^[-\d.\s]+/, ""));
     if (cleaned && /^[a-z0-9_]+$/.test(cleaned) && cleaned.length >= 2) {
       tags.push(cleaned);
     }
@@ -165,7 +170,7 @@ export async function discoverTagsForCountry(
   const inserted = [];
   for (const tag of unique) {
     const result = store.insertTag(tag, [country], "llm");
-    if (result.inserted) inserted.push(tag);
+    if (result.inserted) inserted.push(result.tag);
   }
   return {