npm - tt-help-cli-ycl - Versions diffs - 1.3.91 → 1.3.93 - Mend

tt-help-cli-ycl 1.3.91 → 1.3.93

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "tt-help-cli-ycl",
-  "version": "1.3.91",
+  "version": "1.3.93",
   "description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
   "type": "module",
   "bin": {

package/src/cli/tag.js CHANGED Viewed

@@ -1,4 +1,5 @@
 import { writeFileSync } from "fs";
+import { randomUUID } from "crypto";
 import { fetchTagData, enrichVideosWithLocation } from "../lib/tag-fetcher.js";
 import { TikTokScraper } from "../lib/tiktok-scraper.mjs";
 import {
@@ -11,7 +12,22 @@ import { server as cfgServer } from "../lib/constants.js";
 const ALL_COUNTRIES = DEFAULT_TARGET_LOCATIONS;
 const DEFAULT_SERVER = cfgServer || "http://127.0.0.1:3000";
-async function pushToServer(serverUrl, filteredAuthors, videos) {
+// 构建带客户端追踪 header 的 fetch 封装
+function buildClientHeaders(clientId, meta, extra = {}) {
+  return {
+    "X-Client-Id": clientId,
+    "X-Client-Info": JSON.stringify(meta),
+    ...extra,
+  };
+}
+async function pushToServer(
+  serverUrl,
+  filteredAuthors,
+  videos,
+  clientId,
+  meta,
+) {
   const users = filteredAuthors.map((author) => {
     const video = videos.find((v) => v.authorUniqueId === author);
     return {
@@ -23,7 +39,9 @@ async function pushToServer(serverUrl, filteredAuthors, videos) {
   const res = await fetch(`${serverUrl}/api/raw-users`, {
     method: "POST",
-    headers: { "Content-Type": "application/json" },
+    headers: buildClientHeaders(clientId, meta, {
+      "Content-Type": "application/json",
+    }),
     body: JSON.stringify({ users }),
   });
   const data = await res.json();
@@ -412,11 +430,13 @@ export async function handleScore(parsed) {
   console.log(JSON.stringify(result, null, 2));
 }
-async function reportToServer(baseUrl, result) {
+async function reportToServer(baseUrl, result, clientId, meta) {
   try {
     const res = await fetch(`${baseUrl}/api/tags/score-result`, {
       method: "POST",
-      headers: { "Content-Type": "application/json" },
+      headers: buildClientHeaders(clientId, meta, {
+        "Content-Type": "application/json",
+      }),
       body: JSON.stringify(result),
     });
     const data = await res.json();
@@ -464,17 +484,24 @@ export async function handleScoreAll(parsed) {
   let emptyRounds = 0; // 连续无任务的轮数
   const DISCOVER_AFTER_EMPTY = 3; // 连续 3 轮无任务时触发 discover
+  // 生成客户端 ID，用于服务端追踪
+  const clientId = randomUUID();
+  const clientMeta = { type: "scoring" };
   // 复用 TikTokScraper 实例，避免每次 enrich 都启动/关闭 headless 浏览器
   const enrichScraper = new TikTokScraper({ poolSize: 3 });
   await enrichScraper.init();
   log("✅ TikTokScraper 已就绪 (enrich 复用)");
+  log(`  客户端 ID: ${clientId.substring(0, 8)}...`);
   log("");
   try {
     while (true) {
       try {
         // 从服务端取下一个 new 标签
-        const tagsRes = await fetch(`${baseUrl}/api/tags?status=new&limit=1`);
+        const tagsRes = await fetch(`${baseUrl}/api/tags?status=new&limit=1`, {
+          headers: buildClientHeaders(clientId, clientMeta),
+        });
         const tagsData = await tagsRes.json();
         if (!tagsData.tags || tagsData.tags.length === 0) {
           emptyRounds++;
@@ -488,6 +515,7 @@ export async function handleScoreAll(parsed) {
               try {
                 const discRes = await fetch(
                   `${baseUrl}/api/tags/discover?country=${country}&count=5`,
+                  { headers: buildClientHeaders(clientId, clientMeta) },
                 );
                 const discData = await discRes.json();
                 if (discData.inserted) {
@@ -530,10 +558,12 @@ export async function handleScoreAll(parsed) {
           error: null,
         };
-        // 锁定 tag
+        // 锁定 tag（meta 中不放入 tag，避免非 ASCII 字符导致 header ByteString 报错）
         const claimRes = await fetch(`${baseUrl}/api/tags/claim`, {
           method: "POST",
-          headers: { "Content-Type": "application/json" },
+          headers: buildClientHeaders(clientId, clientMeta, {
+            "Content-Type": "application/json",
+          }),
           body: JSON.stringify({ tag }),
         });
         const claimData = await claimRes.json();
@@ -546,7 +576,7 @@ export async function handleScoreAll(parsed) {
           log(`  ⚠️ 无法锁定 (${claimData.error})，标记为 dead 并跳过`);
           result.error = claimData.error;
           result.status = "dead";
-          await reportToServer(baseUrl, result);
+          await reportToServer(baseUrl, result, clientId, clientMeta);
           totalScored++;
           continue;
         }
@@ -570,7 +600,7 @@ export async function handleScoreAll(parsed) {
           log("  ⚠️ 无视频，标记 dead");
           result.status = "dead";
           result.error = "no videos found";
-          await reportToServer(baseUrl, result);
+          await reportToServer(baseUrl, result, clientId, clientMeta);
           totalScored++;
           continue;
         }
@@ -602,12 +632,14 @@ export async function handleScoreAll(parsed) {
             baseUrl,
             [...matchedAuthorSet],
             videos,
+            clientId,
+            clientMeta,
           );
           result.pushedUsers = pushResult.added || 0;
         }
         // 上报结果
-        await reportToServer(baseUrl, result);
+        await reportToServer(baseUrl, result, clientId, clientMeta);
         totalScored++;
         const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
@@ -643,12 +675,17 @@ export async function handleScoreAll(parsed) {
         }
         log(`  ❌ 失败: ${e.message}`);
         try {
-          await reportToServer(baseUrl, {
-            tag: "",
-            status: "error",
-            score: 0,
-            error: e.message,
-          });
+          await reportToServer(
+            baseUrl,
+            {
+              tag: "",
+              status: "error",
+              score: 0,
+              error: e.message,
+            },
+            clientId,
+            clientMeta,
+          );
         } catch {}
         totalScored++;
       }

package/src/watch/data-store.js CHANGED Viewed

@@ -2871,6 +2871,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
     // 如果启用 LLM 打分，先采样一批进行评分（累积模式：按猜测国家分组，使用偏移量记忆避免重复采样）
     if (useLlm && normalizedLocations && normalizedLocations.length > 0) {
       const llmMinReturn = options.llmMinReturn ?? 60; // 最少返回合格数
+      const llmMinTagReturn = options.llmMinTagReturn ?? 30; // tag 最少合格数
+      const llmMinNonTagReturn = options.llmMinNonTagReturn ?? 30; // 非 tag 最少合格数
       const maxBatches = options.llmMaxBatches ?? 10; // 最多采样轮次，防止无限循环
       // 打印当前偏移量状态
@@ -2878,7 +2880,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
         .map(([k, v]) => `${k}:${v}`)
         .join(", ");
       console.error(
-        `[data-store] LLM 打分开始: 符合条件 ${count} 条，每批 ${llmSampleSize} 条，最低分 ${llmMinScore}，最少返回 ${llmMinReturn} 条`,
+        `[data-store] LLM 打分开始: 符合条件 ${count} 条，每批 ${llmSampleSize} 条，最低分 ${llmMinScore}，tag 最少 ${llmMinTagReturn}，非 tag 最少 ${llmMinNonTagReturn}`,
       );
       if (offsetSummary) {
         console.error(`[data-store] 偏移量记忆: ${offsetSummary}`);
@@ -2886,7 +2888,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
       // 返回 Promise，调用方需要 await
       return (async () => {
-        const allQualified = [];
+        const allTagQualified = []; // tag 合格列表（直接合格）
+        const allNonTagQualified = []; // 非 tag 合格列表（LLM 打分合格）
         const allScores = [];
         // 按猜测国家分组处理，每个国家使用独立的偏移量
@@ -2949,7 +2952,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
             // tag 来源直接加入合格列表
             if (tagSamples.length > 0) {
-              allQualified.push(...tagSamples.map((s) => s.unique_id));
+              allTagQualified.push(...tagSamples.map((s) => s.unique_id));
               console.error(
                 `[data-store] ${location}: 本批 ${tagSamples.length} 条 tag 来源任务跳过 LLM 打分直接合格`,
               );
@@ -2964,47 +2967,51 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
                 DEFAULT_TARGET_LOCATIONS,
               );
               batchQualified = scores.filter((s) => s.score >= llmMinScore);
+              allNonTagQualified.push(...batchQualified.map((s) => s.uniqueId));
             }
             allScores.push(...scores);
-            allQualified.push(...batchQualified.map((s) => s.uniqueId));
             totalBatches++;
+            const totalQualified = allTagQualified.length + allNonTagQualified.length;
             console.error(
-              `[data-store] ${location} 第 ${batch + 1} 批: 采样 ${samples.length} 条，本批合格 ${batchQualified.length} 条，累计合格 ${allQualified.length} 条`,
+              `[data-store] ${location} 第 ${batch + 1} 批: 采样 ${samples.length} 条，tag 合格 ${allTagQualified.length}，非 tag 合格 ${allNonTagQualified.length}，累计 ${totalQualified} 条`,
             );
             // 更新偏移量记忆
             offset += samples.length;
             llmSampleOffsets.set(location, offset);
-            // 合格数已达到最小返回阈值，停止采样
-            if (allQualified.length >= llmMinReturn) break;
+            // 检查是否两个类型都达到阈值，都达到才停止
+            const tagReached = allTagQualified.length >= llmMinTagReturn;
+            const nonTagReached = allNonTagQualified.length >= llmMinNonTagReturn;
+            if (tagReached && nonTagReached) {
+              console.error(
+                `[data-store] 两类任务均已达标 (tag: ${allTagQualified.length}/${llmMinTagReturn}, 非 tag: ${allNonTagQualified.length}/${llmMinNonTagReturn})，停止采样`,
+              );
+              break;
+            }
           }
-          // 合格数已达到最小返回阈值，停止所有国家的采样
-          if (allQualified.length >= llmMinReturn) break;
+          // 检查是否两个类型都达到阈值，都达到才停止所有国家采样
+          const tagReachedGlobal = allTagQualified.length >= llmMinTagReturn;
+          const nonTagReachedGlobal = allNonTagQualified.length >= llmMinNonTagReturn;
+          if (tagReachedGlobal && nonTagReachedGlobal) break;
         }
-        // 分离 tag 合格和非 tag 合格
-        // tag 任务直接合格（不在 allScores 中），非 tag 任务走 LLM 打分
-        const tagQualified = allQualified.filter(
-          (uid) => !allScores.find((s) => s.uniqueId === uid),
-        );
-        const nonTagQualifiedScores = allScores
-          .filter((s) => s.score >= llmMinScore)
-          .sort((a, b) => b.score - a.score);
-        const nonTagQualified = nonTagQualifiedScores.map((s) => s.uniqueId);
+        // 最终合格列表：tag 优先 + 非 tag 按分数排序
         // 限制 tag 占比：最多占 safeLimit 的 70%，留 30% 给非 tag
         const tagMaxCount = Math.floor(safeLimit * 0.7);
-        const tagCount = Math.min(tagQualified.length, tagMaxCount);
+        const tagCount = Math.min(allTagQualified.length, tagMaxCount);
         const nonTagMaxCount = safeLimit - tagCount;
-        const finalNonTagQualified = nonTagQualified.slice(0, nonTagMaxCount);
-        // 最终合格列表：tag 优先 + 非 tag 按分数排序
+        const nonTagQualifiedScores = allScores
+          .filter((s) => s.score >= llmMinScore)
+          .sort((a, b) => b.score - a.score);
+        const finalNonTagQualified = nonTagQualifiedScores.slice(0, nonTagMaxCount).map((s) => s.uniqueId);
         const qualified = [
-          ...tagQualified.slice(0, tagCount),
+          ...allTagQualified.slice(0, tagCount),
           ...finalNonTagQualified,
         ];

package/src/watch/public/app.js CHANGED Viewed

@@ -255,8 +255,14 @@ function renderActiveClients(clients) {
   const tbody = document.getElementById("activeClientsBody");
   if (!section || !bar) return;
-  const types = ["explore", "refresh", "attach", "comments"];
-  const labels = { explore: "Explore", refresh: "Refresh", attach: "Attach", comments: "Comments" };
+  const types = ["explore", "refresh", "attach", "comments", "scoring"];
+  const labels = {
+    explore: "Explore",
+    refresh: "Refresh",
+    attach: "Attach",
+    comments: "Comments",
+    scoring: "Scoring",
+  };
   const grouped = {};
   for (const c of clients) {
     if (!grouped[c.type]) grouped[c.type] = [];
@@ -314,9 +320,7 @@ function showClientDetail(type, clients) {
   tbody.innerHTML = clients
     .map((c) => {
       const cid = c.clientId ? c.clientId.substring(0, 8) : "-";
-      const ipPort = c.ip
-        ? c.ip + (c.port ? ":" + c.port : "")
-        : "-";
+      const ipPort = c.ip ? c.ip + (c.port ? ":" + c.port : "") : "-";
       const userId = c.userId || "-";
       const last = formatRelativeTime(c.lastSeen);
       return `<tr>

package/src/watch/server.js CHANGED Viewed

@@ -93,6 +93,7 @@ function inferClientType(routePath) {
   if (routePath.startsWith("/api/redo-job")) return "refresh";
   if (routePath.startsWith("/api/user-update-tasks")) return "attach";
   if (routePath.startsWith("/api/comment-task")) return "comments";
+  if (routePath.startsWith("/api/tags")) return "scoring";
   if (
     routePath.startsWith("/api/job") ||
     routePath.startsWith("/api/explore-new")