npm - tt-help-cli-ycl - Versions diffs - 1.3.92 → 1.3.93 - Mend

tt-help-cli-ycl 1.3.92 → 1.3.93

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "tt-help-cli-ycl",
-  "version": "1.3.92",
+  "version": "1.3.93",
   "description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
   "type": "module",
   "bin": {

package/src/cli/tag.js CHANGED Viewed

@@ -619,9 +619,6 @@ export async function handleScoreAll(parsed) {
         });
         videos = enriched.videos;
-        // 更新 meta 中当前正在处理的标签
-        clientMeta.tag = tag;
         // 过滤 + 算分 (共用函数)
         const { matchedAuthorSet } = applyFilterAndScore(
           videos,

package/src/watch/data-store.js CHANGED Viewed

@@ -2871,6 +2871,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
     // 如果启用 LLM 打分，先采样一批进行评分（累积模式：按猜测国家分组，使用偏移量记忆避免重复采样）
     if (useLlm && normalizedLocations && normalizedLocations.length > 0) {
       const llmMinReturn = options.llmMinReturn ?? 60; // 最少返回合格数
+      const llmMinTagReturn = options.llmMinTagReturn ?? 30; // tag 最少合格数
+      const llmMinNonTagReturn = options.llmMinNonTagReturn ?? 30; // 非 tag 最少合格数
       const maxBatches = options.llmMaxBatches ?? 10; // 最多采样轮次，防止无限循环
       // 打印当前偏移量状态
@@ -2878,7 +2880,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
         .map(([k, v]) => `${k}:${v}`)
         .join(", ");
       console.error(
-        `[data-store] LLM 打分开始: 符合条件 ${count} 条，每批 ${llmSampleSize} 条，最低分 ${llmMinScore}，最少返回 ${llmMinReturn} 条`,
+        `[data-store] LLM 打分开始: 符合条件 ${count} 条，每批 ${llmSampleSize} 条，最低分 ${llmMinScore}，tag 最少 ${llmMinTagReturn}，非 tag 最少 ${llmMinNonTagReturn}`,
       );
       if (offsetSummary) {
         console.error(`[data-store] 偏移量记忆: ${offsetSummary}`);
@@ -2886,7 +2888,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
       // 返回 Promise，调用方需要 await
       return (async () => {
-        const allQualified = [];
+        const allTagQualified = []; // tag 合格列表（直接合格）
+        const allNonTagQualified = []; // 非 tag 合格列表（LLM 打分合格）
         const allScores = [];
         // 按猜测国家分组处理，每个国家使用独立的偏移量
@@ -2949,7 +2952,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
             // tag 来源直接加入合格列表
             if (tagSamples.length > 0) {
-              allQualified.push(...tagSamples.map((s) => s.unique_id));
+              allTagQualified.push(...tagSamples.map((s) => s.unique_id));
               console.error(
                 `[data-store] ${location}: 本批 ${tagSamples.length} 条 tag 来源任务跳过 LLM 打分直接合格`,
               );
@@ -2964,47 +2967,51 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
                 DEFAULT_TARGET_LOCATIONS,
               );
               batchQualified = scores.filter((s) => s.score >= llmMinScore);
+              allNonTagQualified.push(...batchQualified.map((s) => s.uniqueId));
             }
             allScores.push(...scores);
-            allQualified.push(...batchQualified.map((s) => s.uniqueId));
             totalBatches++;
+            const totalQualified = allTagQualified.length + allNonTagQualified.length;
             console.error(
-              `[data-store] ${location} 第 ${batch + 1} 批: 采样 ${samples.length} 条，本批合格 ${batchQualified.length} 条，累计合格 ${allQualified.length} 条`,
+              `[data-store] ${location} 第 ${batch + 1} 批: 采样 ${samples.length} 条，tag 合格 ${allTagQualified.length}，非 tag 合格 ${allNonTagQualified.length}，累计 ${totalQualified} 条`,
             );
             // 更新偏移量记忆
             offset += samples.length;
             llmSampleOffsets.set(location, offset);
-            // 合格数已达到最小返回阈值，停止采样
-            if (allQualified.length >= llmMinReturn) break;
+            // 检查是否两个类型都达到阈值，都达到才停止
+            const tagReached = allTagQualified.length >= llmMinTagReturn;
+            const nonTagReached = allNonTagQualified.length >= llmMinNonTagReturn;
+            if (tagReached && nonTagReached) {
+              console.error(
+                `[data-store] 两类任务均已达标 (tag: ${allTagQualified.length}/${llmMinTagReturn}, 非 tag: ${allNonTagQualified.length}/${llmMinNonTagReturn})，停止采样`,
+              );
+              break;
+            }
           }
-          // 合格数已达到最小返回阈值，停止所有国家的采样
-          if (allQualified.length >= llmMinReturn) break;
+          // 检查是否两个类型都达到阈值，都达到才停止所有国家采样
+          const tagReachedGlobal = allTagQualified.length >= llmMinTagReturn;
+          const nonTagReachedGlobal = allNonTagQualified.length >= llmMinNonTagReturn;
+          if (tagReachedGlobal && nonTagReachedGlobal) break;
         }
-        // 分离 tag 合格和非 tag 合格
-        // tag 任务直接合格（不在 allScores 中），非 tag 任务走 LLM 打分
-        const tagQualified = allQualified.filter(
-          (uid) => !allScores.find((s) => s.uniqueId === uid),
-        );
-        const nonTagQualifiedScores = allScores
-          .filter((s) => s.score >= llmMinScore)
-          .sort((a, b) => b.score - a.score);
-        const nonTagQualified = nonTagQualifiedScores.map((s) => s.uniqueId);
+        // 最终合格列表：tag 优先 + 非 tag 按分数排序
         // 限制 tag 占比：最多占 safeLimit 的 70%，留 30% 给非 tag
         const tagMaxCount = Math.floor(safeLimit * 0.7);
-        const tagCount = Math.min(tagQualified.length, tagMaxCount);
+        const tagCount = Math.min(allTagQualified.length, tagMaxCount);
         const nonTagMaxCount = safeLimit - tagCount;
-        const finalNonTagQualified = nonTagQualified.slice(0, nonTagMaxCount);
-        // 最终合格列表：tag 优先 + 非 tag 按分数排序
+        const nonTagQualifiedScores = allScores
+          .filter((s) => s.score >= llmMinScore)
+          .sort((a, b) => b.score - a.score);
+        const finalNonTagQualified = nonTagQualifiedScores.slice(0, nonTagMaxCount).map((s) => s.uniqueId);
         const qualified = [
-          ...tagQualified.slice(0, tagCount),
+          ...allTagQualified.slice(0, tagCount),
           ...finalNonTagQualified,
         ];