tt-help-cli-ycl 1.3.92 → 1.3.93

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tt-help-cli-ycl",
3
- "version": "1.3.92",
3
+ "version": "1.3.93",
4
4
  "description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli/tag.js CHANGED
@@ -619,9 +619,6 @@ export async function handleScoreAll(parsed) {
619
619
  });
620
620
  videos = enriched.videos;
621
621
 
622
- // 更新 meta 中当前正在处理的标签
623
- clientMeta.tag = tag;
624
-
625
622
  // 过滤 + 算分 (共用函数)
626
623
  const { matchedAuthorSet } = applyFilterAndScore(
627
624
  videos,
@@ -2871,6 +2871,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
2871
2871
  // 如果启用 LLM 打分,先采样一批进行评分(累积模式:按猜测国家分组,使用偏移量记忆避免重复采样)
2872
2872
  if (useLlm && normalizedLocations && normalizedLocations.length > 0) {
2873
2873
  const llmMinReturn = options.llmMinReturn ?? 60; // 最少返回合格数
2874
+ const llmMinTagReturn = options.llmMinTagReturn ?? 30; // tag 最少合格数
2875
+ const llmMinNonTagReturn = options.llmMinNonTagReturn ?? 30; // 非 tag 最少合格数
2874
2876
  const maxBatches = options.llmMaxBatches ?? 10; // 最多采样轮次,防止无限循环
2875
2877
 
2876
2878
  // 打印当前偏移量状态
@@ -2878,7 +2880,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
2878
2880
  .map(([k, v]) => `${k}:${v}`)
2879
2881
  .join(", ");
2880
2882
  console.error(
2881
- `[data-store] LLM 打分开始: 符合条件 ${count} 条,每批 ${llmSampleSize} 条,最低分 ${llmMinScore},最少返回 ${llmMinReturn} 条`,
2883
+ `[data-store] LLM 打分开始: 符合条件 ${count} 条,每批 ${llmSampleSize} 条,最低分 ${llmMinScore},tag 最少 ${llmMinTagReturn},非 tag 最少 ${llmMinNonTagReturn}`,
2882
2884
  );
2883
2885
  if (offsetSummary) {
2884
2886
  console.error(`[data-store] 偏移量记忆: ${offsetSummary}`);
@@ -2886,7 +2888,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
2886
2888
 
2887
2889
  // 返回 Promise,调用方需要 await
2888
2890
  return (async () => {
2889
- const allQualified = [];
2891
+ const allTagQualified = []; // tag 合格列表(直接合格)
2892
+ const allNonTagQualified = []; // 非 tag 合格列表(LLM 打分合格)
2890
2893
  const allScores = [];
2891
2894
 
2892
2895
  // 按猜测国家分组处理,每个国家使用独立的偏移量
@@ -2949,7 +2952,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
2949
2952
 
2950
2953
  // tag 来源直接加入合格列表
2951
2954
  if (tagSamples.length > 0) {
2952
- allQualified.push(...tagSamples.map((s) => s.unique_id));
2955
+ allTagQualified.push(...tagSamples.map((s) => s.unique_id));
2953
2956
  console.error(
2954
2957
  `[data-store] ${location}: 本批 ${tagSamples.length} 条 tag 来源任务跳过 LLM 打分直接合格`,
2955
2958
  );
@@ -2964,47 +2967,51 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
2964
2967
  DEFAULT_TARGET_LOCATIONS,
2965
2968
  );
2966
2969
  batchQualified = scores.filter((s) => s.score >= llmMinScore);
2970
+ allNonTagQualified.push(...batchQualified.map((s) => s.uniqueId));
2967
2971
  }
2968
2972
 
2969
2973
  allScores.push(...scores);
2970
- allQualified.push(...batchQualified.map((s) => s.uniqueId));
2971
2974
 
2972
2975
  totalBatches++;
2976
+ const totalQualified = allTagQualified.length + allNonTagQualified.length;
2973
2977
  console.error(
2974
- `[data-store] ${location} 第 ${batch + 1} 批: 采样 ${samples.length} 条,本批合格 ${batchQualified.length} 条,累计合格 ${allQualified.length} 条`,
2978
+ `[data-store] ${location} 第 ${batch + 1} 批: 采样 ${samples.length} 条,tag 合格 ${allTagQualified.length},非 tag 合格 ${allNonTagQualified.length},累计 ${totalQualified} 条`,
2975
2979
  );
2976
2980
 
2977
2981
  // 更新偏移量记忆
2978
2982
  offset += samples.length;
2979
2983
  llmSampleOffsets.set(location, offset);
2980
2984
 
2981
- // 合格数已达到最小返回阈值,停止采样
2982
- if (allQualified.length >= llmMinReturn) break;
2985
+ // 检查是否两个类型都达到阈值,都达到才停止
2986
+ const tagReached = allTagQualified.length >= llmMinTagReturn;
2987
+ const nonTagReached = allNonTagQualified.length >= llmMinNonTagReturn;
2988
+ if (tagReached && nonTagReached) {
2989
+ console.error(
2990
+ `[data-store] 两类任务均已达标 (tag: ${allTagQualified.length}/${llmMinTagReturn}, 非 tag: ${allNonTagQualified.length}/${llmMinNonTagReturn}),停止采样`,
2991
+ );
2992
+ break;
2993
+ }
2983
2994
  }
2984
2995
 
2985
- // 合格数已达到最小返回阈值,停止所有国家的采样
2986
- if (allQualified.length >= llmMinReturn) break;
2996
+ // 检查是否两个类型都达到阈值,都达到才停止所有国家采样
2997
+ const tagReachedGlobal = allTagQualified.length >= llmMinTagReturn;
2998
+ const nonTagReachedGlobal = allNonTagQualified.length >= llmMinNonTagReturn;
2999
+ if (tagReachedGlobal && nonTagReachedGlobal) break;
2987
3000
  }
2988
3001
 
2989
- // 分离 tag 合格和非 tag 合格
2990
- // tag 任务直接合格(不在 allScores 中),非 tag 任务走 LLM 打分
2991
- const tagQualified = allQualified.filter(
2992
- (uid) => !allScores.find((s) => s.uniqueId === uid),
2993
- );
2994
- const nonTagQualifiedScores = allScores
2995
- .filter((s) => s.score >= llmMinScore)
2996
- .sort((a, b) => b.score - a.score);
2997
- const nonTagQualified = nonTagQualifiedScores.map((s) => s.uniqueId);
2998
-
3002
+ // 最终合格列表:tag 优先 + 非 tag 按分数排序
2999
3003
  // 限制 tag 占比:最多占 safeLimit 的 70%,留 30% 给非 tag
3000
3004
  const tagMaxCount = Math.floor(safeLimit * 0.7);
3001
- const tagCount = Math.min(tagQualified.length, tagMaxCount);
3005
+ const tagCount = Math.min(allTagQualified.length, tagMaxCount);
3002
3006
  const nonTagMaxCount = safeLimit - tagCount;
3003
- const finalNonTagQualified = nonTagQualified.slice(0, nonTagMaxCount);
3004
3007
 
3005
- // 最终合格列表:tag 优先 + 非 tag 按分数排序
3008
+ const nonTagQualifiedScores = allScores
3009
+ .filter((s) => s.score >= llmMinScore)
3010
+ .sort((a, b) => b.score - a.score);
3011
+ const finalNonTagQualified = nonTagQualifiedScores.slice(0, nonTagMaxCount).map((s) => s.uniqueId);
3012
+
3006
3013
  const qualified = [
3007
- ...tagQualified.slice(0, tagCount),
3014
+ ...allTagQualified.slice(0, tagCount),
3008
3015
  ...finalNonTagQualified,
3009
3016
  ];
3010
3017