tt-help-cli-ycl 1.3.92 → 1.3.93
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli/tag.js +0 -3
- package/src/watch/data-store.js +30 -23
package/package.json
CHANGED
package/src/cli/tag.js
CHANGED
package/src/watch/data-store.js
CHANGED
|
@@ -2871,6 +2871,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2871
2871
|
// 如果启用 LLM 打分,先采样一批进行评分(累积模式:按猜测国家分组,使用偏移量记忆避免重复采样)
|
|
2872
2872
|
if (useLlm && normalizedLocations && normalizedLocations.length > 0) {
|
|
2873
2873
|
const llmMinReturn = options.llmMinReturn ?? 60; // 最少返回合格数
|
|
2874
|
+
const llmMinTagReturn = options.llmMinTagReturn ?? 30; // tag 最少合格数
|
|
2875
|
+
const llmMinNonTagReturn = options.llmMinNonTagReturn ?? 30; // 非 tag 最少合格数
|
|
2874
2876
|
const maxBatches = options.llmMaxBatches ?? 10; // 最多采样轮次,防止无限循环
|
|
2875
2877
|
|
|
2876
2878
|
// 打印当前偏移量状态
|
|
@@ -2878,7 +2880,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2878
2880
|
.map(([k, v]) => `${k}:${v}`)
|
|
2879
2881
|
.join(", ");
|
|
2880
2882
|
console.error(
|
|
2881
|
-
`[data-store] LLM 打分开始: 符合条件 ${count} 条,每批 ${llmSampleSize} 条,最低分 ${llmMinScore}
|
|
2883
|
+
`[data-store] LLM 打分开始: 符合条件 ${count} 条,每批 ${llmSampleSize} 条,最低分 ${llmMinScore},tag 最少 ${llmMinTagReturn},非 tag 最少 ${llmMinNonTagReturn}`,
|
|
2882
2884
|
);
|
|
2883
2885
|
if (offsetSummary) {
|
|
2884
2886
|
console.error(`[data-store] 偏移量记忆: ${offsetSummary}`);
|
|
@@ -2886,7 +2888,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2886
2888
|
|
|
2887
2889
|
// 返回 Promise,调用方需要 await
|
|
2888
2890
|
return (async () => {
|
|
2889
|
-
const
|
|
2891
|
+
const allTagQualified = []; // tag 合格列表(直接合格)
|
|
2892
|
+
const allNonTagQualified = []; // 非 tag 合格列表(LLM 打分合格)
|
|
2890
2893
|
const allScores = [];
|
|
2891
2894
|
|
|
2892
2895
|
// 按猜测国家分组处理,每个国家使用独立的偏移量
|
|
@@ -2949,7 +2952,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2949
2952
|
|
|
2950
2953
|
// tag 来源直接加入合格列表
|
|
2951
2954
|
if (tagSamples.length > 0) {
|
|
2952
|
-
|
|
2955
|
+
allTagQualified.push(...tagSamples.map((s) => s.unique_id));
|
|
2953
2956
|
console.error(
|
|
2954
2957
|
`[data-store] ${location}: 本批 ${tagSamples.length} 条 tag 来源任务跳过 LLM 打分直接合格`,
|
|
2955
2958
|
);
|
|
@@ -2964,47 +2967,51 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2964
2967
|
DEFAULT_TARGET_LOCATIONS,
|
|
2965
2968
|
);
|
|
2966
2969
|
batchQualified = scores.filter((s) => s.score >= llmMinScore);
|
|
2970
|
+
allNonTagQualified.push(...batchQualified.map((s) => s.uniqueId));
|
|
2967
2971
|
}
|
|
2968
2972
|
|
|
2969
2973
|
allScores.push(...scores);
|
|
2970
|
-
allQualified.push(...batchQualified.map((s) => s.uniqueId));
|
|
2971
2974
|
|
|
2972
2975
|
totalBatches++;
|
|
2976
|
+
const totalQualified = allTagQualified.length + allNonTagQualified.length;
|
|
2973
2977
|
console.error(
|
|
2974
|
-
`[data-store] ${location} 第 ${batch + 1} 批: 采样 ${samples.length}
|
|
2978
|
+
`[data-store] ${location} 第 ${batch + 1} 批: 采样 ${samples.length} 条,tag 合格 ${allTagQualified.length},非 tag 合格 ${allNonTagQualified.length},累计 ${totalQualified} 条`,
|
|
2975
2979
|
);
|
|
2976
2980
|
|
|
2977
2981
|
// 更新偏移量记忆
|
|
2978
2982
|
offset += samples.length;
|
|
2979
2983
|
llmSampleOffsets.set(location, offset);
|
|
2980
2984
|
|
|
2981
|
-
//
|
|
2982
|
-
|
|
2985
|
+
// 检查是否两个类型都达到阈值,都达到才停止
|
|
2986
|
+
const tagReached = allTagQualified.length >= llmMinTagReturn;
|
|
2987
|
+
const nonTagReached = allNonTagQualified.length >= llmMinNonTagReturn;
|
|
2988
|
+
if (tagReached && nonTagReached) {
|
|
2989
|
+
console.error(
|
|
2990
|
+
`[data-store] 两类任务均已达标 (tag: ${allTagQualified.length}/${llmMinTagReturn}, 非 tag: ${allNonTagQualified.length}/${llmMinNonTagReturn}),停止采样`,
|
|
2991
|
+
);
|
|
2992
|
+
break;
|
|
2993
|
+
}
|
|
2983
2994
|
}
|
|
2984
2995
|
|
|
2985
|
-
//
|
|
2986
|
-
|
|
2996
|
+
// 检查是否两个类型都达到阈值,都达到才停止所有国家采样
|
|
2997
|
+
const tagReachedGlobal = allTagQualified.length >= llmMinTagReturn;
|
|
2998
|
+
const nonTagReachedGlobal = allNonTagQualified.length >= llmMinNonTagReturn;
|
|
2999
|
+
if (tagReachedGlobal && nonTagReachedGlobal) break;
|
|
2987
3000
|
}
|
|
2988
3001
|
|
|
2989
|
-
//
|
|
2990
|
-
// tag 任务直接合格(不在 allScores 中),非 tag 任务走 LLM 打分
|
|
2991
|
-
const tagQualified = allQualified.filter(
|
|
2992
|
-
(uid) => !allScores.find((s) => s.uniqueId === uid),
|
|
2993
|
-
);
|
|
2994
|
-
const nonTagQualifiedScores = allScores
|
|
2995
|
-
.filter((s) => s.score >= llmMinScore)
|
|
2996
|
-
.sort((a, b) => b.score - a.score);
|
|
2997
|
-
const nonTagQualified = nonTagQualifiedScores.map((s) => s.uniqueId);
|
|
2998
|
-
|
|
3002
|
+
// 最终合格列表:tag 优先 + 非 tag 按分数排序
|
|
2999
3003
|
// 限制 tag 占比:最多占 safeLimit 的 70%,留 30% 给非 tag
|
|
3000
3004
|
const tagMaxCount = Math.floor(safeLimit * 0.7);
|
|
3001
|
-
const tagCount = Math.min(
|
|
3005
|
+
const tagCount = Math.min(allTagQualified.length, tagMaxCount);
|
|
3002
3006
|
const nonTagMaxCount = safeLimit - tagCount;
|
|
3003
|
-
const finalNonTagQualified = nonTagQualified.slice(0, nonTagMaxCount);
|
|
3004
3007
|
|
|
3005
|
-
|
|
3008
|
+
const nonTagQualifiedScores = allScores
|
|
3009
|
+
.filter((s) => s.score >= llmMinScore)
|
|
3010
|
+
.sort((a, b) => b.score - a.score);
|
|
3011
|
+
const finalNonTagQualified = nonTagQualifiedScores.slice(0, nonTagMaxCount).map((s) => s.uniqueId);
|
|
3012
|
+
|
|
3006
3013
|
const qualified = [
|
|
3007
|
-
...
|
|
3014
|
+
...allTagQualified.slice(0, tagCount),
|
|
3008
3015
|
...finalNonTagQualified,
|
|
3009
3016
|
];
|
|
3010
3017
|
|