tt-help-cli-ycl 1.3.91 → 1.3.93
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli/tag.js +53 -16
- package/src/watch/data-store.js +30 -23
- package/src/watch/public/app.js +9 -5
- package/src/watch/server.js +1 -0
package/package.json
CHANGED
package/src/cli/tag.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { writeFileSync } from "fs";
|
|
2
|
+
import { randomUUID } from "crypto";
|
|
2
3
|
import { fetchTagData, enrichVideosWithLocation } from "../lib/tag-fetcher.js";
|
|
3
4
|
import { TikTokScraper } from "../lib/tiktok-scraper.mjs";
|
|
4
5
|
import {
|
|
@@ -11,7 +12,22 @@ import { server as cfgServer } from "../lib/constants.js";
|
|
|
11
12
|
const ALL_COUNTRIES = DEFAULT_TARGET_LOCATIONS;
|
|
12
13
|
const DEFAULT_SERVER = cfgServer || "http://127.0.0.1:3000";
|
|
13
14
|
|
|
14
|
-
|
|
15
|
+
// 构建带客户端追踪 header 的 fetch 封装
|
|
16
|
+
function buildClientHeaders(clientId, meta, extra = {}) {
|
|
17
|
+
return {
|
|
18
|
+
"X-Client-Id": clientId,
|
|
19
|
+
"X-Client-Info": JSON.stringify(meta),
|
|
20
|
+
...extra,
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
async function pushToServer(
|
|
25
|
+
serverUrl,
|
|
26
|
+
filteredAuthors,
|
|
27
|
+
videos,
|
|
28
|
+
clientId,
|
|
29
|
+
meta,
|
|
30
|
+
) {
|
|
15
31
|
const users = filteredAuthors.map((author) => {
|
|
16
32
|
const video = videos.find((v) => v.authorUniqueId === author);
|
|
17
33
|
return {
|
|
@@ -23,7 +39,9 @@ async function pushToServer(serverUrl, filteredAuthors, videos) {
|
|
|
23
39
|
|
|
24
40
|
const res = await fetch(`${serverUrl}/api/raw-users`, {
|
|
25
41
|
method: "POST",
|
|
26
|
-
headers:
|
|
42
|
+
headers: buildClientHeaders(clientId, meta, {
|
|
43
|
+
"Content-Type": "application/json",
|
|
44
|
+
}),
|
|
27
45
|
body: JSON.stringify({ users }),
|
|
28
46
|
});
|
|
29
47
|
const data = await res.json();
|
|
@@ -412,11 +430,13 @@ export async function handleScore(parsed) {
|
|
|
412
430
|
console.log(JSON.stringify(result, null, 2));
|
|
413
431
|
}
|
|
414
432
|
|
|
415
|
-
async function reportToServer(baseUrl, result) {
|
|
433
|
+
async function reportToServer(baseUrl, result, clientId, meta) {
|
|
416
434
|
try {
|
|
417
435
|
const res = await fetch(`${baseUrl}/api/tags/score-result`, {
|
|
418
436
|
method: "POST",
|
|
419
|
-
headers:
|
|
437
|
+
headers: buildClientHeaders(clientId, meta, {
|
|
438
|
+
"Content-Type": "application/json",
|
|
439
|
+
}),
|
|
420
440
|
body: JSON.stringify(result),
|
|
421
441
|
});
|
|
422
442
|
const data = await res.json();
|
|
@@ -464,17 +484,24 @@ export async function handleScoreAll(parsed) {
|
|
|
464
484
|
let emptyRounds = 0; // 连续无任务的轮数
|
|
465
485
|
const DISCOVER_AFTER_EMPTY = 3; // 连续 3 轮无任务时触发 discover
|
|
466
486
|
|
|
487
|
+
// 生成客户端 ID,用于服务端追踪
|
|
488
|
+
const clientId = randomUUID();
|
|
489
|
+
const clientMeta = { type: "scoring" };
|
|
490
|
+
|
|
467
491
|
// 复用 TikTokScraper 实例,避免每次 enrich 都启动/关闭 headless 浏览器
|
|
468
492
|
const enrichScraper = new TikTokScraper({ poolSize: 3 });
|
|
469
493
|
await enrichScraper.init();
|
|
470
494
|
log("✅ TikTokScraper 已就绪 (enrich 复用)");
|
|
495
|
+
log(` 客户端 ID: ${clientId.substring(0, 8)}...`);
|
|
471
496
|
log("");
|
|
472
497
|
|
|
473
498
|
try {
|
|
474
499
|
while (true) {
|
|
475
500
|
try {
|
|
476
501
|
// 从服务端取下一个 new 标签
|
|
477
|
-
const tagsRes = await fetch(`${baseUrl}/api/tags?status=new&limit=1
|
|
502
|
+
const tagsRes = await fetch(`${baseUrl}/api/tags?status=new&limit=1`, {
|
|
503
|
+
headers: buildClientHeaders(clientId, clientMeta),
|
|
504
|
+
});
|
|
478
505
|
const tagsData = await tagsRes.json();
|
|
479
506
|
if (!tagsData.tags || tagsData.tags.length === 0) {
|
|
480
507
|
emptyRounds++;
|
|
@@ -488,6 +515,7 @@ export async function handleScoreAll(parsed) {
|
|
|
488
515
|
try {
|
|
489
516
|
const discRes = await fetch(
|
|
490
517
|
`${baseUrl}/api/tags/discover?country=${country}&count=5`,
|
|
518
|
+
{ headers: buildClientHeaders(clientId, clientMeta) },
|
|
491
519
|
);
|
|
492
520
|
const discData = await discRes.json();
|
|
493
521
|
if (discData.inserted) {
|
|
@@ -530,10 +558,12 @@ export async function handleScoreAll(parsed) {
|
|
|
530
558
|
error: null,
|
|
531
559
|
};
|
|
532
560
|
|
|
533
|
-
// 锁定 tag
|
|
561
|
+
// 锁定 tag(meta 中不放入 tag,避免非 ASCII 字符导致 header ByteString 报错)
|
|
534
562
|
const claimRes = await fetch(`${baseUrl}/api/tags/claim`, {
|
|
535
563
|
method: "POST",
|
|
536
|
-
headers:
|
|
564
|
+
headers: buildClientHeaders(clientId, clientMeta, {
|
|
565
|
+
"Content-Type": "application/json",
|
|
566
|
+
}),
|
|
537
567
|
body: JSON.stringify({ tag }),
|
|
538
568
|
});
|
|
539
569
|
const claimData = await claimRes.json();
|
|
@@ -546,7 +576,7 @@ export async function handleScoreAll(parsed) {
|
|
|
546
576
|
log(` ⚠️ 无法锁定 (${claimData.error}),标记为 dead 并跳过`);
|
|
547
577
|
result.error = claimData.error;
|
|
548
578
|
result.status = "dead";
|
|
549
|
-
await reportToServer(baseUrl, result);
|
|
579
|
+
await reportToServer(baseUrl, result, clientId, clientMeta);
|
|
550
580
|
totalScored++;
|
|
551
581
|
continue;
|
|
552
582
|
}
|
|
@@ -570,7 +600,7 @@ export async function handleScoreAll(parsed) {
|
|
|
570
600
|
log(" ⚠️ 无视频,标记 dead");
|
|
571
601
|
result.status = "dead";
|
|
572
602
|
result.error = "no videos found";
|
|
573
|
-
await reportToServer(baseUrl, result);
|
|
603
|
+
await reportToServer(baseUrl, result, clientId, clientMeta);
|
|
574
604
|
totalScored++;
|
|
575
605
|
continue;
|
|
576
606
|
}
|
|
@@ -602,12 +632,14 @@ export async function handleScoreAll(parsed) {
|
|
|
602
632
|
baseUrl,
|
|
603
633
|
[...matchedAuthorSet],
|
|
604
634
|
videos,
|
|
635
|
+
clientId,
|
|
636
|
+
clientMeta,
|
|
605
637
|
);
|
|
606
638
|
result.pushedUsers = pushResult.added || 0;
|
|
607
639
|
}
|
|
608
640
|
|
|
609
641
|
// 上报结果
|
|
610
|
-
await reportToServer(baseUrl, result);
|
|
642
|
+
await reportToServer(baseUrl, result, clientId, clientMeta);
|
|
611
643
|
|
|
612
644
|
totalScored++;
|
|
613
645
|
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
@@ -643,12 +675,17 @@ export async function handleScoreAll(parsed) {
|
|
|
643
675
|
}
|
|
644
676
|
log(` ❌ 失败: ${e.message}`);
|
|
645
677
|
try {
|
|
646
|
-
await reportToServer(
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
678
|
+
await reportToServer(
|
|
679
|
+
baseUrl,
|
|
680
|
+
{
|
|
681
|
+
tag: "",
|
|
682
|
+
status: "error",
|
|
683
|
+
score: 0,
|
|
684
|
+
error: e.message,
|
|
685
|
+
},
|
|
686
|
+
clientId,
|
|
687
|
+
clientMeta,
|
|
688
|
+
);
|
|
652
689
|
} catch {}
|
|
653
690
|
totalScored++;
|
|
654
691
|
}
|
package/src/watch/data-store.js
CHANGED
|
@@ -2871,6 +2871,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2871
2871
|
// 如果启用 LLM 打分,先采样一批进行评分(累积模式:按猜测国家分组,使用偏移量记忆避免重复采样)
|
|
2872
2872
|
if (useLlm && normalizedLocations && normalizedLocations.length > 0) {
|
|
2873
2873
|
const llmMinReturn = options.llmMinReturn ?? 60; // 最少返回合格数
|
|
2874
|
+
const llmMinTagReturn = options.llmMinTagReturn ?? 30; // tag 最少合格数
|
|
2875
|
+
const llmMinNonTagReturn = options.llmMinNonTagReturn ?? 30; // 非 tag 最少合格数
|
|
2874
2876
|
const maxBatches = options.llmMaxBatches ?? 10; // 最多采样轮次,防止无限循环
|
|
2875
2877
|
|
|
2876
2878
|
// 打印当前偏移量状态
|
|
@@ -2878,7 +2880,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2878
2880
|
.map(([k, v]) => `${k}:${v}`)
|
|
2879
2881
|
.join(", ");
|
|
2880
2882
|
console.error(
|
|
2881
|
-
`[data-store] LLM 打分开始: 符合条件 ${count} 条,每批 ${llmSampleSize} 条,最低分 ${llmMinScore}
|
|
2883
|
+
`[data-store] LLM 打分开始: 符合条件 ${count} 条,每批 ${llmSampleSize} 条,最低分 ${llmMinScore},tag 最少 ${llmMinTagReturn},非 tag 最少 ${llmMinNonTagReturn}`,
|
|
2882
2884
|
);
|
|
2883
2885
|
if (offsetSummary) {
|
|
2884
2886
|
console.error(`[data-store] 偏移量记忆: ${offsetSummary}`);
|
|
@@ -2886,7 +2888,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2886
2888
|
|
|
2887
2889
|
// 返回 Promise,调用方需要 await
|
|
2888
2890
|
return (async () => {
|
|
2889
|
-
const
|
|
2891
|
+
const allTagQualified = []; // tag 合格列表(直接合格)
|
|
2892
|
+
const allNonTagQualified = []; // 非 tag 合格列表(LLM 打分合格)
|
|
2890
2893
|
const allScores = [];
|
|
2891
2894
|
|
|
2892
2895
|
// 按猜测国家分组处理,每个国家使用独立的偏移量
|
|
@@ -2949,7 +2952,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2949
2952
|
|
|
2950
2953
|
// tag 来源直接加入合格列表
|
|
2951
2954
|
if (tagSamples.length > 0) {
|
|
2952
|
-
|
|
2955
|
+
allTagQualified.push(...tagSamples.map((s) => s.unique_id));
|
|
2953
2956
|
console.error(
|
|
2954
2957
|
`[data-store] ${location}: 本批 ${tagSamples.length} 条 tag 来源任务跳过 LLM 打分直接合格`,
|
|
2955
2958
|
);
|
|
@@ -2964,47 +2967,51 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2964
2967
|
DEFAULT_TARGET_LOCATIONS,
|
|
2965
2968
|
);
|
|
2966
2969
|
batchQualified = scores.filter((s) => s.score >= llmMinScore);
|
|
2970
|
+
allNonTagQualified.push(...batchQualified.map((s) => s.uniqueId));
|
|
2967
2971
|
}
|
|
2968
2972
|
|
|
2969
2973
|
allScores.push(...scores);
|
|
2970
|
-
allQualified.push(...batchQualified.map((s) => s.uniqueId));
|
|
2971
2974
|
|
|
2972
2975
|
totalBatches++;
|
|
2976
|
+
const totalQualified = allTagQualified.length + allNonTagQualified.length;
|
|
2973
2977
|
console.error(
|
|
2974
|
-
`[data-store] ${location} 第 ${batch + 1} 批: 采样 ${samples.length}
|
|
2978
|
+
`[data-store] ${location} 第 ${batch + 1} 批: 采样 ${samples.length} 条,tag 合格 ${allTagQualified.length},非 tag 合格 ${allNonTagQualified.length},累计 ${totalQualified} 条`,
|
|
2975
2979
|
);
|
|
2976
2980
|
|
|
2977
2981
|
// 更新偏移量记忆
|
|
2978
2982
|
offset += samples.length;
|
|
2979
2983
|
llmSampleOffsets.set(location, offset);
|
|
2980
2984
|
|
|
2981
|
-
//
|
|
2982
|
-
|
|
2985
|
+
// 检查是否两个类型都达到阈值,都达到才停止
|
|
2986
|
+
const tagReached = allTagQualified.length >= llmMinTagReturn;
|
|
2987
|
+
const nonTagReached = allNonTagQualified.length >= llmMinNonTagReturn;
|
|
2988
|
+
if (tagReached && nonTagReached) {
|
|
2989
|
+
console.error(
|
|
2990
|
+
`[data-store] 两类任务均已达标 (tag: ${allTagQualified.length}/${llmMinTagReturn}, 非 tag: ${allNonTagQualified.length}/${llmMinNonTagReturn}),停止采样`,
|
|
2991
|
+
);
|
|
2992
|
+
break;
|
|
2993
|
+
}
|
|
2983
2994
|
}
|
|
2984
2995
|
|
|
2985
|
-
//
|
|
2986
|
-
|
|
2996
|
+
// 检查是否两个类型都达到阈值,都达到才停止所有国家采样
|
|
2997
|
+
const tagReachedGlobal = allTagQualified.length >= llmMinTagReturn;
|
|
2998
|
+
const nonTagReachedGlobal = allNonTagQualified.length >= llmMinNonTagReturn;
|
|
2999
|
+
if (tagReachedGlobal && nonTagReachedGlobal) break;
|
|
2987
3000
|
}
|
|
2988
3001
|
|
|
2989
|
-
//
|
|
2990
|
-
// tag 任务直接合格(不在 allScores 中),非 tag 任务走 LLM 打分
|
|
2991
|
-
const tagQualified = allQualified.filter(
|
|
2992
|
-
(uid) => !allScores.find((s) => s.uniqueId === uid),
|
|
2993
|
-
);
|
|
2994
|
-
const nonTagQualifiedScores = allScores
|
|
2995
|
-
.filter((s) => s.score >= llmMinScore)
|
|
2996
|
-
.sort((a, b) => b.score - a.score);
|
|
2997
|
-
const nonTagQualified = nonTagQualifiedScores.map((s) => s.uniqueId);
|
|
2998
|
-
|
|
3002
|
+
// 最终合格列表:tag 优先 + 非 tag 按分数排序
|
|
2999
3003
|
// 限制 tag 占比:最多占 safeLimit 的 70%,留 30% 给非 tag
|
|
3000
3004
|
const tagMaxCount = Math.floor(safeLimit * 0.7);
|
|
3001
|
-
const tagCount = Math.min(
|
|
3005
|
+
const tagCount = Math.min(allTagQualified.length, tagMaxCount);
|
|
3002
3006
|
const nonTagMaxCount = safeLimit - tagCount;
|
|
3003
|
-
const finalNonTagQualified = nonTagQualified.slice(0, nonTagMaxCount);
|
|
3004
3007
|
|
|
3005
|
-
|
|
3008
|
+
const nonTagQualifiedScores = allScores
|
|
3009
|
+
.filter((s) => s.score >= llmMinScore)
|
|
3010
|
+
.sort((a, b) => b.score - a.score);
|
|
3011
|
+
const finalNonTagQualified = nonTagQualifiedScores.slice(0, nonTagMaxCount).map((s) => s.uniqueId);
|
|
3012
|
+
|
|
3006
3013
|
const qualified = [
|
|
3007
|
-
...
|
|
3014
|
+
...allTagQualified.slice(0, tagCount),
|
|
3008
3015
|
...finalNonTagQualified,
|
|
3009
3016
|
];
|
|
3010
3017
|
|
package/src/watch/public/app.js
CHANGED
|
@@ -255,8 +255,14 @@ function renderActiveClients(clients) {
|
|
|
255
255
|
const tbody = document.getElementById("activeClientsBody");
|
|
256
256
|
if (!section || !bar) return;
|
|
257
257
|
|
|
258
|
-
const types = ["explore", "refresh", "attach", "comments"];
|
|
259
|
-
const labels = {
|
|
258
|
+
const types = ["explore", "refresh", "attach", "comments", "scoring"];
|
|
259
|
+
const labels = {
|
|
260
|
+
explore: "Explore",
|
|
261
|
+
refresh: "Refresh",
|
|
262
|
+
attach: "Attach",
|
|
263
|
+
comments: "Comments",
|
|
264
|
+
scoring: "Scoring",
|
|
265
|
+
};
|
|
260
266
|
const grouped = {};
|
|
261
267
|
for (const c of clients) {
|
|
262
268
|
if (!grouped[c.type]) grouped[c.type] = [];
|
|
@@ -314,9 +320,7 @@ function showClientDetail(type, clients) {
|
|
|
314
320
|
tbody.innerHTML = clients
|
|
315
321
|
.map((c) => {
|
|
316
322
|
const cid = c.clientId ? c.clientId.substring(0, 8) : "-";
|
|
317
|
-
const ipPort = c.ip
|
|
318
|
-
? c.ip + (c.port ? ":" + c.port : "")
|
|
319
|
-
: "-";
|
|
323
|
+
const ipPort = c.ip ? c.ip + (c.port ? ":" + c.port : "") : "-";
|
|
320
324
|
const userId = c.userId || "-";
|
|
321
325
|
const last = formatRelativeTime(c.lastSeen);
|
|
322
326
|
return `<tr>
|
package/src/watch/server.js
CHANGED
|
@@ -93,6 +93,7 @@ function inferClientType(routePath) {
|
|
|
93
93
|
if (routePath.startsWith("/api/redo-job")) return "refresh";
|
|
94
94
|
if (routePath.startsWith("/api/user-update-tasks")) return "attach";
|
|
95
95
|
if (routePath.startsWith("/api/comment-task")) return "comments";
|
|
96
|
+
if (routePath.startsWith("/api/tags")) return "scoring";
|
|
96
97
|
if (
|
|
97
98
|
routePath.startsWith("/api/job") ||
|
|
98
99
|
routePath.startsWith("/api/explore-new")
|