tt-help-cli-ycl 1.3.90 → 1.3.92

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tt-help-cli-ycl",
3
- "version": "1.3.90",
3
+ "version": "1.3.92",
4
4
  "description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli/tag.js CHANGED
@@ -1,4 +1,5 @@
1
1
  import { writeFileSync } from "fs";
2
+ import { randomUUID } from "crypto";
2
3
  import { fetchTagData, enrichVideosWithLocation } from "../lib/tag-fetcher.js";
3
4
  import { TikTokScraper } from "../lib/tiktok-scraper.mjs";
4
5
  import {
@@ -11,7 +12,22 @@ import { server as cfgServer } from "../lib/constants.js";
11
12
  const ALL_COUNTRIES = DEFAULT_TARGET_LOCATIONS;
12
13
  const DEFAULT_SERVER = cfgServer || "http://127.0.0.1:3000";
13
14
 
14
- async function pushToServer(serverUrl, filteredAuthors, videos) {
15
+ // 构建带客户端追踪 header fetch 封装
16
+ function buildClientHeaders(clientId, meta, extra = {}) {
17
+ return {
18
+ "X-Client-Id": clientId,
19
+ "X-Client-Info": JSON.stringify(meta),
20
+ ...extra,
21
+ };
22
+ }
23
+
24
+ async function pushToServer(
25
+ serverUrl,
26
+ filteredAuthors,
27
+ videos,
28
+ clientId,
29
+ meta,
30
+ ) {
15
31
  const users = filteredAuthors.map((author) => {
16
32
  const video = videos.find((v) => v.authorUniqueId === author);
17
33
  return {
@@ -23,7 +39,9 @@ async function pushToServer(serverUrl, filteredAuthors, videos) {
23
39
 
24
40
  const res = await fetch(`${serverUrl}/api/raw-users`, {
25
41
  method: "POST",
26
- headers: { "Content-Type": "application/json" },
42
+ headers: buildClientHeaders(clientId, meta, {
43
+ "Content-Type": "application/json",
44
+ }),
27
45
  body: JSON.stringify({ users }),
28
46
  });
29
47
  const data = await res.json();
@@ -412,11 +430,13 @@ export async function handleScore(parsed) {
412
430
  console.log(JSON.stringify(result, null, 2));
413
431
  }
414
432
 
415
- async function reportToServer(baseUrl, result) {
433
+ async function reportToServer(baseUrl, result, clientId, meta) {
416
434
  try {
417
435
  const res = await fetch(`${baseUrl}/api/tags/score-result`, {
418
436
  method: "POST",
419
- headers: { "Content-Type": "application/json" },
437
+ headers: buildClientHeaders(clientId, meta, {
438
+ "Content-Type": "application/json",
439
+ }),
420
440
  body: JSON.stringify(result),
421
441
  });
422
442
  const data = await res.json();
@@ -464,73 +484,86 @@ export async function handleScoreAll(parsed) {
464
484
  let emptyRounds = 0; // 连续无任务的轮数
465
485
  const DISCOVER_AFTER_EMPTY = 3; // 连续 3 轮无任务时触发 discover
466
486
 
487
+ // 生成客户端 ID,用于服务端追踪
488
+ const clientId = randomUUID();
489
+ const clientMeta = { type: "scoring" };
490
+
467
491
  // 复用 TikTokScraper 实例,避免每次 enrich 都启动/关闭 headless 浏览器
468
492
  const enrichScraper = new TikTokScraper({ poolSize: 3 });
469
493
  await enrichScraper.init();
470
494
  log("✅ TikTokScraper 已就绪 (enrich 复用)");
495
+ log(` 客户端 ID: ${clientId.substring(0, 8)}...`);
471
496
  log("");
472
497
 
473
498
  try {
474
499
  while (true) {
475
- // 从服务端取下一个 new 标签
476
- const tagsRes = await fetch(`${baseUrl}/api/tags?status=new&limit=1`);
477
- const tagsData = await tagsRes.json();
478
- if (!tagsData.tags || tagsData.tags.length === 0) {
479
- emptyRounds++;
480
-
481
- // 自动发现:连续 N 轮无任务时自动生成标签
482
- if (autoDiscover && emptyRounds >= DISCOVER_AFTER_EMPTY) {
483
- log(
484
- `🔍 连续 ${emptyRounds} 轮无待打分标签,自动为 ${targetCountries.length} 个国家生成标签...`,
485
- );
486
- for (const country of targetCountries) {
487
- try {
488
- const discRes = await fetch(
489
- `${baseUrl}/api/tags/discover?country=${country}&count=5`,
490
- );
491
- const discData = await discRes.json();
492
- if (discData.inserted) {
493
- log(` ${country}: 新增 ${discData.inserted} 个`);
500
+ try {
501
+ // 从服务端取下一个 new 标签
502
+ const tagsRes = await fetch(`${baseUrl}/api/tags?status=new&limit=1`, {
503
+ headers: buildClientHeaders(clientId, clientMeta),
504
+ });
505
+ const tagsData = await tagsRes.json();
506
+ if (!tagsData.tags || tagsData.tags.length === 0) {
507
+ emptyRounds++;
508
+
509
+ // 自动发现:连续 N 轮无任务时自动生成标签
510
+ if (autoDiscover && emptyRounds >= DISCOVER_AFTER_EMPTY) {
511
+ log(
512
+ `🔍 连续 ${emptyRounds} 轮无待打分标签,自动为 ${targetCountries.length} 个国家生成标签...`,
513
+ );
514
+ for (const country of targetCountries) {
515
+ try {
516
+ const discRes = await fetch(
517
+ `${baseUrl}/api/tags/discover?country=${country}&count=5`,
518
+ { headers: buildClientHeaders(clientId, clientMeta) },
519
+ );
520
+ const discData = await discRes.json();
521
+ if (discData.inserted) {
522
+ log(` ${country}: 新增 ${discData.inserted} 个`);
523
+ }
524
+ } catch (e) {
525
+ log(` ${country}: 请求失败 (${e.message})`);
494
526
  }
495
- } catch (e) {
496
- log(` ${country}: 请求失败 (${e.message})`);
497
527
  }
528
+ emptyRounds = 0; // 重置计数器
529
+ // 等 3 秒让服务端处理完
530
+ await new Promise((r) => setTimeout(r, 3000));
531
+ continue;
498
532
  }
499
- emptyRounds = 0; // 重置计数器
500
- // 3 秒让服务端处理完
501
- await new Promise((r) => setTimeout(r, 3000));
533
+ log(`⏳ 暂无待打分标签(连续 ${emptyRounds} 轮),10 秒后重试...`);
534
+ await new Promise((r) => setTimeout(r, 10000));
502
535
  continue;
503
536
  }
504
- log(`⏳ 暂无待打分标签(连续 ${emptyRounds} 轮),10 秒后重试...`);
505
- await new Promise((r) => setTimeout(r, 10000));
506
- continue;
507
- }
508
537
 
509
- // 有任务了,重置计数器
510
- emptyRounds = 0;
511
-
512
- const tag = tagsData.tags[0].tag.replace(/^#+/, "").trim().toLowerCase();
513
- const startTime = Date.now();
514
-
515
- log(`[${totalScored + 1}] 正在打分 #${tag} ...`);
516
-
517
- const result = {
518
- tag,
519
- status: "error",
520
- score: 0,
521
- totalPosts: 0,
522
- authorCount: 0,
523
- matchedAuthors: 0,
524
- matchedCountries: [],
525
- pushedUsers: 0,
526
- error: null,
527
- };
528
-
529
- try {
530
- // 锁定 tag
538
+ // 有任务了,重置计数器
539
+ emptyRounds = 0;
540
+
541
+ const tag = tagsData.tags[0].tag
542
+ .replace(/^#+/, "")
543
+ .trim()
544
+ .toLowerCase();
545
+ const startTime = Date.now();
546
+
547
+ log(`[${totalScored + 1}] 正在打分 #${tag} ...`);
548
+
549
+ const result = {
550
+ tag,
551
+ status: "error",
552
+ score: 0,
553
+ totalPosts: 0,
554
+ authorCount: 0,
555
+ matchedAuthors: 0,
556
+ matchedCountries: [],
557
+ pushedUsers: 0,
558
+ error: null,
559
+ };
560
+
561
+ // 锁定 tag(meta 中不放入 tag,避免非 ASCII 字符导致 header ByteString 报错)
531
562
  const claimRes = await fetch(`${baseUrl}/api/tags/claim`, {
532
563
  method: "POST",
533
- headers: { "Content-Type": "application/json" },
564
+ headers: buildClientHeaders(clientId, clientMeta, {
565
+ "Content-Type": "application/json",
566
+ }),
534
567
  body: JSON.stringify({ tag }),
535
568
  });
536
569
  const claimData = await claimRes.json();
@@ -543,7 +576,7 @@ export async function handleScoreAll(parsed) {
543
576
  log(` ⚠️ 无法锁定 (${claimData.error}),标记为 dead 并跳过`);
544
577
  result.error = claimData.error;
545
578
  result.status = "dead";
546
- await reportToServer(baseUrl, result);
579
+ await reportToServer(baseUrl, result, clientId, clientMeta);
547
580
  totalScored++;
548
581
  continue;
549
582
  }
@@ -567,7 +600,7 @@ export async function handleScoreAll(parsed) {
567
600
  log(" ⚠️ 无视频,标记 dead");
568
601
  result.status = "dead";
569
602
  result.error = "no videos found";
570
- await reportToServer(baseUrl, result);
603
+ await reportToServer(baseUrl, result, clientId, clientMeta);
571
604
  totalScored++;
572
605
  continue;
573
606
  }
@@ -586,6 +619,9 @@ export async function handleScoreAll(parsed) {
586
619
  });
587
620
  videos = enriched.videos;
588
621
 
622
+ // 更新 meta 中当前正在处理的标签
623
+ clientMeta.tag = tag;
624
+
589
625
  // 过滤 + 算分 (共用函数)
590
626
  const { matchedAuthorSet } = applyFilterAndScore(
591
627
  videos,
@@ -599,12 +635,14 @@ export async function handleScoreAll(parsed) {
599
635
  baseUrl,
600
636
  [...matchedAuthorSet],
601
637
  videos,
638
+ clientId,
639
+ clientMeta,
602
640
  );
603
641
  result.pushedUsers = pushResult.added || 0;
604
642
  }
605
643
 
606
644
  // 上报结果
607
- await reportToServer(baseUrl, result);
645
+ await reportToServer(baseUrl, result, clientId, clientMeta);
608
646
 
609
647
  totalScored++;
610
648
  const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
@@ -624,10 +662,33 @@ export async function handleScoreAll(parsed) {
624
662
  );
625
663
  log("");
626
664
  } catch (e) {
665
+ // 区分网络错误和业务错误
666
+ const isNetworkError =
667
+ e.code === "ECONNREFUSED" ||
668
+ e.code === "ENOTFOUND" ||
669
+ e.code === "ECONNRESET" ||
670
+ (e.message &&
671
+ (e.message.includes("ECONNREFUSED") ||
672
+ e.message.includes("fetch failed") ||
673
+ e.message.includes("network")));
674
+ if (isNetworkError) {
675
+ log(` ⚠️ 服务端连接失败 (${e.message}),15 秒后重试...`);
676
+ await new Promise((r) => setTimeout(r, 15000));
677
+ continue;
678
+ }
627
679
  log(` ❌ 失败: ${e.message}`);
628
- result.error = e.message;
629
680
  try {
630
- await reportToServer(baseUrl, result);
681
+ await reportToServer(
682
+ baseUrl,
683
+ {
684
+ tag: "",
685
+ status: "error",
686
+ score: 0,
687
+ error: e.message,
688
+ },
689
+ clientId,
690
+ clientMeta,
691
+ );
631
692
  } catch {}
632
693
  totalScored++;
633
694
  }
@@ -2223,6 +2223,58 @@ export function createStore(filePath, options = {}) {
2223
2223
  if (filePath) {
2224
2224
  // 初始化 SQLite 用户表(用于判重)
2225
2225
  initUserDb(filePath);
2226
+ // 从数据库恢复偏移量
2227
+ loadLlmSampleOffsets();
2228
+ }
2229
+
2230
+ /**
2231
+ * 从数据库加载 LLM 采样偏移量
2232
+ */
2233
+ function loadLlmSampleOffsets() {
2234
+ try {
2235
+ const row = db
2236
+ .prepare(`SELECT offsets FROM _llm_sample_offsets LIMIT 1`)
2237
+ .get();
2238
+ if (row && row.offsets) {
2239
+ const parsed = JSON.parse(row.offsets);
2240
+ if (parsed && typeof parsed === "object") {
2241
+ Object.entries(parsed).forEach(([k, v]) => {
2242
+ llmSampleOffsets.set(k, v);
2243
+ });
2244
+ console.error(
2245
+ `[data-store] 已恢复 LLM 采样偏移量: ${Array.from(
2246
+ llmSampleOffsets.entries(),
2247
+ )
2248
+ .map(([k, v]) => `${k}:${v}`)
2249
+ .join(", ")}`,
2250
+ );
2251
+ }
2252
+ }
2253
+ } catch (e) {
2254
+ // 表不存在或解析失败,使用空偏移量
2255
+ console.error(
2256
+ `[data-store] 加载 LLM 采样偏移量失败,使用空偏移量: ${e.message}`,
2257
+ );
2258
+ }
2259
+ }
2260
+
2261
+ /**
2262
+ * 将 LLM 采样偏移量持久化到数据库
2263
+ */
2264
+ function saveLlmSampleOffsets() {
2265
+ try {
2266
+ const offsetsJson = JSON.stringify(Object.fromEntries(llmSampleOffsets));
2267
+ // 表不存在则创建
2268
+ db.prepare(
2269
+ `CREATE TABLE IF NOT EXISTS _llm_sample_offsets (id INTEGER PRIMARY KEY CHECK (id = 1), offsets TEXT)`,
2270
+ ).run();
2271
+ // 插入或更新
2272
+ db.prepare(
2273
+ `INSERT OR REPLACE INTO _llm_sample_offsets (id, offsets) VALUES (1, ?)`,
2274
+ ).run(offsetsJson);
2275
+ } catch (e) {
2276
+ console.error(`[data-store] 保存 LLM 采样偏移量失败: ${e.message}`);
2277
+ }
2226
2278
  }
2227
2279
 
2228
2280
  // stats 缓存
@@ -2383,15 +2435,97 @@ export function createStore(filePath, options = {}) {
2383
2435
  }
2384
2436
 
2385
2437
  function flushSave() {
2438
+ // 数据库模式:先保存 LLM 偏移量,再备份数据库
2439
+ if (db && dbPath) {
2440
+ try {
2441
+ saveLlmSampleOffsets();
2442
+ } catch (e) {
2443
+ console.error(`[data-store] 保存 LLM 偏移量失败: ${e.message}`);
2444
+ }
2445
+ }
2386
2446
  return Promise.resolve();
2387
2447
  }
2388
2448
 
2389
- function saveVideos() {
2390
- return;
2449
+ /**
2450
+ * 数据库备份:使用 SQLite BACKUP 命令,保留最新 maxBackups 个备份
2451
+ * @param {number} maxBackups - 保留的备份数量,默认 3
2452
+ * @returns {string|null} 备份文件路径,失败返回 null
2453
+ */
2454
+ function backupDatabase(maxBackups = 3) {
2455
+ if (!db || !dbPath) {
2456
+ console.error("[data-store] 数据库未初始化,跳过备份");
2457
+ return null;
2458
+ }
2459
+
2460
+ try {
2461
+ // 生成备份文件名:result-20260627T094400.db
2462
+ const now = new Date();
2463
+ const timestamp = now
2464
+ .toISOString()
2465
+ .replace(/[-:T.]/g, "")
2466
+ .slice(0, 15); // YYYYMMDDHHmmss
2467
+ const baseName = path.basename(dbPath, ".db");
2468
+ const backupName = `${baseName}-${timestamp}.db`;
2469
+ const backupDir = path.dirname(dbPath);
2470
+ const backupPath = path.join(backupDir, backupName);
2471
+
2472
+ console.error(`[data-store] 正在备份数据库: ${backupName}`);
2473
+
2474
+ // 使用 better-sqlite3 的 backup API(原子性备份,安全可靠)
2475
+ const backupDb = new Database(backupPath);
2476
+ db.backup("main", backupDb, "main");
2477
+ backupDb.close();
2478
+
2479
+ // 验证备份文件大小
2480
+ const stat = fs.statSync(backupPath);
2481
+ const sizeMB = (stat.size / 1024 / 1024).toFixed(2);
2482
+ console.error(`[data-store] 备份完成: ${backupName} (${sizeMB} MB)`);
2483
+
2484
+ // 清理旧备份:保留最新 maxBackups 个
2485
+ cleanupOldBackups(backupDir, baseName, maxBackups);
2486
+
2487
+ return backupPath;
2488
+ } catch (e) {
2489
+ console.error(`[data-store] 备份失败: ${e.message}`);
2490
+ return null;
2491
+ }
2492
+ }
2493
+
2494
+ /**
2495
+ * 清理旧备份文件,保留最新 maxBackups 个
2496
+ */
2497
+ function cleanupOldBackups(backupDir, baseName, maxBackups) {
2498
+ try {
2499
+ // 查找所有备份文件:baseName-YYYYMMDDHHmmss.db
2500
+ const pattern = new RegExp(`^${baseName}-\\d{15}\\.db$`);
2501
+ const backups = fs
2502
+ .readdirSync(backupDir)
2503
+ .filter((f) => pattern.test(f))
2504
+ .sort() // 按时间戳排序(ASCII 排序 = 时间排序)
2505
+ .reverse(); // 最新的在前
2506
+
2507
+ if (backups.length > maxBackups) {
2508
+ const toDelete = backups.slice(maxBackups);
2509
+ for (const file of toDelete) {
2510
+ const filePath = path.join(backupDir, file);
2511
+ fs.unlinkSync(filePath);
2512
+ console.error(`[data-store] 已清理旧备份: ${file}`);
2513
+ }
2514
+ }
2515
+
2516
+ console.error(
2517
+ `[data-store] 备份清理完成: 保留 ${Math.min(backups.length, maxBackups)} / ${backups.length} 个备份`,
2518
+ );
2519
+ } catch (e) {
2520
+ console.error(`[data-store] 清理旧备份失败: ${e.message}`);
2521
+ }
2391
2522
  }
2392
2523
 
2393
2524
  function stopBackup() {
2394
- return;
2525
+ // 退出时执行备份
2526
+ if (db && dbPath) {
2527
+ backupDatabase();
2528
+ }
2395
2529
  }
2396
2530
 
2397
2531
  function getUser(uid) {
@@ -2795,7 +2929,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
2795
2929
  .prepare(
2796
2930
  `
2797
2931
  SELECT * FROM raw_jobs WHERE ${whereSql} AND guessed_location = ?
2798
- ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
2932
+ ORDER BY
2933
+ CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
2934
+ COALESCE(video_count, 0) DESC, created_at DESC
2799
2935
  LIMIT ? OFFSET ?
2800
2936
  `,
2801
2937
  )
@@ -2803,11 +2939,32 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
2803
2939
 
2804
2940
  if (samples.length === 0) break;
2805
2941
 
2806
- const scores = await scoreJobsBatch(
2807
- samples,
2808
- DEFAULT_TARGET_LOCATIONS,
2942
+ // 分离 tag 来源和非 tag 来源:tag 来源跳过 LLM 打分直接合格
2943
+ const tagSamples = samples.filter((s) =>
2944
+ (s.sources || "").includes("tag"),
2809
2945
  );
2810
- const batchQualified = scores.filter((s) => s.score >= llmMinScore);
2946
+ const nonTagSamples = samples.filter(
2947
+ (s) => !(s.sources || "").includes("tag"),
2948
+ );
2949
+
2950
+ // tag 来源直接加入合格列表
2951
+ if (tagSamples.length > 0) {
2952
+ allQualified.push(...tagSamples.map((s) => s.unique_id));
2953
+ console.error(
2954
+ `[data-store] ${location}: 本批 ${tagSamples.length} 条 tag 来源任务跳过 LLM 打分直接合格`,
2955
+ );
2956
+ }
2957
+
2958
+ // 非 tag 来源走 LLM 打分
2959
+ let batchQualified = [];
2960
+ let scores = [];
2961
+ if (nonTagSamples.length > 0) {
2962
+ scores = await scoreJobsBatch(
2963
+ nonTagSamples,
2964
+ DEFAULT_TARGET_LOCATIONS,
2965
+ );
2966
+ batchQualified = scores.filter((s) => s.score >= llmMinScore);
2967
+ }
2811
2968
 
2812
2969
  allScores.push(...scores);
2813
2970
  allQualified.push(...batchQualified.map((s) => s.uniqueId));
@@ -2829,12 +2986,27 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
2829
2986
  if (allQualified.length >= llmMinReturn) break;
2830
2987
  }
2831
2988
 
2832
- // 按分数降序排序,取前 safeLimit
2833
- const qualifiedScores = allScores
2989
+ // 分离 tag 合格和非 tag 合格
2990
+ // tag 任务直接合格(不在 allScores 中),非 tag 任务走 LLM 打分
2991
+ const tagQualified = allQualified.filter(
2992
+ (uid) => !allScores.find((s) => s.uniqueId === uid),
2993
+ );
2994
+ const nonTagQualifiedScores = allScores
2834
2995
  .filter((s) => s.score >= llmMinScore)
2835
- .sort((a, b) => b.score - a.score)
2836
- .slice(0, safeLimit);
2837
- const qualified = qualifiedScores.map((s) => s.uniqueId);
2996
+ .sort((a, b) => b.score - a.score);
2997
+ const nonTagQualified = nonTagQualifiedScores.map((s) => s.uniqueId);
2998
+
2999
+ // 限制 tag 占比:最多占 safeLimit 的 70%,留 30% 给非 tag
3000
+ const tagMaxCount = Math.floor(safeLimit * 0.7);
3001
+ const tagCount = Math.min(tagQualified.length, tagMaxCount);
3002
+ const nonTagMaxCount = safeLimit - tagCount;
3003
+ const finalNonTagQualified = nonTagQualified.slice(0, nonTagMaxCount);
3004
+
3005
+ // 最终合格列表:tag 优先 + 非 tag 按分数排序
3006
+ const qualified = [
3007
+ ...tagQualified.slice(0, tagCount),
3008
+ ...finalNonTagQualified,
3009
+ ];
2838
3010
 
2839
3011
  if (!qualified.length) {
2840
3012
  console.error(
@@ -2881,6 +3053,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
2881
3053
  moveTxn();
2882
3054
  markStatsDirty();
2883
3055
 
3056
+ // 持久化偏移量到数据库
3057
+ saveLlmSampleOffsets();
3058
+
2884
3059
  // 打印最终偏移量状态
2885
3060
  const finalOffsetSummary = Array.from(llmSampleOffsets.entries())
2886
3061
  .map(([k, v]) => `${k}:${v}`)
@@ -2920,7 +3095,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
2920
3095
  status_code, latest_video_time, user_create_time
2921
3096
  FROM raw_jobs
2922
3097
  WHERE ${whereSql}
2923
- ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
3098
+ ORDER BY
3099
+ CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
3100
+ COALESCE(video_count, 0) DESC, created_at DESC
2924
3101
  LIMIT ?
2925
3102
  `,
2926
3103
  ).run(...args, safeLimit);
@@ -2932,7 +3109,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
2932
3109
  WHERE unique_id IN (
2933
3110
  SELECT unique_id FROM raw_jobs
2934
3111
  WHERE ${whereSql}
2935
- ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
3112
+ ORDER BY
3113
+ CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
3114
+ COALESCE(video_count, 0) DESC, created_at DESC
2936
3115
  LIMIT ?
2937
3116
  )
2938
3117
  `,
@@ -4274,7 +4453,12 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4274
4453
  sqlParams.push(...targetCountries);
4275
4454
  }
4276
4455
 
4277
- sql += ` ORDER BY created_at ASC, unique_id ASC LIMIT ?`;
4456
+ // 优先级:sources 包含 "tag" 的任务优先,其余按 created_at 排序
4457
+ sql += ` ORDER BY
4458
+ CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
4459
+ created_at ASC,
4460
+ unique_id ASC
4461
+ LIMIT ?`;
4278
4462
  sqlParams.push(l);
4279
4463
 
4280
4464
  const rows = db.prepare(sql).all(...sqlParams);
@@ -4322,6 +4506,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4322
4506
  }
4323
4507
  return false;
4324
4508
  })
4509
+ .sort((a, b) => {
4510
+ // 优先级:sources 包含 "tag" 的任务优先
4511
+ const aIsTag = (a.sources || "").includes("tag");
4512
+ const bIsTag = (b.sources || "").includes("tag");
4513
+ if (aIsTag !== bIsTag) return aIsTag ? -1 : 1;
4514
+ return (a.createdAt || 0) - (b.createdAt || 0);
4515
+ })
4325
4516
  .slice(0, l);
4326
4517
  // 接受任务时 userUpdateCount + 1
4327
4518
  pending.forEach((u) => {
@@ -4835,6 +5026,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4835
5026
  commitCommentTask,
4836
5027
  debugClaimNextJob,
4837
5028
  stopBackup,
5029
+ backupDatabase, // 手动备份数据库
4838
5030
  rawQuery,
4839
5031
  getLlmSampleOffsets, // 获取 LLM 采样偏移量状态
4840
5032
  // Tag 发现与打分
@@ -255,8 +255,14 @@ function renderActiveClients(clients) {
255
255
  const tbody = document.getElementById("activeClientsBody");
256
256
  if (!section || !bar) return;
257
257
 
258
- const types = ["explore", "refresh", "attach", "comments"];
259
- const labels = { explore: "Explore", refresh: "Refresh", attach: "Attach", comments: "Comments" };
258
+ const types = ["explore", "refresh", "attach", "comments", "scoring"];
259
+ const labels = {
260
+ explore: "Explore",
261
+ refresh: "Refresh",
262
+ attach: "Attach",
263
+ comments: "Comments",
264
+ scoring: "Scoring",
265
+ };
260
266
  const grouped = {};
261
267
  for (const c of clients) {
262
268
  if (!grouped[c.type]) grouped[c.type] = [];
@@ -314,9 +320,7 @@ function showClientDetail(type, clients) {
314
320
  tbody.innerHTML = clients
315
321
  .map((c) => {
316
322
  const cid = c.clientId ? c.clientId.substring(0, 8) : "-";
317
- const ipPort = c.ip
318
- ? c.ip + (c.port ? ":" + c.port : "")
319
- : "-";
323
+ const ipPort = c.ip ? c.ip + (c.port ? ":" + c.port : "") : "-";
320
324
  const userId = c.userId || "-";
321
325
  const last = formatRelativeTime(c.lastSeen);
322
326
  return `<tr>
@@ -93,6 +93,7 @@ function inferClientType(routePath) {
93
93
  if (routePath.startsWith("/api/redo-job")) return "refresh";
94
94
  if (routePath.startsWith("/api/user-update-tasks")) return "attach";
95
95
  if (routePath.startsWith("/api/comment-task")) return "comments";
96
+ if (routePath.startsWith("/api/tags")) return "scoring";
96
97
  if (
97
98
  routePath.startsWith("/api/job") ||
98
99
  routePath.startsWith("/api/explore-new")
@@ -1250,7 +1251,10 @@ export function startWatchServer(
1250
1251
  console.error("[server] HTTP 服务已关闭");
1251
1252
  });
1252
1253
  await store.flushSave();
1253
- console.error("[server] 数据已保存,退出");
1254
+ console.error("[server] 数据已保存");
1255
+ // 备份数据库
1256
+ store.stopBackup();
1257
+ console.error("[server] 退出");
1254
1258
  process.exit(0);
1255
1259
  }
1256
1260