npm - tt-help-cli-ycl - Versions diffs - 1.3.90 → 1.3.91 - Mend

tt-help-cli-ycl 1.3.90 → 1.3.91

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json +1 -1
package/src/cli/tag.js +71 -50
package/src/watch/data-store.js +208 -16
package/src/watch/server.js +4 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "tt-help-cli-ycl",
-  "version": "1.3.90",
+  "version": "1.3.91",
   "description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
   "type": "module",
   "bin": {

package/src/cli/tag.js CHANGED Viewed

@@ -472,61 +472,64 @@ export async function handleScoreAll(parsed) {
   try {
     while (true) {
-      // 从服务端取下一个 new 标签
-      const tagsRes = await fetch(`${baseUrl}/api/tags?status=new&limit=1`);
-      const tagsData = await tagsRes.json();
-      if (!tagsData.tags || tagsData.tags.length === 0) {
-        emptyRounds++;
-        // 自动发现：连续 N 轮无任务时自动生成标签
-        if (autoDiscover && emptyRounds >= DISCOVER_AFTER_EMPTY) {
-          log(
-            `🔍 连续 ${emptyRounds} 轮无待打分标签，自动为 ${targetCountries.length} 个国家生成标签...`,
-          );
-          for (const country of targetCountries) {
-            try {
-              const discRes = await fetch(
-                `${baseUrl}/api/tags/discover?country=${country}&count=5`,
-              );
-              const discData = await discRes.json();
-              if (discData.inserted) {
-                log(`  ${country}: 新增 ${discData.inserted} 个`);
+      try {
+        // 从服务端取下一个 new 标签
+        const tagsRes = await fetch(`${baseUrl}/api/tags?status=new&limit=1`);
+        const tagsData = await tagsRes.json();
+        if (!tagsData.tags || tagsData.tags.length === 0) {
+          emptyRounds++;
+          // 自动发现：连续 N 轮无任务时自动生成标签
+          if (autoDiscover && emptyRounds >= DISCOVER_AFTER_EMPTY) {
+            log(
+              `🔍 连续 ${emptyRounds} 轮无待打分标签，自动为 ${targetCountries.length} 个国家生成标签...`,
+            );
+            for (const country of targetCountries) {
+              try {
+                const discRes = await fetch(
+                  `${baseUrl}/api/tags/discover?country=${country}&count=5`,
+                );
+                const discData = await discRes.json();
+                if (discData.inserted) {
+                  log(`  ${country}: 新增 ${discData.inserted} 个`);
+                }
+              } catch (e) {
+                log(`  ${country}: 请求失败 (${e.message})`);
               }
-            } catch (e) {
-              log(`  ${country}: 请求失败 (${e.message})`);
             }
+            emptyRounds = 0; // 重置计数器
+            // 等 3 秒让服务端处理完
+            await new Promise((r) => setTimeout(r, 3000));
+            continue;
           }
-          emptyRounds = 0; // 重置计数器
-          // 等 3 秒让服务端处理完
-          await new Promise((r) => setTimeout(r, 3000));
+          log(`⏳ 暂无待打分标签（连续 ${emptyRounds} 轮），10 秒后重试...`);
+          await new Promise((r) => setTimeout(r, 10000));
           continue;
         }
-        log(`⏳ 暂无待打分标签（连续 ${emptyRounds} 轮），10 秒后重试...`);
-        await new Promise((r) => setTimeout(r, 10000));
-        continue;
-      }
-      // 有任务了，重置计数器
-      emptyRounds = 0;
-      const tag = tagsData.tags[0].tag.replace(/^#+/, "").trim().toLowerCase();
-      const startTime = Date.now();
-      log(`[${totalScored + 1}] 正在打分 #${tag} ...`);
+        // 有任务了，重置计数器
+        emptyRounds = 0;
+        const tag = tagsData.tags[0].tag
+          .replace(/^#+/, "")
+          .trim()
+          .toLowerCase();
+        const startTime = Date.now();
+        log(`[${totalScored + 1}] 正在打分 #${tag} ...`);
+        const result = {
+          tag,
+          status: "error",
+          score: 0,
+          totalPosts: 0,
+          authorCount: 0,
+          matchedAuthors: 0,
+          matchedCountries: [],
+          pushedUsers: 0,
+          error: null,
+        };
-      const result = {
-        tag,
-        status: "error",
-        score: 0,
-        totalPosts: 0,
-        authorCount: 0,
-        matchedAuthors: 0,
-        matchedCountries: [],
-        pushedUsers: 0,
-        error: null,
-      };
-      try {
         // 锁定 tag
         const claimRes = await fetch(`${baseUrl}/api/tags/claim`, {
           method: "POST",
@@ -624,10 +627,28 @@ export async function handleScoreAll(parsed) {
         );
         log("");
       } catch (e) {
+        // 区分网络错误和业务错误
+        const isNetworkError =
+          e.code === "ECONNREFUSED" ||
+          e.code === "ENOTFOUND" ||
+          e.code === "ECONNRESET" ||
+          (e.message &&
+            (e.message.includes("ECONNREFUSED") ||
+              e.message.includes("fetch failed") ||
+              e.message.includes("network")));
+        if (isNetworkError) {
+          log(`  ⚠️ 服务端连接失败 (${e.message})，15 秒后重试...`);
+          await new Promise((r) => setTimeout(r, 15000));
+          continue;
+        }
         log(`  ❌ 失败: ${e.message}`);
-        result.error = e.message;
         try {
-          await reportToServer(baseUrl, result);
+          await reportToServer(baseUrl, {
+            tag: "",
+            status: "error",
+            score: 0,
+            error: e.message,
+          });
         } catch {}
         totalScored++;
       }

package/src/watch/data-store.js CHANGED Viewed

@@ -2223,6 +2223,58 @@ export function createStore(filePath, options = {}) {
   if (filePath) {
     // 初始化 SQLite 用户表（用于判重）
     initUserDb(filePath);
+    // 从数据库恢复偏移量
+    loadLlmSampleOffsets();
+  }
+  /**
+   * 从数据库加载 LLM 采样偏移量
+   */
+  function loadLlmSampleOffsets() {
+    try {
+      const row = db
+        .prepare(`SELECT offsets FROM _llm_sample_offsets LIMIT 1`)
+        .get();
+      if (row && row.offsets) {
+        const parsed = JSON.parse(row.offsets);
+        if (parsed && typeof parsed === "object") {
+          Object.entries(parsed).forEach(([k, v]) => {
+            llmSampleOffsets.set(k, v);
+          });
+          console.error(
+            `[data-store] 已恢复 LLM 采样偏移量: ${Array.from(
+              llmSampleOffsets.entries(),
+            )
+              .map(([k, v]) => `${k}:${v}`)
+              .join(", ")}`,
+          );
+        }
+      }
+    } catch (e) {
+      // 表不存在或解析失败，使用空偏移量
+      console.error(
+        `[data-store] 加载 LLM 采样偏移量失败，使用空偏移量: ${e.message}`,
+      );
+    }
+  }
+  /**
+   * 将 LLM 采样偏移量持久化到数据库
+   */
+  function saveLlmSampleOffsets() {
+    try {
+      const offsetsJson = JSON.stringify(Object.fromEntries(llmSampleOffsets));
+      // 表不存在则创建
+      db.prepare(
+        `CREATE TABLE IF NOT EXISTS _llm_sample_offsets (id INTEGER PRIMARY KEY CHECK (id = 1), offsets TEXT)`,
+      ).run();
+      // 插入或更新
+      db.prepare(
+        `INSERT OR REPLACE INTO _llm_sample_offsets (id, offsets) VALUES (1, ?)`,
+      ).run(offsetsJson);
+    } catch (e) {
+      console.error(`[data-store] 保存 LLM 采样偏移量失败: ${e.message}`);
+    }
   }
   // stats 缓存
@@ -2383,15 +2435,97 @@ export function createStore(filePath, options = {}) {
   }
   function flushSave() {
+    // 数据库模式：先保存 LLM 偏移量，再备份数据库
+    if (db && dbPath) {
+      try {
+        saveLlmSampleOffsets();
+      } catch (e) {
+        console.error(`[data-store] 保存 LLM 偏移量失败: ${e.message}`);
+      }
+    }
     return Promise.resolve();
   }
-  function saveVideos() {
-    return;
+  /**
+   * 数据库备份：使用 SQLite BACKUP 命令，保留最新 maxBackups 个备份
+   * @param {number} maxBackups - 保留的备份数量，默认 3
+   * @returns {string|null} 备份文件路径，失败返回 null
+   */
+  function backupDatabase(maxBackups = 3) {
+    if (!db || !dbPath) {
+      console.error("[data-store] 数据库未初始化，跳过备份");
+      return null;
+    }
+    try {
+      // 生成备份文件名：result-20260627T094400.db
+      const now = new Date();
+      const timestamp = now
+        .toISOString()
+        .replace(/[-:T.]/g, "")
+        .slice(0, 15); // YYYYMMDDHHmmss
+      const baseName = path.basename(dbPath, ".db");
+      const backupName = `${baseName}-${timestamp}.db`;
+      const backupDir = path.dirname(dbPath);
+      const backupPath = path.join(backupDir, backupName);
+      console.error(`[data-store] 正在备份数据库: ${backupName}`);
+      // 使用 better-sqlite3 的 backup API（原子性备份，安全可靠）
+      const backupDb = new Database(backupPath);
+      db.backup("main", backupDb, "main");
+      backupDb.close();
+      // 验证备份文件大小
+      const stat = fs.statSync(backupPath);
+      const sizeMB = (stat.size / 1024 / 1024).toFixed(2);
+      console.error(`[data-store] 备份完成: ${backupName} (${sizeMB} MB)`);
+      // 清理旧备份：保留最新 maxBackups 个
+      cleanupOldBackups(backupDir, baseName, maxBackups);
+      return backupPath;
+    } catch (e) {
+      console.error(`[data-store] 备份失败: ${e.message}`);
+      return null;
+    }
+  }
+  /**
+   * 清理旧备份文件，保留最新 maxBackups 个
+   */
+  function cleanupOldBackups(backupDir, baseName, maxBackups) {
+    try {
+      // 查找所有备份文件：baseName-YYYYMMDDHHmmss.db
+      const pattern = new RegExp(`^${baseName}-\\d{15}\\.db$`);
+      const backups = fs
+        .readdirSync(backupDir)
+        .filter((f) => pattern.test(f))
+        .sort() // 按时间戳排序（ASCII 排序 = 时间排序）
+        .reverse(); // 最新的在前
+      if (backups.length > maxBackups) {
+        const toDelete = backups.slice(maxBackups);
+        for (const file of toDelete) {
+          const filePath = path.join(backupDir, file);
+          fs.unlinkSync(filePath);
+          console.error(`[data-store] 已清理旧备份: ${file}`);
+        }
+      }
+      console.error(
+        `[data-store] 备份清理完成: 保留 ${Math.min(backups.length, maxBackups)} / ${backups.length} 个备份`,
+      );
+    } catch (e) {
+      console.error(`[data-store] 清理旧备份失败: ${e.message}`);
+    }
   }
   function stopBackup() {
-    return;
+    // 退出时执行备份
+    if (db && dbPath) {
+      backupDatabase();
+    }
   }
   function getUser(uid) {
@@ -2795,7 +2929,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
               .prepare(
                 `
                 SELECT * FROM raw_jobs WHERE ${whereSql} AND guessed_location = ?
-                ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
+                ORDER BY
+                  CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
+                  COALESCE(video_count, 0) DESC, created_at DESC
                 LIMIT ? OFFSET ?
               `,
               )
@@ -2803,11 +2939,32 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
             if (samples.length === 0) break;
-            const scores = await scoreJobsBatch(
-              samples,
-              DEFAULT_TARGET_LOCATIONS,
+            // 分离 tag 来源和非 tag 来源：tag 来源跳过 LLM 打分直接合格
+            const tagSamples = samples.filter((s) =>
+              (s.sources || "").includes("tag"),
             );
-            const batchQualified = scores.filter((s) => s.score >= llmMinScore);
+            const nonTagSamples = samples.filter(
+              (s) => !(s.sources || "").includes("tag"),
+            );
+            // tag 来源直接加入合格列表
+            if (tagSamples.length > 0) {
+              allQualified.push(...tagSamples.map((s) => s.unique_id));
+              console.error(
+                `[data-store] ${location}: 本批 ${tagSamples.length} 条 tag 来源任务跳过 LLM 打分直接合格`,
+              );
+            }
+            // 非 tag 来源走 LLM 打分
+            let batchQualified = [];
+            let scores = [];
+            if (nonTagSamples.length > 0) {
+              scores = await scoreJobsBatch(
+                nonTagSamples,
+                DEFAULT_TARGET_LOCATIONS,
+              );
+              batchQualified = scores.filter((s) => s.score >= llmMinScore);
+            }
             allScores.push(...scores);
             allQualified.push(...batchQualified.map((s) => s.uniqueId));
@@ -2829,12 +2986,27 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
           if (allQualified.length >= llmMinReturn) break;
         }
-        // 按分数降序排序，取前 safeLimit 条
-        const qualifiedScores = allScores
+        // 分离 tag 合格和非 tag 合格
+        // tag 任务直接合格（不在 allScores 中），非 tag 任务走 LLM 打分
+        const tagQualified = allQualified.filter(
+          (uid) => !allScores.find((s) => s.uniqueId === uid),
+        );
+        const nonTagQualifiedScores = allScores
           .filter((s) => s.score >= llmMinScore)
-          .sort((a, b) => b.score - a.score)
-          .slice(0, safeLimit);
-        const qualified = qualifiedScores.map((s) => s.uniqueId);
+          .sort((a, b) => b.score - a.score);
+        const nonTagQualified = nonTagQualifiedScores.map((s) => s.uniqueId);
+        // 限制 tag 占比：最多占 safeLimit 的 70%，留 30% 给非 tag
+        const tagMaxCount = Math.floor(safeLimit * 0.7);
+        const tagCount = Math.min(tagQualified.length, tagMaxCount);
+        const nonTagMaxCount = safeLimit - tagCount;
+        const finalNonTagQualified = nonTagQualified.slice(0, nonTagMaxCount);
+        // 最终合格列表：tag 优先 + 非 tag 按分数排序
+        const qualified = [
+          ...tagQualified.slice(0, tagCount),
+          ...finalNonTagQualified,
+        ];
         if (!qualified.length) {
           console.error(
@@ -2881,6 +3053,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
         moveTxn();
         markStatsDirty();
+        // 持久化偏移量到数据库
+        saveLlmSampleOffsets();
         // 打印最终偏移量状态
         const finalOffsetSummary = Array.from(llmSampleOffsets.entries())
           .map(([k, v]) => `${k}:${v}`)
@@ -2920,7 +3095,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
           status_code, latest_video_time, user_create_time
         FROM raw_jobs
         WHERE ${whereSql}
-        ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
+        ORDER BY
+          CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
+          COALESCE(video_count, 0) DESC, created_at DESC
         LIMIT ?
       `,
       ).run(...args, safeLimit);
@@ -2932,7 +3109,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
         WHERE unique_id IN (
           SELECT unique_id FROM raw_jobs
           WHERE ${whereSql}
-          ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
+          ORDER BY
+            CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
+            COALESCE(video_count, 0) DESC, created_at DESC
           LIMIT ?
         )
       `,
@@ -4274,7 +4453,12 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
         sqlParams.push(...targetCountries);
       }
-      sql += ` ORDER BY created_at ASC, unique_id ASC LIMIT ?`;
+      // 优先级：sources 包含 "tag" 的任务优先，其余按 created_at 排序
+      sql += ` ORDER BY
+        CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
+        created_at ASC,
+        unique_id ASC
+      LIMIT ?`;
       sqlParams.push(l);
       const rows = db.prepare(sql).all(...sqlParams);
@@ -4322,6 +4506,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
         }
         return false;
       })
+      .sort((a, b) => {
+        // 优先级：sources 包含 "tag" 的任务优先
+        const aIsTag = (a.sources || "").includes("tag");
+        const bIsTag = (b.sources || "").includes("tag");
+        if (aIsTag !== bIsTag) return aIsTag ? -1 : 1;
+        return (a.createdAt || 0) - (b.createdAt || 0);
+      })
       .slice(0, l);
     // 接受任务时 userUpdateCount + 1
     pending.forEach((u) => {
@@ -4835,6 +5026,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
     commitCommentTask,
     debugClaimNextJob,
     stopBackup,
+    backupDatabase, // 手动备份数据库
     rawQuery,
     getLlmSampleOffsets, // 获取 LLM 采样偏移量状态
     // Tag 发现与打分

package/src/watch/server.js CHANGED Viewed

@@ -1250,7 +1250,10 @@ export function startWatchServer(
         console.error("[server] HTTP 服务已关闭");
       });
       await store.flushSave();
-      console.error("[server] 数据已保存，退出");
+      console.error("[server] 数据已保存");
+      // 备份数据库
+      store.stopBackup();
+      console.error("[server] 退出");
       process.exit(0);
     }