tt-help-cli-ycl 1.3.95 → 1.3.97

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tt-help-cli-ycl",
3
- "version": "1.3.95",
3
+ "version": "1.3.97",
4
4
  "description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli/tag.js CHANGED
@@ -9,6 +9,7 @@ import {
9
9
  DEFAULT_TARGET_LOCATIONS,
10
10
  isLocationInList,
11
11
  } from "../lib/target-locations.js";
12
+ import { delay as randomDelay } from "../lib/delay.js";
12
13
  import { discoverTags } from "../lib/tag-discover.js";
13
14
  import {
14
15
  server as cfgServer,
@@ -121,13 +122,13 @@ async function processTag(
121
122
  port: port || 9222,
122
123
  onProgress: ({ videos, authors }) => {
123
124
  process.stderr.write(
124
- `\r${prefix} #${tag}: ${videos} 视频, ${authors} 作者`,
125
+ `\r${prefix} #${tag}: ${videos} 视频, ${authors} 作者\x1b[K`,
125
126
  );
126
127
  },
127
128
  });
128
129
 
129
130
  process.stderr.write(
130
- `\r${prefix} #${tag}: ${result.videoCount} 视频, ${result.uniqueAuthorCount} 作者`,
131
+ `\r${prefix} #${tag}: ${result.videoCount} 视频, ${result.uniqueAuthorCount} 作者\x1b[K`,
131
132
  );
132
133
 
133
134
  let videos = result.videos;
@@ -154,7 +155,7 @@ async function processTag(
154
155
  locationCreated &&
155
156
  isLocationInList(locationCreated, targetLocations);
156
157
  process.stderr.write(
157
- `\r [${done}/${total}] ${label} → ${loc}${hit ? " ✓" : ""}`,
158
+ `\r [${done}/${total}] ${label} → ${loc}${hit ? " ✓" : ""}\x1b[K`,
158
159
  );
159
160
  },
160
161
  });
@@ -295,8 +296,8 @@ async function scoreSingleTag(
295
296
  { baseUrl, cdpPort, targetCountries, effectiveProxy },
296
297
  ) {
297
298
  const log = (...args) => process.stderr.write(args.join(" ") + "\n");
298
- const progress = (msg) => process.stderr.write(`\r ${msg}`);
299
- const clearLine = () => process.stderr.write("\r" + " ".repeat(80) + "\r");
299
+ const progress = (msg) => process.stderr.write(`\r ${msg}\x1b[K`);
300
+ const clearLine = () => process.stderr.write("\r\x1b[K");
300
301
 
301
302
  const startTime = Date.now();
302
303
 
@@ -613,7 +614,7 @@ export async function handleScoreAll(parsed) {
613
614
  const cdpOpts = { port: cdpPort };
614
615
  if (effectiveProxy) cdpOpts.proxyServer = effectiveProxy;
615
616
  const browser = await ensureBrowserReady(cdpOpts);
616
- const page = await getOrCreatePage(browser);
617
+ let page = await getOrCreatePage(browser);
617
618
 
618
619
  let totalScored = 0;
619
620
  let emptyRounds = 0; // 连续无任务的轮数
@@ -633,6 +634,15 @@ export async function handleScoreAll(parsed) {
633
634
  log(` 客户端 ID: ${clientId.substring(0, 8)}...`);
634
635
  log("");
635
636
 
637
+ // Ctrl+C 时关闭浏览器和 scraper
638
+ const cleanup = () => {
639
+ log("\n正在清理资源...");
640
+ enrichScraper.close().catch(() => {});
641
+ killEdgeProcesses(null, cdpPort);
642
+ process.exit(0);
643
+ };
644
+ process.on("SIGINT", cleanup);
645
+
636
646
  try {
637
647
  while (true) {
638
648
  try {
@@ -720,15 +730,19 @@ export async function handleScoreAll(parsed) {
720
730
  }
721
731
 
722
732
  // 抓取视频(CDP 连接已登录 Edge)
733
+ const fetchStart = Date.now();
723
734
  log(` 抓取 TikTok 标签页...`);
724
735
  const tagResult = await fetchTagData(tag, {
725
736
  port: cdpPort,
726
737
  onProgress: ({ videos, authors }) => {
727
- process.stderr.write(`\r 抓取中: ${videos} 视频, ${authors} 作者`);
738
+ process.stderr.write(
739
+ `\r 抓取中: ${videos} 视频, ${authors} 作者\x1b[K`,
740
+ );
728
741
  },
729
742
  });
743
+ const fetchSec = ((Date.now() - fetchStart) / 1000).toFixed(1);
730
744
  log(
731
- `\r 完成: ${tagResult.videoCount} 视频, ${tagResult.uniqueAuthorCount} 作者`,
745
+ `\r 完成: ${tagResult.videoCount} 视频, ${tagResult.uniqueAuthorCount} 作者 (${fetchSec}s)\x1b[K`,
732
746
  );
733
747
 
734
748
  result.totalPosts = tagResult.totalPosts || 0;
@@ -736,11 +750,23 @@ export async function handleScoreAll(parsed) {
736
750
  let videos = tagResult.videos;
737
751
 
738
752
  if (!videos || videos.length === 0) {
739
- log(" ⚠️ 无视频,标记 dead");
753
+ const deadSec = ((Date.now() - fetchStart) / 1000).toFixed(1);
754
+ const memMB = (process.memoryUsage().heapUsed / 1024 / 1024).toFixed(
755
+ 0,
756
+ );
757
+ log(` ⚠️ 无视频 (${deadSec}s) mem=${memMB}MB,标记 dead`);
740
758
  result.status = "dead";
741
759
  result.error = "no videos found";
742
760
  await reportToServer(baseUrl, result, clientId, clientMeta);
743
761
  totalScored++;
762
+ // 导航到 about:blank 释放页面状态再跳过
763
+ await page
764
+ .goto("about:blank", {
765
+ waitUntil: "domcontentloaded",
766
+ timeout: 5000,
767
+ })
768
+ .catch(() => {});
769
+ process.stderr.write(` → page=${page.url()}\n`);
744
770
  continue;
745
771
  }
746
772
 
@@ -751,7 +777,7 @@ export async function handleScoreAll(parsed) {
751
777
  onProgress: ({ done, total, current, locationCreated }) => {
752
778
  if (done % 10 === 0 || done === total) {
753
779
  process.stderr.write(
754
- `\r [${done}/${total}] ${current.split("/").pop().slice(0, 20)} → ${locationCreated || "-"}`,
780
+ `\r [${done}/${total}] ${current.split("/").pop().slice(0, 20)} → ${locationCreated || "-"}\x1b[K`,
755
781
  );
756
782
  }
757
783
  },
@@ -760,6 +786,20 @@ export async function handleScoreAll(parsed) {
760
786
  const enriched = await enrichVideosWithLocation(videos, enrichOpts);
761
787
  videos = enriched.videos;
762
788
 
789
+ // CDN 限流检测:有拦截则冷却 + 重启 scraper
790
+ const cdnBlocked = enriched.cdnBlockedCount || 0;
791
+ if (cdnBlocked > 0) {
792
+ const cdnRatio = cdnBlocked / (videos.length || 1);
793
+ const coolSec = cdnRatio > 0.3 ? 120 : 60;
794
+ log(
795
+ ` ⚠️ CDN 限流: ${cdnBlocked}/${videos.length} (${(cdnRatio * 100).toFixed(0)}%),冷却 ${coolSec} 秒后重启 scraper`,
796
+ );
797
+ await new Promise((r) => setTimeout(r, coolSec * 1000));
798
+ log(` 正在重启 TikTokScraper...`);
799
+ await enrichScraper.restart();
800
+ log(` ✅ TikTokScraper 已重启`);
801
+ }
802
+
763
803
  // 过滤 + 算分 (共用函数)
764
804
  const { matchedAuthorSet } = applyFilterAndScore(
765
805
  videos,
@@ -795,10 +835,25 @@ export async function handleScoreAll(parsed) {
795
835
  const mc = result.matchedCountries
796
836
  .map((c) => `${c.c}:${c.n}`)
797
837
  .join(" ");
838
+ // Node.js 进程内存占用
839
+ const memMB = (process.memoryUsage().heapUsed / 1024 / 1024).toFixed(0);
840
+ const memStr = ` mem=${memMB}MB`;
798
841
  log(
799
- ` ${icon} ${result.status} score=${result.score} authors=${result.authorCount} matched=${result.matchedAuthors} (${elapsed}s)${mc ? " " + mc : ""}`,
842
+ ` ${icon} ${result.status} score=${result.score} authors=${result.authorCount} matched=${result.matchedAuthors} (${elapsed}s)${mc ? " " + mc : ""}${memStr}`,
800
843
  );
801
844
  log("");
845
+
846
+ // 随机等待 3-7 秒,避免连续访问 TikTok 触发风控
847
+ await randomDelay(3000, 7000);
848
+
849
+ // 导航到 about:blank 卸载页面,状态清零,下次 goto 重新初始化
850
+ await page
851
+ .goto("about:blank", { waitUntil: "domcontentloaded", timeout: 5000 })
852
+ .catch((e) => {
853
+ log(` ⚠️ about:blank 跳转失败: ${e.message}`);
854
+ });
855
+ process.stderr.write(` → page=${page.url()}\n`);
856
+ await randomDelay(3000, 7000);
802
857
  } catch (e) {
803
858
  // 区分网络错误和业务错误
804
859
  const isNetworkError =
@@ -23,6 +23,7 @@ async function processAPIResponse(
23
23
  href,
24
24
  createTime: item.createTime || null,
25
25
  playCount: item.stats?.playCount || 0,
26
+ isECVideo: item.isECVideo ? 1 : 0,
26
27
  });
27
28
  }
28
29
 
@@ -72,6 +73,7 @@ async function processAPIResponse(
72
73
  href,
73
74
  createTime: item.createTime || null,
74
75
  playCount: item.stats?.playCount || 0,
76
+ isECVideo: item.isECVideo ? 1 : 0,
75
77
  });
76
78
  }
77
79
  }
@@ -2,6 +2,7 @@ import { chromium } from "playwright";
2
2
  import { ensureBrowserReady } from "./browser/cdp.js";
3
3
  import { getOrCreatePage } from "./browser/page.js";
4
4
  import { TikTokScraper } from "./tiktok-scraper.mjs";
5
+ import { CDNBlockedError } from "./parse-ssr.mjs";
5
6
 
6
7
  const TAG_URL = "https://www.tiktok.com/tag";
7
8
  const SCROLL_INTERVAL = 3000;
@@ -56,43 +57,45 @@ export async function fetchTagData(tag, options = {}) {
56
57
  const browser = await ensureBrowserReady(cdpOptions);
57
58
  const page = await getOrCreatePage(browser);
58
59
 
59
- try {
60
- let challengeInfo = null;
61
- const rawVideos = [];
62
- const seenVideoIds = new Set();
63
- const authors = new Set();
64
-
65
- page.on("response", async (resp) => {
66
- try {
67
- const url = resp.url();
68
- const ct = resp.headers()["content-type"] || "";
69
-
70
- if (url.includes("/api/challenge/detail/") && ct.includes("json")) {
71
- const body = await resp.json();
72
- if (body?.challengeInfo?.challenge) {
73
- challengeInfo = body.challengeInfo.challenge;
74
- }
60
+ let challengeInfo = null;
61
+ const rawVideos = [];
62
+ const seenVideoIds = new Set();
63
+ const authors = new Set();
64
+
65
+ const responseHandler = async (resp) => {
66
+ try {
67
+ const url = resp.url();
68
+ const ct = resp.headers()["content-type"] || "";
69
+
70
+ if (url.includes("/api/challenge/detail/") && ct.includes("json")) {
71
+ const body = await resp.json();
72
+ if (body?.challengeInfo?.challenge) {
73
+ challengeInfo = body.challengeInfo.challenge;
75
74
  }
75
+ }
76
76
 
77
- if (url.includes("/api/challenge/item_list/") && ct.includes("json")) {
78
- const body = await resp.json();
79
- if (!body?.itemList) return;
80
- for (const item of body.itemList) {
81
- const vid = item.id || "";
82
- if (vid && !seenVideoIds.has(vid)) {
83
- seenVideoIds.add(vid);
84
- const uid = item.author?.uniqueId || "";
85
- if (uid) authors.add(uid);
86
- rawVideos.push(extractItemData(item));
87
- }
88
- }
89
- if (onProgress) {
90
- onProgress({ videos: rawVideos.length, authors: authors.size });
77
+ if (url.includes("/api/challenge/item_list/") && ct.includes("json")) {
78
+ const body = await resp.json();
79
+ if (!body?.itemList) return;
80
+ for (const item of body.itemList) {
81
+ const vid = item.id || "";
82
+ if (vid && !seenVideoIds.has(vid)) {
83
+ seenVideoIds.add(vid);
84
+ const uid = item.author?.uniqueId || "";
85
+ if (uid) authors.add(uid);
86
+ rawVideos.push(extractItemData(item));
91
87
  }
92
88
  }
93
- } catch {}
94
- });
89
+ if (onProgress) {
90
+ onProgress({ videos: rawVideos.length, authors: authors.size });
91
+ }
92
+ }
93
+ } catch {}
94
+ };
95
+
96
+ page.on("response", responseHandler);
95
97
 
98
+ try {
96
99
  const tagUrl = `${TAG_URL}/${encodeURIComponent(tag)}`;
97
100
  const resp = await page.goto(tagUrl, {
98
101
  waitUntil: "domcontentloaded",
@@ -185,7 +188,7 @@ export async function fetchTagData(tag, options = {}) {
185
188
  uniqueAuthors: [...authors],
186
189
  };
187
190
  } finally {
188
- // 不关闭 page 和 browser,由用户自行关闭
191
+ page.off("response", responseHandler);
189
192
  }
190
193
  }
191
194
 
@@ -197,7 +200,7 @@ export async function fetchTagData(tag, options = {}) {
197
200
  * @param {number} [options.poolSize=3] - 并发页面数
198
201
  * @param {number} [options.maxRetries=3] - 单个请求最大重试次数
199
202
  * @param {Function} [options.onProgress] - 进度回调 ({ done, total, current, locationCreated })
200
- * @returns {Promise<{ videos: Array, locationMap: Record<string, string|null> }>}
203
+ * @returns {Promise<{ videos: Array, locationMap: Record<string, string|null>, cdnBlockedCount: number }>}
201
204
  */
202
205
  export async function enrichVideosWithLocation(videos, options = {}) {
203
206
  const {
@@ -219,6 +222,8 @@ export async function enrichVideosWithLocation(videos, options = {}) {
219
222
  const locationMap = {};
220
223
  let done = 0;
221
224
 
225
+ let cdnBlockedCount = 0;
226
+
222
227
  if (mode === "users") {
223
228
  const uniqueAuthors = [
224
229
  ...new Set(videos.map((v) => v.authorUniqueId).filter(Boolean)),
@@ -239,7 +244,10 @@ export async function enrichVideosWithLocation(videos, options = {}) {
239
244
  current: uniqueId,
240
245
  locationCreated: location,
241
246
  });
242
- } catch {
247
+ } catch (err) {
248
+ if (err instanceof CDNBlockedError) {
249
+ cdnBlockedCount++;
250
+ }
243
251
  locationMap[uniqueId] = null;
244
252
  done++;
245
253
  if (onProgress)
@@ -277,7 +285,10 @@ export async function enrichVideosWithLocation(videos, options = {}) {
277
285
  current: videoUrl,
278
286
  locationCreated: location,
279
287
  });
280
- } catch {
288
+ } catch (err) {
289
+ if (err instanceof CDNBlockedError) {
290
+ cdnBlockedCount++;
291
+ }
281
292
  v.locationCreated = null;
282
293
  locationMap[v.id] = null;
283
294
  done++;
@@ -295,7 +306,7 @@ export async function enrichVideosWithLocation(videos, options = {}) {
295
306
  await Promise.allSettled(tasks);
296
307
  }
297
308
 
298
- return { videos: enriched, locationMap };
309
+ return { videos: enriched, locationMap, cdnBlockedCount };
299
310
  } finally {
300
311
  if (ownsScraper) await scraper.close();
301
312
  }
@@ -267,6 +267,14 @@ export class TikTokScraper {
267
267
  const slot = this._pickSlot();
268
268
  return slot.lock.run(async () => {
269
269
  let rawHtml = await this._fetchViewSource(videoUrl, slot);
270
+ // CDN 限流立即抛出,不重试
271
+ if (detectAccessDenied(rawHtml)) {
272
+ const denied = detectAccessDenied(rawHtml);
273
+ throw new CDNBlockedError(
274
+ `CDN限流 (Access Denied, ref:${denied.reference || "N/A"})`,
275
+ denied.reference,
276
+ );
277
+ }
270
278
  let result = parseVideoInfo(rawHtml);
271
279
  for (let attempt = 1; !result && attempt <= maxRetries; attempt++) {
272
280
  // 检查是否值得重试
@@ -278,6 +286,14 @@ export class TikTokScraper {
278
286
  } catch {}
279
287
  await delay(500 * attempt);
280
288
  rawHtml = await this._fetchViewSource(videoUrl, slot);
289
+ // 重试中也检查 CDN 限流
290
+ if (detectAccessDenied(rawHtml)) {
291
+ const denied = detectAccessDenied(rawHtml);
292
+ throw new CDNBlockedError(
293
+ `CDN限流 (Access Denied, ref:${denied.reference || "N/A"})`,
294
+ denied.reference,
295
+ );
296
+ }
281
297
  result = parseVideoInfo(rawHtml);
282
298
  }
283
299
  return result || null;
@@ -87,11 +87,14 @@ async function processExplore(page, username, options, log) {
87
87
  if (result.userInfo) result.userInfo.latestVideoTime = latestCreateTime;
88
88
  }
89
89
 
90
- // 找出 7 天内发布且播放量最大的视频
90
+ // 找出 7 天内发布且 isECVideo=1 且播放量最大的视频
91
91
  const SEVEN_DAYS_SECONDS = 7 * 24 * 60 * 60;
92
92
  const nowSeconds = Math.floor(Date.now() / 1000);
93
93
  const recentVideos = videoArray.filter(
94
- (v) => v.createTime && nowSeconds - v.createTime <= SEVEN_DAYS_SECONDS,
94
+ (v) =>
95
+ v.isECVideo === 1 &&
96
+ v.createTime &&
97
+ nowSeconds - v.createTime <= SEVEN_DAYS_SECONDS,
95
98
  );
96
99
  if (recentVideos.length > 0) {
97
100
  const topVideo = recentVideos.reduce((max, v) =>
@@ -104,7 +107,7 @@ async function processExplore(page, username, options, log) {
104
107
  createTime: topVideo.createTime,
105
108
  };
106
109
  log(
107
- ` 7天内最高播放视频: ${topVideo.playCount} 次播放 (${recentVideos.length} 个候选)`,
110
+ ` 7天内 EC视频最高播放: ${topVideo.playCount} 次播放 (${recentVideos.length} 个EC候选)`,
108
111
  );
109
112
  }
110
113
 
@@ -90,7 +90,9 @@ import {
90
90
  getDeadTags,
91
91
  claimTag,
92
92
  reportTagScore,
93
+ resetStaleScoringTags,
93
94
  getAllTags,
95
+ getTagStats,
94
96
  rawQuery,
95
97
  normalizeTags,
96
98
  clearTags,
@@ -1415,6 +1417,7 @@ export function createStore(filePath, options = {}) {
1415
1417
  `(
1416
1418
  instr(COALESCE(sources, ''), '"following"') > 0
1417
1419
  OR instr(COALESCE(sources, ''), '"follower"') > 0
1420
+ OR instr(COALESCE(sources, ''), '"comment"') > 0
1418
1421
  )`,
1419
1422
  ],
1420
1423
  });
@@ -1639,7 +1642,8 @@ export function createStore(filePath, options = {}) {
1639
1642
  (u) =>
1640
1643
  u.sources &&
1641
1644
  (u.sources.includes("following") ||
1642
- u.sources.includes("follower")),
1645
+ u.sources.includes("follower") ||
1646
+ u.sources.includes("comment")),
1643
1647
  );
1644
1648
  follow.sort((a, b) => locationTier(a) - locationTier(b));
1645
1649
  next = follow[0] || null;
@@ -2674,6 +2678,30 @@ export function createStore(filePath, options = {}) {
2674
2678
  return { ok: true, location, modifiedAt: user.modifiedAt };
2675
2679
  }
2676
2680
 
2681
+ function setNonSeller(uniqueId) {
2682
+ if (getDb()) {
2683
+ const existing = getDb()
2684
+ .prepare("SELECT * FROM jobs WHERE unique_id = ?")
2685
+ .get(uniqueId);
2686
+ if (!existing) return { error: "user not found" };
2687
+ const now = Date.now();
2688
+ getDb()
2689
+ .prepare(
2690
+ "UPDATE jobs SET tt_seller = 0, updated_at = ? WHERE unique_id = ?",
2691
+ )
2692
+ .run(now, uniqueId);
2693
+ console.error(`[DB] setNonSeller: ${uniqueId} → tt_seller=0`);
2694
+ return { ok: true };
2695
+ }
2696
+
2697
+ const user = getUser(uniqueId);
2698
+ if (!user) return { error: "user not found" };
2699
+ user.ttSeller = false;
2700
+ user.updatedAt = Date.now();
2701
+ save();
2702
+ return { ok: true };
2703
+ }
2704
+
2677
2705
  // 将单个 job 移动到 raw_jobs 表(完整字段复制 + 删除原记录)
2678
2706
  function moveJobToRaw(uniqueId) {
2679
2707
  if (!getDb()) return false;
@@ -3127,6 +3155,7 @@ export function createStore(filePath, options = {}) {
3127
3155
  getPendingUserUpdateTasks,
3128
3156
  updateUserInfo,
3129
3157
  updateUserLocation,
3158
+ setNonSeller,
3130
3159
  batchUpdateUserInfo,
3131
3160
  reportClientError,
3132
3161
  deleteClientError,
@@ -3154,7 +3183,9 @@ export function createStore(filePath, options = {}) {
3154
3183
  getDeadTags,
3155
3184
  claimTag,
3156
3185
  reportTagScore,
3186
+ resetStaleScoringTags,
3157
3187
  getAllTags,
3188
+ getTagStats,
3158
3189
  normalizeTags,
3159
3190
  clearTags,
3160
3191
  data,
@@ -93,8 +93,11 @@ export function getDashboardStatsFromDb(targetLocations = []) {
93
93
  .prepare("SELECT COUNT(*) as total FROM jobs_base")
94
94
  .get().total;
95
95
 
96
+ const tagCount = db.prepare("SELECT COUNT(*) as total FROM tags").get().total;
97
+
96
98
  return {
97
99
  totalUsers: aggregateRow.total,
100
+ tagCount,
98
101
  rawJobs: getRawJobsCount(),
99
102
  dbTotalUsers: getUserDbCount(),
100
103
  jobsTotal: aggregateRow.total,
@@ -33,17 +33,61 @@ export function insertTag(tag, countries, source = "llm") {
33
33
  }
34
34
  }
35
35
 
36
- export function getTagsByStatus(status, limit = 100) {
36
+ export function getTagsByStatus(
37
+ status,
38
+ limit = 100,
39
+ offset = 0,
40
+ country = null,
41
+ ) {
37
42
  const db = getDb();
38
43
  if (!db) return [];
39
- const rows = db
40
- .prepare(
41
- "SELECT * FROM tags WHERE status = ? ORDER BY score ASC, created_at ASC LIMIT ?",
42
- )
43
- .all(status, limit);
44
+ let sql = "SELECT * FROM tags WHERE status = ?";
45
+ const params = [status];
46
+ if (country) {
47
+ sql += " AND countries LIKE ?";
48
+ params.push(`%"${country}"%`);
49
+ }
50
+ sql += " ORDER BY score ASC, created_at ASC LIMIT ? OFFSET ?";
51
+ params.push(limit, offset);
52
+ const rows = db.prepare(sql).all(...params);
44
53
  return rows.map(parseTagRow);
45
54
  }
46
55
 
56
+ export function getTagStats(country = null) {
57
+ const db = getDb();
58
+ if (!db) return null;
59
+ let sql = `SELECT
60
+ COUNT(*) as total,
61
+ SUM(CASE WHEN status = 'productive' THEN 1 ELSE 0 END) as productive,
62
+ SUM(CASE WHEN status = 'dead' THEN 1 ELSE 0 END) as dead,
63
+ SUM(CASE WHEN status = 'new' THEN 1 ELSE 0 END) as newCount,
64
+ SUM(CASE WHEN status = 'scoring' THEN 1 ELSE 0 END) as scoring
65
+ FROM tags`;
66
+ const params = [];
67
+ if (country) {
68
+ sql += " WHERE countries LIKE ?";
69
+ params.push(`%"${country}"%`);
70
+ }
71
+ const row = db.prepare(sql).get(...params);
72
+ // 获取所有出现过的国家
73
+ const allRows = db.prepare("SELECT countries FROM tags").all();
74
+ const countrySet = new Set();
75
+ for (const r of allRows) {
76
+ try {
77
+ const arr = JSON.parse(r.countries || "[]");
78
+ for (const c of arr) countrySet.add(c);
79
+ } catch {}
80
+ }
81
+ return {
82
+ total: row.total,
83
+ productive: row.productive || 0,
84
+ dead: row.dead || 0,
85
+ new: row.newCount || 0,
86
+ scoring: row.scoring || 0,
87
+ countries: [...countrySet].sort(),
88
+ };
89
+ }
90
+
47
91
  export function getTagsByCountry(country, minScore = 0) {
48
92
  const db = getDb();
49
93
  if (!db) return [];
@@ -64,12 +108,33 @@ export function getDeadTags(country) {
64
108
  return rows.map(parseTagRow).filter((r) => r.countries.includes(country));
65
109
  }
66
110
 
111
+ export function resetStaleScoringTags(minutes = 30) {
112
+ const db = getDb();
113
+ if (!db) return { ok: false, error: "db not ready" };
114
+ // 清理超时的 scoring 标签:有时间戳的按时间,没时间戳的(旧数据)直接清
115
+ const result = db
116
+ .prepare(
117
+ "UPDATE tags SET status = 'new', scored_at = NULL WHERE status = 'scoring' AND (scored_at IS NULL OR scored_at < datetime('now', ?))",
118
+ )
119
+ .run(`-${minutes} minutes`);
120
+ if (result.changes > 0) {
121
+ console.error(
122
+ `[tags] 清理了 ${result.changes} 个超时 scoring 标签(>${minutes}分钟)`,
123
+ );
124
+ }
125
+ return { ok: true, reset: result.changes };
126
+ }
127
+
67
128
  export function claimTag(tag) {
68
129
  const db = getDb();
69
130
  if (!db) return { ok: false, error: "db not ready" };
131
+
132
+ // 先清理超时的 scoring 标签,防止死任务堆积
133
+ resetStaleScoringTags();
134
+
70
135
  const result = db
71
136
  .prepare(
72
- "UPDATE tags SET status = 'scoring' WHERE tag = ? AND status = 'new'",
137
+ "UPDATE tags SET status = 'scoring', scored_at = datetime('now') WHERE tag = ? AND status = 'new'",
73
138
  )
74
139
  .run(tag);
75
140
  if (result.changes === 0) {
@@ -132,12 +197,18 @@ export function reportTagScore(tag, fields) {
132
197
  }
133
198
  }
134
199
 
135
- export function getAllTags(limit = 200) {
200
+ export function getAllTags(limit = 200, offset = 0, country = null) {
136
201
  const db = getDb();
137
202
  if (!db) return [];
138
- const rows = db
139
- .prepare("SELECT * FROM tags ORDER BY score DESC, created_at DESC LIMIT ?")
140
- .all(limit);
203
+ let sql = "SELECT * FROM tags";
204
+ const params = [];
205
+ if (country) {
206
+ sql += " WHERE countries LIKE ?";
207
+ params.push(`%"${country}"%`);
208
+ }
209
+ sql += " ORDER BY score DESC, created_at DESC LIMIT ? OFFSET ?";
210
+ params.push(limit, offset);
211
+ const rows = db.prepare(sql).all(...params);
141
212
  return rows.map(parseTagRow);
142
213
  }
143
214