tt-help-cli-ycl 1.3.98 → 1.3.100

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tt-help-cli-ycl",
3
- "version": "1.3.98",
3
+ "version": "1.3.100",
4
4
  "description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli/tag.js CHANGED
@@ -5,6 +5,7 @@ import { fetchTagData, enrichVideosWithLocation } from "../lib/tag-fetcher.js";
5
5
  import { killEdgeProcesses, ensureBrowserReady } from "../lib/browser/cdp.js";
6
6
  import { getOrCreatePage } from "../lib/browser/page.js";
7
7
  import { TikTokScraper } from "../lib/tiktok-scraper.mjs";
8
+ import { CDNBlockedError } from "../lib/parse-ssr.mjs";
8
9
  import {
9
10
  DEFAULT_TARGET_LOCATIONS,
10
11
  isLocationInList,
@@ -18,6 +19,41 @@ import {
18
19
 
19
20
  const ALL_COUNTRIES = DEFAULT_TARGET_LOCATIONS;
20
21
  const DEFAULT_SERVER = cfgServer || "http://127.0.0.1:3000";
22
+ const DEFAULT_SCORE_COUNTRIES = [
23
+ "ES",
24
+ "FR",
25
+ "DE",
26
+ "PT",
27
+ "IT",
28
+ "NL",
29
+ "BE",
30
+ "AT",
31
+ "IE",
32
+ "PL",
33
+ "CZ",
34
+ "GR",
35
+ "HU",
36
+ ];
37
+
38
+ function resolveTargetCountries(countries) {
39
+ return countries || DEFAULT_SCORE_COUNTRIES;
40
+ }
41
+
42
+ function formatMemoryUsage(mem = process.memoryUsage()) {
43
+ return `rss:${(mem.rss / 1024 / 1024).toFixed(0)}MB heap:${(mem.heapUsed / 1024 / 1024).toFixed(0)}MB ext:${(mem.external / 1024 / 1024).toFixed(0)}MB ab:${(mem.arrayBuffers / 1024 / 1024).toFixed(0)}MB`;
44
+ }
45
+
46
+ function getCdnCooldownSeconds(blockedCount, totalCount, isTooManyRequests = false) {
47
+ if (isTooManyRequests) return 120;
48
+ const ratio = blockedCount / Math.max(totalCount, 1);
49
+ return ratio > 0.3 ? 120 : 60;
50
+ }
51
+
52
+ async function cooldownAndRecycle(cooldownSec, recyclePage, maybeRecycleForMemory) {
53
+ await new Promise((r) => setTimeout(r, cooldownSec * 1000));
54
+ await recyclePage();
55
+ await maybeRecycleForMemory();
56
+ }
21
57
 
22
58
  // 构建带客户端追踪 header 的 fetch 封装
23
59
  function buildClientHeaders(clientId, meta, extra = {}) {
@@ -426,21 +462,7 @@ export async function handleScore(parsed) {
426
462
  const baseUrl = serverUrl || DEFAULT_SERVER;
427
463
  const cdpPort = port || 9222;
428
464
  const effectiveProxy = cliProxy || configuredProxy;
429
- const targetCountries = countries || [
430
- "ES",
431
- "FR",
432
- "DE",
433
- "PT",
434
- "IT",
435
- "NL",
436
- "BE",
437
- "AT",
438
- "IE",
439
- "PL",
440
- "CZ",
441
- "GR",
442
- "HU",
443
- ];
465
+ const targetCountries = resolveTargetCountries(countries);
444
466
 
445
467
  const log = (...args) => process.stderr.write(args.join(" ") + "\n");
446
468
 
@@ -576,23 +598,11 @@ export async function handleScoreAll(parsed) {
576
598
  } = tagScoreAll || {};
577
599
 
578
600
  const baseUrl = serverUrl || DEFAULT_SERVER;
579
- const cdpPort = port || 9222;
601
+ const defaultScoreAllPort =
602
+ parseInt(process.env.TAG_SCOREALL_PORT_POOL_START || "7222", 10) || 7222;
603
+ let cdpPort = port || defaultScoreAllPort;
580
604
  const effectiveProxy = cliProxy || configuredProxy;
581
- const targetCountries = countries || [
582
- "ES",
583
- "FR",
584
- "DE",
585
- "PT",
586
- "IT",
587
- "NL",
588
- "BE",
589
- "AT",
590
- "IE",
591
- "PL",
592
- "CZ",
593
- "GR",
594
- "HU",
595
- ];
605
+ const targetCountries = resolveTargetCountries(countries);
596
606
 
597
607
  const log = (...args) => process.stderr.write(args.join(" ") + "\n");
598
608
 
@@ -613,8 +623,131 @@ export async function handleScoreAll(parsed) {
613
623
  // 连接 CDP 浏览器
614
624
  const cdpOpts = { port: cdpPort };
615
625
  if (effectiveProxy) cdpOpts.proxyServer = effectiveProxy;
616
- const browser = await ensureBrowserReady(cdpOpts);
626
+ let browser = await ensureBrowserReady(cdpOpts);
617
627
  let page = await getOrCreatePage(browser);
628
+ const blockedRoutePages = new WeakSet();
629
+
630
+ async function setupPageRequestBlocking(targetPage) {
631
+ if (!targetPage || blockedRoutePages.has(targetPage)) return;
632
+ await targetPage.route("**/*", (route) => {
633
+ const resourceType = route.request().resourceType();
634
+ if (resourceType === "image" || resourceType === "stylesheet") {
635
+ route.abort();
636
+ } else {
637
+ route.continue();
638
+ }
639
+ });
640
+ blockedRoutePages.add(targetPage);
641
+ }
642
+
643
+ await setupPageRequestBlocking(page);
644
+
645
+ const portPoolStart = Math.max(
646
+ 1,
647
+ parseInt(process.env.TAG_SCOREALL_PORT_POOL_START || "7222", 10) || 7222,
648
+ );
649
+ const portPoolSize = Math.max(
650
+ 2,
651
+ parseInt(process.env.TAG_SCOREALL_PORT_POOL_SIZE || "10", 10) || 10,
652
+ );
653
+ const switchPortOnRecycle =
654
+ String(process.env.TAG_SCOREALL_SWITCH_PORT_ON_RECYCLE || "1") !== "0";
655
+
656
+ function pickNextPort(currentPort) {
657
+ const candidates = [];
658
+ for (let i = 0; i < portPoolSize; i++) {
659
+ const p = portPoolStart + i;
660
+ if (p !== currentPort) candidates.push(p);
661
+ }
662
+ if (candidates.length === 0) return currentPort;
663
+ return candidates[Math.floor(Math.random() * candidates.length)];
664
+ }
665
+
666
+ const memRssRecycleMb = Math.max(
667
+ 256,
668
+ parseInt(process.env.TAG_SCOREALL_RECYCLE_RSS_MB || "900", 10) || 900,
669
+ );
670
+ const memHeapRecycleMb = Math.max(
671
+ 128,
672
+ parseInt(process.env.TAG_SCOREALL_RECYCLE_HEAP_MB || "320", 10) || 320,
673
+ );
674
+ const recycleCooldownMs = Math.max(
675
+ 0,
676
+ parseInt(process.env.TAG_SCOREALL_RECYCLE_COOLDOWN_MS || "180000", 10) ||
677
+ 180000,
678
+ );
679
+ // 默认关闭按固定轮次重建,仅在高内存时触发;需要可通过环境变量开启。
680
+ const periodicRecycleEvery = Math.max(
681
+ 0,
682
+ parseInt(process.env.TAG_SCOREALL_PERIODIC_RECYCLE_EVERY || "0", 10) || 0,
683
+ );
684
+ let lastRecycleAt = 0;
685
+
686
+ async function recyclePage() {
687
+ if (!page || page.isClosed()) {
688
+ page = await getOrCreatePage(browser);
689
+ await setupPageRequestBlocking(page);
690
+ return;
691
+ }
692
+ try {
693
+ await page.goto("about:blank", {
694
+ waitUntil: "domcontentloaded",
695
+ timeout: 5000,
696
+ });
697
+ } catch {
698
+ // 页面状态异常时回退到重建 tab
699
+ await page.close().catch(() => {});
700
+ page = await getOrCreatePage(browser);
701
+ await setupPageRequestBlocking(page);
702
+ }
703
+ }
704
+
705
+ async function recycleCdpSession(reason) {
706
+ const oldPort = cdpPort;
707
+ if (switchPortOnRecycle) {
708
+ cdpPort = pickNextPort(cdpPort);
709
+ cdpOpts.port = cdpPort;
710
+ clientMeta.port = cdpPort;
711
+ }
712
+ const switchHint =
713
+ oldPort === cdpPort ? `port=${cdpPort}` : `port ${oldPort} -> ${cdpPort}`;
714
+ log(` ♻️ 重建 CDP 会话 (${reason}; ${switchHint})...`);
715
+ if (page) {
716
+ await page.close().catch(() => {});
717
+ page = null;
718
+ }
719
+ await browser.close().catch(() => {});
720
+ if (oldPort !== cdpPort) {
721
+ await killEdgeProcesses(null, oldPort).catch(() => {});
722
+ }
723
+ browser = await ensureBrowserReady(cdpOpts);
724
+ page = await getOrCreatePage(browser);
725
+ await setupPageRequestBlocking(page);
726
+ lastRecycleAt = Date.now();
727
+ }
728
+
729
+ async function maybeRecycleForMemory() {
730
+ const mem = process.memoryUsage();
731
+ const rssMB = mem.rss / 1024 / 1024;
732
+ const heapMB = mem.heapUsed / 1024 / 1024;
733
+ if (rssMB >= memRssRecycleMb || heapMB >= memHeapRecycleMb) {
734
+ const now = Date.now();
735
+ if (recycleCooldownMs > 0 && now - lastRecycleAt < recycleCooldownMs) {
736
+ return;
737
+ }
738
+ await recycleCdpSession(
739
+ `mem rss=${rssMB.toFixed(0)}MB heap=${heapMB.toFixed(0)}MB (threshold rss=${memRssRecycleMb} heap=${memHeapRecycleMb})`,
740
+ );
741
+ return;
742
+ }
743
+ if (
744
+ periodicRecycleEvery > 0 &&
745
+ totalScored > 0 &&
746
+ totalScored % periodicRecycleEvery === 0
747
+ ) {
748
+ await recycleCdpSession(`periodic every ${totalScored} tasks`);
749
+ }
750
+ }
618
751
 
619
752
  let totalScored = 0;
620
753
  let emptyRounds = 0; // 连续无任务的轮数
@@ -622,7 +755,7 @@ export async function handleScoreAll(parsed) {
622
755
 
623
756
  // 生成客户端 ID,用于服务端追踪
624
757
  const clientId = randomUUID();
625
- const clientMeta = { type: "scoring" };
758
+ const clientMeta = { type: "scoring", port: cdpPort };
626
759
 
627
760
  // 复用 TikTokScraper 实例,避免每次 enrich 都启动/关闭 headless 浏览器
628
761
  const enrichScraper = new TikTokScraper({
@@ -719,6 +852,7 @@ export async function handleScoreAll(parsed) {
719
852
  // already claimed: 其他机器抢先了,跳过不标 dead
720
853
  if (claimData.error && claimData.error.includes("already claimed")) {
721
854
  log(` ⏭️ 已被其他客户端锁定,跳过`);
855
+ await recyclePage();
722
856
  continue;
723
857
  }
724
858
  log(` ⚠️ 无法锁定 (${claimData.error}),标记为 dead 并跳过`);
@@ -726,6 +860,7 @@ export async function handleScoreAll(parsed) {
726
860
  result.status = "dead";
727
861
  await reportToServer(baseUrl, result, clientId, clientMeta);
728
862
  totalScored++;
863
+ await recyclePage();
729
864
  continue;
730
865
  }
731
866
 
@@ -733,7 +868,7 @@ export async function handleScoreAll(parsed) {
733
868
  const fetchStart = Date.now();
734
869
  log(` 抓取 TikTok 标签页...`);
735
870
  const tagResult = await fetchTagData(tag, {
736
- port: cdpPort,
871
+ page,
737
872
  onProgress: ({ videos, authors }) => {
738
873
  process.stderr.write(
739
874
  `\r 抓取中: ${videos} 视频, ${authors} 作者\x1b[K`,
@@ -751,23 +886,17 @@ export async function handleScoreAll(parsed) {
751
886
 
752
887
  if (!videos || videos.length === 0) {
753
888
  const deadSec = ((Date.now() - fetchStart) / 1000).toFixed(1);
754
- const memMB = (process.memoryUsage().heapUsed / 1024 / 1024).toFixed(
755
- 0,
889
+ log(
890
+ ` ⚠️ 无视频 (${deadSec}s) mem=${formatMemoryUsage()},标记 dead`,
756
891
  );
757
- log(` ⚠️ 无视频 (${deadSec}s) mem=${memMB}MB,标记 dead`);
758
892
  result.status = "dead";
759
893
  result.error = "no videos found";
760
894
  await reportToServer(baseUrl, result, clientId, clientMeta);
761
895
  totalScored++;
762
896
  // 随机等待 3-7 秒,避免连续访问 TikTok 触发风控
763
897
  await randomDelay(0, 5000);
764
- // 导航到 about:blank 释放页面状态再跳过
765
- await page
766
- .goto("about:blank", {
767
- waitUntil: "domcontentloaded",
768
- timeout: 5000,
769
- })
770
- .catch(() => {});
898
+ await recyclePage();
899
+ await maybeRecycleForMemory();
771
900
  continue;
772
901
  }
773
902
 
@@ -790,12 +919,11 @@ export async function handleScoreAll(parsed) {
790
919
  // CDN 限流检测:有拦截则冷却 + 重启 scraper
791
920
  const cdnBlocked = enriched.cdnBlockedCount || 0;
792
921
  if (cdnBlocked > 0) {
793
- const cdnRatio = cdnBlocked / (videos.length || 1);
794
- const coolSec = cdnRatio > 0.3 ? 120 : 60;
922
+ const coolSec = getCdnCooldownSeconds(cdnBlocked, videos.length);
795
923
  log(
796
- ` ⚠️ CDN 限流: ${cdnBlocked}/${videos.length} (${(cdnRatio * 100).toFixed(0)}%),冷却 ${coolSec} 秒后重启 scraper`,
924
+ ` ⚠️ CDN 限流: ${cdnBlocked}/${videos.length} (${((cdnBlocked / Math.max(videos.length, 1)) * 100).toFixed(0)}%),冷却 ${coolSec} 秒后重启 scraper`,
797
925
  );
798
- await new Promise((r) => setTimeout(r, coolSec * 1000));
926
+ await cooldownAndRecycle(coolSec, recyclePage, maybeRecycleForMemory);
799
927
  log(` 正在重启 TikTokScraper...`);
800
928
  await enrichScraper.restart();
801
929
  log(` ✅ TikTokScraper 已重启`);
@@ -836,22 +964,26 @@ export async function handleScoreAll(parsed) {
836
964
  const mc = result.matchedCountries
837
965
  .map((c) => `${c.c}:${c.n}`)
838
966
  .join(" ");
839
- // Node.js 进程内存占用
840
- const memMB = (process.memoryUsage().heapUsed / 1024 / 1024).toFixed(0);
841
- const memStr = ` mem=${memMB}MB`;
967
+ const memStr = ` mem=${formatMemoryUsage()}`;
842
968
  log(
843
969
  ` ${icon} ${result.status} score=${result.score} authors=${result.authorCount} matched=${result.matchedAuthors} (${elapsed}s)${mc ? " " + mc : ""}${memStr}`,
844
970
  );
845
971
  log("");
846
-
847
- // 导航到 about:blank 卸载页面,状态清零,下次 goto 重新初始化
848
- await page
849
- .goto("about:blank", { waitUntil: "domcontentloaded", timeout: 5000 })
850
- .catch((e) => {
851
- log(` ⚠️ about:blank 跳转失败: ${e.message}`);
852
- });
972
+ await recyclePage();
973
+ await maybeRecycleForMemory();
853
974
  await randomDelay(3000, 7000);
854
975
  } catch (e) {
976
+ if (e instanceof CDNBlockedError || /HTTP\s+(403|429)/.test(e.message)) {
977
+ log(` ⚠️ CDN 被封: ${e.message}`);
978
+ result.status = "dead";
979
+ result.error = "cdn_blocked";
980
+ await reportToServer(baseUrl, result, clientId, clientMeta);
981
+ totalScored++;
982
+ const cooldownSec = getCdnCooldownSeconds(1, 1, /429/.test(e.message));
983
+ log(` 冷却 ${cooldownSec} 秒后再继续...`);
984
+ await cooldownAndRecycle(cooldownSec, recyclePage, maybeRecycleForMemory);
985
+ continue;
986
+ }
855
987
  // 区分网络错误和业务错误
856
988
  const isNetworkError =
857
989
  e.code === "ECONNREFUSED" ||
@@ -881,6 +1013,8 @@ export async function handleScoreAll(parsed) {
881
1013
  );
882
1014
  } catch {}
883
1015
  totalScored++;
1016
+ await recyclePage();
1017
+ await maybeRecycleForMemory();
884
1018
  }
885
1019
  }
886
1020
  } finally {
package/src/lib/args.js CHANGED
@@ -739,7 +739,7 @@ function parseTagArgs(args) {
739
739
  let isDiscover = false;
740
740
  let isScore = false;
741
741
  let isScoreAll = false;
742
- let scoreAllPort = 9222;
742
+ let scoreAllPort = 7222;
743
743
  let scoreProxy = null;
744
744
  let scoreTag = null;
745
745
  let scoreCountries = null;
@@ -788,7 +788,7 @@ function parseTagArgs(args) {
788
788
  .filter(Boolean);
789
789
  } else if (arg === "--port") {
790
790
  if (isScoreAll) {
791
- scoreAllPort = parseInt(args[++i]) || 9222;
791
+ scoreAllPort = parseInt(args[++i]) || 7222;
792
792
  } else {
793
793
  scorePort = parseInt(args[++i]) || 9222;
794
794
  }
@@ -48,14 +48,19 @@ export async function fetchTagData(tag, options = {}) {
48
48
  userDataDir,
49
49
  proxyServer,
50
50
  onProgress,
51
+ browser: existingBrowser,
52
+ page: existingPage,
51
53
  } = options;
52
54
 
53
55
  const cdpOptions = { port };
54
56
  if (userDataDir) cdpOptions.userDataDir = userDataDir;
55
57
  if (proxyServer) cdpOptions.proxyServer = proxyServer;
56
58
 
57
- const browser = await ensureBrowserReady(cdpOptions);
58
- const page = await getOrCreatePage(browser);
59
+ const browser =
60
+ existingBrowser ||
61
+ (existingPage ? existingPage.context().browser() : null) ||
62
+ (await ensureBrowserReady(cdpOptions));
63
+ const page = existingPage || (await getOrCreatePage(browser));
59
64
 
60
65
  let challengeInfo = null;
61
66
  const rawVideos = [];
@@ -102,6 +107,9 @@ export async function fetchTagData(tag, options = {}) {
102
107
  timeout: 30000,
103
108
  });
104
109
 
110
+ if (resp.status() === 403 || resp.status() === 429) {
111
+ throw new CDNBlockedError(`标签页返回 HTTP ${resp.status()}`);
112
+ }
105
113
  if (resp.status() !== 200) {
106
114
  throw new Error(`标签页返回 HTTP ${resp.status()}`);
107
115
  }
@@ -1160,59 +1160,112 @@ export function createStore(filePath, options = {}) {
1160
1160
  })();
1161
1161
  }
1162
1162
 
1163
- // 常规移动:INSERT + DELETE 事务
1164
- const moveTxn = getDb().transaction(() => {
1165
- getDb()
1166
- .prepare(
1167
- `
1168
- INSERT OR IGNORE INTO jobs (
1169
- unique_id, nickname, status, sources, pinned,
1170
- tt_seller, verified, video_count, comment_count,
1171
- guessed_location, location_created, confirmed_location,
1172
- follower_count, following_count, heart_count,
1173
- created_at, updated_at, region, signature, bio_link, sec_uid,
1174
- status_code, latest_video_time, user_create_time
1175
- )
1176
- SELECT
1177
- unique_id, nickname, 'pending', sources, pinned,
1178
- tt_seller, verified, video_count, comment_count,
1179
- guessed_location, location_created, confirmed_location,
1180
- follower_count, following_count, heart_count,
1181
- created_at, updated_at, region, signature, bio_link, sec_uid,
1182
- status_code, latest_video_time, user_create_time
1183
- FROM raw_jobs
1184
- WHERE ${whereSql}
1163
+ // 常规移动:多国家时先按国家均衡补充,再用全局兜底补齐剩余额度
1164
+ const insertFromRawSql = `
1165
+ INSERT OR IGNORE INTO jobs (
1166
+ unique_id, nickname, status, sources, pinned,
1167
+ tt_seller, verified, video_count, comment_count,
1168
+ guessed_location, location_created, confirmed_location,
1169
+ follower_count, following_count, heart_count,
1170
+ created_at, updated_at, region, signature, bio_link, sec_uid,
1171
+ status_code, latest_video_time, user_create_time
1172
+ )
1173
+ SELECT
1174
+ unique_id, nickname, 'pending', sources, pinned,
1175
+ tt_seller, verified, video_count, comment_count,
1176
+ guessed_location, location_created, confirmed_location,
1177
+ follower_count, following_count, heart_count,
1178
+ created_at, updated_at, region, signature, bio_link, sec_uid,
1179
+ status_code, latest_video_time, user_create_time
1180
+ FROM raw_jobs
1181
+ WHERE __WHERE__
1182
+ ORDER BY
1183
+ CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
1184
+ COALESCE(video_count, 0) DESC, created_at DESC
1185
+ LIMIT ?
1186
+ `;
1187
+
1188
+ const deleteFromRawSql = `
1189
+ DELETE FROM raw_jobs
1190
+ WHERE unique_id IN (
1191
+ SELECT unique_id FROM raw_jobs
1192
+ WHERE __WHERE__
1185
1193
  ORDER BY
1186
1194
  CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
1187
1195
  COALESCE(video_count, 0) DESC, created_at DESC
1188
1196
  LIMIT ?
1189
- `,
1190
- )
1191
- .run(...args, safeLimit);
1197
+ )
1198
+ `;
1192
1199
 
1193
- // 删除已移动的记录:用子查询匹配刚 INSERT 的 unique_id
1194
- getDb()
1195
- .prepare(
1196
- `
1197
- DELETE FROM raw_jobs
1198
- WHERE unique_id IN (
1199
- SELECT unique_id FROM raw_jobs
1200
- WHERE ${whereSql}
1201
- ORDER BY
1202
- CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
1203
- COALESCE(video_count, 0) DESC, created_at DESC
1204
- LIMIT ?
1205
- )
1206
- `,
1207
- )
1208
- .run(...args, safeLimit);
1200
+ const uniqueLocations = normalizedLocations
1201
+ ? Array.from(new Set(normalizedLocations))
1202
+ : [];
1203
+ const shouldBalanceByCountry = uniqueLocations.length > 1;
1204
+
1205
+ const moveTxn = getDb().transaction(() => {
1206
+ let moved = 0;
1207
+ const movedByCountry = {};
1208
+
1209
+ if (shouldBalanceByCountry) {
1210
+ const baseQuota = Math.floor(safeLimit / uniqueLocations.length);
1211
+ const remainder = safeLimit % uniqueLocations.length;
1212
+
1213
+ for (let i = 0; i < uniqueLocations.length; i++) {
1214
+ if (moved >= safeLimit) break;
1215
+
1216
+ const location = uniqueLocations[i];
1217
+ const quota = baseQuota + (i < remainder ? 1 : 0);
1218
+ const currentLimit = Math.max(0, Math.min(quota, safeLimit - moved));
1219
+ if (!currentLimit) continue;
1220
+
1221
+ const locationWhere = `${whereSql} AND UPPER(COALESCE(guessed_location, '')) = ?`;
1222
+ const locationArgs = [...args, location];
1223
+
1224
+ getDb()
1225
+ .prepare(insertFromRawSql.replace("__WHERE__", locationWhere))
1226
+ .run(...locationArgs, currentLimit);
1227
+ const del = getDb()
1228
+ .prepare(deleteFromRawSql.replace("__WHERE__", locationWhere))
1229
+ .run(...locationArgs, currentLimit);
1230
+
1231
+ const movedThisCountry = del?.changes || 0;
1232
+ moved += movedThisCountry;
1233
+ movedByCountry[location] = movedThisCountry;
1234
+ }
1235
+
1236
+ // 某些国家库存不足时,用全局查询补齐剩余额度(仍受 whereSql 国家范围约束)
1237
+ const remaining = safeLimit - moved;
1238
+ if (remaining > 0) {
1239
+ getDb()
1240
+ .prepare(insertFromRawSql.replace("__WHERE__", whereSql))
1241
+ .run(...args, remaining);
1242
+ const del = getDb()
1243
+ .prepare(deleteFromRawSql.replace("__WHERE__", whereSql))
1244
+ .run(...args, remaining);
1245
+ moved += del?.changes || 0;
1246
+ }
1247
+
1248
+ console.error(
1249
+ `[data-store] refill 国家均衡: ${uniqueLocations
1250
+ .map((loc) => `${loc}:${movedByCountry[loc] || 0}`)
1251
+ .join(", ")} | total=${moved}`,
1252
+ );
1253
+ } else {
1254
+ getDb()
1255
+ .prepare(insertFromRawSql.replace("__WHERE__", whereSql))
1256
+ .run(...args, safeLimit);
1257
+ const del = getDb()
1258
+ .prepare(deleteFromRawSql.replace("__WHERE__", whereSql))
1259
+ .run(...args, safeLimit);
1260
+ moved = del?.changes || 0;
1261
+ }
1262
+
1263
+ return moved;
1209
1264
  });
1210
1265
 
1211
- moveTxn();
1266
+ const moved = moveTxn();
1212
1267
  markStatsDirty();
1213
-
1214
- const actualMoved = Math.min(count, safeLimit);
1215
- return { moved: actualMoved };
1268
+ return { moved };
1216
1269
  }
1217
1270
 
1218
1271
  async function claimNextJob(
@@ -1512,8 +1565,7 @@ export function createStore(filePath, options = {}) {
1512
1565
  }
1513
1566
  return null;
1514
1567
  }
1515
- const refillResult = (async () => {
1516
- refillLock = Promise.resolve(); // 占位
1568
+ const refillPromise = (async () => {
1517
1569
  const result = refillJobsFromRaw(
1518
1570
  normalizedLocations.length ? normalizedLocations : null,
1519
1571
  500,
@@ -1521,30 +1573,17 @@ export function createStore(filePath, options = {}) {
1521
1573
  );
1522
1574
  // refillJobsFromRaw 在 LLM 模式下返回 Promise
1523
1575
  if (result && typeof result.then === "function") {
1524
- return result.finally(() => {
1525
- refillLock = null;
1526
- });
1576
+ return await result;
1527
1577
  }
1528
1578
  return result;
1529
1579
  })();
1530
- if (refillResult && typeof refillResult.then === "function") {
1531
- const awaited = await refillResult;
1532
- if (awaited.moved > 0) {
1533
- console.error(
1534
- `[data-store] 从 raw_jobs 补充了 ${awaited.moved} 条任务到 jobs`,
1535
- );
1536
- for (const requireVideo of [true, false]) {
1537
- const pinned = findPinnedPending(requireVideo);
1538
- if (pinned) {
1539
- return claimRow(pinned);
1540
- }
1541
- const ranked = findPrioritizedPending(requireVideo);
1542
- if (ranked) {
1543
- return claimRow(ranked);
1544
- }
1545
- }
1546
- }
1547
- } else if (refillResult.moved > 0) {
1580
+ // 让并发请求等待同一个 refill,并且无论成功/失败都释放锁
1581
+ refillLock = refillPromise.finally(() => {
1582
+ refillLock = null;
1583
+ });
1584
+
1585
+ const refillResult = await refillLock;
1586
+ if (refillResult.moved > 0) {
1548
1587
  console.error(
1549
1588
  `[data-store] 从 raw_jobs 补充了 ${refillResult.moved} 条任务到 jobs`,
1550
1589
  );