tt-help-cli-ycl 1.3.98 → 1.3.100
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli/tag.js +192 -58
- package/src/lib/args.js +2 -2
- package/src/lib/tag-fetcher.js +10 -2
- package/src/watch/data-store.js +107 -68
package/package.json
CHANGED
package/src/cli/tag.js
CHANGED
|
@@ -5,6 +5,7 @@ import { fetchTagData, enrichVideosWithLocation } from "../lib/tag-fetcher.js";
|
|
|
5
5
|
import { killEdgeProcesses, ensureBrowserReady } from "../lib/browser/cdp.js";
|
|
6
6
|
import { getOrCreatePage } from "../lib/browser/page.js";
|
|
7
7
|
import { TikTokScraper } from "../lib/tiktok-scraper.mjs";
|
|
8
|
+
import { CDNBlockedError } from "../lib/parse-ssr.mjs";
|
|
8
9
|
import {
|
|
9
10
|
DEFAULT_TARGET_LOCATIONS,
|
|
10
11
|
isLocationInList,
|
|
@@ -18,6 +19,41 @@ import {
|
|
|
18
19
|
|
|
19
20
|
const ALL_COUNTRIES = DEFAULT_TARGET_LOCATIONS;
|
|
20
21
|
const DEFAULT_SERVER = cfgServer || "http://127.0.0.1:3000";
|
|
22
|
+
const DEFAULT_SCORE_COUNTRIES = [
|
|
23
|
+
"ES",
|
|
24
|
+
"FR",
|
|
25
|
+
"DE",
|
|
26
|
+
"PT",
|
|
27
|
+
"IT",
|
|
28
|
+
"NL",
|
|
29
|
+
"BE",
|
|
30
|
+
"AT",
|
|
31
|
+
"IE",
|
|
32
|
+
"PL",
|
|
33
|
+
"CZ",
|
|
34
|
+
"GR",
|
|
35
|
+
"HU",
|
|
36
|
+
];
|
|
37
|
+
|
|
38
|
+
function resolveTargetCountries(countries) {
|
|
39
|
+
return countries || DEFAULT_SCORE_COUNTRIES;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function formatMemoryUsage(mem = process.memoryUsage()) {
|
|
43
|
+
return `rss:${(mem.rss / 1024 / 1024).toFixed(0)}MB heap:${(mem.heapUsed / 1024 / 1024).toFixed(0)}MB ext:${(mem.external / 1024 / 1024).toFixed(0)}MB ab:${(mem.arrayBuffers / 1024 / 1024).toFixed(0)}MB`;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function getCdnCooldownSeconds(blockedCount, totalCount, isTooManyRequests = false) {
|
|
47
|
+
if (isTooManyRequests) return 120;
|
|
48
|
+
const ratio = blockedCount / Math.max(totalCount, 1);
|
|
49
|
+
return ratio > 0.3 ? 120 : 60;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async function cooldownAndRecycle(cooldownSec, recyclePage, maybeRecycleForMemory) {
|
|
53
|
+
await new Promise((r) => setTimeout(r, cooldownSec * 1000));
|
|
54
|
+
await recyclePage();
|
|
55
|
+
await maybeRecycleForMemory();
|
|
56
|
+
}
|
|
21
57
|
|
|
22
58
|
// 构建带客户端追踪 header 的 fetch 封装
|
|
23
59
|
function buildClientHeaders(clientId, meta, extra = {}) {
|
|
@@ -426,21 +462,7 @@ export async function handleScore(parsed) {
|
|
|
426
462
|
const baseUrl = serverUrl || DEFAULT_SERVER;
|
|
427
463
|
const cdpPort = port || 9222;
|
|
428
464
|
const effectiveProxy = cliProxy || configuredProxy;
|
|
429
|
-
const targetCountries = countries
|
|
430
|
-
"ES",
|
|
431
|
-
"FR",
|
|
432
|
-
"DE",
|
|
433
|
-
"PT",
|
|
434
|
-
"IT",
|
|
435
|
-
"NL",
|
|
436
|
-
"BE",
|
|
437
|
-
"AT",
|
|
438
|
-
"IE",
|
|
439
|
-
"PL",
|
|
440
|
-
"CZ",
|
|
441
|
-
"GR",
|
|
442
|
-
"HU",
|
|
443
|
-
];
|
|
465
|
+
const targetCountries = resolveTargetCountries(countries);
|
|
444
466
|
|
|
445
467
|
const log = (...args) => process.stderr.write(args.join(" ") + "\n");
|
|
446
468
|
|
|
@@ -576,23 +598,11 @@ export async function handleScoreAll(parsed) {
|
|
|
576
598
|
} = tagScoreAll || {};
|
|
577
599
|
|
|
578
600
|
const baseUrl = serverUrl || DEFAULT_SERVER;
|
|
579
|
-
const
|
|
601
|
+
const defaultScoreAllPort =
|
|
602
|
+
parseInt(process.env.TAG_SCOREALL_PORT_POOL_START || "7222", 10) || 7222;
|
|
603
|
+
let cdpPort = port || defaultScoreAllPort;
|
|
580
604
|
const effectiveProxy = cliProxy || configuredProxy;
|
|
581
|
-
const targetCountries = countries
|
|
582
|
-
"ES",
|
|
583
|
-
"FR",
|
|
584
|
-
"DE",
|
|
585
|
-
"PT",
|
|
586
|
-
"IT",
|
|
587
|
-
"NL",
|
|
588
|
-
"BE",
|
|
589
|
-
"AT",
|
|
590
|
-
"IE",
|
|
591
|
-
"PL",
|
|
592
|
-
"CZ",
|
|
593
|
-
"GR",
|
|
594
|
-
"HU",
|
|
595
|
-
];
|
|
605
|
+
const targetCountries = resolveTargetCountries(countries);
|
|
596
606
|
|
|
597
607
|
const log = (...args) => process.stderr.write(args.join(" ") + "\n");
|
|
598
608
|
|
|
@@ -613,8 +623,131 @@ export async function handleScoreAll(parsed) {
|
|
|
613
623
|
// 连接 CDP 浏览器
|
|
614
624
|
const cdpOpts = { port: cdpPort };
|
|
615
625
|
if (effectiveProxy) cdpOpts.proxyServer = effectiveProxy;
|
|
616
|
-
|
|
626
|
+
let browser = await ensureBrowserReady(cdpOpts);
|
|
617
627
|
let page = await getOrCreatePage(browser);
|
|
628
|
+
const blockedRoutePages = new WeakSet();
|
|
629
|
+
|
|
630
|
+
async function setupPageRequestBlocking(targetPage) {
|
|
631
|
+
if (!targetPage || blockedRoutePages.has(targetPage)) return;
|
|
632
|
+
await targetPage.route("**/*", (route) => {
|
|
633
|
+
const resourceType = route.request().resourceType();
|
|
634
|
+
if (resourceType === "image" || resourceType === "stylesheet") {
|
|
635
|
+
route.abort();
|
|
636
|
+
} else {
|
|
637
|
+
route.continue();
|
|
638
|
+
}
|
|
639
|
+
});
|
|
640
|
+
blockedRoutePages.add(targetPage);
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
await setupPageRequestBlocking(page);
|
|
644
|
+
|
|
645
|
+
const portPoolStart = Math.max(
|
|
646
|
+
1,
|
|
647
|
+
parseInt(process.env.TAG_SCOREALL_PORT_POOL_START || "7222", 10) || 7222,
|
|
648
|
+
);
|
|
649
|
+
const portPoolSize = Math.max(
|
|
650
|
+
2,
|
|
651
|
+
parseInt(process.env.TAG_SCOREALL_PORT_POOL_SIZE || "10", 10) || 10,
|
|
652
|
+
);
|
|
653
|
+
const switchPortOnRecycle =
|
|
654
|
+
String(process.env.TAG_SCOREALL_SWITCH_PORT_ON_RECYCLE || "1") !== "0";
|
|
655
|
+
|
|
656
|
+
function pickNextPort(currentPort) {
|
|
657
|
+
const candidates = [];
|
|
658
|
+
for (let i = 0; i < portPoolSize; i++) {
|
|
659
|
+
const p = portPoolStart + i;
|
|
660
|
+
if (p !== currentPort) candidates.push(p);
|
|
661
|
+
}
|
|
662
|
+
if (candidates.length === 0) return currentPort;
|
|
663
|
+
return candidates[Math.floor(Math.random() * candidates.length)];
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
const memRssRecycleMb = Math.max(
|
|
667
|
+
256,
|
|
668
|
+
parseInt(process.env.TAG_SCOREALL_RECYCLE_RSS_MB || "900", 10) || 900,
|
|
669
|
+
);
|
|
670
|
+
const memHeapRecycleMb = Math.max(
|
|
671
|
+
128,
|
|
672
|
+
parseInt(process.env.TAG_SCOREALL_RECYCLE_HEAP_MB || "320", 10) || 320,
|
|
673
|
+
);
|
|
674
|
+
const recycleCooldownMs = Math.max(
|
|
675
|
+
0,
|
|
676
|
+
parseInt(process.env.TAG_SCOREALL_RECYCLE_COOLDOWN_MS || "180000", 10) ||
|
|
677
|
+
180000,
|
|
678
|
+
);
|
|
679
|
+
// 默认关闭按固定轮次重建,仅在高内存时触发;需要可通过环境变量开启。
|
|
680
|
+
const periodicRecycleEvery = Math.max(
|
|
681
|
+
0,
|
|
682
|
+
parseInt(process.env.TAG_SCOREALL_PERIODIC_RECYCLE_EVERY || "0", 10) || 0,
|
|
683
|
+
);
|
|
684
|
+
let lastRecycleAt = 0;
|
|
685
|
+
|
|
686
|
+
async function recyclePage() {
|
|
687
|
+
if (!page || page.isClosed()) {
|
|
688
|
+
page = await getOrCreatePage(browser);
|
|
689
|
+
await setupPageRequestBlocking(page);
|
|
690
|
+
return;
|
|
691
|
+
}
|
|
692
|
+
try {
|
|
693
|
+
await page.goto("about:blank", {
|
|
694
|
+
waitUntil: "domcontentloaded",
|
|
695
|
+
timeout: 5000,
|
|
696
|
+
});
|
|
697
|
+
} catch {
|
|
698
|
+
// 页面状态异常时回退到重建 tab
|
|
699
|
+
await page.close().catch(() => {});
|
|
700
|
+
page = await getOrCreatePage(browser);
|
|
701
|
+
await setupPageRequestBlocking(page);
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
async function recycleCdpSession(reason) {
|
|
706
|
+
const oldPort = cdpPort;
|
|
707
|
+
if (switchPortOnRecycle) {
|
|
708
|
+
cdpPort = pickNextPort(cdpPort);
|
|
709
|
+
cdpOpts.port = cdpPort;
|
|
710
|
+
clientMeta.port = cdpPort;
|
|
711
|
+
}
|
|
712
|
+
const switchHint =
|
|
713
|
+
oldPort === cdpPort ? `port=${cdpPort}` : `port ${oldPort} -> ${cdpPort}`;
|
|
714
|
+
log(` ♻️ 重建 CDP 会话 (${reason}; ${switchHint})...`);
|
|
715
|
+
if (page) {
|
|
716
|
+
await page.close().catch(() => {});
|
|
717
|
+
page = null;
|
|
718
|
+
}
|
|
719
|
+
await browser.close().catch(() => {});
|
|
720
|
+
if (oldPort !== cdpPort) {
|
|
721
|
+
await killEdgeProcesses(null, oldPort).catch(() => {});
|
|
722
|
+
}
|
|
723
|
+
browser = await ensureBrowserReady(cdpOpts);
|
|
724
|
+
page = await getOrCreatePage(browser);
|
|
725
|
+
await setupPageRequestBlocking(page);
|
|
726
|
+
lastRecycleAt = Date.now();
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
async function maybeRecycleForMemory() {
|
|
730
|
+
const mem = process.memoryUsage();
|
|
731
|
+
const rssMB = mem.rss / 1024 / 1024;
|
|
732
|
+
const heapMB = mem.heapUsed / 1024 / 1024;
|
|
733
|
+
if (rssMB >= memRssRecycleMb || heapMB >= memHeapRecycleMb) {
|
|
734
|
+
const now = Date.now();
|
|
735
|
+
if (recycleCooldownMs > 0 && now - lastRecycleAt < recycleCooldownMs) {
|
|
736
|
+
return;
|
|
737
|
+
}
|
|
738
|
+
await recycleCdpSession(
|
|
739
|
+
`mem rss=${rssMB.toFixed(0)}MB heap=${heapMB.toFixed(0)}MB (threshold rss=${memRssRecycleMb} heap=${memHeapRecycleMb})`,
|
|
740
|
+
);
|
|
741
|
+
return;
|
|
742
|
+
}
|
|
743
|
+
if (
|
|
744
|
+
periodicRecycleEvery > 0 &&
|
|
745
|
+
totalScored > 0 &&
|
|
746
|
+
totalScored % periodicRecycleEvery === 0
|
|
747
|
+
) {
|
|
748
|
+
await recycleCdpSession(`periodic every ${totalScored} tasks`);
|
|
749
|
+
}
|
|
750
|
+
}
|
|
618
751
|
|
|
619
752
|
let totalScored = 0;
|
|
620
753
|
let emptyRounds = 0; // 连续无任务的轮数
|
|
@@ -622,7 +755,7 @@ export async function handleScoreAll(parsed) {
|
|
|
622
755
|
|
|
623
756
|
// 生成客户端 ID,用于服务端追踪
|
|
624
757
|
const clientId = randomUUID();
|
|
625
|
-
const clientMeta = { type: "scoring" };
|
|
758
|
+
const clientMeta = { type: "scoring", port: cdpPort };
|
|
626
759
|
|
|
627
760
|
// 复用 TikTokScraper 实例,避免每次 enrich 都启动/关闭 headless 浏览器
|
|
628
761
|
const enrichScraper = new TikTokScraper({
|
|
@@ -719,6 +852,7 @@ export async function handleScoreAll(parsed) {
|
|
|
719
852
|
// already claimed: 其他机器抢先了,跳过不标 dead
|
|
720
853
|
if (claimData.error && claimData.error.includes("already claimed")) {
|
|
721
854
|
log(` ⏭️ 已被其他客户端锁定,跳过`);
|
|
855
|
+
await recyclePage();
|
|
722
856
|
continue;
|
|
723
857
|
}
|
|
724
858
|
log(` ⚠️ 无法锁定 (${claimData.error}),标记为 dead 并跳过`);
|
|
@@ -726,6 +860,7 @@ export async function handleScoreAll(parsed) {
|
|
|
726
860
|
result.status = "dead";
|
|
727
861
|
await reportToServer(baseUrl, result, clientId, clientMeta);
|
|
728
862
|
totalScored++;
|
|
863
|
+
await recyclePage();
|
|
729
864
|
continue;
|
|
730
865
|
}
|
|
731
866
|
|
|
@@ -733,7 +868,7 @@ export async function handleScoreAll(parsed) {
|
|
|
733
868
|
const fetchStart = Date.now();
|
|
734
869
|
log(` 抓取 TikTok 标签页...`);
|
|
735
870
|
const tagResult = await fetchTagData(tag, {
|
|
736
|
-
|
|
871
|
+
page,
|
|
737
872
|
onProgress: ({ videos, authors }) => {
|
|
738
873
|
process.stderr.write(
|
|
739
874
|
`\r 抓取中: ${videos} 视频, ${authors} 作者\x1b[K`,
|
|
@@ -751,23 +886,17 @@ export async function handleScoreAll(parsed) {
|
|
|
751
886
|
|
|
752
887
|
if (!videos || videos.length === 0) {
|
|
753
888
|
const deadSec = ((Date.now() - fetchStart) / 1000).toFixed(1);
|
|
754
|
-
|
|
755
|
-
|
|
889
|
+
log(
|
|
890
|
+
` ⚠️ 无视频 (${deadSec}s) mem=${formatMemoryUsage()},标记 dead`,
|
|
756
891
|
);
|
|
757
|
-
log(` ⚠️ 无视频 (${deadSec}s) mem=${memMB}MB,标记 dead`);
|
|
758
892
|
result.status = "dead";
|
|
759
893
|
result.error = "no videos found";
|
|
760
894
|
await reportToServer(baseUrl, result, clientId, clientMeta);
|
|
761
895
|
totalScored++;
|
|
762
896
|
// 随机等待 3-7 秒,避免连续访问 TikTok 触发风控
|
|
763
897
|
await randomDelay(0, 5000);
|
|
764
|
-
|
|
765
|
-
await
|
|
766
|
-
.goto("about:blank", {
|
|
767
|
-
waitUntil: "domcontentloaded",
|
|
768
|
-
timeout: 5000,
|
|
769
|
-
})
|
|
770
|
-
.catch(() => {});
|
|
898
|
+
await recyclePage();
|
|
899
|
+
await maybeRecycleForMemory();
|
|
771
900
|
continue;
|
|
772
901
|
}
|
|
773
902
|
|
|
@@ -790,12 +919,11 @@ export async function handleScoreAll(parsed) {
|
|
|
790
919
|
// CDN 限流检测:有拦截则冷却 + 重启 scraper
|
|
791
920
|
const cdnBlocked = enriched.cdnBlockedCount || 0;
|
|
792
921
|
if (cdnBlocked > 0) {
|
|
793
|
-
const
|
|
794
|
-
const coolSec = cdnRatio > 0.3 ? 120 : 60;
|
|
922
|
+
const coolSec = getCdnCooldownSeconds(cdnBlocked, videos.length);
|
|
795
923
|
log(
|
|
796
|
-
` ⚠️ CDN 限流: ${cdnBlocked}/${videos.length} (${(
|
|
924
|
+
` ⚠️ CDN 限流: ${cdnBlocked}/${videos.length} (${((cdnBlocked / Math.max(videos.length, 1)) * 100).toFixed(0)}%),冷却 ${coolSec} 秒后重启 scraper`,
|
|
797
925
|
);
|
|
798
|
-
await
|
|
926
|
+
await cooldownAndRecycle(coolSec, recyclePage, maybeRecycleForMemory);
|
|
799
927
|
log(` 正在重启 TikTokScraper...`);
|
|
800
928
|
await enrichScraper.restart();
|
|
801
929
|
log(` ✅ TikTokScraper 已重启`);
|
|
@@ -836,22 +964,26 @@ export async function handleScoreAll(parsed) {
|
|
|
836
964
|
const mc = result.matchedCountries
|
|
837
965
|
.map((c) => `${c.c}:${c.n}`)
|
|
838
966
|
.join(" ");
|
|
839
|
-
|
|
840
|
-
const memMB = (process.memoryUsage().heapUsed / 1024 / 1024).toFixed(0);
|
|
841
|
-
const memStr = ` mem=${memMB}MB`;
|
|
967
|
+
const memStr = ` mem=${formatMemoryUsage()}`;
|
|
842
968
|
log(
|
|
843
969
|
` ${icon} ${result.status} score=${result.score} authors=${result.authorCount} matched=${result.matchedAuthors} (${elapsed}s)${mc ? " " + mc : ""}${memStr}`,
|
|
844
970
|
);
|
|
845
971
|
log("");
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
await page
|
|
849
|
-
.goto("about:blank", { waitUntil: "domcontentloaded", timeout: 5000 })
|
|
850
|
-
.catch((e) => {
|
|
851
|
-
log(` ⚠️ about:blank 跳转失败: ${e.message}`);
|
|
852
|
-
});
|
|
972
|
+
await recyclePage();
|
|
973
|
+
await maybeRecycleForMemory();
|
|
853
974
|
await randomDelay(3000, 7000);
|
|
854
975
|
} catch (e) {
|
|
976
|
+
if (e instanceof CDNBlockedError || /HTTP\s+(403|429)/.test(e.message)) {
|
|
977
|
+
log(` ⚠️ CDN 被封: ${e.message}`);
|
|
978
|
+
result.status = "dead";
|
|
979
|
+
result.error = "cdn_blocked";
|
|
980
|
+
await reportToServer(baseUrl, result, clientId, clientMeta);
|
|
981
|
+
totalScored++;
|
|
982
|
+
const cooldownSec = getCdnCooldownSeconds(1, 1, /429/.test(e.message));
|
|
983
|
+
log(` 冷却 ${cooldownSec} 秒后再继续...`);
|
|
984
|
+
await cooldownAndRecycle(cooldownSec, recyclePage, maybeRecycleForMemory);
|
|
985
|
+
continue;
|
|
986
|
+
}
|
|
855
987
|
// 区分网络错误和业务错误
|
|
856
988
|
const isNetworkError =
|
|
857
989
|
e.code === "ECONNREFUSED" ||
|
|
@@ -881,6 +1013,8 @@ export async function handleScoreAll(parsed) {
|
|
|
881
1013
|
);
|
|
882
1014
|
} catch {}
|
|
883
1015
|
totalScored++;
|
|
1016
|
+
await recyclePage();
|
|
1017
|
+
await maybeRecycleForMemory();
|
|
884
1018
|
}
|
|
885
1019
|
}
|
|
886
1020
|
} finally {
|
package/src/lib/args.js
CHANGED
|
@@ -739,7 +739,7 @@ function parseTagArgs(args) {
|
|
|
739
739
|
let isDiscover = false;
|
|
740
740
|
let isScore = false;
|
|
741
741
|
let isScoreAll = false;
|
|
742
|
-
let scoreAllPort =
|
|
742
|
+
let scoreAllPort = 7222;
|
|
743
743
|
let scoreProxy = null;
|
|
744
744
|
let scoreTag = null;
|
|
745
745
|
let scoreCountries = null;
|
|
@@ -788,7 +788,7 @@ function parseTagArgs(args) {
|
|
|
788
788
|
.filter(Boolean);
|
|
789
789
|
} else if (arg === "--port") {
|
|
790
790
|
if (isScoreAll) {
|
|
791
|
-
scoreAllPort = parseInt(args[++i]) ||
|
|
791
|
+
scoreAllPort = parseInt(args[++i]) || 7222;
|
|
792
792
|
} else {
|
|
793
793
|
scorePort = parseInt(args[++i]) || 9222;
|
|
794
794
|
}
|
package/src/lib/tag-fetcher.js
CHANGED
|
@@ -48,14 +48,19 @@ export async function fetchTagData(tag, options = {}) {
|
|
|
48
48
|
userDataDir,
|
|
49
49
|
proxyServer,
|
|
50
50
|
onProgress,
|
|
51
|
+
browser: existingBrowser,
|
|
52
|
+
page: existingPage,
|
|
51
53
|
} = options;
|
|
52
54
|
|
|
53
55
|
const cdpOptions = { port };
|
|
54
56
|
if (userDataDir) cdpOptions.userDataDir = userDataDir;
|
|
55
57
|
if (proxyServer) cdpOptions.proxyServer = proxyServer;
|
|
56
58
|
|
|
57
|
-
const browser =
|
|
58
|
-
|
|
59
|
+
const browser =
|
|
60
|
+
existingBrowser ||
|
|
61
|
+
(existingPage ? existingPage.context().browser() : null) ||
|
|
62
|
+
(await ensureBrowserReady(cdpOptions));
|
|
63
|
+
const page = existingPage || (await getOrCreatePage(browser));
|
|
59
64
|
|
|
60
65
|
let challengeInfo = null;
|
|
61
66
|
const rawVideos = [];
|
|
@@ -102,6 +107,9 @@ export async function fetchTagData(tag, options = {}) {
|
|
|
102
107
|
timeout: 30000,
|
|
103
108
|
});
|
|
104
109
|
|
|
110
|
+
if (resp.status() === 403 || resp.status() === 429) {
|
|
111
|
+
throw new CDNBlockedError(`标签页返回 HTTP ${resp.status()}`);
|
|
112
|
+
}
|
|
105
113
|
if (resp.status() !== 200) {
|
|
106
114
|
throw new Error(`标签页返回 HTTP ${resp.status()}`);
|
|
107
115
|
}
|
package/src/watch/data-store.js
CHANGED
|
@@ -1160,59 +1160,112 @@ export function createStore(filePath, options = {}) {
|
|
|
1160
1160
|
})();
|
|
1161
1161
|
}
|
|
1162
1162
|
|
|
1163
|
-
//
|
|
1164
|
-
const
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1163
|
+
// 常规移动:多国家时先按国家均衡补充,再用全局兜底补齐剩余额度
|
|
1164
|
+
const insertFromRawSql = `
|
|
1165
|
+
INSERT OR IGNORE INTO jobs (
|
|
1166
|
+
unique_id, nickname, status, sources, pinned,
|
|
1167
|
+
tt_seller, verified, video_count, comment_count,
|
|
1168
|
+
guessed_location, location_created, confirmed_location,
|
|
1169
|
+
follower_count, following_count, heart_count,
|
|
1170
|
+
created_at, updated_at, region, signature, bio_link, sec_uid,
|
|
1171
|
+
status_code, latest_video_time, user_create_time
|
|
1172
|
+
)
|
|
1173
|
+
SELECT
|
|
1174
|
+
unique_id, nickname, 'pending', sources, pinned,
|
|
1175
|
+
tt_seller, verified, video_count, comment_count,
|
|
1176
|
+
guessed_location, location_created, confirmed_location,
|
|
1177
|
+
follower_count, following_count, heart_count,
|
|
1178
|
+
created_at, updated_at, region, signature, bio_link, sec_uid,
|
|
1179
|
+
status_code, latest_video_time, user_create_time
|
|
1180
|
+
FROM raw_jobs
|
|
1181
|
+
WHERE __WHERE__
|
|
1182
|
+
ORDER BY
|
|
1183
|
+
CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
|
|
1184
|
+
COALESCE(video_count, 0) DESC, created_at DESC
|
|
1185
|
+
LIMIT ?
|
|
1186
|
+
`;
|
|
1187
|
+
|
|
1188
|
+
const deleteFromRawSql = `
|
|
1189
|
+
DELETE FROM raw_jobs
|
|
1190
|
+
WHERE unique_id IN (
|
|
1191
|
+
SELECT unique_id FROM raw_jobs
|
|
1192
|
+
WHERE __WHERE__
|
|
1185
1193
|
ORDER BY
|
|
1186
1194
|
CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
|
|
1187
1195
|
COALESCE(video_count, 0) DESC, created_at DESC
|
|
1188
1196
|
LIMIT ?
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
.run(...args, safeLimit);
|
|
1197
|
+
)
|
|
1198
|
+
`;
|
|
1192
1199
|
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1200
|
+
const uniqueLocations = normalizedLocations
|
|
1201
|
+
? Array.from(new Set(normalizedLocations))
|
|
1202
|
+
: [];
|
|
1203
|
+
const shouldBalanceByCountry = uniqueLocations.length > 1;
|
|
1204
|
+
|
|
1205
|
+
const moveTxn = getDb().transaction(() => {
|
|
1206
|
+
let moved = 0;
|
|
1207
|
+
const movedByCountry = {};
|
|
1208
|
+
|
|
1209
|
+
if (shouldBalanceByCountry) {
|
|
1210
|
+
const baseQuota = Math.floor(safeLimit / uniqueLocations.length);
|
|
1211
|
+
const remainder = safeLimit % uniqueLocations.length;
|
|
1212
|
+
|
|
1213
|
+
for (let i = 0; i < uniqueLocations.length; i++) {
|
|
1214
|
+
if (moved >= safeLimit) break;
|
|
1215
|
+
|
|
1216
|
+
const location = uniqueLocations[i];
|
|
1217
|
+
const quota = baseQuota + (i < remainder ? 1 : 0);
|
|
1218
|
+
const currentLimit = Math.max(0, Math.min(quota, safeLimit - moved));
|
|
1219
|
+
if (!currentLimit) continue;
|
|
1220
|
+
|
|
1221
|
+
const locationWhere = `${whereSql} AND UPPER(COALESCE(guessed_location, '')) = ?`;
|
|
1222
|
+
const locationArgs = [...args, location];
|
|
1223
|
+
|
|
1224
|
+
getDb()
|
|
1225
|
+
.prepare(insertFromRawSql.replace("__WHERE__", locationWhere))
|
|
1226
|
+
.run(...locationArgs, currentLimit);
|
|
1227
|
+
const del = getDb()
|
|
1228
|
+
.prepare(deleteFromRawSql.replace("__WHERE__", locationWhere))
|
|
1229
|
+
.run(...locationArgs, currentLimit);
|
|
1230
|
+
|
|
1231
|
+
const movedThisCountry = del?.changes || 0;
|
|
1232
|
+
moved += movedThisCountry;
|
|
1233
|
+
movedByCountry[location] = movedThisCountry;
|
|
1234
|
+
}
|
|
1235
|
+
|
|
1236
|
+
// 某些国家库存不足时,用全局查询补齐剩余额度(仍受 whereSql 国家范围约束)
|
|
1237
|
+
const remaining = safeLimit - moved;
|
|
1238
|
+
if (remaining > 0) {
|
|
1239
|
+
getDb()
|
|
1240
|
+
.prepare(insertFromRawSql.replace("__WHERE__", whereSql))
|
|
1241
|
+
.run(...args, remaining);
|
|
1242
|
+
const del = getDb()
|
|
1243
|
+
.prepare(deleteFromRawSql.replace("__WHERE__", whereSql))
|
|
1244
|
+
.run(...args, remaining);
|
|
1245
|
+
moved += del?.changes || 0;
|
|
1246
|
+
}
|
|
1247
|
+
|
|
1248
|
+
console.error(
|
|
1249
|
+
`[data-store] refill 国家均衡: ${uniqueLocations
|
|
1250
|
+
.map((loc) => `${loc}:${movedByCountry[loc] || 0}`)
|
|
1251
|
+
.join(", ")} | total=${moved}`,
|
|
1252
|
+
);
|
|
1253
|
+
} else {
|
|
1254
|
+
getDb()
|
|
1255
|
+
.prepare(insertFromRawSql.replace("__WHERE__", whereSql))
|
|
1256
|
+
.run(...args, safeLimit);
|
|
1257
|
+
const del = getDb()
|
|
1258
|
+
.prepare(deleteFromRawSql.replace("__WHERE__", whereSql))
|
|
1259
|
+
.run(...args, safeLimit);
|
|
1260
|
+
moved = del?.changes || 0;
|
|
1261
|
+
}
|
|
1262
|
+
|
|
1263
|
+
return moved;
|
|
1209
1264
|
});
|
|
1210
1265
|
|
|
1211
|
-
moveTxn();
|
|
1266
|
+
const moved = moveTxn();
|
|
1212
1267
|
markStatsDirty();
|
|
1213
|
-
|
|
1214
|
-
const actualMoved = Math.min(count, safeLimit);
|
|
1215
|
-
return { moved: actualMoved };
|
|
1268
|
+
return { moved };
|
|
1216
1269
|
}
|
|
1217
1270
|
|
|
1218
1271
|
async function claimNextJob(
|
|
@@ -1512,8 +1565,7 @@ export function createStore(filePath, options = {}) {
|
|
|
1512
1565
|
}
|
|
1513
1566
|
return null;
|
|
1514
1567
|
}
|
|
1515
|
-
const
|
|
1516
|
-
refillLock = Promise.resolve(); // 占位
|
|
1568
|
+
const refillPromise = (async () => {
|
|
1517
1569
|
const result = refillJobsFromRaw(
|
|
1518
1570
|
normalizedLocations.length ? normalizedLocations : null,
|
|
1519
1571
|
500,
|
|
@@ -1521,30 +1573,17 @@ export function createStore(filePath, options = {}) {
|
|
|
1521
1573
|
);
|
|
1522
1574
|
// refillJobsFromRaw 在 LLM 模式下返回 Promise
|
|
1523
1575
|
if (result && typeof result.then === "function") {
|
|
1524
|
-
return result
|
|
1525
|
-
refillLock = null;
|
|
1526
|
-
});
|
|
1576
|
+
return await result;
|
|
1527
1577
|
}
|
|
1528
1578
|
return result;
|
|
1529
1579
|
})();
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
const pinned = findPinnedPending(requireVideo);
|
|
1538
|
-
if (pinned) {
|
|
1539
|
-
return claimRow(pinned);
|
|
1540
|
-
}
|
|
1541
|
-
const ranked = findPrioritizedPending(requireVideo);
|
|
1542
|
-
if (ranked) {
|
|
1543
|
-
return claimRow(ranked);
|
|
1544
|
-
}
|
|
1545
|
-
}
|
|
1546
|
-
}
|
|
1547
|
-
} else if (refillResult.moved > 0) {
|
|
1580
|
+
// 让并发请求等待同一个 refill,并且无论成功/失败都释放锁
|
|
1581
|
+
refillLock = refillPromise.finally(() => {
|
|
1582
|
+
refillLock = null;
|
|
1583
|
+
});
|
|
1584
|
+
|
|
1585
|
+
const refillResult = await refillLock;
|
|
1586
|
+
if (refillResult.moved > 0) {
|
|
1548
1587
|
console.error(
|
|
1549
1588
|
`[data-store] 从 raw_jobs 补充了 ${refillResult.moved} 条任务到 jobs`,
|
|
1550
1589
|
);
|