tt-help-cli-ycl 1.3.98 → 1.3.99
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli/tag.js +165 -54
- package/src/lib/args.js +2 -2
- package/src/lib/tag-fetcher.js +7 -2
package/package.json
CHANGED
package/src/cli/tag.js
CHANGED
|
@@ -18,6 +18,29 @@ import {
|
|
|
18
18
|
|
|
19
19
|
const ALL_COUNTRIES = DEFAULT_TARGET_LOCATIONS;
|
|
20
20
|
const DEFAULT_SERVER = cfgServer || "http://127.0.0.1:3000";
|
|
21
|
+
const DEFAULT_SCORE_COUNTRIES = [
|
|
22
|
+
"ES",
|
|
23
|
+
"FR",
|
|
24
|
+
"DE",
|
|
25
|
+
"PT",
|
|
26
|
+
"IT",
|
|
27
|
+
"NL",
|
|
28
|
+
"BE",
|
|
29
|
+
"AT",
|
|
30
|
+
"IE",
|
|
31
|
+
"PL",
|
|
32
|
+
"CZ",
|
|
33
|
+
"GR",
|
|
34
|
+
"HU",
|
|
35
|
+
];
|
|
36
|
+
|
|
37
|
+
function resolveTargetCountries(countries) {
|
|
38
|
+
return countries || DEFAULT_SCORE_COUNTRIES;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function formatMemoryUsage(mem = process.memoryUsage()) {
|
|
42
|
+
return `rss:${(mem.rss / 1024 / 1024).toFixed(0)}MB heap:${(mem.heapUsed / 1024 / 1024).toFixed(0)}MB ext:${(mem.external / 1024 / 1024).toFixed(0)}MB ab:${(mem.arrayBuffers / 1024 / 1024).toFixed(0)}MB`;
|
|
43
|
+
}
|
|
21
44
|
|
|
22
45
|
// 构建带客户端追踪 header 的 fetch 封装
|
|
23
46
|
function buildClientHeaders(clientId, meta, extra = {}) {
|
|
@@ -426,21 +449,7 @@ export async function handleScore(parsed) {
|
|
|
426
449
|
const baseUrl = serverUrl || DEFAULT_SERVER;
|
|
427
450
|
const cdpPort = port || 9222;
|
|
428
451
|
const effectiveProxy = cliProxy || configuredProxy;
|
|
429
|
-
const targetCountries = countries
|
|
430
|
-
"ES",
|
|
431
|
-
"FR",
|
|
432
|
-
"DE",
|
|
433
|
-
"PT",
|
|
434
|
-
"IT",
|
|
435
|
-
"NL",
|
|
436
|
-
"BE",
|
|
437
|
-
"AT",
|
|
438
|
-
"IE",
|
|
439
|
-
"PL",
|
|
440
|
-
"CZ",
|
|
441
|
-
"GR",
|
|
442
|
-
"HU",
|
|
443
|
-
];
|
|
452
|
+
const targetCountries = resolveTargetCountries(countries);
|
|
444
453
|
|
|
445
454
|
const log = (...args) => process.stderr.write(args.join(" ") + "\n");
|
|
446
455
|
|
|
@@ -576,23 +585,11 @@ export async function handleScoreAll(parsed) {
|
|
|
576
585
|
} = tagScoreAll || {};
|
|
577
586
|
|
|
578
587
|
const baseUrl = serverUrl || DEFAULT_SERVER;
|
|
579
|
-
const
|
|
588
|
+
const defaultScoreAllPort =
|
|
589
|
+
parseInt(process.env.TAG_SCOREALL_PORT_POOL_START || "7222", 10) || 7222;
|
|
590
|
+
let cdpPort = port || defaultScoreAllPort;
|
|
580
591
|
const effectiveProxy = cliProxy || configuredProxy;
|
|
581
|
-
const targetCountries = countries
|
|
582
|
-
"ES",
|
|
583
|
-
"FR",
|
|
584
|
-
"DE",
|
|
585
|
-
"PT",
|
|
586
|
-
"IT",
|
|
587
|
-
"NL",
|
|
588
|
-
"BE",
|
|
589
|
-
"AT",
|
|
590
|
-
"IE",
|
|
591
|
-
"PL",
|
|
592
|
-
"CZ",
|
|
593
|
-
"GR",
|
|
594
|
-
"HU",
|
|
595
|
-
];
|
|
592
|
+
const targetCountries = resolveTargetCountries(countries);
|
|
596
593
|
|
|
597
594
|
const log = (...args) => process.stderr.write(args.join(" ") + "\n");
|
|
598
595
|
|
|
@@ -613,8 +610,131 @@ export async function handleScoreAll(parsed) {
|
|
|
613
610
|
// 连接 CDP 浏览器
|
|
614
611
|
const cdpOpts = { port: cdpPort };
|
|
615
612
|
if (effectiveProxy) cdpOpts.proxyServer = effectiveProxy;
|
|
616
|
-
|
|
613
|
+
let browser = await ensureBrowserReady(cdpOpts);
|
|
617
614
|
let page = await getOrCreatePage(browser);
|
|
615
|
+
const blockedRoutePages = new WeakSet();
|
|
616
|
+
|
|
617
|
+
async function setupPageRequestBlocking(targetPage) {
|
|
618
|
+
if (!targetPage || blockedRoutePages.has(targetPage)) return;
|
|
619
|
+
await targetPage.route("**/*", (route) => {
|
|
620
|
+
const resourceType = route.request().resourceType();
|
|
621
|
+
if (resourceType === "image" || resourceType === "stylesheet") {
|
|
622
|
+
route.abort();
|
|
623
|
+
} else {
|
|
624
|
+
route.continue();
|
|
625
|
+
}
|
|
626
|
+
});
|
|
627
|
+
blockedRoutePages.add(targetPage);
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
await setupPageRequestBlocking(page);
|
|
631
|
+
|
|
632
|
+
const portPoolStart = Math.max(
|
|
633
|
+
1,
|
|
634
|
+
parseInt(process.env.TAG_SCOREALL_PORT_POOL_START || "7222", 10) || 7222,
|
|
635
|
+
);
|
|
636
|
+
const portPoolSize = Math.max(
|
|
637
|
+
2,
|
|
638
|
+
parseInt(process.env.TAG_SCOREALL_PORT_POOL_SIZE || "10", 10) || 10,
|
|
639
|
+
);
|
|
640
|
+
const switchPortOnRecycle =
|
|
641
|
+
String(process.env.TAG_SCOREALL_SWITCH_PORT_ON_RECYCLE || "1") !== "0";
|
|
642
|
+
|
|
643
|
+
function pickNextPort(currentPort) {
|
|
644
|
+
const candidates = [];
|
|
645
|
+
for (let i = 0; i < portPoolSize; i++) {
|
|
646
|
+
const p = portPoolStart + i;
|
|
647
|
+
if (p !== currentPort) candidates.push(p);
|
|
648
|
+
}
|
|
649
|
+
if (candidates.length === 0) return currentPort;
|
|
650
|
+
return candidates[Math.floor(Math.random() * candidates.length)];
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
const memRssRecycleMb = Math.max(
|
|
654
|
+
256,
|
|
655
|
+
parseInt(process.env.TAG_SCOREALL_RECYCLE_RSS_MB || "900", 10) || 900,
|
|
656
|
+
);
|
|
657
|
+
const memHeapRecycleMb = Math.max(
|
|
658
|
+
128,
|
|
659
|
+
parseInt(process.env.TAG_SCOREALL_RECYCLE_HEAP_MB || "320", 10) || 320,
|
|
660
|
+
);
|
|
661
|
+
const recycleCooldownMs = Math.max(
|
|
662
|
+
0,
|
|
663
|
+
parseInt(process.env.TAG_SCOREALL_RECYCLE_COOLDOWN_MS || "180000", 10) ||
|
|
664
|
+
180000,
|
|
665
|
+
);
|
|
666
|
+
// 默认关闭按固定轮次重建,仅在高内存时触发;需要可通过环境变量开启。
|
|
667
|
+
const periodicRecycleEvery = Math.max(
|
|
668
|
+
0,
|
|
669
|
+
parseInt(process.env.TAG_SCOREALL_PERIODIC_RECYCLE_EVERY || "0", 10) || 0,
|
|
670
|
+
);
|
|
671
|
+
let lastRecycleAt = 0;
|
|
672
|
+
|
|
673
|
+
async function recyclePage() {
|
|
674
|
+
if (!page || page.isClosed()) {
|
|
675
|
+
page = await getOrCreatePage(browser);
|
|
676
|
+
await setupPageRequestBlocking(page);
|
|
677
|
+
return;
|
|
678
|
+
}
|
|
679
|
+
try {
|
|
680
|
+
await page.goto("about:blank", {
|
|
681
|
+
waitUntil: "domcontentloaded",
|
|
682
|
+
timeout: 5000,
|
|
683
|
+
});
|
|
684
|
+
} catch {
|
|
685
|
+
// 页面状态异常时回退到重建 tab
|
|
686
|
+
await page.close().catch(() => {});
|
|
687
|
+
page = await getOrCreatePage(browser);
|
|
688
|
+
await setupPageRequestBlocking(page);
|
|
689
|
+
}
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
async function recycleCdpSession(reason) {
|
|
693
|
+
const oldPort = cdpPort;
|
|
694
|
+
if (switchPortOnRecycle) {
|
|
695
|
+
cdpPort = pickNextPort(cdpPort);
|
|
696
|
+
cdpOpts.port = cdpPort;
|
|
697
|
+
clientMeta.port = cdpPort;
|
|
698
|
+
}
|
|
699
|
+
const switchHint =
|
|
700
|
+
oldPort === cdpPort ? `port=${cdpPort}` : `port ${oldPort} -> ${cdpPort}`;
|
|
701
|
+
log(` ♻️ 重建 CDP 会话 (${reason}; ${switchHint})...`);
|
|
702
|
+
if (page) {
|
|
703
|
+
await page.close().catch(() => {});
|
|
704
|
+
page = null;
|
|
705
|
+
}
|
|
706
|
+
await browser.close().catch(() => {});
|
|
707
|
+
if (oldPort !== cdpPort) {
|
|
708
|
+
await killEdgeProcesses(null, oldPort).catch(() => {});
|
|
709
|
+
}
|
|
710
|
+
browser = await ensureBrowserReady(cdpOpts);
|
|
711
|
+
page = await getOrCreatePage(browser);
|
|
712
|
+
await setupPageRequestBlocking(page);
|
|
713
|
+
lastRecycleAt = Date.now();
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
async function maybeRecycleForMemory() {
|
|
717
|
+
const mem = process.memoryUsage();
|
|
718
|
+
const rssMB = mem.rss / 1024 / 1024;
|
|
719
|
+
const heapMB = mem.heapUsed / 1024 / 1024;
|
|
720
|
+
if (rssMB >= memRssRecycleMb || heapMB >= memHeapRecycleMb) {
|
|
721
|
+
const now = Date.now();
|
|
722
|
+
if (recycleCooldownMs > 0 && now - lastRecycleAt < recycleCooldownMs) {
|
|
723
|
+
return;
|
|
724
|
+
}
|
|
725
|
+
await recycleCdpSession(
|
|
726
|
+
`mem rss=${rssMB.toFixed(0)}MB heap=${heapMB.toFixed(0)}MB (threshold rss=${memRssRecycleMb} heap=${memHeapRecycleMb})`,
|
|
727
|
+
);
|
|
728
|
+
return;
|
|
729
|
+
}
|
|
730
|
+
if (
|
|
731
|
+
periodicRecycleEvery > 0 &&
|
|
732
|
+
totalScored > 0 &&
|
|
733
|
+
totalScored % periodicRecycleEvery === 0
|
|
734
|
+
) {
|
|
735
|
+
await recycleCdpSession(`periodic every ${totalScored} tasks`);
|
|
736
|
+
}
|
|
737
|
+
}
|
|
618
738
|
|
|
619
739
|
let totalScored = 0;
|
|
620
740
|
let emptyRounds = 0; // 连续无任务的轮数
|
|
@@ -622,7 +742,7 @@ export async function handleScoreAll(parsed) {
|
|
|
622
742
|
|
|
623
743
|
// 生成客户端 ID,用于服务端追踪
|
|
624
744
|
const clientId = randomUUID();
|
|
625
|
-
const clientMeta = { type: "scoring" };
|
|
745
|
+
const clientMeta = { type: "scoring", port: cdpPort };
|
|
626
746
|
|
|
627
747
|
// 复用 TikTokScraper 实例,避免每次 enrich 都启动/关闭 headless 浏览器
|
|
628
748
|
const enrichScraper = new TikTokScraper({
|
|
@@ -719,6 +839,7 @@ export async function handleScoreAll(parsed) {
|
|
|
719
839
|
// already claimed: 其他机器抢先了,跳过不标 dead
|
|
720
840
|
if (claimData.error && claimData.error.includes("already claimed")) {
|
|
721
841
|
log(` ⏭️ 已被其他客户端锁定,跳过`);
|
|
842
|
+
await recyclePage();
|
|
722
843
|
continue;
|
|
723
844
|
}
|
|
724
845
|
log(` ⚠️ 无法锁定 (${claimData.error}),标记为 dead 并跳过`);
|
|
@@ -726,6 +847,7 @@ export async function handleScoreAll(parsed) {
|
|
|
726
847
|
result.status = "dead";
|
|
727
848
|
await reportToServer(baseUrl, result, clientId, clientMeta);
|
|
728
849
|
totalScored++;
|
|
850
|
+
await recyclePage();
|
|
729
851
|
continue;
|
|
730
852
|
}
|
|
731
853
|
|
|
@@ -733,7 +855,7 @@ export async function handleScoreAll(parsed) {
|
|
|
733
855
|
const fetchStart = Date.now();
|
|
734
856
|
log(` 抓取 TikTok 标签页...`);
|
|
735
857
|
const tagResult = await fetchTagData(tag, {
|
|
736
|
-
|
|
858
|
+
page,
|
|
737
859
|
onProgress: ({ videos, authors }) => {
|
|
738
860
|
process.stderr.write(
|
|
739
861
|
`\r 抓取中: ${videos} 视频, ${authors} 作者\x1b[K`,
|
|
@@ -751,23 +873,17 @@ export async function handleScoreAll(parsed) {
|
|
|
751
873
|
|
|
752
874
|
if (!videos || videos.length === 0) {
|
|
753
875
|
const deadSec = ((Date.now() - fetchStart) / 1000).toFixed(1);
|
|
754
|
-
|
|
755
|
-
|
|
876
|
+
log(
|
|
877
|
+
` ⚠️ 无视频 (${deadSec}s) mem=${formatMemoryUsage()},标记 dead`,
|
|
756
878
|
);
|
|
757
|
-
log(` ⚠️ 无视频 (${deadSec}s) mem=${memMB}MB,标记 dead`);
|
|
758
879
|
result.status = "dead";
|
|
759
880
|
result.error = "no videos found";
|
|
760
881
|
await reportToServer(baseUrl, result, clientId, clientMeta);
|
|
761
882
|
totalScored++;
|
|
762
883
|
// 随机等待 3-7 秒,避免连续访问 TikTok 触发风控
|
|
763
884
|
await randomDelay(0, 5000);
|
|
764
|
-
|
|
765
|
-
await
|
|
766
|
-
.goto("about:blank", {
|
|
767
|
-
waitUntil: "domcontentloaded",
|
|
768
|
-
timeout: 5000,
|
|
769
|
-
})
|
|
770
|
-
.catch(() => {});
|
|
885
|
+
await recyclePage();
|
|
886
|
+
await maybeRecycleForMemory();
|
|
771
887
|
continue;
|
|
772
888
|
}
|
|
773
889
|
|
|
@@ -836,20 +952,13 @@ export async function handleScoreAll(parsed) {
|
|
|
836
952
|
const mc = result.matchedCountries
|
|
837
953
|
.map((c) => `${c.c}:${c.n}`)
|
|
838
954
|
.join(" ");
|
|
839
|
-
|
|
840
|
-
const memMB = (process.memoryUsage().heapUsed / 1024 / 1024).toFixed(0);
|
|
841
|
-
const memStr = ` mem=${memMB}MB`;
|
|
955
|
+
const memStr = ` mem=${formatMemoryUsage()}`;
|
|
842
956
|
log(
|
|
843
957
|
` ${icon} ${result.status} score=${result.score} authors=${result.authorCount} matched=${result.matchedAuthors} (${elapsed}s)${mc ? " " + mc : ""}${memStr}`,
|
|
844
958
|
);
|
|
845
959
|
log("");
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
await page
|
|
849
|
-
.goto("about:blank", { waitUntil: "domcontentloaded", timeout: 5000 })
|
|
850
|
-
.catch((e) => {
|
|
851
|
-
log(` ⚠️ about:blank 跳转失败: ${e.message}`);
|
|
852
|
-
});
|
|
960
|
+
await recyclePage();
|
|
961
|
+
await maybeRecycleForMemory();
|
|
853
962
|
await randomDelay(3000, 7000);
|
|
854
963
|
} catch (e) {
|
|
855
964
|
// 区分网络错误和业务错误
|
|
@@ -881,6 +990,8 @@ export async function handleScoreAll(parsed) {
|
|
|
881
990
|
);
|
|
882
991
|
} catch {}
|
|
883
992
|
totalScored++;
|
|
993
|
+
await recyclePage();
|
|
994
|
+
await maybeRecycleForMemory();
|
|
884
995
|
}
|
|
885
996
|
}
|
|
886
997
|
} finally {
|
package/src/lib/args.js
CHANGED
|
@@ -739,7 +739,7 @@ function parseTagArgs(args) {
|
|
|
739
739
|
let isDiscover = false;
|
|
740
740
|
let isScore = false;
|
|
741
741
|
let isScoreAll = false;
|
|
742
|
-
let scoreAllPort =
|
|
742
|
+
let scoreAllPort = 7222;
|
|
743
743
|
let scoreProxy = null;
|
|
744
744
|
let scoreTag = null;
|
|
745
745
|
let scoreCountries = null;
|
|
@@ -788,7 +788,7 @@ function parseTagArgs(args) {
|
|
|
788
788
|
.filter(Boolean);
|
|
789
789
|
} else if (arg === "--port") {
|
|
790
790
|
if (isScoreAll) {
|
|
791
|
-
scoreAllPort = parseInt(args[++i]) ||
|
|
791
|
+
scoreAllPort = parseInt(args[++i]) || 7222;
|
|
792
792
|
} else {
|
|
793
793
|
scorePort = parseInt(args[++i]) || 9222;
|
|
794
794
|
}
|
package/src/lib/tag-fetcher.js
CHANGED
|
@@ -48,14 +48,19 @@ export async function fetchTagData(tag, options = {}) {
|
|
|
48
48
|
userDataDir,
|
|
49
49
|
proxyServer,
|
|
50
50
|
onProgress,
|
|
51
|
+
browser: existingBrowser,
|
|
52
|
+
page: existingPage,
|
|
51
53
|
} = options;
|
|
52
54
|
|
|
53
55
|
const cdpOptions = { port };
|
|
54
56
|
if (userDataDir) cdpOptions.userDataDir = userDataDir;
|
|
55
57
|
if (proxyServer) cdpOptions.proxyServer = proxyServer;
|
|
56
58
|
|
|
57
|
-
const browser =
|
|
58
|
-
|
|
59
|
+
const browser =
|
|
60
|
+
existingBrowser ||
|
|
61
|
+
(existingPage ? existingPage.context().browser() : null) ||
|
|
62
|
+
(await ensureBrowserReady(cdpOptions));
|
|
63
|
+
const page = existingPage || (await getOrCreatePage(browser));
|
|
59
64
|
|
|
60
65
|
let challengeInfo = null;
|
|
61
66
|
const rawVideos = [];
|