tt-help-cli-ycl 1.3.98 → 1.3.99

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tt-help-cli-ycl",
3
- "version": "1.3.98",
3
+ "version": "1.3.99",
4
4
  "description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli/tag.js CHANGED
@@ -18,6 +18,29 @@ import {
18
18
 
19
19
  const ALL_COUNTRIES = DEFAULT_TARGET_LOCATIONS;
20
20
  const DEFAULT_SERVER = cfgServer || "http://127.0.0.1:3000";
21
+ const DEFAULT_SCORE_COUNTRIES = [
22
+ "ES",
23
+ "FR",
24
+ "DE",
25
+ "PT",
26
+ "IT",
27
+ "NL",
28
+ "BE",
29
+ "AT",
30
+ "IE",
31
+ "PL",
32
+ "CZ",
33
+ "GR",
34
+ "HU",
35
+ ];
36
+
37
+ function resolveTargetCountries(countries) {
38
+ return countries || DEFAULT_SCORE_COUNTRIES;
39
+ }
40
+
41
+ function formatMemoryUsage(mem = process.memoryUsage()) {
42
+ return `rss:${(mem.rss / 1024 / 1024).toFixed(0)}MB heap:${(mem.heapUsed / 1024 / 1024).toFixed(0)}MB ext:${(mem.external / 1024 / 1024).toFixed(0)}MB ab:${(mem.arrayBuffers / 1024 / 1024).toFixed(0)}MB`;
43
+ }
21
44
 
22
45
  // 构建带客户端追踪 header 的 fetch 封装
23
46
  function buildClientHeaders(clientId, meta, extra = {}) {
@@ -426,21 +449,7 @@ export async function handleScore(parsed) {
426
449
  const baseUrl = serverUrl || DEFAULT_SERVER;
427
450
  const cdpPort = port || 9222;
428
451
  const effectiveProxy = cliProxy || configuredProxy;
429
- const targetCountries = countries || [
430
- "ES",
431
- "FR",
432
- "DE",
433
- "PT",
434
- "IT",
435
- "NL",
436
- "BE",
437
- "AT",
438
- "IE",
439
- "PL",
440
- "CZ",
441
- "GR",
442
- "HU",
443
- ];
452
+ const targetCountries = resolveTargetCountries(countries);
444
453
 
445
454
  const log = (...args) => process.stderr.write(args.join(" ") + "\n");
446
455
 
@@ -576,23 +585,11 @@ export async function handleScoreAll(parsed) {
576
585
  } = tagScoreAll || {};
577
586
 
578
587
  const baseUrl = serverUrl || DEFAULT_SERVER;
579
- const cdpPort = port || 9222;
588
+ const defaultScoreAllPort =
589
+ parseInt(process.env.TAG_SCOREALL_PORT_POOL_START || "7222", 10) || 7222;
590
+ let cdpPort = port || defaultScoreAllPort;
580
591
  const effectiveProxy = cliProxy || configuredProxy;
581
- const targetCountries = countries || [
582
- "ES",
583
- "FR",
584
- "DE",
585
- "PT",
586
- "IT",
587
- "NL",
588
- "BE",
589
- "AT",
590
- "IE",
591
- "PL",
592
- "CZ",
593
- "GR",
594
- "HU",
595
- ];
592
+ const targetCountries = resolveTargetCountries(countries);
596
593
 
597
594
  const log = (...args) => process.stderr.write(args.join(" ") + "\n");
598
595
 
@@ -613,8 +610,131 @@ export async function handleScoreAll(parsed) {
613
610
  // 连接 CDP 浏览器
614
611
  const cdpOpts = { port: cdpPort };
615
612
  if (effectiveProxy) cdpOpts.proxyServer = effectiveProxy;
616
- const browser = await ensureBrowserReady(cdpOpts);
613
+ let browser = await ensureBrowserReady(cdpOpts);
617
614
  let page = await getOrCreatePage(browser);
615
+ const blockedRoutePages = new WeakSet();
616
+
617
+ async function setupPageRequestBlocking(targetPage) {
618
+ if (!targetPage || blockedRoutePages.has(targetPage)) return;
619
+ await targetPage.route("**/*", (route) => {
620
+ const resourceType = route.request().resourceType();
621
+ if (resourceType === "image" || resourceType === "stylesheet") {
622
+ route.abort();
623
+ } else {
624
+ route.continue();
625
+ }
626
+ });
627
+ blockedRoutePages.add(targetPage);
628
+ }
629
+
630
+ await setupPageRequestBlocking(page);
631
+
632
+ const portPoolStart = Math.max(
633
+ 1,
634
+ parseInt(process.env.TAG_SCOREALL_PORT_POOL_START || "7222", 10) || 7222,
635
+ );
636
+ const portPoolSize = Math.max(
637
+ 2,
638
+ parseInt(process.env.TAG_SCOREALL_PORT_POOL_SIZE || "10", 10) || 10,
639
+ );
640
+ const switchPortOnRecycle =
641
+ String(process.env.TAG_SCOREALL_SWITCH_PORT_ON_RECYCLE || "1") !== "0";
642
+
643
+ function pickNextPort(currentPort) {
644
+ const candidates = [];
645
+ for (let i = 0; i < portPoolSize; i++) {
646
+ const p = portPoolStart + i;
647
+ if (p !== currentPort) candidates.push(p);
648
+ }
649
+ if (candidates.length === 0) return currentPort;
650
+ return candidates[Math.floor(Math.random() * candidates.length)];
651
+ }
652
+
653
+ const memRssRecycleMb = Math.max(
654
+ 256,
655
+ parseInt(process.env.TAG_SCOREALL_RECYCLE_RSS_MB || "900", 10) || 900,
656
+ );
657
+ const memHeapRecycleMb = Math.max(
658
+ 128,
659
+ parseInt(process.env.TAG_SCOREALL_RECYCLE_HEAP_MB || "320", 10) || 320,
660
+ );
661
+ const recycleCooldownMs = Math.max(
662
+ 0,
663
+ parseInt(process.env.TAG_SCOREALL_RECYCLE_COOLDOWN_MS || "180000", 10) ||
664
+ 180000,
665
+ );
666
+ // 默认关闭按固定轮次重建,仅在高内存时触发;需要可通过环境变量开启。
667
+ const periodicRecycleEvery = Math.max(
668
+ 0,
669
+ parseInt(process.env.TAG_SCOREALL_PERIODIC_RECYCLE_EVERY || "0", 10) || 0,
670
+ );
671
+ let lastRecycleAt = 0;
672
+
673
+ async function recyclePage() {
674
+ if (!page || page.isClosed()) {
675
+ page = await getOrCreatePage(browser);
676
+ await setupPageRequestBlocking(page);
677
+ return;
678
+ }
679
+ try {
680
+ await page.goto("about:blank", {
681
+ waitUntil: "domcontentloaded",
682
+ timeout: 5000,
683
+ });
684
+ } catch {
685
+ // 页面状态异常时回退到重建 tab
686
+ await page.close().catch(() => {});
687
+ page = await getOrCreatePage(browser);
688
+ await setupPageRequestBlocking(page);
689
+ }
690
+ }
691
+
692
+ async function recycleCdpSession(reason) {
693
+ const oldPort = cdpPort;
694
+ if (switchPortOnRecycle) {
695
+ cdpPort = pickNextPort(cdpPort);
696
+ cdpOpts.port = cdpPort;
697
+ clientMeta.port = cdpPort;
698
+ }
699
+ const switchHint =
700
+ oldPort === cdpPort ? `port=${cdpPort}` : `port ${oldPort} -> ${cdpPort}`;
701
+ log(` ♻️ 重建 CDP 会话 (${reason}; ${switchHint})...`);
702
+ if (page) {
703
+ await page.close().catch(() => {});
704
+ page = null;
705
+ }
706
+ await browser.close().catch(() => {});
707
+ if (oldPort !== cdpPort) {
708
+ await killEdgeProcesses(null, oldPort).catch(() => {});
709
+ }
710
+ browser = await ensureBrowserReady(cdpOpts);
711
+ page = await getOrCreatePage(browser);
712
+ await setupPageRequestBlocking(page);
713
+ lastRecycleAt = Date.now();
714
+ }
715
+
716
+ async function maybeRecycleForMemory() {
717
+ const mem = process.memoryUsage();
718
+ const rssMB = mem.rss / 1024 / 1024;
719
+ const heapMB = mem.heapUsed / 1024 / 1024;
720
+ if (rssMB >= memRssRecycleMb || heapMB >= memHeapRecycleMb) {
721
+ const now = Date.now();
722
+ if (recycleCooldownMs > 0 && now - lastRecycleAt < recycleCooldownMs) {
723
+ return;
724
+ }
725
+ await recycleCdpSession(
726
+ `mem rss=${rssMB.toFixed(0)}MB heap=${heapMB.toFixed(0)}MB (threshold rss=${memRssRecycleMb} heap=${memHeapRecycleMb})`,
727
+ );
728
+ return;
729
+ }
730
+ if (
731
+ periodicRecycleEvery > 0 &&
732
+ totalScored > 0 &&
733
+ totalScored % periodicRecycleEvery === 0
734
+ ) {
735
+ await recycleCdpSession(`periodic every ${totalScored} tasks`);
736
+ }
737
+ }
618
738
 
619
739
  let totalScored = 0;
620
740
  let emptyRounds = 0; // 连续无任务的轮数
@@ -622,7 +742,7 @@ export async function handleScoreAll(parsed) {
622
742
 
623
743
  // 生成客户端 ID,用于服务端追踪
624
744
  const clientId = randomUUID();
625
- const clientMeta = { type: "scoring" };
745
+ const clientMeta = { type: "scoring", port: cdpPort };
626
746
 
627
747
  // 复用 TikTokScraper 实例,避免每次 enrich 都启动/关闭 headless 浏览器
628
748
  const enrichScraper = new TikTokScraper({
@@ -719,6 +839,7 @@ export async function handleScoreAll(parsed) {
719
839
  // already claimed: 其他机器抢先了,跳过不标 dead
720
840
  if (claimData.error && claimData.error.includes("already claimed")) {
721
841
  log(` ⏭️ 已被其他客户端锁定,跳过`);
842
+ await recyclePage();
722
843
  continue;
723
844
  }
724
845
  log(` ⚠️ 无法锁定 (${claimData.error}),标记为 dead 并跳过`);
@@ -726,6 +847,7 @@ export async function handleScoreAll(parsed) {
726
847
  result.status = "dead";
727
848
  await reportToServer(baseUrl, result, clientId, clientMeta);
728
849
  totalScored++;
850
+ await recyclePage();
729
851
  continue;
730
852
  }
731
853
 
@@ -733,7 +855,7 @@ export async function handleScoreAll(parsed) {
733
855
  const fetchStart = Date.now();
734
856
  log(` 抓取 TikTok 标签页...`);
735
857
  const tagResult = await fetchTagData(tag, {
736
- port: cdpPort,
858
+ page,
737
859
  onProgress: ({ videos, authors }) => {
738
860
  process.stderr.write(
739
861
  `\r 抓取中: ${videos} 视频, ${authors} 作者\x1b[K`,
@@ -751,23 +873,17 @@ export async function handleScoreAll(parsed) {
751
873
 
752
874
  if (!videos || videos.length === 0) {
753
875
  const deadSec = ((Date.now() - fetchStart) / 1000).toFixed(1);
754
- const memMB = (process.memoryUsage().heapUsed / 1024 / 1024).toFixed(
755
- 0,
876
+ log(
877
+ ` ⚠️ 无视频 (${deadSec}s) mem=${formatMemoryUsage()},标记 dead`,
756
878
  );
757
- log(` ⚠️ 无视频 (${deadSec}s) mem=${memMB}MB,标记 dead`);
758
879
  result.status = "dead";
759
880
  result.error = "no videos found";
760
881
  await reportToServer(baseUrl, result, clientId, clientMeta);
761
882
  totalScored++;
762
883
  // 随机等待 3-7 秒,避免连续访问 TikTok 触发风控
763
884
  await randomDelay(0, 5000);
764
- // 导航到 about:blank 释放页面状态再跳过
765
- await page
766
- .goto("about:blank", {
767
- waitUntil: "domcontentloaded",
768
- timeout: 5000,
769
- })
770
- .catch(() => {});
885
+ await recyclePage();
886
+ await maybeRecycleForMemory();
771
887
  continue;
772
888
  }
773
889
 
@@ -836,20 +952,13 @@ export async function handleScoreAll(parsed) {
836
952
  const mc = result.matchedCountries
837
953
  .map((c) => `${c.c}:${c.n}`)
838
954
  .join(" ");
839
- // Node.js 进程内存占用
840
- const memMB = (process.memoryUsage().heapUsed / 1024 / 1024).toFixed(0);
841
- const memStr = ` mem=${memMB}MB`;
955
+ const memStr = ` mem=${formatMemoryUsage()}`;
842
956
  log(
843
957
  ` ${icon} ${result.status} score=${result.score} authors=${result.authorCount} matched=${result.matchedAuthors} (${elapsed}s)${mc ? " " + mc : ""}${memStr}`,
844
958
  );
845
959
  log("");
846
-
847
- // 导航到 about:blank 卸载页面,状态清零,下次 goto 重新初始化
848
- await page
849
- .goto("about:blank", { waitUntil: "domcontentloaded", timeout: 5000 })
850
- .catch((e) => {
851
- log(` ⚠️ about:blank 跳转失败: ${e.message}`);
852
- });
960
+ await recyclePage();
961
+ await maybeRecycleForMemory();
853
962
  await randomDelay(3000, 7000);
854
963
  } catch (e) {
855
964
  // 区分网络错误和业务错误
@@ -881,6 +990,8 @@ export async function handleScoreAll(parsed) {
881
990
  );
882
991
  } catch {}
883
992
  totalScored++;
993
+ await recyclePage();
994
+ await maybeRecycleForMemory();
884
995
  }
885
996
  }
886
997
  } finally {
package/src/lib/args.js CHANGED
@@ -739,7 +739,7 @@ function parseTagArgs(args) {
739
739
  let isDiscover = false;
740
740
  let isScore = false;
741
741
  let isScoreAll = false;
742
- let scoreAllPort = 9222;
742
+ let scoreAllPort = 7222;
743
743
  let scoreProxy = null;
744
744
  let scoreTag = null;
745
745
  let scoreCountries = null;
@@ -788,7 +788,7 @@ function parseTagArgs(args) {
788
788
  .filter(Boolean);
789
789
  } else if (arg === "--port") {
790
790
  if (isScoreAll) {
791
- scoreAllPort = parseInt(args[++i]) || 9222;
791
+ scoreAllPort = parseInt(args[++i]) || 7222;
792
792
  } else {
793
793
  scorePort = parseInt(args[++i]) || 9222;
794
794
  }
@@ -48,14 +48,19 @@ export async function fetchTagData(tag, options = {}) {
48
48
  userDataDir,
49
49
  proxyServer,
50
50
  onProgress,
51
+ browser: existingBrowser,
52
+ page: existingPage,
51
53
  } = options;
52
54
 
53
55
  const cdpOptions = { port };
54
56
  if (userDataDir) cdpOptions.userDataDir = userDataDir;
55
57
  if (proxyServer) cdpOptions.proxyServer = proxyServer;
56
58
 
57
- const browser = await ensureBrowserReady(cdpOptions);
58
- const page = await getOrCreatePage(browser);
59
+ const browser =
60
+ existingBrowser ||
61
+ (existingPage ? existingPage.context().browser() : null) ||
62
+ (await ensureBrowserReady(cdpOptions));
63
+ const page = existingPage || (await getOrCreatePage(browser));
59
64
 
60
65
  let challengeInfo = null;
61
66
  const rawVideos = [];