tt-help-cli-ycl 1.3.100 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tt-help-cli-ycl",
3
- "version": "1.3.100",
3
+ "version": "1.4.1",
4
4
  "description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli/tag.js CHANGED
@@ -5,7 +5,6 @@ import { fetchTagData, enrichVideosWithLocation } from "../lib/tag-fetcher.js";
5
5
  import { killEdgeProcesses, ensureBrowserReady } from "../lib/browser/cdp.js";
6
6
  import { getOrCreatePage } from "../lib/browser/page.js";
7
7
  import { TikTokScraper } from "../lib/tiktok-scraper.mjs";
8
- import { CDNBlockedError } from "../lib/parse-ssr.mjs";
9
8
  import {
10
9
  DEFAULT_TARGET_LOCATIONS,
11
10
  isLocationInList,
@@ -43,18 +42,6 @@ function formatMemoryUsage(mem = process.memoryUsage()) {
43
42
  return `rss:${(mem.rss / 1024 / 1024).toFixed(0)}MB heap:${(mem.heapUsed / 1024 / 1024).toFixed(0)}MB ext:${(mem.external / 1024 / 1024).toFixed(0)}MB ab:${(mem.arrayBuffers / 1024 / 1024).toFixed(0)}MB`;
44
43
  }
45
44
 
46
- function getCdnCooldownSeconds(blockedCount, totalCount, isTooManyRequests = false) {
47
- if (isTooManyRequests) return 120;
48
- const ratio = blockedCount / Math.max(totalCount, 1);
49
- return ratio > 0.3 ? 120 : 60;
50
- }
51
-
52
- async function cooldownAndRecycle(cooldownSec, recyclePage, maybeRecycleForMemory) {
53
- await new Promise((r) => setTimeout(r, cooldownSec * 1000));
54
- await recyclePage();
55
- await maybeRecycleForMemory();
56
- }
57
-
58
45
  // 构建带客户端追踪 header 的 fetch 封装
59
46
  function buildClientHeaders(clientId, meta, extra = {}) {
60
47
  return {
@@ -642,10 +629,8 @@ export async function handleScoreAll(parsed) {
642
629
 
643
630
  await setupPageRequestBlocking(page);
644
631
 
645
- const portPoolStart = Math.max(
646
- 1,
647
- parseInt(process.env.TAG_SCOREALL_PORT_POOL_START || "7222", 10) || 7222,
648
- );
632
+ // 端口池起始以 CLI --port / 环境变量 / 默认 7222 为准
633
+ const portPoolStart = cdpPort;
649
634
  const portPoolSize = Math.max(
650
635
  2,
651
636
  parseInt(process.env.TAG_SCOREALL_PORT_POOL_SIZE || "10", 10) || 10,
@@ -751,7 +736,9 @@ export async function handleScoreAll(parsed) {
751
736
 
752
737
  let totalScored = 0;
753
738
  let emptyRounds = 0; // 连续无任务的轮数
739
+ let consecutive403 = 0; // 连续 HTTP 403 次数,达 3 次认为被封
754
740
  const DISCOVER_AFTER_EMPTY = 3; // 连续 3 轮无任务时触发 discover
741
+ const CONSECUTIVE_403_BLOCKED = 3; // 连续 3 次 403 认为账号被封
755
742
 
756
743
  // 生成客户端 ID,用于服务端追踪
757
744
  const clientId = randomUUID();
@@ -919,11 +906,12 @@ export async function handleScoreAll(parsed) {
919
906
  // CDN 限流检测:有拦截则冷却 + 重启 scraper
920
907
  const cdnBlocked = enriched.cdnBlockedCount || 0;
921
908
  if (cdnBlocked > 0) {
922
- const coolSec = getCdnCooldownSeconds(cdnBlocked, videos.length);
909
+ const cdnRatio = cdnBlocked / (videos.length || 1);
910
+ const coolSec = cdnRatio > 0.3 ? 120 : 60;
923
911
  log(
924
- ` ⚠️ CDN 限流: ${cdnBlocked}/${videos.length} (${((cdnBlocked / Math.max(videos.length, 1)) * 100).toFixed(0)}%),冷却 ${coolSec} 秒后重启 scraper`,
912
+ ` ⚠️ CDN 限流: ${cdnBlocked}/${videos.length} (${(cdnRatio * 100).toFixed(0)}%),冷却 ${coolSec} 秒后重启 scraper`,
925
913
  );
926
- await cooldownAndRecycle(coolSec, recyclePage, maybeRecycleForMemory);
914
+ await new Promise((r) => setTimeout(r, coolSec * 1000));
927
915
  log(` 正在重启 TikTokScraper...`);
928
916
  await enrichScraper.restart();
929
917
  log(` ✅ TikTokScraper 已重启`);
@@ -969,21 +957,11 @@ export async function handleScoreAll(parsed) {
969
957
  ` ${icon} ${result.status} score=${result.score} authors=${result.authorCount} matched=${result.matchedAuthors} (${elapsed}s)${mc ? " " + mc : ""}${memStr}`,
970
958
  );
971
959
  log("");
960
+ consecutive403 = 0; // 成功完成,重置 403 计数器
972
961
  await recyclePage();
973
962
  await maybeRecycleForMemory();
974
963
  await randomDelay(3000, 7000);
975
964
  } catch (e) {
976
- if (e instanceof CDNBlockedError || /HTTP\s+(403|429)/.test(e.message)) {
977
- log(` ⚠️ CDN 被封: ${e.message}`);
978
- result.status = "dead";
979
- result.error = "cdn_blocked";
980
- await reportToServer(baseUrl, result, clientId, clientMeta);
981
- totalScored++;
982
- const cooldownSec = getCdnCooldownSeconds(1, 1, /429/.test(e.message));
983
- log(` 冷却 ${cooldownSec} 秒后再继续...`);
984
- await cooldownAndRecycle(cooldownSec, recyclePage, maybeRecycleForMemory);
985
- continue;
986
- }
987
965
  // 区分网络错误和业务错误
988
966
  const isNetworkError =
989
967
  e.code === "ECONNREFUSED" ||
@@ -998,12 +976,30 @@ export async function handleScoreAll(parsed) {
998
976
  await new Promise((r) => setTimeout(r, 15000));
999
977
  continue;
1000
978
  }
979
+ // 检测 HTTP 403(TikTok 拒绝请求,可能是账号/会话被封)
980
+ const is403 =
981
+ e.message &&
982
+ (e.message.includes("HTTP 403") ||
983
+ e.message.includes("status 403") ||
984
+ e.message.includes("status code 403"));
985
+
986
+ if (is403) {
987
+ consecutive403++;
988
+ log(
989
+ ` 🚫 HTTP 403 — 连续 ${consecutive403}/${CONSECUTIVE_403_BLOCKED} 次`,
990
+ );
991
+ } else {
992
+ consecutive403 = 0;
993
+ }
994
+
1001
995
  log(` ❌ 失败: ${e.message}`);
1002
996
  try {
997
+ // 使用当前正在处理的 tag(tag 变量在 try 块开头已赋值,catch 中仍可访问)
998
+ const failedTag = typeof tag === "string" && tag ? tag : "unknown";
1003
999
  await reportToServer(
1004
1000
  baseUrl,
1005
1001
  {
1006
- tag: "",
1002
+ tag: failedTag,
1007
1003
  status: "error",
1008
1004
  score: 0,
1009
1005
  error: e.message,
@@ -1013,6 +1009,18 @@ export async function handleScoreAll(parsed) {
1013
1009
  );
1014
1010
  } catch {}
1015
1011
  totalScored++;
1012
+
1013
+ // 连续 3 次 403:认为账号被封,等 2 分钟后切换端口
1014
+ if (consecutive403 >= CONSECUTIVE_403_BLOCKED) {
1015
+ log(
1016
+ ` 🔐 连续 ${consecutive403} 次 403,账号可能被封!等待 2 分钟后切换端口...`,
1017
+ );
1018
+ await new Promise((r) => setTimeout(r, 120000));
1019
+ await recycleCdpSession("连续 403 封禁");
1020
+ consecutive403 = 0;
1021
+ continue;
1022
+ }
1023
+
1016
1024
  await recyclePage();
1017
1025
  await maybeRecycleForMemory();
1018
1026
  }
@@ -260,8 +260,17 @@ const HELP_TEXT = [
260
260
  " 选项:",
261
261
  " --countries <CSV> 目标国家,逗号分隔(默认 13 个欧洲国家)",
262
262
  " -s, --server <URL> 服务端地址(默认 http://127.0.0.1:3000)",
263
+ " --port <N> 起始 CDP 端口,端口池默认为 N~N+9(默认 7222)",
264
+ " --discover 开启自动发现:连续 3 轮无新标签时自动生成",
265
+ " 行为:",
266
+ " 连续 3 次 HTTP 403 → 判断账号被封 → 等 2 分钟后自动切换到池中另一端口",
267
+ " 环境变量:",
268
+ " TAG_SCOREALL_PORT_POOL_SIZE=N 端口池大小(默认 10)",
269
+ " TAG_SCOREALL_RECYCLE_RSS_MB=N 内存回收阈值 RSS MB(默认 900)",
270
+ " TAG_SCOREALL_RECYCLE_HEAP_MB=N 内存回收阈值 Heap MB(默认 320)",
263
271
  " 示例: tt-help tag score-all",
264
272
  " tt-help tag score-all --countries ES -s http://127.0.0.1:3001",
273
+ " tt-help tag score-all --port 9222 -s http://127.0.0.1:3001 --discover",
265
274
  "",
266
275
  " config [show|set|unset|reset]",
267
276
  " config 查看当前配置",
@@ -107,9 +107,6 @@ export async function fetchTagData(tag, options = {}) {
107
107
  timeout: 30000,
108
108
  });
109
109
 
110
- if (resp.status() === 403 || resp.status() === 429) {
111
- throw new CDNBlockedError(`标签页返回 HTTP ${resp.status()}`);
112
- }
113
110
  if (resp.status() !== 200) {
114
111
  throw new Error(`标签页返回 HTTP ${resp.status()}`);
115
112
  }