tt-help-cli-ycl 1.3.100 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tt-help-cli-ycl",
3
- "version": "1.3.100",
3
+ "version": "1.4.0",
4
4
  "description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli/tag.js CHANGED
@@ -5,7 +5,6 @@ import { fetchTagData, enrichVideosWithLocation } from "../lib/tag-fetcher.js";
5
5
  import { killEdgeProcesses, ensureBrowserReady } from "../lib/browser/cdp.js";
6
6
  import { getOrCreatePage } from "../lib/browser/page.js";
7
7
  import { TikTokScraper } from "../lib/tiktok-scraper.mjs";
8
- import { CDNBlockedError } from "../lib/parse-ssr.mjs";
9
8
  import {
10
9
  DEFAULT_TARGET_LOCATIONS,
11
10
  isLocationInList,
@@ -43,18 +42,6 @@ function formatMemoryUsage(mem = process.memoryUsage()) {
43
42
  return `rss:${(mem.rss / 1024 / 1024).toFixed(0)}MB heap:${(mem.heapUsed / 1024 / 1024).toFixed(0)}MB ext:${(mem.external / 1024 / 1024).toFixed(0)}MB ab:${(mem.arrayBuffers / 1024 / 1024).toFixed(0)}MB`;
44
43
  }
45
44
 
46
- function getCdnCooldownSeconds(blockedCount, totalCount, isTooManyRequests = false) {
47
- if (isTooManyRequests) return 120;
48
- const ratio = blockedCount / Math.max(totalCount, 1);
49
- return ratio > 0.3 ? 120 : 60;
50
- }
51
-
52
- async function cooldownAndRecycle(cooldownSec, recyclePage, maybeRecycleForMemory) {
53
- await new Promise((r) => setTimeout(r, cooldownSec * 1000));
54
- await recyclePage();
55
- await maybeRecycleForMemory();
56
- }
57
-
58
45
  // 构建带客户端追踪 header 的 fetch 封装
59
46
  function buildClientHeaders(clientId, meta, extra = {}) {
60
47
  return {
@@ -751,7 +738,9 @@ export async function handleScoreAll(parsed) {
751
738
 
752
739
  let totalScored = 0;
753
740
  let emptyRounds = 0; // 连续无任务的轮数
741
+ let consecutive403 = 0; // 连续 HTTP 403 次数,达 3 次认为被封
754
742
  const DISCOVER_AFTER_EMPTY = 3; // 连续 3 轮无任务时触发 discover
743
+ const CONSECUTIVE_403_BLOCKED = 3; // 连续 3 次 403 认为账号被封
755
744
 
756
745
  // 生成客户端 ID,用于服务端追踪
757
746
  const clientId = randomUUID();
@@ -919,11 +908,12 @@ export async function handleScoreAll(parsed) {
919
908
  // CDN 限流检测:有拦截则冷却 + 重启 scraper
920
909
  const cdnBlocked = enriched.cdnBlockedCount || 0;
921
910
  if (cdnBlocked > 0) {
922
- const coolSec = getCdnCooldownSeconds(cdnBlocked, videos.length);
911
+ const cdnRatio = cdnBlocked / (videos.length || 1);
912
+ const coolSec = cdnRatio > 0.3 ? 120 : 60;
923
913
  log(
924
- ` ⚠️ CDN 限流: ${cdnBlocked}/${videos.length} (${((cdnBlocked / Math.max(videos.length, 1)) * 100).toFixed(0)}%),冷却 ${coolSec} 秒后重启 scraper`,
914
+ ` ⚠️ CDN 限流: ${cdnBlocked}/${videos.length} (${(cdnRatio * 100).toFixed(0)}%),冷却 ${coolSec} 秒后重启 scraper`,
925
915
  );
926
- await cooldownAndRecycle(coolSec, recyclePage, maybeRecycleForMemory);
916
+ await new Promise((r) => setTimeout(r, coolSec * 1000));
927
917
  log(` 正在重启 TikTokScraper...`);
928
918
  await enrichScraper.restart();
929
919
  log(` ✅ TikTokScraper 已重启`);
@@ -969,21 +959,11 @@ export async function handleScoreAll(parsed) {
969
959
  ` ${icon} ${result.status} score=${result.score} authors=${result.authorCount} matched=${result.matchedAuthors} (${elapsed}s)${mc ? " " + mc : ""}${memStr}`,
970
960
  );
971
961
  log("");
962
+ consecutive403 = 0; // 成功完成,重置 403 计数器
972
963
  await recyclePage();
973
964
  await maybeRecycleForMemory();
974
965
  await randomDelay(3000, 7000);
975
966
  } catch (e) {
976
- if (e instanceof CDNBlockedError || /HTTP\s+(403|429)/.test(e.message)) {
977
- log(` ⚠️ CDN 被封: ${e.message}`);
978
- result.status = "dead";
979
- result.error = "cdn_blocked";
980
- await reportToServer(baseUrl, result, clientId, clientMeta);
981
- totalScored++;
982
- const cooldownSec = getCdnCooldownSeconds(1, 1, /429/.test(e.message));
983
- log(` 冷却 ${cooldownSec} 秒后再继续...`);
984
- await cooldownAndRecycle(cooldownSec, recyclePage, maybeRecycleForMemory);
985
- continue;
986
- }
987
967
  // 区分网络错误和业务错误
988
968
  const isNetworkError =
989
969
  e.code === "ECONNREFUSED" ||
@@ -998,12 +978,29 @@ export async function handleScoreAll(parsed) {
998
978
  await new Promise((r) => setTimeout(r, 15000));
999
979
  continue;
1000
980
  }
981
+ // 检测 HTTP 403(TikTok 拒绝请求,可能是账号/会话被封)
982
+ const is403 =
983
+ e.message &&
984
+ (e.message.includes("HTTP 403") ||
985
+ e.message.includes("status 403") ||
986
+ e.message.includes("status code 403"));
987
+
988
+ if (is403) {
989
+ consecutive403++;
990
+ log(` 🚫 HTTP 403 — 连续 ${consecutive403}/${CONSECUTIVE_403_BLOCKED} 次`);
991
+ } else {
992
+ consecutive403 = 0;
993
+ }
994
+
1001
995
  log(` ❌ 失败: ${e.message}`);
1002
996
  try {
997
+ // 使用当前正在处理的 tag(tag 变量在 try 块开头已赋值,catch 中仍可访问)
998
+ const failedTag =
999
+ typeof tag === "string" && tag ? tag : "unknown";
1003
1000
  await reportToServer(
1004
1001
  baseUrl,
1005
1002
  {
1006
- tag: "",
1003
+ tag: failedTag,
1007
1004
  status: "error",
1008
1005
  score: 0,
1009
1006
  error: e.message,
@@ -1013,6 +1010,18 @@ export async function handleScoreAll(parsed) {
1013
1010
  );
1014
1011
  } catch {}
1015
1012
  totalScored++;
1013
+
1014
+ // 连续 3 次 403:认为账号被封,等 2 分钟后切换端口
1015
+ if (consecutive403 >= CONSECUTIVE_403_BLOCKED) {
1016
+ log(
1017
+ ` 🔐 连续 ${consecutive403} 次 403,账号可能被封!等待 2 分钟后切换端口...`,
1018
+ );
1019
+ await new Promise((r) => setTimeout(r, 120000));
1020
+ await recycleCdpSession("连续 403 封禁");
1021
+ consecutive403 = 0;
1022
+ continue;
1023
+ }
1024
+
1016
1025
  await recyclePage();
1017
1026
  await maybeRecycleForMemory();
1018
1027
  }
@@ -107,9 +107,6 @@ export async function fetchTagData(tag, options = {}) {
107
107
  timeout: 30000,
108
108
  });
109
109
 
110
- if (resp.status() === 403 || resp.status() === 429) {
111
- throw new CDNBlockedError(`标签页返回 HTTP ${resp.status()}`);
112
- }
113
110
  if (resp.status() !== 200) {
114
111
  throw new Error(`标签页返回 HTTP ${resp.status()}`);
115
112
  }