tt-help-cli-ycl 1.3.100 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli/tag.js +40 -32
- package/src/lib/constants.js +9 -0
- package/src/lib/tag-fetcher.js +0 -3
package/package.json
CHANGED
package/src/cli/tag.js
CHANGED
|
@@ -5,7 +5,6 @@ import { fetchTagData, enrichVideosWithLocation } from "../lib/tag-fetcher.js";
|
|
|
5
5
|
import { killEdgeProcesses, ensureBrowserReady } from "../lib/browser/cdp.js";
|
|
6
6
|
import { getOrCreatePage } from "../lib/browser/page.js";
|
|
7
7
|
import { TikTokScraper } from "../lib/tiktok-scraper.mjs";
|
|
8
|
-
import { CDNBlockedError } from "../lib/parse-ssr.mjs";
|
|
9
8
|
import {
|
|
10
9
|
DEFAULT_TARGET_LOCATIONS,
|
|
11
10
|
isLocationInList,
|
|
@@ -43,18 +42,6 @@ function formatMemoryUsage(mem = process.memoryUsage()) {
|
|
|
43
42
|
return `rss:${(mem.rss / 1024 / 1024).toFixed(0)}MB heap:${(mem.heapUsed / 1024 / 1024).toFixed(0)}MB ext:${(mem.external / 1024 / 1024).toFixed(0)}MB ab:${(mem.arrayBuffers / 1024 / 1024).toFixed(0)}MB`;
|
|
44
43
|
}
|
|
45
44
|
|
|
46
|
-
function getCdnCooldownSeconds(blockedCount, totalCount, isTooManyRequests = false) {
|
|
47
|
-
if (isTooManyRequests) return 120;
|
|
48
|
-
const ratio = blockedCount / Math.max(totalCount, 1);
|
|
49
|
-
return ratio > 0.3 ? 120 : 60;
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
async function cooldownAndRecycle(cooldownSec, recyclePage, maybeRecycleForMemory) {
|
|
53
|
-
await new Promise((r) => setTimeout(r, cooldownSec * 1000));
|
|
54
|
-
await recyclePage();
|
|
55
|
-
await maybeRecycleForMemory();
|
|
56
|
-
}
|
|
57
|
-
|
|
58
45
|
// 构建带客户端追踪 header 的 fetch 封装
|
|
59
46
|
function buildClientHeaders(clientId, meta, extra = {}) {
|
|
60
47
|
return {
|
|
@@ -642,10 +629,8 @@ export async function handleScoreAll(parsed) {
|
|
|
642
629
|
|
|
643
630
|
await setupPageRequestBlocking(page);
|
|
644
631
|
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
parseInt(process.env.TAG_SCOREALL_PORT_POOL_START || "7222", 10) || 7222,
|
|
648
|
-
);
|
|
632
|
+
// 端口池起始以 CLI --port / 环境变量 / 默认 7222 为准
|
|
633
|
+
const portPoolStart = cdpPort;
|
|
649
634
|
const portPoolSize = Math.max(
|
|
650
635
|
2,
|
|
651
636
|
parseInt(process.env.TAG_SCOREALL_PORT_POOL_SIZE || "10", 10) || 10,
|
|
@@ -751,7 +736,9 @@ export async function handleScoreAll(parsed) {
|
|
|
751
736
|
|
|
752
737
|
let totalScored = 0;
|
|
753
738
|
let emptyRounds = 0; // 连续无任务的轮数
|
|
739
|
+
let consecutive403 = 0; // 连续 HTTP 403 次数,达 3 次认为被封
|
|
754
740
|
const DISCOVER_AFTER_EMPTY = 3; // 连续 3 轮无任务时触发 discover
|
|
741
|
+
const CONSECUTIVE_403_BLOCKED = 3; // 连续 3 次 403 认为账号被封
|
|
755
742
|
|
|
756
743
|
// 生成客户端 ID,用于服务端追踪
|
|
757
744
|
const clientId = randomUUID();
|
|
@@ -919,11 +906,12 @@ export async function handleScoreAll(parsed) {
|
|
|
919
906
|
// CDN 限流检测:有拦截则冷却 + 重启 scraper
|
|
920
907
|
const cdnBlocked = enriched.cdnBlockedCount || 0;
|
|
921
908
|
if (cdnBlocked > 0) {
|
|
922
|
-
const
|
|
909
|
+
const cdnRatio = cdnBlocked / (videos.length || 1);
|
|
910
|
+
const coolSec = cdnRatio > 0.3 ? 120 : 60;
|
|
923
911
|
log(
|
|
924
|
-
` ⚠️ CDN 限流: ${cdnBlocked}/${videos.length} (${(
|
|
912
|
+
` ⚠️ CDN 限流: ${cdnBlocked}/${videos.length} (${(cdnRatio * 100).toFixed(0)}%),冷却 ${coolSec} 秒后重启 scraper`,
|
|
925
913
|
);
|
|
926
|
-
await
|
|
914
|
+
await new Promise((r) => setTimeout(r, coolSec * 1000));
|
|
927
915
|
log(` 正在重启 TikTokScraper...`);
|
|
928
916
|
await enrichScraper.restart();
|
|
929
917
|
log(` ✅ TikTokScraper 已重启`);
|
|
@@ -969,21 +957,11 @@ export async function handleScoreAll(parsed) {
|
|
|
969
957
|
` ${icon} ${result.status} score=${result.score} authors=${result.authorCount} matched=${result.matchedAuthors} (${elapsed}s)${mc ? " " + mc : ""}${memStr}`,
|
|
970
958
|
);
|
|
971
959
|
log("");
|
|
960
|
+
consecutive403 = 0; // 成功完成,重置 403 计数器
|
|
972
961
|
await recyclePage();
|
|
973
962
|
await maybeRecycleForMemory();
|
|
974
963
|
await randomDelay(3000, 7000);
|
|
975
964
|
} catch (e) {
|
|
976
|
-
if (e instanceof CDNBlockedError || /HTTP\s+(403|429)/.test(e.message)) {
|
|
977
|
-
log(` ⚠️ CDN 被封: ${e.message}`);
|
|
978
|
-
result.status = "dead";
|
|
979
|
-
result.error = "cdn_blocked";
|
|
980
|
-
await reportToServer(baseUrl, result, clientId, clientMeta);
|
|
981
|
-
totalScored++;
|
|
982
|
-
const cooldownSec = getCdnCooldownSeconds(1, 1, /429/.test(e.message));
|
|
983
|
-
log(` 冷却 ${cooldownSec} 秒后再继续...`);
|
|
984
|
-
await cooldownAndRecycle(cooldownSec, recyclePage, maybeRecycleForMemory);
|
|
985
|
-
continue;
|
|
986
|
-
}
|
|
987
965
|
// 区分网络错误和业务错误
|
|
988
966
|
const isNetworkError =
|
|
989
967
|
e.code === "ECONNREFUSED" ||
|
|
@@ -998,12 +976,30 @@ export async function handleScoreAll(parsed) {
|
|
|
998
976
|
await new Promise((r) => setTimeout(r, 15000));
|
|
999
977
|
continue;
|
|
1000
978
|
}
|
|
979
|
+
// 检测 HTTP 403(TikTok 拒绝请求,可能是账号/会话被封)
|
|
980
|
+
const is403 =
|
|
981
|
+
e.message &&
|
|
982
|
+
(e.message.includes("HTTP 403") ||
|
|
983
|
+
e.message.includes("status 403") ||
|
|
984
|
+
e.message.includes("status code 403"));
|
|
985
|
+
|
|
986
|
+
if (is403) {
|
|
987
|
+
consecutive403++;
|
|
988
|
+
log(
|
|
989
|
+
` 🚫 HTTP 403 — 连续 ${consecutive403}/${CONSECUTIVE_403_BLOCKED} 次`,
|
|
990
|
+
);
|
|
991
|
+
} else {
|
|
992
|
+
consecutive403 = 0;
|
|
993
|
+
}
|
|
994
|
+
|
|
1001
995
|
log(` ❌ 失败: ${e.message}`);
|
|
1002
996
|
try {
|
|
997
|
+
// 使用当前正在处理的 tag(tag 变量在 try 块开头已赋值,catch 中仍可访问)
|
|
998
|
+
const failedTag = typeof tag === "string" && tag ? tag : "unknown";
|
|
1003
999
|
await reportToServer(
|
|
1004
1000
|
baseUrl,
|
|
1005
1001
|
{
|
|
1006
|
-
tag:
|
|
1002
|
+
tag: failedTag,
|
|
1007
1003
|
status: "error",
|
|
1008
1004
|
score: 0,
|
|
1009
1005
|
error: e.message,
|
|
@@ -1013,6 +1009,18 @@ export async function handleScoreAll(parsed) {
|
|
|
1013
1009
|
);
|
|
1014
1010
|
} catch {}
|
|
1015
1011
|
totalScored++;
|
|
1012
|
+
|
|
1013
|
+
// 连续 3 次 403:认为账号被封,等 2 分钟后切换端口
|
|
1014
|
+
if (consecutive403 >= CONSECUTIVE_403_BLOCKED) {
|
|
1015
|
+
log(
|
|
1016
|
+
` 🔐 连续 ${consecutive403} 次 403,账号可能被封!等待 2 分钟后切换端口...`,
|
|
1017
|
+
);
|
|
1018
|
+
await new Promise((r) => setTimeout(r, 120000));
|
|
1019
|
+
await recycleCdpSession("连续 403 封禁");
|
|
1020
|
+
consecutive403 = 0;
|
|
1021
|
+
continue;
|
|
1022
|
+
}
|
|
1023
|
+
|
|
1016
1024
|
await recyclePage();
|
|
1017
1025
|
await maybeRecycleForMemory();
|
|
1018
1026
|
}
|
package/src/lib/constants.js
CHANGED
|
@@ -260,8 +260,17 @@ const HELP_TEXT = [
|
|
|
260
260
|
" 选项:",
|
|
261
261
|
" --countries <CSV> 目标国家,逗号分隔(默认 13 个欧洲国家)",
|
|
262
262
|
" -s, --server <URL> 服务端地址(默认 http://127.0.0.1:3000)",
|
|
263
|
+
" --port <N> 起始 CDP 端口,端口池默认为 N~N+9(默认 7222)",
|
|
264
|
+
" --discover 开启自动发现:连续 3 轮无新标签时自动生成",
|
|
265
|
+
" 行为:",
|
|
266
|
+
" 连续 3 次 HTTP 403 → 判断账号被封 → 等 2 分钟后自动切换到池中另一端口",
|
|
267
|
+
" 环境变量:",
|
|
268
|
+
" TAG_SCOREALL_PORT_POOL_SIZE=N 端口池大小(默认 10)",
|
|
269
|
+
" TAG_SCOREALL_RECYCLE_RSS_MB=N 内存回收阈值 RSS MB(默认 900)",
|
|
270
|
+
" TAG_SCOREALL_RECYCLE_HEAP_MB=N 内存回收阈值 Heap MB(默认 320)",
|
|
263
271
|
" 示例: tt-help tag score-all",
|
|
264
272
|
" tt-help tag score-all --countries ES -s http://127.0.0.1:3001",
|
|
273
|
+
" tt-help tag score-all --port 9222 -s http://127.0.0.1:3001 --discover",
|
|
265
274
|
"",
|
|
266
275
|
" config [show|set|unset|reset]",
|
|
267
276
|
" config 查看当前配置",
|
package/src/lib/tag-fetcher.js
CHANGED
|
@@ -107,9 +107,6 @@ export async function fetchTagData(tag, options = {}) {
|
|
|
107
107
|
timeout: 30000,
|
|
108
108
|
});
|
|
109
109
|
|
|
110
|
-
if (resp.status() === 403 || resp.status() === 429) {
|
|
111
|
-
throw new CDNBlockedError(`标签页返回 HTTP ${resp.status()}`);
|
|
112
|
-
}
|
|
113
110
|
if (resp.status() !== 200) {
|
|
114
111
|
throw new Error(`标签页返回 HTTP ${resp.status()}`);
|
|
115
112
|
}
|