tt-help-cli-ycl 1.3.99 → 1.3.100
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli/tag.js +27 -4
- package/src/lib/tag-fetcher.js +3 -0
- package/src/watch/data-store.js +107 -68
package/package.json
CHANGED
package/src/cli/tag.js
CHANGED
|
@@ -5,6 +5,7 @@ import { fetchTagData, enrichVideosWithLocation } from "../lib/tag-fetcher.js";
|
|
|
5
5
|
import { killEdgeProcesses, ensureBrowserReady } from "../lib/browser/cdp.js";
|
|
6
6
|
import { getOrCreatePage } from "../lib/browser/page.js";
|
|
7
7
|
import { TikTokScraper } from "../lib/tiktok-scraper.mjs";
|
|
8
|
+
import { CDNBlockedError } from "../lib/parse-ssr.mjs";
|
|
8
9
|
import {
|
|
9
10
|
DEFAULT_TARGET_LOCATIONS,
|
|
10
11
|
isLocationInList,
|
|
@@ -42,6 +43,18 @@ function formatMemoryUsage(mem = process.memoryUsage()) {
|
|
|
42
43
|
return `rss:${(mem.rss / 1024 / 1024).toFixed(0)}MB heap:${(mem.heapUsed / 1024 / 1024).toFixed(0)}MB ext:${(mem.external / 1024 / 1024).toFixed(0)}MB ab:${(mem.arrayBuffers / 1024 / 1024).toFixed(0)}MB`;
|
|
43
44
|
}
|
|
44
45
|
|
|
46
|
+
function getCdnCooldownSeconds(blockedCount, totalCount, isTooManyRequests = false) {
|
|
47
|
+
if (isTooManyRequests) return 120;
|
|
48
|
+
const ratio = blockedCount / Math.max(totalCount, 1);
|
|
49
|
+
return ratio > 0.3 ? 120 : 60;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async function cooldownAndRecycle(cooldownSec, recyclePage, maybeRecycleForMemory) {
|
|
53
|
+
await new Promise((r) => setTimeout(r, cooldownSec * 1000));
|
|
54
|
+
await recyclePage();
|
|
55
|
+
await maybeRecycleForMemory();
|
|
56
|
+
}
|
|
57
|
+
|
|
45
58
|
// 构建带客户端追踪 header 的 fetch 封装
|
|
46
59
|
function buildClientHeaders(clientId, meta, extra = {}) {
|
|
47
60
|
return {
|
|
@@ -906,12 +919,11 @@ export async function handleScoreAll(parsed) {
|
|
|
906
919
|
// CDN 限流检测:有拦截则冷却 + 重启 scraper
|
|
907
920
|
const cdnBlocked = enriched.cdnBlockedCount || 0;
|
|
908
921
|
if (cdnBlocked > 0) {
|
|
909
|
-
const
|
|
910
|
-
const coolSec = cdnRatio > 0.3 ? 120 : 60;
|
|
922
|
+
const coolSec = getCdnCooldownSeconds(cdnBlocked, videos.length);
|
|
911
923
|
log(
|
|
912
|
-
` ⚠️ CDN 限流: ${cdnBlocked}/${videos.length} (${(
|
|
924
|
+
` ⚠️ CDN 限流: ${cdnBlocked}/${videos.length} (${((cdnBlocked / Math.max(videos.length, 1)) * 100).toFixed(0)}%),冷却 ${coolSec} 秒后重启 scraper`,
|
|
913
925
|
);
|
|
914
|
-
await
|
|
926
|
+
await cooldownAndRecycle(coolSec, recyclePage, maybeRecycleForMemory);
|
|
915
927
|
log(` 正在重启 TikTokScraper...`);
|
|
916
928
|
await enrichScraper.restart();
|
|
917
929
|
log(` ✅ TikTokScraper 已重启`);
|
|
@@ -961,6 +973,17 @@ export async function handleScoreAll(parsed) {
|
|
|
961
973
|
await maybeRecycleForMemory();
|
|
962
974
|
await randomDelay(3000, 7000);
|
|
963
975
|
} catch (e) {
|
|
976
|
+
if (e instanceof CDNBlockedError || /HTTP\s+(403|429)/.test(e.message)) {
|
|
977
|
+
log(` ⚠️ CDN 被封: ${e.message}`);
|
|
978
|
+
result.status = "dead";
|
|
979
|
+
result.error = "cdn_blocked";
|
|
980
|
+
await reportToServer(baseUrl, result, clientId, clientMeta);
|
|
981
|
+
totalScored++;
|
|
982
|
+
const cooldownSec = getCdnCooldownSeconds(1, 1, /429/.test(e.message));
|
|
983
|
+
log(` 冷却 ${cooldownSec} 秒后再继续...`);
|
|
984
|
+
await cooldownAndRecycle(cooldownSec, recyclePage, maybeRecycleForMemory);
|
|
985
|
+
continue;
|
|
986
|
+
}
|
|
964
987
|
// 区分网络错误和业务错误
|
|
965
988
|
const isNetworkError =
|
|
966
989
|
e.code === "ECONNREFUSED" ||
|
package/src/lib/tag-fetcher.js
CHANGED
|
@@ -107,6 +107,9 @@ export async function fetchTagData(tag, options = {}) {
|
|
|
107
107
|
timeout: 30000,
|
|
108
108
|
});
|
|
109
109
|
|
|
110
|
+
if (resp.status() === 403 || resp.status() === 429) {
|
|
111
|
+
throw new CDNBlockedError(`标签页返回 HTTP ${resp.status()}`);
|
|
112
|
+
}
|
|
110
113
|
if (resp.status() !== 200) {
|
|
111
114
|
throw new Error(`标签页返回 HTTP ${resp.status()}`);
|
|
112
115
|
}
|
package/src/watch/data-store.js
CHANGED
|
@@ -1160,59 +1160,112 @@ export function createStore(filePath, options = {}) {
|
|
|
1160
1160
|
})();
|
|
1161
1161
|
}
|
|
1162
1162
|
|
|
1163
|
-
//
|
|
1164
|
-
const
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1163
|
+
// 常规移动:多国家时先按国家均衡补充,再用全局兜底补齐剩余额度
|
|
1164
|
+
const insertFromRawSql = `
|
|
1165
|
+
INSERT OR IGNORE INTO jobs (
|
|
1166
|
+
unique_id, nickname, status, sources, pinned,
|
|
1167
|
+
tt_seller, verified, video_count, comment_count,
|
|
1168
|
+
guessed_location, location_created, confirmed_location,
|
|
1169
|
+
follower_count, following_count, heart_count,
|
|
1170
|
+
created_at, updated_at, region, signature, bio_link, sec_uid,
|
|
1171
|
+
status_code, latest_video_time, user_create_time
|
|
1172
|
+
)
|
|
1173
|
+
SELECT
|
|
1174
|
+
unique_id, nickname, 'pending', sources, pinned,
|
|
1175
|
+
tt_seller, verified, video_count, comment_count,
|
|
1176
|
+
guessed_location, location_created, confirmed_location,
|
|
1177
|
+
follower_count, following_count, heart_count,
|
|
1178
|
+
created_at, updated_at, region, signature, bio_link, sec_uid,
|
|
1179
|
+
status_code, latest_video_time, user_create_time
|
|
1180
|
+
FROM raw_jobs
|
|
1181
|
+
WHERE __WHERE__
|
|
1182
|
+
ORDER BY
|
|
1183
|
+
CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
|
|
1184
|
+
COALESCE(video_count, 0) DESC, created_at DESC
|
|
1185
|
+
LIMIT ?
|
|
1186
|
+
`;
|
|
1187
|
+
|
|
1188
|
+
const deleteFromRawSql = `
|
|
1189
|
+
DELETE FROM raw_jobs
|
|
1190
|
+
WHERE unique_id IN (
|
|
1191
|
+
SELECT unique_id FROM raw_jobs
|
|
1192
|
+
WHERE __WHERE__
|
|
1185
1193
|
ORDER BY
|
|
1186
1194
|
CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
|
|
1187
1195
|
COALESCE(video_count, 0) DESC, created_at DESC
|
|
1188
1196
|
LIMIT ?
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
.run(...args, safeLimit);
|
|
1197
|
+
)
|
|
1198
|
+
`;
|
|
1192
1199
|
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1200
|
+
const uniqueLocations = normalizedLocations
|
|
1201
|
+
? Array.from(new Set(normalizedLocations))
|
|
1202
|
+
: [];
|
|
1203
|
+
const shouldBalanceByCountry = uniqueLocations.length > 1;
|
|
1204
|
+
|
|
1205
|
+
const moveTxn = getDb().transaction(() => {
|
|
1206
|
+
let moved = 0;
|
|
1207
|
+
const movedByCountry = {};
|
|
1208
|
+
|
|
1209
|
+
if (shouldBalanceByCountry) {
|
|
1210
|
+
const baseQuota = Math.floor(safeLimit / uniqueLocations.length);
|
|
1211
|
+
const remainder = safeLimit % uniqueLocations.length;
|
|
1212
|
+
|
|
1213
|
+
for (let i = 0; i < uniqueLocations.length; i++) {
|
|
1214
|
+
if (moved >= safeLimit) break;
|
|
1215
|
+
|
|
1216
|
+
const location = uniqueLocations[i];
|
|
1217
|
+
const quota = baseQuota + (i < remainder ? 1 : 0);
|
|
1218
|
+
const currentLimit = Math.max(0, Math.min(quota, safeLimit - moved));
|
|
1219
|
+
if (!currentLimit) continue;
|
|
1220
|
+
|
|
1221
|
+
const locationWhere = `${whereSql} AND UPPER(COALESCE(guessed_location, '')) = ?`;
|
|
1222
|
+
const locationArgs = [...args, location];
|
|
1223
|
+
|
|
1224
|
+
getDb()
|
|
1225
|
+
.prepare(insertFromRawSql.replace("__WHERE__", locationWhere))
|
|
1226
|
+
.run(...locationArgs, currentLimit);
|
|
1227
|
+
const del = getDb()
|
|
1228
|
+
.prepare(deleteFromRawSql.replace("__WHERE__", locationWhere))
|
|
1229
|
+
.run(...locationArgs, currentLimit);
|
|
1230
|
+
|
|
1231
|
+
const movedThisCountry = del?.changes || 0;
|
|
1232
|
+
moved += movedThisCountry;
|
|
1233
|
+
movedByCountry[location] = movedThisCountry;
|
|
1234
|
+
}
|
|
1235
|
+
|
|
1236
|
+
// 某些国家库存不足时,用全局查询补齐剩余额度(仍受 whereSql 国家范围约束)
|
|
1237
|
+
const remaining = safeLimit - moved;
|
|
1238
|
+
if (remaining > 0) {
|
|
1239
|
+
getDb()
|
|
1240
|
+
.prepare(insertFromRawSql.replace("__WHERE__", whereSql))
|
|
1241
|
+
.run(...args, remaining);
|
|
1242
|
+
const del = getDb()
|
|
1243
|
+
.prepare(deleteFromRawSql.replace("__WHERE__", whereSql))
|
|
1244
|
+
.run(...args, remaining);
|
|
1245
|
+
moved += del?.changes || 0;
|
|
1246
|
+
}
|
|
1247
|
+
|
|
1248
|
+
console.error(
|
|
1249
|
+
`[data-store] refill 国家均衡: ${uniqueLocations
|
|
1250
|
+
.map((loc) => `${loc}:${movedByCountry[loc] || 0}`)
|
|
1251
|
+
.join(", ")} | total=${moved}`,
|
|
1252
|
+
);
|
|
1253
|
+
} else {
|
|
1254
|
+
getDb()
|
|
1255
|
+
.prepare(insertFromRawSql.replace("__WHERE__", whereSql))
|
|
1256
|
+
.run(...args, safeLimit);
|
|
1257
|
+
const del = getDb()
|
|
1258
|
+
.prepare(deleteFromRawSql.replace("__WHERE__", whereSql))
|
|
1259
|
+
.run(...args, safeLimit);
|
|
1260
|
+
moved = del?.changes || 0;
|
|
1261
|
+
}
|
|
1262
|
+
|
|
1263
|
+
return moved;
|
|
1209
1264
|
});
|
|
1210
1265
|
|
|
1211
|
-
moveTxn();
|
|
1266
|
+
const moved = moveTxn();
|
|
1212
1267
|
markStatsDirty();
|
|
1213
|
-
|
|
1214
|
-
const actualMoved = Math.min(count, safeLimit);
|
|
1215
|
-
return { moved: actualMoved };
|
|
1268
|
+
return { moved };
|
|
1216
1269
|
}
|
|
1217
1270
|
|
|
1218
1271
|
async function claimNextJob(
|
|
@@ -1512,8 +1565,7 @@ export function createStore(filePath, options = {}) {
|
|
|
1512
1565
|
}
|
|
1513
1566
|
return null;
|
|
1514
1567
|
}
|
|
1515
|
-
const
|
|
1516
|
-
refillLock = Promise.resolve(); // 占位
|
|
1568
|
+
const refillPromise = (async () => {
|
|
1517
1569
|
const result = refillJobsFromRaw(
|
|
1518
1570
|
normalizedLocations.length ? normalizedLocations : null,
|
|
1519
1571
|
500,
|
|
@@ -1521,30 +1573,17 @@ export function createStore(filePath, options = {}) {
|
|
|
1521
1573
|
);
|
|
1522
1574
|
// refillJobsFromRaw 在 LLM 模式下返回 Promise
|
|
1523
1575
|
if (result && typeof result.then === "function") {
|
|
1524
|
-
return result
|
|
1525
|
-
refillLock = null;
|
|
1526
|
-
});
|
|
1576
|
+
return await result;
|
|
1527
1577
|
}
|
|
1528
1578
|
return result;
|
|
1529
1579
|
})();
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
const pinned = findPinnedPending(requireVideo);
|
|
1538
|
-
if (pinned) {
|
|
1539
|
-
return claimRow(pinned);
|
|
1540
|
-
}
|
|
1541
|
-
const ranked = findPrioritizedPending(requireVideo);
|
|
1542
|
-
if (ranked) {
|
|
1543
|
-
return claimRow(ranked);
|
|
1544
|
-
}
|
|
1545
|
-
}
|
|
1546
|
-
}
|
|
1547
|
-
} else if (refillResult.moved > 0) {
|
|
1580
|
+
// 让并发请求等待同一个 refill,并且无论成功/失败都释放锁
|
|
1581
|
+
refillLock = refillPromise.finally(() => {
|
|
1582
|
+
refillLock = null;
|
|
1583
|
+
});
|
|
1584
|
+
|
|
1585
|
+
const refillResult = await refillLock;
|
|
1586
|
+
if (refillResult.moved > 0) {
|
|
1548
1587
|
console.error(
|
|
1549
1588
|
`[data-store] 从 raw_jobs 补充了 ${refillResult.moved} 条任务到 jobs`,
|
|
1550
1589
|
);
|