tt-help-cli-ycl 1.3.99 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tt-help-cli-ycl",
3
- "version": "1.3.99",
3
+ "version": "1.4.0",
4
4
  "description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli/tag.js CHANGED
@@ -738,7 +738,9 @@ export async function handleScoreAll(parsed) {
738
738
 
739
739
  let totalScored = 0;
740
740
  let emptyRounds = 0; // 连续无任务的轮数
741
+ let consecutive403 = 0; // 连续 HTTP 403 次数,达 3 次认为被封
741
742
  const DISCOVER_AFTER_EMPTY = 3; // 连续 3 轮无任务时触发 discover
743
+ const CONSECUTIVE_403_BLOCKED = 3; // 连续 3 次 403 认为账号被封
742
744
 
743
745
  // 生成客户端 ID,用于服务端追踪
744
746
  const clientId = randomUUID();
@@ -957,6 +959,7 @@ export async function handleScoreAll(parsed) {
957
959
  ` ${icon} ${result.status} score=${result.score} authors=${result.authorCount} matched=${result.matchedAuthors} (${elapsed}s)${mc ? " " + mc : ""}${memStr}`,
958
960
  );
959
961
  log("");
962
+ consecutive403 = 0; // 成功完成,重置 403 计数器
960
963
  await recyclePage();
961
964
  await maybeRecycleForMemory();
962
965
  await randomDelay(3000, 7000);
@@ -975,12 +978,29 @@ export async function handleScoreAll(parsed) {
975
978
  await new Promise((r) => setTimeout(r, 15000));
976
979
  continue;
977
980
  }
981
+ // 检测 HTTP 403(TikTok 拒绝请求,可能是账号/会话被封)
982
+ const is403 =
983
+ e.message &&
984
+ (e.message.includes("HTTP 403") ||
985
+ e.message.includes("status 403") ||
986
+ e.message.includes("status code 403"));
987
+
988
+ if (is403) {
989
+ consecutive403++;
990
+ log(` 🚫 HTTP 403 — 连续 ${consecutive403}/${CONSECUTIVE_403_BLOCKED} 次`);
991
+ } else {
992
+ consecutive403 = 0;
993
+ }
994
+
978
995
  log(` ❌ 失败: ${e.message}`);
979
996
  try {
997
+ // 使用当前正在处理的 tag(tag 变量在 try 块开头已赋值,catch 中仍可访问)
998
+ const failedTag =
999
+ typeof tag === "string" && tag ? tag : "unknown";
980
1000
  await reportToServer(
981
1001
  baseUrl,
982
1002
  {
983
- tag: "",
1003
+ tag: failedTag,
984
1004
  status: "error",
985
1005
  score: 0,
986
1006
  error: e.message,
@@ -990,6 +1010,18 @@ export async function handleScoreAll(parsed) {
990
1010
  );
991
1011
  } catch {}
992
1012
  totalScored++;
1013
+
1014
+ // 连续 3 次 403:认为账号被封,等 2 分钟后切换端口
1015
+ if (consecutive403 >= CONSECUTIVE_403_BLOCKED) {
1016
+ log(
1017
+ ` 🔐 连续 ${consecutive403} 次 403,账号可能被封!等待 2 分钟后切换端口...`,
1018
+ );
1019
+ await new Promise((r) => setTimeout(r, 120000));
1020
+ await recycleCdpSession("连续 403 封禁");
1021
+ consecutive403 = 0;
1022
+ continue;
1023
+ }
1024
+
993
1025
  await recyclePage();
994
1026
  await maybeRecycleForMemory();
995
1027
  }
@@ -1160,59 +1160,112 @@ export function createStore(filePath, options = {}) {
1160
1160
  })();
1161
1161
  }
1162
1162
 
1163
- // 常规移动:INSERT + DELETE 事务
1164
- const moveTxn = getDb().transaction(() => {
1165
- getDb()
1166
- .prepare(
1167
- `
1168
- INSERT OR IGNORE INTO jobs (
1169
- unique_id, nickname, status, sources, pinned,
1170
- tt_seller, verified, video_count, comment_count,
1171
- guessed_location, location_created, confirmed_location,
1172
- follower_count, following_count, heart_count,
1173
- created_at, updated_at, region, signature, bio_link, sec_uid,
1174
- status_code, latest_video_time, user_create_time
1175
- )
1176
- SELECT
1177
- unique_id, nickname, 'pending', sources, pinned,
1178
- tt_seller, verified, video_count, comment_count,
1179
- guessed_location, location_created, confirmed_location,
1180
- follower_count, following_count, heart_count,
1181
- created_at, updated_at, region, signature, bio_link, sec_uid,
1182
- status_code, latest_video_time, user_create_time
1183
- FROM raw_jobs
1184
- WHERE ${whereSql}
1163
+ // 常规移动:多国家时先按国家均衡补充,再用全局兜底补齐剩余额度
1164
+ const insertFromRawSql = `
1165
+ INSERT OR IGNORE INTO jobs (
1166
+ unique_id, nickname, status, sources, pinned,
1167
+ tt_seller, verified, video_count, comment_count,
1168
+ guessed_location, location_created, confirmed_location,
1169
+ follower_count, following_count, heart_count,
1170
+ created_at, updated_at, region, signature, bio_link, sec_uid,
1171
+ status_code, latest_video_time, user_create_time
1172
+ )
1173
+ SELECT
1174
+ unique_id, nickname, 'pending', sources, pinned,
1175
+ tt_seller, verified, video_count, comment_count,
1176
+ guessed_location, location_created, confirmed_location,
1177
+ follower_count, following_count, heart_count,
1178
+ created_at, updated_at, region, signature, bio_link, sec_uid,
1179
+ status_code, latest_video_time, user_create_time
1180
+ FROM raw_jobs
1181
+ WHERE __WHERE__
1182
+ ORDER BY
1183
+ CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
1184
+ COALESCE(video_count, 0) DESC, created_at DESC
1185
+ LIMIT ?
1186
+ `;
1187
+
1188
+ const deleteFromRawSql = `
1189
+ DELETE FROM raw_jobs
1190
+ WHERE unique_id IN (
1191
+ SELECT unique_id FROM raw_jobs
1192
+ WHERE __WHERE__
1185
1193
  ORDER BY
1186
1194
  CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
1187
1195
  COALESCE(video_count, 0) DESC, created_at DESC
1188
1196
  LIMIT ?
1189
- `,
1190
- )
1191
- .run(...args, safeLimit);
1197
+ )
1198
+ `;
1192
1199
 
1193
- // 删除已移动的记录:用子查询匹配刚 INSERT 的 unique_id
1194
- getDb()
1195
- .prepare(
1196
- `
1197
- DELETE FROM raw_jobs
1198
- WHERE unique_id IN (
1199
- SELECT unique_id FROM raw_jobs
1200
- WHERE ${whereSql}
1201
- ORDER BY
1202
- CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
1203
- COALESCE(video_count, 0) DESC, created_at DESC
1204
- LIMIT ?
1205
- )
1206
- `,
1207
- )
1208
- .run(...args, safeLimit);
1200
+ const uniqueLocations = normalizedLocations
1201
+ ? Array.from(new Set(normalizedLocations))
1202
+ : [];
1203
+ const shouldBalanceByCountry = uniqueLocations.length > 1;
1204
+
1205
+ const moveTxn = getDb().transaction(() => {
1206
+ let moved = 0;
1207
+ const movedByCountry = {};
1208
+
1209
+ if (shouldBalanceByCountry) {
1210
+ const baseQuota = Math.floor(safeLimit / uniqueLocations.length);
1211
+ const remainder = safeLimit % uniqueLocations.length;
1212
+
1213
+ for (let i = 0; i < uniqueLocations.length; i++) {
1214
+ if (moved >= safeLimit) break;
1215
+
1216
+ const location = uniqueLocations[i];
1217
+ const quota = baseQuota + (i < remainder ? 1 : 0);
1218
+ const currentLimit = Math.max(0, Math.min(quota, safeLimit - moved));
1219
+ if (!currentLimit) continue;
1220
+
1221
+ const locationWhere = `${whereSql} AND UPPER(COALESCE(guessed_location, '')) = ?`;
1222
+ const locationArgs = [...args, location];
1223
+
1224
+ getDb()
1225
+ .prepare(insertFromRawSql.replace("__WHERE__", locationWhere))
1226
+ .run(...locationArgs, currentLimit);
1227
+ const del = getDb()
1228
+ .prepare(deleteFromRawSql.replace("__WHERE__", locationWhere))
1229
+ .run(...locationArgs, currentLimit);
1230
+
1231
+ const movedThisCountry = del?.changes || 0;
1232
+ moved += movedThisCountry;
1233
+ movedByCountry[location] = movedThisCountry;
1234
+ }
1235
+
1236
+ // 某些国家库存不足时,用全局查询补齐剩余额度(仍受 whereSql 国家范围约束)
1237
+ const remaining = safeLimit - moved;
1238
+ if (remaining > 0) {
1239
+ getDb()
1240
+ .prepare(insertFromRawSql.replace("__WHERE__", whereSql))
1241
+ .run(...args, remaining);
1242
+ const del = getDb()
1243
+ .prepare(deleteFromRawSql.replace("__WHERE__", whereSql))
1244
+ .run(...args, remaining);
1245
+ moved += del?.changes || 0;
1246
+ }
1247
+
1248
+ console.error(
1249
+ `[data-store] refill 国家均衡: ${uniqueLocations
1250
+ .map((loc) => `${loc}:${movedByCountry[loc] || 0}`)
1251
+ .join(", ")} | total=${moved}`,
1252
+ );
1253
+ } else {
1254
+ getDb()
1255
+ .prepare(insertFromRawSql.replace("__WHERE__", whereSql))
1256
+ .run(...args, safeLimit);
1257
+ const del = getDb()
1258
+ .prepare(deleteFromRawSql.replace("__WHERE__", whereSql))
1259
+ .run(...args, safeLimit);
1260
+ moved = del?.changes || 0;
1261
+ }
1262
+
1263
+ return moved;
1209
1264
  });
1210
1265
 
1211
- moveTxn();
1266
+ const moved = moveTxn();
1212
1267
  markStatsDirty();
1213
-
1214
- const actualMoved = Math.min(count, safeLimit);
1215
- return { moved: actualMoved };
1268
+ return { moved };
1216
1269
  }
1217
1270
 
1218
1271
  async function claimNextJob(
@@ -1512,8 +1565,7 @@ export function createStore(filePath, options = {}) {
1512
1565
  }
1513
1566
  return null;
1514
1567
  }
1515
- const refillResult = (async () => {
1516
- refillLock = Promise.resolve(); // 占位
1568
+ const refillPromise = (async () => {
1517
1569
  const result = refillJobsFromRaw(
1518
1570
  normalizedLocations.length ? normalizedLocations : null,
1519
1571
  500,
@@ -1521,30 +1573,17 @@ export function createStore(filePath, options = {}) {
1521
1573
  );
1522
1574
  // refillJobsFromRaw 在 LLM 模式下返回 Promise
1523
1575
  if (result && typeof result.then === "function") {
1524
- return result.finally(() => {
1525
- refillLock = null;
1526
- });
1576
+ return await result;
1527
1577
  }
1528
1578
  return result;
1529
1579
  })();
1530
- if (refillResult && typeof refillResult.then === "function") {
1531
- const awaited = await refillResult;
1532
- if (awaited.moved > 0) {
1533
- console.error(
1534
- `[data-store] 从 raw_jobs 补充了 ${awaited.moved} 条任务到 jobs`,
1535
- );
1536
- for (const requireVideo of [true, false]) {
1537
- const pinned = findPinnedPending(requireVideo);
1538
- if (pinned) {
1539
- return claimRow(pinned);
1540
- }
1541
- const ranked = findPrioritizedPending(requireVideo);
1542
- if (ranked) {
1543
- return claimRow(ranked);
1544
- }
1545
- }
1546
- }
1547
- } else if (refillResult.moved > 0) {
1580
+ // 让并发请求等待同一个 refill,并且无论成功/失败都释放锁
1581
+ refillLock = refillPromise.finally(() => {
1582
+ refillLock = null;
1583
+ });
1584
+
1585
+ const refillResult = await refillLock;
1586
+ if (refillResult.moved > 0) {
1548
1587
  console.error(
1549
1588
  `[data-store] 从 raw_jobs 补充了 ${refillResult.moved} 条任务到 jobs`,
1550
1589
  );