tt-help-cli-ycl 1.3.61 → 1.3.63

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/lib/args.js CHANGED
@@ -180,6 +180,7 @@ function parseExploreArgs(args) {
180
180
  let explorePortCount = null;
181
181
  let exploreUserId = null;
182
182
  let exploreMaxVideos = 16;
183
+ let exploreProxy = null;
183
184
 
184
185
  const positional = [];
185
186
  const PRESETS = ["fast", "normal", "slow", "stealth"];
@@ -218,6 +219,8 @@ function parseExploreArgs(args) {
218
219
  exploreUserId = args[++i];
219
220
  } else if (arg === "--max-videos") {
220
221
  exploreMaxVideos = parseInt(args[++i]) || 16;
222
+ } else if (arg === "--proxy") {
223
+ exploreProxy = args[++i];
221
224
  } else {
222
225
  positional.push(arg);
223
226
  }
@@ -258,6 +261,7 @@ function parseExploreArgs(args) {
258
261
  explorePortCount,
259
262
  exploreUserId,
260
263
  exploreMaxVideos,
264
+ exploreProxy,
261
265
  urls: [],
262
266
  outputFormat: "json",
263
267
  exploreCount: 0,
@@ -439,39 +443,77 @@ function parseDbImportArgs(args) {
439
443
  function parseRefreshArgs(args) {
440
444
  let serverUrl = defaultServer;
441
445
  let explorePreset = "normal";
442
- let exploreMaxComments = 10;
443
- let exploreMaxGuess = 0;
446
+ let exploreInterval = 30;
447
+ let exploreEnableFollow = true;
448
+ let exploreMaxFollowing = 100;
449
+ let exploreMaxFollowers = 100;
450
+ let exploreLocation = DEFAULT_TARGET_LOCATIONS_CSV;
451
+ let exploreMaxUsers = 0;
444
452
  let explorePort = null;
453
+ let exploreBasePort = null;
454
+ let explorePortCount = null;
445
455
  let exploreProfile = null;
446
456
  let exploreUserId = null;
457
+ let exploreMaxVideos = 16;
458
+ let exploreRedoMaxAge = 0;
459
+ let exploreProxy = null;
447
460
 
448
461
  for (let i = 0; i < args.length; i++) {
449
462
  const arg = args[i];
450
463
  if (arg === "--server") {
451
464
  serverUrl = args[++i];
452
- } else if (arg === "--comments") {
453
- exploreMaxComments = parseInt(args[++i]) || 10;
454
- } else if (arg === "--guess") {
455
- exploreMaxGuess = parseInt(args[++i]) || 0;
456
465
  } else if (arg === "--preset") {
457
466
  explorePreset = args[++i];
467
+ } else if (arg === "-i" || arg === "--interval") {
468
+ exploreInterval = parseInt(args[++i], 10) || 30;
458
469
  } else if (arg === "--port") {
459
470
  explorePort = parseInt(args[++i]) || 9222;
471
+ } else if (arg === "--base-port") {
472
+ exploreBasePort = parseInt(args[++i]) || 9222;
473
+ } else if (arg === "--port-count") {
474
+ explorePortCount = parseInt(args[++i]) || 10;
460
475
  } else if (arg === "--profile") {
461
476
  exploreProfile = args[++i];
462
477
  } else if (arg === "--user-id") {
463
478
  exploreUserId = args[++i];
479
+ } else if (arg === "--max-videos") {
480
+ exploreMaxVideos = parseInt(args[++i]) || 16;
481
+ } else if (arg === "--max-following") {
482
+ exploreMaxFollowing = parseInt(args[++i]) || 100;
483
+ } else if (arg === "--max-followers") {
484
+ exploreMaxFollowers = parseInt(args[++i]) || 100;
485
+ } else if (arg === "--max-age") {
486
+ exploreRedoMaxAge = parseInt(args[++i]) || 43200;
487
+ } else if (arg === "--proxy") {
488
+ exploreProxy = args[++i];
489
+ } else if (arg === "--location") {
490
+ exploreLocation = args[++i];
491
+ } else if (arg === "--enable-follow") {
492
+ exploreEnableFollow = true;
493
+ } else if (arg === "--disable-follow") {
494
+ exploreEnableFollow = false;
495
+ } else if (arg === "--max-users") {
496
+ exploreMaxUsers = parseInt(args[++i]) || 0;
464
497
  }
465
498
  }
466
499
 
467
500
  return {
468
501
  subcommand: "refresh",
469
502
  explorePreset,
470
- exploreMaxComments,
471
- exploreMaxGuess,
503
+ exploreInterval,
504
+ exploreEnableFollow,
505
+ exploreMaxFollowing,
506
+ exploreMaxFollowers,
507
+ exploreLocation,
508
+ exploreMaxUsers,
472
509
  explorePort,
510
+ exploreBasePort,
511
+ explorePortCount,
473
512
  exploreProfile,
474
513
  exploreUserId,
514
+ exploreMaxVideos,
515
+ exploreRedoMaxAge,
516
+ exploreProxy,
475
517
  serverUrl,
476
518
  urls: [],
477
519
  outputFormat: "json",
@@ -721,6 +763,10 @@ export function parseArgs() {
721
763
  return parseDbImportArgs(args.slice(1));
722
764
  }
723
765
 
766
+ if (args.length > 0 && args[0] === "refresh") {
767
+ return parseRefreshArgs(args.slice(1));
768
+ }
769
+
724
770
  const urls = [];
725
771
  let inputFile = null;
726
772
  let outputFile = null;
@@ -143,7 +143,7 @@ function killEdgeProcesses(targetDir) {
143
143
  });
144
144
  }
145
145
 
146
- function launchEdgeWithCDP(port, userDataDir) {
146
+ function launchEdgeWithCDP(port, userDataDir, proxyServer) {
147
147
  return new Promise((resolve, reject) => {
148
148
  const platform = os.platform();
149
149
  const edgePath = getEdgePath();
@@ -162,14 +162,19 @@ function launchEdgeWithCDP(port, userDataDir) {
162
162
  "--disable-breakpad",
163
163
  "--disable-background-networking",
164
164
  "--disable-sync",
165
- ].join(" ");
165
+ ];
166
+ if (proxyServer) {
167
+ extraArgs.push(`--proxy-server="${proxyServer}"`);
168
+ }
169
+
170
+ const argsStr = extraArgs.join(" ");
166
171
 
167
172
  if (platform === "darwin") {
168
- command = `open -a ${edgePath} --new --args ${extraArgs}`;
173
+ command = `open -a ${edgePath} --new --args ${argsStr}`;
169
174
  } else if (platform === "win32") {
170
- command = `start msedge ${extraArgs}`;
175
+ command = `start msedge ${argsStr}`;
171
176
  } else {
172
- command = `msedge ${extraArgs} &`;
177
+ command = `msedge ${argsStr} &`;
173
178
  }
174
179
 
175
180
  exec(command, (err) => {
@@ -194,6 +199,7 @@ export { killEdgeProcesses };
194
199
  export async function ensureBrowserReady(options = {}) {
195
200
  const port = options.port || DEFAULT_CDP_PORT;
196
201
  const userDataDir = options.userDataDir || DEFAULT_USER_DATA_DIR;
202
+ const proxyServer = options.proxyServer || null;
197
203
  const isCustom = port !== DEFAULT_CDP_PORT || !!options.userDataDir;
198
204
 
199
205
  const isReady = await checkCDPPort(port);
@@ -104,30 +104,54 @@ export async function isLoggedIn(page) {
104
104
 
105
105
  /**
106
106
  * 通过 DOM 元素判断登录状态(验真方案)
107
+ * 使用 locator API + state: 'attached' 来避免 CDP 连接下 waitForSelector 的可见性问题
107
108
  * @returns {boolean|null} true=已登录, false=明确未登录, null=无法判断
108
109
  */
109
110
  export async function isLoggedInByDom(page) {
110
- // 先等客户端渲染完成:登录态元素或登录按钮,哪个先出现就停止等待
111
- const loginOrLoggedInSelector = [
112
- '[class*="DivProfileContainer"]',
113
- '[class*="DivUserContainer"]',
114
- '[class*="UserMenu"]',
115
- '[class*="CurrentUserInfo"]',
116
- 'button:has-text("登录")',
117
- 'button:has-text("Log in")',
118
- 'button:has-text("Sign in")',
119
- ].join(", ");
120
-
121
- const selectorFound = await page
122
- .waitForSelector(loginOrLoggedInSelector, { timeout: DOM_CHECK_TIMEOUT })
123
- .then(() => true)
124
- .catch(() => false);
125
-
126
- if (!selectorFound) {
127
- // 等待超时,DOM 无法判断
128
- return null;
111
+ // 使用 Promise.race 等待:已登录元素 或 登录按钮,哪个先出现就停止
112
+ const loggedInLocators = [
113
+ page.locator('[class*="DivProfileContainer"]'),
114
+ page.locator('[class*="DivUserContainer"]'),
115
+ page.locator('[class*="UserMenu"]'),
116
+ page.locator('[class*="CurrentUserInfo"]'),
117
+ ];
118
+
119
+ const loginButtonLocators = [
120
+ page.getByText("登录", { exact: true }),
121
+ page.getByText("Log in", { exact: true }),
122
+ page.getByText("Sign in", { exact: true }),
123
+ ];
124
+
125
+ // 并发等待:已登录元素 vs 登录按钮
126
+ const waitForLoggedIn = Promise.any(
127
+ loggedInLocators.map((loc) =>
128
+ loc.first().waitFor({ state: "attached", timeout: DOM_CHECK_TIMEOUT }),
129
+ ),
130
+ )
131
+ .then(() => "loggedIn")
132
+ .catch(() => null);
133
+
134
+ const waitForLoginButton = Promise.any(
135
+ loginButtonLocators.map((loc) =>
136
+ loc.first().waitFor({ state: "attached", timeout: DOM_CHECK_TIMEOUT }),
137
+ ),
138
+ )
139
+ .then(() => "loginButton")
140
+ .catch(() => null);
141
+
142
+ // 哪个先完成就返回哪个结果
143
+ const result = await Promise.race([waitForLoggedIn, waitForLoginButton]);
144
+
145
+ if (result === "loginButton") {
146
+ // 明确看到登录按钮 → 未登录
147
+ return false;
148
+ }
149
+ if (result === "loggedIn") {
150
+ // 看到已登录元素 → 已登录
151
+ return true;
129
152
  }
130
153
 
154
+ // 都超时了,回退到 evaluate 做最终判断
131
155
  return page.evaluate(() => {
132
156
  const hasProfileContainer = !!document.querySelector(
133
157
  '[class*="DivProfileContainer"], [class*="DivUserContainer"]',
@@ -143,7 +167,7 @@ export async function isLoggedInByDom(page) {
143
167
  if (hasLoginButton) return false;
144
168
  // 看到已登录元素 → 已登录
145
169
  if (hasProfileContainer || hasUserMenu) return true;
146
- // 元素已出现但都不是登录/未登录标志 → 无法判断
170
+ // 都无法判断
147
171
  return null;
148
172
  });
149
173
  }
@@ -131,12 +131,33 @@ const HELP_TEXT = [
131
131
  " --disable-follow 禁用关注/粉丝提取",
132
132
  " --max-following <数量> 最大获取关注数,默认 50",
133
133
  " --max-followers <数量> 最大获取粉丝数,默认 50",
134
- " --max-users <数量> 最大处理用户数,默认无限制",
135
- " -i, --interval <秒数> 无任务时轮询间隔,默认 10 秒",
136
- " --port <端口号> 固定 CDP 端口(调试用,关闭自动轮换)",
137
- " --base-port <端口号> 起始端口,默认 9222",
138
- " --port-count <数量> 端口数量(账户数),默认 10",
139
- " --user-id <编号> 客户端编号(设备ID),默认自动生成",
134
+ " --max-users <数量> 最大处理用户数,默认无限制",
135
+ " -i, --interval <秒数> 无任务时轮询间隔,默认 10 秒",
136
+ " --port <端口号> 固定 CDP 端口(调试用,关闭自动轮换)",
137
+ " --base-port <端口号> 起始端口,默认 9222",
138
+ " --port-count <数量> 端口数量(账户数),默认 10",
139
+ " --user-id <编号> 客户端编号(设备ID),默认自动生成",
140
+ " --proxy <地址> 浏览器代理(如 socks5://127.0.0.1:1080)",
141
+ "",
142
+ " tt-help refresh [选项]",
143
+ " 对目标商家用户进行轮回刷新,重新采集视频 + 关注 + 粉丝",
144
+ " 筛选条件: tt_seller=1, verified=0, 目标国家",
145
+ " 选项:",
146
+ " --server <URL> 服务端地址,默认 http://127.0.0.1:3001",
147
+ ` --location <国家代码> 国家筛选,逗号分隔,默认 ${DEFAULT_TARGET_LOCATIONS_CSV}`,
148
+ " --max-videos <数量> 每用户最大视频数,默认 16",
149
+ " --enable-follow 启用关注/粉丝提取(默认启用)",
150
+ " --disable-follow 禁用关注/粉丝提取",
151
+ " --max-following <数量> 最大获取关注数,默认 100",
152
+ " --max-followers <数量> 最大获取粉丝数,默认 100",
153
+ " --max-users <数量> 最大处理用户数,默认无限制",
154
+ " -i, --interval <秒数> 无任务时轮询间隔,默认 30 秒",
155
+ " --max-age <秒数> 最小刷新间隔,默认 43200(12小时)",
156
+ " --port <端口号> 固定 CDP 端口(调试用,关闭自动轮换)",
157
+ " --base-port <端口号> 起始端口,默认 9222",
158
+ " --port-count <数量> 端口数量(账户数),默认 10",
159
+ " --user-id <编号> 客户端编号(设备ID),默认自动生成",
160
+ " --proxy <地址> 浏览器代理(如 socks5://127.0.0.1:1080)",
140
161
  "",
141
162
  " tt-help info <URL> [URL2 ...] [--onlyvideo]",
142
163
  " 获取用户/视频信息,支持多个 URL",
@@ -196,9 +217,10 @@ const HELP_TEXT = [
196
217
  " -h, --help 显示帮助",
197
218
  " --version 显示版本号",
198
219
  "",
199
- " 示例: tt-help info https://www.tiktok.com/@nike https://www.tiktok.com/@adidas",
200
- " tt-help explore qiqi23280 fast --location ES --max-comments 50",
201
- " tt-help config set server http://127.0.0.1:3001",
220
+ " 示例: tt-help info https://www.tiktok.com/@nike https://www.tiktok.com/@adidas",
221
+ " tt-help explore qiqi23280 fast --location ES --max-comments 50",
222
+ " tt-help refresh --server http://127.0.0.1:3001 --port 9222",
223
+ " tt-help config set server http://127.0.0.1:3001",
202
224
  " tt-help attach -p 5 -i 10",
203
225
  " tt-help watch -o data/result.db",
204
226
  " tt-help videostats data/result.db -p 3",
package/src/main.js CHANGED
@@ -10,6 +10,7 @@ import { handleComments } from "./cli/comments.js";
10
10
  import { handleVideoStats } from "./cli/videostats.js";
11
11
  import { handleDbImport } from "./cli/db-import.js";
12
12
  import { handleWebserver } from "./cli/webserver.js";
13
+ import { handleRefresh } from "./cli/refresh.js";
13
14
 
14
15
  async function main() {
15
16
  const parsed = parseArgs();
@@ -33,6 +34,8 @@ async function main() {
33
34
  return handleVideoStats(parsed);
34
35
  case "db-import":
35
36
  return handleDbImport(parsed);
37
+ case "refresh":
38
+ return handleRefresh(parsed);
36
39
  }
37
40
 
38
41
  const {
package/src/npm-main.js CHANGED
@@ -6,6 +6,7 @@ import { handleAttach } from "./cli/attach.js";
6
6
  import { handleConfig, showConfig, showUsage, version } from "./cli/config.js";
7
7
  import { handleOpen } from "./cli/open.js";
8
8
  import { handleComments } from "./cli/comments.js";
9
+ import { handleRefresh } from "./cli/refresh.js";
9
10
 
10
11
  function exitUnsupportedCommand(command) {
11
12
  console.error(
@@ -33,6 +34,8 @@ async function main() {
33
34
  return handleOpen(parsed);
34
35
  case "comments":
35
36
  return handleComments(parsed);
37
+ case "refresh":
38
+ return handleRefresh(parsed);
36
39
  }
37
40
 
38
41
  const {
@@ -2786,9 +2786,10 @@ export function createStore(filePath) {
2786
2786
  return { saved: true, pinned: user.pinned };
2787
2787
  }
2788
2788
 
2789
- function getNextRedoJob(userId) {
2789
+ function getNextRedoJob(userId, maxAgeSeconds = 43200) {
2790
2790
  if (db) {
2791
2791
  const now = Date.now();
2792
+ const threshold = now - maxAgeSeconds * 1000;
2792
2793
  const defaultTime = new Date("2016-01-01T00:00:00Z").getTime();
2793
2794
  const targetLocations = [
2794
2795
  "CZ",
@@ -2813,11 +2814,12 @@ export function createStore(filePath) {
2813
2814
  WHERE tt_seller = 1
2814
2815
  AND verified = 0
2815
2816
  AND location_created IN (${placeholders})
2817
+ AND COALESCE(refresh_time, ?) < ?
2816
2818
  ORDER BY COALESCE(refresh_time, ?) ASC
2817
2819
  LIMIT 1
2818
2820
  `,
2819
2821
  )
2820
- .get(...targetLocations, defaultTime);
2822
+ .get(...targetLocations, defaultTime, threshold, defaultTime);
2821
2823
  if (!row) return null;
2822
2824
  db.prepare(
2823
2825
  "UPDATE jobs SET refresh_time = ?, updated_at = ? WHERE unique_id = ?",
@@ -2830,6 +2832,7 @@ export function createStore(filePath) {
2830
2832
  }
2831
2833
 
2832
2834
  const now = Date.now();
2835
+ const threshold = now - maxAgeSeconds * 1000;
2833
2836
  const defaultTime = new Date("2016-01-01T00:00:00Z").getTime();
2834
2837
 
2835
2838
  // 筛选目标国家用户,按 refreshTime 升序取最远的(没有则默认 2016-01-01)
@@ -2855,13 +2858,19 @@ export function createStore(filePath) {
2855
2858
  );
2856
2859
  if (targetUsers.length === 0) return null;
2857
2860
 
2858
- targetUsers.sort((a, b) => {
2861
+ const recentEnough = targetUsers.filter((u) => {
2862
+ const rt = u.refreshTime || defaultTime;
2863
+ return rt < threshold;
2864
+ });
2865
+ if (recentEnough.length === 0) return null;
2866
+
2867
+ recentEnough.sort((a, b) => {
2859
2868
  const ta = a.refreshTime || defaultTime;
2860
2869
  const tb = b.refreshTime || defaultTime;
2861
2870
  return ta - tb;
2862
2871
  });
2863
2872
 
2864
- const next = targetUsers[0];
2873
+ const next = recentEnough[0];
2865
2874
  next.refreshTime = now;
2866
2875
  save();
2867
2876
  return {
@@ -2889,9 +2898,10 @@ export function createStore(filePath) {
2889
2898
  }
2890
2899
  }
2891
2900
  }
2901
+ const newUsers = processDiscoveredUsers(result);
2892
2902
  const ret = updateJobInfo(uniqueId, user, false);
2893
2903
  if (ret.error) return { saved: false, error: ret.error };
2894
- return { saved: true };
2904
+ return { saved: true, newUsers };
2895
2905
  }
2896
2906
 
2897
2907
  const user = getUser(uniqueId);
@@ -2908,8 +2918,8 @@ export function createStore(filePath) {
2908
2918
  }
2909
2919
  }
2910
2920
  }
2911
-
2912
- return { saved: true };
2921
+ const newUsers = processDiscoveredUsers(result);
2922
+ return { saved: true, newUsers };
2913
2923
  }
2914
2924
 
2915
2925
  function reportClientError(
@@ -368,6 +368,10 @@ function renderTable(users) {
368
368
  }
369
369
  const claimTime = u.claimedAt ? formatTime(u.claimedAt) : "-";
370
370
  const procTime = u.processedAt ? formatTime(u.processedAt) : "-";
371
+ const refreshTime =
372
+ u.ttSeller && !u.verified && u.refreshTime
373
+ ? formatTime(u.refreshTime)
374
+ : "-";
371
375
  const statusCodeDisplay =
372
376
  u.statusCode != null && u.statusCode !== 0
373
377
  ? `<span class="tag error" style="font-size:10px">${u.statusCode}</span>`
@@ -378,6 +382,7 @@ function renderTable(users) {
378
382
  <td data-label="粉丝">${fans}</td>
379
383
  <td data-label="视频">${videos}</td>
380
384
  <td data-label="国家">${loc}</td>
385
+ <td data-label="最近刷新" style="font-size:11px;color:#888">${refreshTime}</td>
381
386
  <td data-label="最近发布" style="font-size:11px;color:#888">${latestVideo}</td>
382
387
  <td data-label="猜测国家">${guessedLoc}</td>
383
388
  <td data-label="来源">${sources || "-"}</td>
@@ -125,6 +125,7 @@
125
125
  <th>粉丝</th>
126
126
  <th>视频</th>
127
127
  <th>国家</th>
128
+ <th class="sortable" data-sort="refreshTime">最近刷新 <span class="sort-icon">↕</span></th>
128
129
  <th class="sortable" data-sort="latestVideoTime">最近发布 <span class="sort-icon">↕</span></th>
129
130
  <th>猜测国家</th>
130
131
  <th>来源</th>
@@ -356,7 +356,8 @@ export function startWatchServer(dataAnchor, port = 3000, existingStore) {
356
356
 
357
357
  if (req.method === "GET" && routePath === "/api/redo-job") {
358
358
  const userId = params.userId || "";
359
- const job = store.getNextRedoJob(userId);
359
+ const maxAge = parseInt(params.maxAge) || 43200;
360
+ const job = store.getNextRedoJob(userId, maxAge);
360
361
  if (job) {
361
362
  logJob("REDO-CLAIM", { user: job.uniqueId, clientId: userId });
362
363
  sendJSON(res, 200, { hasJob: true, user: job });
@@ -683,6 +684,7 @@ export function startWatchServer(dataAnchor, port = 3000, existingStore) {
683
684
  followerCount: u.followerCount,
684
685
  locationCreated: u.locationCreated,
685
686
  latestVideoTime: u.latestVideoTime,
687
+ refreshTime: u.refreshTime,
686
688
  guessedLocation: u.guessedLocation,
687
689
  pinned: u.pinned,
688
690
  processedAt: u.processedAt,
@@ -1,213 +0,0 @@
1
- import {
2
- delay,
3
- retryWithBackoff,
4
- detectPageError,
5
- assertPageUrl,
6
- } from "./modules/page-helpers.js";
7
- import { detectCaptcha } from "./modules/captcha-handler.js";
8
- import { getUserInfo, collectVideos } from "../videos/core.js";
9
- import { extractFollowAndFollowers } from "./modules/follow-extractor.js";
10
- import { processExplore } from "./explore-core.js";
11
- import { DEFAULT_TARGET_LOCATIONS_CSV } from "../lib/target-locations.js";
12
-
13
- export async function processRefresh(page, username, serverUrl, options, log) {
14
- const { maxFollowing = 100, maxFollowers = 100, maxVideos = 100 } = options;
15
-
16
- const result = {
17
- userInfo: null,
18
- discoveredVideoAuthors: [],
19
- discoveredFollowing: [],
20
- discoveredFollowers: [],
21
- newUsersAdded: 0,
22
- collectedVideos: 0,
23
- error: null,
24
- };
25
-
26
- try {
27
- log(` 访问 @${username} 主页...`);
28
- const homeUrl = `https://www.tiktok.com/@${username}`;
29
- await retryWithBackoff(
30
- async () => {
31
- await page.goto(homeUrl, {
32
- waitUntil: "domcontentloaded",
33
- timeout: 30000,
34
- });
35
- assertPageUrl(page, `@${username}`);
36
- },
37
- { log },
38
- );
39
- await page
40
- .waitForSelector('[class*="DivVideoList"]', { timeout: 10000 })
41
- .catch(() => {});
42
- await delay(1000, 2000);
43
-
44
- log(" 获取用户信息...");
45
- const info = await getUserInfo(page);
46
- if (info) {
47
- result.userInfo = info;
48
- log(
49
- ` 用户: ${info.nickname || username} | 粉丝: ${info.followerCount || "-"} | 视频: ${info.videoCount || "-"}`,
50
- );
51
- }
52
-
53
- const captcha = await detectCaptcha(page);
54
- if (captcha && captcha.visible) {
55
- log(`[验证码] @${username} 页面出现验证码`);
56
- result.captchaDetected = true;
57
- result.captchaStage = result.captchaStage || "video-page";
58
- result.captchaMessage = result.captchaMessage || "视频页出现验证码";
59
- }
60
-
61
- // 采集视频
62
- log(` 采集视频 (最多 ${maxVideos} 个)...`);
63
- const videoList = await collectVideos(page, username, maxVideos, log);
64
- const videoArray = videoList ? [...videoList.values()] : [];
65
- result.collectedVideos = videoArray.length;
66
- result.discoveredVideoAuthors = videoArray.map((v) => v.author);
67
-
68
- if (videoArray.length <= 0) {
69
- result.noVideo = true;
70
- const pageError = await detectPageError(page);
71
- if (pageError) {
72
- result.restricted = true;
73
- log(` @${username} 页面受限(${pageError}),标记跳过`);
74
- }
75
- return result;
76
- }
77
-
78
- // 采集关注和粉丝
79
- log(` 采集关注 (最多 ${maxFollowing}) + 粉丝 (最多 ${maxFollowers})...`);
80
- try {
81
- const followResult = await extractFollowAndFollowers(page, {
82
- maxFollowing,
83
- maxFollowers,
84
- });
85
- result.discoveredFollowing = followResult.following || [];
86
- result.discoveredFollowers = followResult.followers || [];
87
- log(
88
- ` 关注: ${result.discoveredFollowing.length}, 粉丝: ${result.discoveredFollowers.length}`,
89
- );
90
- } catch (e) {
91
- log(` [关注/粉丝采集失败] ${e.message}`);
92
- result.discoveredFollowing = [];
93
- result.discoveredFollowers = [];
94
- }
95
-
96
- // 处理新发现的用户(关注 + 粉丝),循环执行完整 explore
97
- // follow-extractor 返回 [handle, displayName] 数组
98
- const allDiscovered = [
99
- ...result.discoveredFollowing.map((h) => ({
100
- handle: Array.isArray(h) ? h[0] : h,
101
- source: "refresh-following",
102
- })),
103
- ...result.discoveredFollowers.map((h) => ({
104
- handle: Array.isArray(h) ? h[0] : h,
105
- source: "refresh-follower",
106
- })),
107
- ];
108
-
109
- for (const { handle, source } of allDiscovered) {
110
- const uniqueId = handle.replace("@", "");
111
-
112
- // 检查用户是否已存在
113
- const existsResp = await fetch(
114
- `${serverUrl}/api/user-exists/${encodeURIComponent(uniqueId)}`,
115
- );
116
- const existsData = await existsResp.json();
117
-
118
- if (existsData.exists) {
119
- continue;
120
- }
121
-
122
- log(` [新用户] @${uniqueId} 不存在,开始探索 (来源: ${source})...`);
123
- await delay(1000, 2000);
124
-
125
- // 对新用户做完整 explore(与 explore 命令逻辑一致)
126
- const exploreResult = await processExplore(
127
- page,
128
- uniqueId,
129
- {
130
- maxComments: 10,
131
- maxGuess: 0,
132
- enableFollow: true,
133
- maxFollowing: 5,
134
- maxFollowers: 5,
135
- location: DEFAULT_TARGET_LOCATIONS_CSV,
136
- },
137
- log,
138
- );
139
-
140
- // 提交 explore 结果到服务端(和 explore 命令的 commitJob 一致)
141
- if (exploreResult.userInfo) {
142
- const guessedLocation = exploreResult.locationCreated || null;
143
-
144
- const payload = {
145
- userInfo: exploreResult.userInfo || {},
146
- discoveredVideoAuthors: (
147
- exploreResult.discoveredVideoAuthors || []
148
- ).map((item) =>
149
- typeof item === "object" ? { ...item, guessedLocation } : item,
150
- ),
151
- discoveredCommentAuthors: (
152
- exploreResult.discoveredCommentAuthors || []
153
- ).map((author) => ({ author, guessedLocation })),
154
- discoveredGuessAuthors: (
155
- exploreResult.discoveredGuessAuthors || []
156
- ).map((author) => ({ author, guessedLocation })),
157
- discoveredFollowing: (exploreResult.discoveredFollowing || []).map(
158
- (f) => ({
159
- handle: Array.isArray(f) ? f[0] : f,
160
- displayName: Array.isArray(f) ? f[1] : null,
161
- guessedLocation,
162
- }),
163
- ),
164
- discoveredFollowers: (exploreResult.discoveredFollowers || []).map(
165
- (f) => ({
166
- handle: Array.isArray(f) ? f[0] : f,
167
- displayName: Array.isArray(f) ? f[1] : null,
168
- guessedLocation,
169
- }),
170
- ),
171
- processed: exploreResult.processed,
172
- hasFollowData: exploreResult.hasFollowData,
173
- keepFollow: exploreResult.keepFollow,
174
- locationCreated: exploreResult.locationCreated,
175
- noVideo: exploreResult.noVideo,
176
- restricted: exploreResult.restricted,
177
- error: exploreResult.error,
178
- };
179
-
180
- const addResp = await fetch(
181
- `${serverUrl}/api/explore-new/${uniqueId}`,
182
- {
183
- method: "POST",
184
- headers: { "Content-Type": "application/json" },
185
- body: JSON.stringify(payload),
186
- },
187
- );
188
- const addResult = await addResp.json();
189
-
190
- if (!addResult.saved) {
191
- log(` [跳过] @${uniqueId} 提交失败`);
192
- continue;
193
- }
194
-
195
- result.newUsersAdded++;
196
- if (exploreResult.captchaDetected) {
197
- result.captchaDetected = true;
198
- }
199
- log(
200
- ` [已提交] @${uniqueId} ${addResult.created ? "(新用户)" : "(已存在)"} | 发现: ${addResult.newUsers?.length || 0} 个`,
201
- );
202
- }
203
-
204
- await delay(2000, 4000);
205
- }
206
- } catch (e) {
207
- log(` [错误] ${e.message}`);
208
- result.error = e.message;
209
- result.errorStack = e.stack || "";
210
- }
211
-
212
- return result;
213
- }