tt-help-cli-ycl 1.3.80 → 1.3.82

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tt-help-cli-ycl",
3
- "version": "1.3.80",
3
+ "version": "1.3.82",
4
4
  "description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
5
5
  "type": "module",
6
6
  "bin": {
@@ -549,6 +549,7 @@ export async function handleRefresh(options) {
549
549
 
550
550
  const payload = {
551
551
  userInfo: refreshUserInfo,
552
+ topRecentVideo: result.topRecentVideo || null,
552
553
  discoveredFollowing: (result.discoveredFollowing || []).map((f) => ({
553
554
  handle: Array.isArray(f) ? f[0] : f,
554
555
  displayName: Array.isArray(f) ? f[1] : null,
package/src/cli/watch.js CHANGED
@@ -5,19 +5,40 @@ import { startWatchServer, openBrowser } from "../watch/server.js";
5
5
 
6
6
  export async function handleWatch(options) {
7
7
  const dataAnchor = options.dataAnchor || options.outputFile;
8
- const { watchPort } = options;
8
+ const { watchPort, llmRefill, llmRefillMinScore, llmRefillSampleSize } =
9
+ options;
9
10
 
10
11
  if (!dataAnchor) {
11
- console.error("用法: tt-help watch -o <db路径> [-p 端口]");
12
+ console.error("用法: tt-help watch -o <db路径> [-p 端口] [--llm-refill]");
12
13
  console.error(" tt-help watch -o data/result.db");
13
14
  console.error(" tt-help watch -o data/result.db -p 8080");
15
+ console.error(
16
+ " tt-help watch -o data/result.db --llm-refill --llm-refill-min 60",
17
+ );
14
18
  process.exit(1);
15
19
  }
16
20
 
17
21
  mkdirSync(path.dirname(path.resolve(dataAnchor)), { recursive: true });
18
22
 
19
- const store = createStore(dataAnchor);
20
- const { server, port } = await startWatchServer(dataAnchor, watchPort, store);
23
+ const storeOptions = {};
24
+ if (llmRefill) {
25
+ storeOptions.refillLlm = {
26
+ llmScore: true,
27
+ llmMinScore: llmRefillMinScore ?? 60,
28
+ llmSampleSize: llmRefillSampleSize ?? 100,
29
+ };
30
+ console.error(
31
+ `[watch] LLM 自动打分已启用: 最低分 ${storeOptions.refillLlm.llmMinScore}, 采样 ${storeOptions.refillLlm.llmSampleSize}`,
32
+ );
33
+ }
34
+
35
+ const store = createStore(dataAnchor, storeOptions);
36
+ const { server, port } = await startWatchServer(
37
+ dataAnchor,
38
+ watchPort,
39
+ store,
40
+ storeOptions,
41
+ );
21
42
  openBrowser(port);
22
43
 
23
44
  process.once("SIGINT", () => {
@@ -18,7 +18,12 @@ async function processAPIResponse(
18
18
  for (const item of firstPageItems) {
19
19
  if (items.length >= maxVideos) break;
20
20
  const href = `https://www.tiktok.com/@${username}/video/${item.id}`;
21
- items.push({ id: item.id, href, createTime: item.createTime || null });
21
+ items.push({
22
+ id: item.id,
23
+ href,
24
+ createTime: item.createTime || null,
25
+ playCount: item.stats?.playCount || 0,
26
+ });
22
27
  }
23
28
 
24
29
  let cursor = data.cursor;
@@ -32,7 +37,7 @@ async function processAPIResponse(
32
37
 
33
38
  try {
34
39
  const pageData = await (() => {
35
- // 重试包装:处理页面导航导致的执行上下文销毁
40
+ // 重试包装:处理页面导航导致的执行上下文销毁和 CDP 断连
36
41
  const tryEval = async (retries = 3) => {
37
42
  for (let i = 0; i < retries; i++) {
38
43
  try {
@@ -42,8 +47,11 @@ async function processAPIResponse(
42
47
  }, newUrl);
43
48
  } catch (e) {
44
49
  if (
45
- e.message.includes("Execution context was destroyed") &&
46
- i < retries - 1
50
+ e.message &&
51
+ (e.message.includes("Execution context was destroyed") ||
52
+ e.message.includes("Target closed") ||
53
+ e.message.includes("Connection closed") ||
54
+ e.message.includes("Protocol error"))
47
55
  ) {
48
56
  await delay(500 * (i + 1), 500 * (i + 1));
49
57
  } else {
@@ -63,6 +71,7 @@ async function processAPIResponse(
63
71
  id: item.id,
64
72
  href,
65
73
  createTime: item.createTime || null,
74
+ playCount: item.stats?.playCount || 0,
66
75
  });
67
76
  }
68
77
  }
@@ -89,6 +98,14 @@ async function processAPIResponse(
89
98
  async function fetchUserVideosAPI(page, username, maxVideos, log) {
90
99
  log(` [API拦截] 获取 @${username} 视频 ...`);
91
100
 
101
+ // CDP 健康检查:确保 page 可用
102
+ try {
103
+ await page.evaluate(() => 1);
104
+ } catch (e) {
105
+ log(` [API拦截] CDP 连接异常: ${e.message}`);
106
+ throw new Error(`CDP 连接异常: ${e.message}`);
107
+ }
108
+
92
109
  let apiRequestUrl = null;
93
110
  let sawApiRequest = false;
94
111
 
package/src/lib/args.js CHANGED
@@ -347,6 +347,9 @@ function parseInfoArgs(args) {
347
347
  function parseWatchArgs(args) {
348
348
  let dataAnchor = "./result.db";
349
349
  let watchPort = 3001;
350
+ let llmRefill = true;
351
+ let llmRefillMinScore = null;
352
+ let llmRefillSampleSize = null;
350
353
 
351
354
  for (let i = 0; i < args.length; i++) {
352
355
  const arg = args[i];
@@ -354,6 +357,14 @@ function parseWatchArgs(args) {
354
357
  dataAnchor = args[++i];
355
358
  } else if (arg === "-p") {
356
359
  watchPort = parseInt(args[++i]) || 3001;
360
+ } else if (arg === "--llm-refill") {
361
+ llmRefill = true;
362
+ } else if (arg === "--no-llm-refill") {
363
+ llmRefill = false;
364
+ } else if (arg === "--llm-refill-min") {
365
+ llmRefillMinScore = parseInt(args[++i]) || 60;
366
+ } else if (arg === "--llm-refill-sample") {
367
+ llmRefillSampleSize = parseInt(args[++i]) || 100;
357
368
  }
358
369
  }
359
370
 
@@ -362,6 +373,9 @@ function parseWatchArgs(args) {
362
373
  outputFile: dataAnchor,
363
374
  dataAnchor,
364
375
  watchPort,
376
+ llmRefill,
377
+ llmRefillMinScore,
378
+ llmRefillSampleSize,
365
379
  urls: [],
366
380
  outputFormat: "json",
367
381
  exploreCount: 0,
@@ -54,23 +54,40 @@ const PATTERNS = {
54
54
  service_error: ["出错了", "很抱歉"],
55
55
  };
56
56
 
57
- export async function detectPageError(page) {
58
- return page.evaluate((patterns) => {
59
- const body = document.body;
60
- if (!body) return null;
61
- const bodyText = body.innerText;
62
- const lower = bodyText.toLowerCase();
57
+ export async function detectPageError(page, timeout = 10000) {
58
+ try {
59
+ return await page.evaluate(
60
+ (patterns) => {
61
+ const body = document.body;
62
+ if (!body) return null;
63
+ const bodyText = body.innerText;
64
+ const lower = bodyText.toLowerCase();
63
65
 
64
- for (const [type, phrases] of Object.entries(patterns)) {
65
- for (const phrase of phrases) {
66
- if (lower.includes(phrase.toLowerCase())) {
67
- return type;
66
+ for (const [type, phrases] of Object.entries(patterns)) {
67
+ for (const phrase of phrases) {
68
+ if (lower.includes(phrase.toLowerCase())) {
69
+ return type;
70
+ }
71
+ }
68
72
  }
69
- }
70
- }
71
73
 
72
- return null;
73
- }, PATTERNS);
74
+ return null;
75
+ },
76
+ PATTERNS,
77
+ );
78
+ } catch (e) {
79
+ // CDP 断连或超时:返回 null 而非永久挂起
80
+ if (
81
+ e.message &&
82
+ (e.message.includes("Timeout") ||
83
+ e.message.includes("Target closed") ||
84
+ e.message.includes("Connection closed") ||
85
+ e.message.includes("Protocol error"))
86
+ ) {
87
+ return null;
88
+ }
89
+ throw e;
90
+ }
74
91
  }
75
92
 
76
93
  /**
@@ -30,7 +30,7 @@ async function doCollect(
30
30
  }
31
31
 
32
32
  const fn = eval("(" + fnStr + ")");
33
- return fn(el, args);
33
+ return args !== undefined ? fn(el, args) : fn(el);
34
34
  },
35
35
  { fn: fnStr, containerSelector: container, findScrollableFlag: findScrollable, args: extraArgs },
36
36
  );
@@ -8,6 +8,7 @@ import {
8
8
  DEFAULT_TARGET_LOCATIONS_CSV,
9
9
  findFirstMatchingLocation,
10
10
  isLocationInList,
11
+ normalizeLocation,
11
12
  normalizeLocationList,
12
13
  } from "../lib/target-locations.js";
13
14
  import {
@@ -40,6 +41,7 @@ async function processExplore(page, username, options, log) {
40
41
  keepFollow: false,
41
42
  locationCreated: null,
42
43
  latestVideoTime: null,
44
+ topRecentVideo: null,
43
45
  noVideo: false,
44
46
  restricted: false,
45
47
  error: null,
@@ -84,6 +86,27 @@ async function processExplore(page, username, options, log) {
84
86
  if (result.userInfo) result.userInfo.latestVideoTime = latestCreateTime;
85
87
  }
86
88
 
89
+ // 找出 7 天内发布且播放量最大的视频
90
+ const SEVEN_DAYS_SECONDS = 7 * 24 * 60 * 60;
91
+ const nowSeconds = Math.floor(Date.now() / 1000);
92
+ const recentVideos = videoArray.filter(
93
+ (v) => v.createTime && nowSeconds - v.createTime <= SEVEN_DAYS_SECONDS,
94
+ );
95
+ if (recentVideos.length > 0) {
96
+ const topVideo = recentVideos.reduce((max, v) =>
97
+ v.playCount > max.playCount ? v : max,
98
+ );
99
+ result.topRecentVideo = {
100
+ id: topVideo.id,
101
+ href: topVideo.href,
102
+ playCount: topVideo.playCount,
103
+ createTime: topVideo.createTime,
104
+ };
105
+ log(
106
+ ` 7天内最高播放视频: ${topVideo.playCount} 次播放 (${recentVideos.length} 个候选)`,
107
+ );
108
+ }
109
+
87
110
  if (videoArray.length <= 0) {
88
111
  // 视频为空:可能是页面受限或用户真的没有视频
89
112
  result.processed = true;
@@ -110,7 +133,10 @@ async function processExplore(page, username, options, log) {
110
133
  log(
111
134
  ` 国家采样(${locations.length}个): [${locations.filter(Boolean).join(", ") || "无数据"}]`,
112
135
  );
113
- const normalizedLocations = normalizeLocationList(locations, []);
136
+ // 直接标准化,不去重(保留重复值用于频率统计)
137
+ const normalizedLocations = locations
138
+ .map((loc) => normalizeLocation(loc))
139
+ .filter(Boolean);
114
140
 
115
141
  // 统计频率
116
142
  const freq = {};
@@ -7,14 +7,18 @@ import {
7
7
  import { fetchUserVideosAPI } from "../lib/api-interceptor.js";
8
8
 
9
9
  async function getUserInfo(page) {
10
- // 重试包装:处理页面导航导致的执行上下文销毁
10
+ // 重试包装:处理页面导航导致的执行上下文销毁和 CDP 断连
11
11
  const evaluateWithRetry = async (fn, retries = 3) => {
12
12
  for (let i = 0; i < retries; i++) {
13
13
  try {
14
14
  return await page.evaluate(fn);
15
15
  } catch (e) {
16
16
  if (
17
- e.message.includes("Execution context was destroyed") &&
17
+ e.message &&
18
+ (e.message.includes("Execution context was destroyed") ||
19
+ e.message.includes("Target closed") ||
20
+ e.message.includes("Connection closed") ||
21
+ e.message.includes("Protocol error")) &&
18
22
  i < retries - 1
19
23
  ) {
20
24
  await new Promise((r) => setTimeout(r, 500 * (i + 1)));