tt-help-cli-ycl 1.3.80 → 1.3.82
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli/refresh.js +1 -0
- package/src/cli/watch.js +25 -4
- package/src/lib/api-interceptor.js +21 -4
- package/src/lib/args.js +14 -0
- package/src/lib/page-error-detector.js +31 -14
- package/src/lib/scroll-collector.js +1 -1
- package/src/scraper/explore-core.js +27 -1
- package/src/videos/core.js +6 -2
- package/src/watch/data-store.js +701 -96
- package/src/watch/public/app.js +59 -4
- package/src/watch/public/index.html +2 -1
- package/src/watch/public/style.css +25 -0
- package/src/watch/server.js +66 -3
package/package.json
CHANGED
package/src/cli/refresh.js
CHANGED
|
@@ -549,6 +549,7 @@ export async function handleRefresh(options) {
|
|
|
549
549
|
|
|
550
550
|
const payload = {
|
|
551
551
|
userInfo: refreshUserInfo,
|
|
552
|
+
topRecentVideo: result.topRecentVideo || null,
|
|
552
553
|
discoveredFollowing: (result.discoveredFollowing || []).map((f) => ({
|
|
553
554
|
handle: Array.isArray(f) ? f[0] : f,
|
|
554
555
|
displayName: Array.isArray(f) ? f[1] : null,
|
package/src/cli/watch.js
CHANGED
|
@@ -5,19 +5,40 @@ import { startWatchServer, openBrowser } from "../watch/server.js";
|
|
|
5
5
|
|
|
6
6
|
export async function handleWatch(options) {
|
|
7
7
|
const dataAnchor = options.dataAnchor || options.outputFile;
|
|
8
|
-
const { watchPort } =
|
|
8
|
+
const { watchPort, llmRefill, llmRefillMinScore, llmRefillSampleSize } =
|
|
9
|
+
options;
|
|
9
10
|
|
|
10
11
|
if (!dataAnchor) {
|
|
11
|
-
console.error("用法: tt-help watch -o <db路径> [-p 端口]");
|
|
12
|
+
console.error("用法: tt-help watch -o <db路径> [-p 端口] [--llm-refill]");
|
|
12
13
|
console.error(" tt-help watch -o data/result.db");
|
|
13
14
|
console.error(" tt-help watch -o data/result.db -p 8080");
|
|
15
|
+
console.error(
|
|
16
|
+
" tt-help watch -o data/result.db --llm-refill --llm-refill-min 60",
|
|
17
|
+
);
|
|
14
18
|
process.exit(1);
|
|
15
19
|
}
|
|
16
20
|
|
|
17
21
|
mkdirSync(path.dirname(path.resolve(dataAnchor)), { recursive: true });
|
|
18
22
|
|
|
19
|
-
const
|
|
20
|
-
|
|
23
|
+
const storeOptions = {};
|
|
24
|
+
if (llmRefill) {
|
|
25
|
+
storeOptions.refillLlm = {
|
|
26
|
+
llmScore: true,
|
|
27
|
+
llmMinScore: llmRefillMinScore ?? 60,
|
|
28
|
+
llmSampleSize: llmRefillSampleSize ?? 100,
|
|
29
|
+
};
|
|
30
|
+
console.error(
|
|
31
|
+
`[watch] LLM 自动打分已启用: 最低分 ${storeOptions.refillLlm.llmMinScore}, 采样 ${storeOptions.refillLlm.llmSampleSize}`,
|
|
32
|
+
);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const store = createStore(dataAnchor, storeOptions);
|
|
36
|
+
const { server, port } = await startWatchServer(
|
|
37
|
+
dataAnchor,
|
|
38
|
+
watchPort,
|
|
39
|
+
store,
|
|
40
|
+
storeOptions,
|
|
41
|
+
);
|
|
21
42
|
openBrowser(port);
|
|
22
43
|
|
|
23
44
|
process.once("SIGINT", () => {
|
|
@@ -18,7 +18,12 @@ async function processAPIResponse(
|
|
|
18
18
|
for (const item of firstPageItems) {
|
|
19
19
|
if (items.length >= maxVideos) break;
|
|
20
20
|
const href = `https://www.tiktok.com/@${username}/video/${item.id}`;
|
|
21
|
-
items.push({
|
|
21
|
+
items.push({
|
|
22
|
+
id: item.id,
|
|
23
|
+
href,
|
|
24
|
+
createTime: item.createTime || null,
|
|
25
|
+
playCount: item.stats?.playCount || 0,
|
|
26
|
+
});
|
|
22
27
|
}
|
|
23
28
|
|
|
24
29
|
let cursor = data.cursor;
|
|
@@ -32,7 +37,7 @@ async function processAPIResponse(
|
|
|
32
37
|
|
|
33
38
|
try {
|
|
34
39
|
const pageData = await (() => {
|
|
35
|
-
//
|
|
40
|
+
// 重试包装:处理页面导航导致的执行上下文销毁和 CDP 断连
|
|
36
41
|
const tryEval = async (retries = 3) => {
|
|
37
42
|
for (let i = 0; i < retries; i++) {
|
|
38
43
|
try {
|
|
@@ -42,8 +47,11 @@ async function processAPIResponse(
|
|
|
42
47
|
}, newUrl);
|
|
43
48
|
} catch (e) {
|
|
44
49
|
if (
|
|
45
|
-
e.message
|
|
46
|
-
|
|
50
|
+
e.message &&
|
|
51
|
+
(e.message.includes("Execution context was destroyed") ||
|
|
52
|
+
e.message.includes("Target closed") ||
|
|
53
|
+
e.message.includes("Connection closed") ||
|
|
54
|
+
e.message.includes("Protocol error"))
|
|
47
55
|
) {
|
|
48
56
|
await delay(500 * (i + 1), 500 * (i + 1));
|
|
49
57
|
} else {
|
|
@@ -63,6 +71,7 @@ async function processAPIResponse(
|
|
|
63
71
|
id: item.id,
|
|
64
72
|
href,
|
|
65
73
|
createTime: item.createTime || null,
|
|
74
|
+
playCount: item.stats?.playCount || 0,
|
|
66
75
|
});
|
|
67
76
|
}
|
|
68
77
|
}
|
|
@@ -89,6 +98,14 @@ async function processAPIResponse(
|
|
|
89
98
|
async function fetchUserVideosAPI(page, username, maxVideos, log) {
|
|
90
99
|
log(` [API拦截] 获取 @${username} 视频 ...`);
|
|
91
100
|
|
|
101
|
+
// CDP 健康检查:确保 page 可用
|
|
102
|
+
try {
|
|
103
|
+
await page.evaluate(() => 1);
|
|
104
|
+
} catch (e) {
|
|
105
|
+
log(` [API拦截] CDP 连接异常: ${e.message}`);
|
|
106
|
+
throw new Error(`CDP 连接异常: ${e.message}`);
|
|
107
|
+
}
|
|
108
|
+
|
|
92
109
|
let apiRequestUrl = null;
|
|
93
110
|
let sawApiRequest = false;
|
|
94
111
|
|
package/src/lib/args.js
CHANGED
|
@@ -347,6 +347,9 @@ function parseInfoArgs(args) {
|
|
|
347
347
|
function parseWatchArgs(args) {
|
|
348
348
|
let dataAnchor = "./result.db";
|
|
349
349
|
let watchPort = 3001;
|
|
350
|
+
let llmRefill = true;
|
|
351
|
+
let llmRefillMinScore = null;
|
|
352
|
+
let llmRefillSampleSize = null;
|
|
350
353
|
|
|
351
354
|
for (let i = 0; i < args.length; i++) {
|
|
352
355
|
const arg = args[i];
|
|
@@ -354,6 +357,14 @@ function parseWatchArgs(args) {
|
|
|
354
357
|
dataAnchor = args[++i];
|
|
355
358
|
} else if (arg === "-p") {
|
|
356
359
|
watchPort = parseInt(args[++i]) || 3001;
|
|
360
|
+
} else if (arg === "--llm-refill") {
|
|
361
|
+
llmRefill = true;
|
|
362
|
+
} else if (arg === "--no-llm-refill") {
|
|
363
|
+
llmRefill = false;
|
|
364
|
+
} else if (arg === "--llm-refill-min") {
|
|
365
|
+
llmRefillMinScore = parseInt(args[++i]) || 60;
|
|
366
|
+
} else if (arg === "--llm-refill-sample") {
|
|
367
|
+
llmRefillSampleSize = parseInt(args[++i]) || 100;
|
|
357
368
|
}
|
|
358
369
|
}
|
|
359
370
|
|
|
@@ -362,6 +373,9 @@ function parseWatchArgs(args) {
|
|
|
362
373
|
outputFile: dataAnchor,
|
|
363
374
|
dataAnchor,
|
|
364
375
|
watchPort,
|
|
376
|
+
llmRefill,
|
|
377
|
+
llmRefillMinScore,
|
|
378
|
+
llmRefillSampleSize,
|
|
365
379
|
urls: [],
|
|
366
380
|
outputFormat: "json",
|
|
367
381
|
exploreCount: 0,
|
|
@@ -54,23 +54,40 @@ const PATTERNS = {
|
|
|
54
54
|
service_error: ["出错了", "很抱歉"],
|
|
55
55
|
};
|
|
56
56
|
|
|
57
|
-
export async function detectPageError(page) {
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
57
|
+
export async function detectPageError(page, timeout = 10000) {
|
|
58
|
+
try {
|
|
59
|
+
return await page.evaluate(
|
|
60
|
+
(patterns) => {
|
|
61
|
+
const body = document.body;
|
|
62
|
+
if (!body) return null;
|
|
63
|
+
const bodyText = body.innerText;
|
|
64
|
+
const lower = bodyText.toLowerCase();
|
|
63
65
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
66
|
+
for (const [type, phrases] of Object.entries(patterns)) {
|
|
67
|
+
for (const phrase of phrases) {
|
|
68
|
+
if (lower.includes(phrase.toLowerCase())) {
|
|
69
|
+
return type;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
68
72
|
}
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
73
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
+
return null;
|
|
75
|
+
},
|
|
76
|
+
PATTERNS,
|
|
77
|
+
);
|
|
78
|
+
} catch (e) {
|
|
79
|
+
// CDP 断连或超时:返回 null 而非永久挂起
|
|
80
|
+
if (
|
|
81
|
+
e.message &&
|
|
82
|
+
(e.message.includes("Timeout") ||
|
|
83
|
+
e.message.includes("Target closed") ||
|
|
84
|
+
e.message.includes("Connection closed") ||
|
|
85
|
+
e.message.includes("Protocol error"))
|
|
86
|
+
) {
|
|
87
|
+
return null;
|
|
88
|
+
}
|
|
89
|
+
throw e;
|
|
90
|
+
}
|
|
74
91
|
}
|
|
75
92
|
|
|
76
93
|
/**
|
|
@@ -30,7 +30,7 @@ async function doCollect(
|
|
|
30
30
|
}
|
|
31
31
|
|
|
32
32
|
const fn = eval("(" + fnStr + ")");
|
|
33
|
-
return fn(el, args);
|
|
33
|
+
return args !== undefined ? fn(el, args) : fn(el);
|
|
34
34
|
},
|
|
35
35
|
{ fn: fnStr, containerSelector: container, findScrollableFlag: findScrollable, args: extraArgs },
|
|
36
36
|
);
|
|
@@ -8,6 +8,7 @@ import {
|
|
|
8
8
|
DEFAULT_TARGET_LOCATIONS_CSV,
|
|
9
9
|
findFirstMatchingLocation,
|
|
10
10
|
isLocationInList,
|
|
11
|
+
normalizeLocation,
|
|
11
12
|
normalizeLocationList,
|
|
12
13
|
} from "../lib/target-locations.js";
|
|
13
14
|
import {
|
|
@@ -40,6 +41,7 @@ async function processExplore(page, username, options, log) {
|
|
|
40
41
|
keepFollow: false,
|
|
41
42
|
locationCreated: null,
|
|
42
43
|
latestVideoTime: null,
|
|
44
|
+
topRecentVideo: null,
|
|
43
45
|
noVideo: false,
|
|
44
46
|
restricted: false,
|
|
45
47
|
error: null,
|
|
@@ -84,6 +86,27 @@ async function processExplore(page, username, options, log) {
|
|
|
84
86
|
if (result.userInfo) result.userInfo.latestVideoTime = latestCreateTime;
|
|
85
87
|
}
|
|
86
88
|
|
|
89
|
+
// 找出 7 天内发布且播放量最大的视频
|
|
90
|
+
const SEVEN_DAYS_SECONDS = 7 * 24 * 60 * 60;
|
|
91
|
+
const nowSeconds = Math.floor(Date.now() / 1000);
|
|
92
|
+
const recentVideos = videoArray.filter(
|
|
93
|
+
(v) => v.createTime && nowSeconds - v.createTime <= SEVEN_DAYS_SECONDS,
|
|
94
|
+
);
|
|
95
|
+
if (recentVideos.length > 0) {
|
|
96
|
+
const topVideo = recentVideos.reduce((max, v) =>
|
|
97
|
+
v.playCount > max.playCount ? v : max,
|
|
98
|
+
);
|
|
99
|
+
result.topRecentVideo = {
|
|
100
|
+
id: topVideo.id,
|
|
101
|
+
href: topVideo.href,
|
|
102
|
+
playCount: topVideo.playCount,
|
|
103
|
+
createTime: topVideo.createTime,
|
|
104
|
+
};
|
|
105
|
+
log(
|
|
106
|
+
` 7天内最高播放视频: ${topVideo.playCount} 次播放 (${recentVideos.length} 个候选)`,
|
|
107
|
+
);
|
|
108
|
+
}
|
|
109
|
+
|
|
87
110
|
if (videoArray.length <= 0) {
|
|
88
111
|
// 视频为空:可能是页面受限或用户真的没有视频
|
|
89
112
|
result.processed = true;
|
|
@@ -110,7 +133,10 @@ async function processExplore(page, username, options, log) {
|
|
|
110
133
|
log(
|
|
111
134
|
` 国家采样(${locations.length}个): [${locations.filter(Boolean).join(", ") || "无数据"}]`,
|
|
112
135
|
);
|
|
113
|
-
|
|
136
|
+
// 直接标准化,不去重(保留重复值用于频率统计)
|
|
137
|
+
const normalizedLocations = locations
|
|
138
|
+
.map((loc) => normalizeLocation(loc))
|
|
139
|
+
.filter(Boolean);
|
|
114
140
|
|
|
115
141
|
// 统计频率
|
|
116
142
|
const freq = {};
|
package/src/videos/core.js
CHANGED
|
@@ -7,14 +7,18 @@ import {
|
|
|
7
7
|
import { fetchUserVideosAPI } from "../lib/api-interceptor.js";
|
|
8
8
|
|
|
9
9
|
async function getUserInfo(page) {
|
|
10
|
-
//
|
|
10
|
+
// 重试包装:处理页面导航导致的执行上下文销毁和 CDP 断连
|
|
11
11
|
const evaluateWithRetry = async (fn, retries = 3) => {
|
|
12
12
|
for (let i = 0; i < retries; i++) {
|
|
13
13
|
try {
|
|
14
14
|
return await page.evaluate(fn);
|
|
15
15
|
} catch (e) {
|
|
16
16
|
if (
|
|
17
|
-
e.message
|
|
17
|
+
e.message &&
|
|
18
|
+
(e.message.includes("Execution context was destroyed") ||
|
|
19
|
+
e.message.includes("Target closed") ||
|
|
20
|
+
e.message.includes("Connection closed") ||
|
|
21
|
+
e.message.includes("Protocol error")) &&
|
|
18
22
|
i < retries - 1
|
|
19
23
|
) {
|
|
20
24
|
await new Promise((r) => setTimeout(r, 500 * (i + 1)));
|