tt-help-cli-ycl 1.3.81 → 1.3.83
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli/explore.js +27 -4
- package/src/cli/refresh.js +1 -0
- package/src/cli/watch.js +25 -4
- package/src/lib/api-interceptor-comment.js +56 -14
- package/src/lib/api-interceptor.js +18 -2
- package/src/lib/args.js +14 -0
- package/src/scraper/explore-core.js +27 -1
- package/src/watch/data-store.js +586 -68
- package/src/watch/public/app.js +60 -5
- package/src/watch/public/index.html +2 -1
- package/src/watch/public/style.css +25 -0
- package/src/watch/server.js +66 -3
package/package.json
CHANGED
package/src/cli/explore.js
CHANGED
|
@@ -264,10 +264,11 @@ export async function handleExplore(options) {
|
|
|
264
264
|
const FOLLOW_BLOCK_THRESHOLD = 10 * 60 * 1000; // 10分钟
|
|
265
265
|
|
|
266
266
|
while (!shuttingDown) {
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
267
|
+
try {
|
|
268
|
+
const jobQuery = exploreJobLocations
|
|
269
|
+
? `${serverUrl}/api/job?userId=${encodeURIComponent(userId)}&locations=${encodeURIComponent(exploreJobLocations)}&loggedIn=${loggedIn}`
|
|
270
|
+
: `${serverUrl}/api/job?userId=${encodeURIComponent(userId)}&loggedIn=${loggedIn}`;
|
|
271
|
+
const job = await apiGet(jobQuery);
|
|
271
272
|
if (!job.hasJob) {
|
|
272
273
|
console.error(`\n[Explore] 当前无任务,${exploreInterval} 秒后重试...`);
|
|
273
274
|
await new Promise((r) => setTimeout(r, exploreInterval * 1000));
|
|
@@ -569,6 +570,28 @@ export async function handleExplore(options) {
|
|
|
569
570
|
console.error(`\n已达上限 ${exploreMaxUsers} 个用户,停止处理`);
|
|
570
571
|
break;
|
|
571
572
|
}
|
|
573
|
+
} catch (e) {
|
|
574
|
+
// 浏览器关闭错误:自动重建 browser + page,然后重试当前轮次
|
|
575
|
+
if (isBrowserClosedError(e)) {
|
|
576
|
+
console.error(
|
|
577
|
+
`\n[浏览器] 检测到浏览器关闭 (${e.message}),正在重建...`,
|
|
578
|
+
);
|
|
579
|
+
const newBrowser = await relaunchBrowser(
|
|
580
|
+
cdpOptions,
|
|
581
|
+
cdpOptions.port || 9222,
|
|
582
|
+
);
|
|
583
|
+
browser = newBrowser;
|
|
584
|
+
const newPage = await setupNewPage(browser);
|
|
585
|
+
Object.assign(page, newPage);
|
|
586
|
+
// 重建后等待页面稳定
|
|
587
|
+
await new Promise((r) => setTimeout(r, 3000));
|
|
588
|
+
console.error(`[浏览器] 已重建,继续处理...`);
|
|
589
|
+
continue;
|
|
590
|
+
}
|
|
591
|
+
// 其他未预期错误:打印堆栈并跳过本轮
|
|
592
|
+
console.error(`\n[未捕获错误] ${e.message}`);
|
|
593
|
+
console.error(e.stack || "");
|
|
594
|
+
}
|
|
572
595
|
}
|
|
573
596
|
|
|
574
597
|
const stats = await apiGet(`${serverUrl}/api/stats`);
|
package/src/cli/refresh.js
CHANGED
|
@@ -549,6 +549,7 @@ export async function handleRefresh(options) {
|
|
|
549
549
|
|
|
550
550
|
const payload = {
|
|
551
551
|
userInfo: refreshUserInfo,
|
|
552
|
+
topRecentVideo: result.topRecentVideo || null,
|
|
552
553
|
discoveredFollowing: (result.discoveredFollowing || []).map((f) => ({
|
|
553
554
|
handle: Array.isArray(f) ? f[0] : f,
|
|
554
555
|
displayName: Array.isArray(f) ? f[1] : null,
|
package/src/cli/watch.js
CHANGED
|
@@ -5,19 +5,40 @@ import { startWatchServer, openBrowser } from "../watch/server.js";
|
|
|
5
5
|
|
|
6
6
|
export async function handleWatch(options) {
|
|
7
7
|
const dataAnchor = options.dataAnchor || options.outputFile;
|
|
8
|
-
const { watchPort } =
|
|
8
|
+
const { watchPort, llmRefill, llmRefillMinScore, llmRefillSampleSize } =
|
|
9
|
+
options;
|
|
9
10
|
|
|
10
11
|
if (!dataAnchor) {
|
|
11
|
-
console.error("用法: tt-help watch -o <db路径> [-p 端口]");
|
|
12
|
+
console.error("用法: tt-help watch -o <db路径> [-p 端口] [--llm-refill]");
|
|
12
13
|
console.error(" tt-help watch -o data/result.db");
|
|
13
14
|
console.error(" tt-help watch -o data/result.db -p 8080");
|
|
15
|
+
console.error(
|
|
16
|
+
" tt-help watch -o data/result.db --llm-refill --llm-refill-min 60",
|
|
17
|
+
);
|
|
14
18
|
process.exit(1);
|
|
15
19
|
}
|
|
16
20
|
|
|
17
21
|
mkdirSync(path.dirname(path.resolve(dataAnchor)), { recursive: true });
|
|
18
22
|
|
|
19
|
-
const
|
|
20
|
-
|
|
23
|
+
const storeOptions = {};
|
|
24
|
+
if (llmRefill) {
|
|
25
|
+
storeOptions.refillLlm = {
|
|
26
|
+
llmScore: true,
|
|
27
|
+
llmMinScore: llmRefillMinScore ?? 60,
|
|
28
|
+
llmSampleSize: llmRefillSampleSize ?? 100,
|
|
29
|
+
};
|
|
30
|
+
console.error(
|
|
31
|
+
`[watch] LLM 自动打分已启用: 最低分 ${storeOptions.refillLlm.llmMinScore}, 采样 ${storeOptions.refillLlm.llmSampleSize}`,
|
|
32
|
+
);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const store = createStore(dataAnchor, storeOptions);
|
|
36
|
+
const { server, port } = await startWatchServer(
|
|
37
|
+
dataAnchor,
|
|
38
|
+
watchPort,
|
|
39
|
+
store,
|
|
40
|
+
storeOptions,
|
|
41
|
+
);
|
|
21
42
|
openBrowser(port);
|
|
22
43
|
|
|
23
44
|
process.once("SIGINT", () => {
|
|
@@ -12,35 +12,61 @@ import { delay } from "./delay.js";
|
|
|
12
12
|
* @param {function} options.onCaptcha - 验证码检测回调 (page) => Promise<{detected: boolean}>
|
|
13
13
|
* @returns {Promise<{comments: Array, total: number, captchaDetected: boolean, error: string|null}>}
|
|
14
14
|
*/
|
|
15
|
-
async function fetchUserCommentsAPI(
|
|
15
|
+
async function fetchUserCommentsAPI(
|
|
16
|
+
page,
|
|
17
|
+
{ maxComments = 100, log = console.log, onCaptcha } = {},
|
|
18
|
+
) {
|
|
16
19
|
// 先注册 API 拦截器,再点 tab(顺序不能反)
|
|
17
20
|
let apiResolve = null;
|
|
18
21
|
let apiRequestUrl = null;
|
|
19
|
-
const apiPromise = new Promise(r => {
|
|
22
|
+
const apiPromise = new Promise((r) => {
|
|
23
|
+
apiResolve = r;
|
|
24
|
+
});
|
|
20
25
|
|
|
21
26
|
const handler = async (response) => {
|
|
22
27
|
const url = response.url();
|
|
23
|
-
if (
|
|
28
|
+
if (
|
|
29
|
+
response.status() === 200 &&
|
|
30
|
+
url.includes("/api/comment/list/") &&
|
|
31
|
+
!apiRequestUrl
|
|
32
|
+
) {
|
|
24
33
|
apiRequestUrl = url;
|
|
25
34
|
try {
|
|
26
|
-
|
|
35
|
+
// 超时保护:response.json() 内部调用 CDP Network.getResponseBody,
|
|
36
|
+
// 当页面刷新/验证码导致响应资源丢失时会挂起,需独立超时控制
|
|
37
|
+
apiResolve(
|
|
38
|
+
await Promise.race([
|
|
39
|
+
response.json(),
|
|
40
|
+
new Promise((_, reject) =>
|
|
41
|
+
setTimeout(
|
|
42
|
+
() => reject(new Error("Response body fetch timeout (60s)")),
|
|
43
|
+
60000,
|
|
44
|
+
),
|
|
45
|
+
),
|
|
46
|
+
]),
|
|
47
|
+
);
|
|
27
48
|
} catch (e) {
|
|
28
49
|
apiResolve(null);
|
|
29
50
|
}
|
|
30
51
|
}
|
|
31
52
|
};
|
|
32
53
|
|
|
33
|
-
page.on(
|
|
54
|
+
page.on("response", handler);
|
|
34
55
|
|
|
35
56
|
try {
|
|
36
57
|
// 点击评论 tab 触发 API
|
|
37
|
-
log(
|
|
58
|
+
log(" [API拦截] 点击评论 tab...");
|
|
38
59
|
const tabs = page.locator('[class*="tabbar-item"]');
|
|
39
60
|
const commentTab = tabs.filter({ hasText: /评论|Comment/ });
|
|
40
61
|
const count = await commentTab.count();
|
|
41
62
|
|
|
42
63
|
if (count === 0) {
|
|
43
|
-
return {
|
|
64
|
+
return {
|
|
65
|
+
comments: [],
|
|
66
|
+
total: 0,
|
|
67
|
+
captchaDetected: false,
|
|
68
|
+
error: "未找到评论 tab",
|
|
69
|
+
};
|
|
44
70
|
}
|
|
45
71
|
|
|
46
72
|
await commentTab.first().click({ force: true });
|
|
@@ -55,7 +81,12 @@ async function fetchUserCommentsAPI(page, { maxComments = 100, log = console.log
|
|
|
55
81
|
|
|
56
82
|
if (!data || !apiRequestUrl) {
|
|
57
83
|
log(` [API拦截] 点击评论 tab 后 ${elapsed}ms 未拿到 API 响应`);
|
|
58
|
-
return {
|
|
84
|
+
return {
|
|
85
|
+
comments: [],
|
|
86
|
+
total: 0,
|
|
87
|
+
captchaDetected: false,
|
|
88
|
+
error: "API 超时或未响应",
|
|
89
|
+
};
|
|
59
90
|
}
|
|
60
91
|
|
|
61
92
|
// 验证码检测(API 拿完后检测)
|
|
@@ -65,7 +96,7 @@ async function fetchUserCommentsAPI(page, { maxComments = 100, log = console.log
|
|
|
65
96
|
const captchaResult = await onCaptcha(page);
|
|
66
97
|
captchaDetected = !!captchaResult.detected;
|
|
67
98
|
if (captchaDetected) {
|
|
68
|
-
log(
|
|
99
|
+
log(" [API拦截] 检测到验证码");
|
|
69
100
|
}
|
|
70
101
|
} catch (e) {
|
|
71
102
|
log(` [API拦截] 验证码检测异常: ${e.message}`);
|
|
@@ -73,10 +104,16 @@ async function fetchUserCommentsAPI(page, { maxComments = 100, log = console.log
|
|
|
73
104
|
}
|
|
74
105
|
|
|
75
106
|
const items = data.comments || [];
|
|
76
|
-
log(
|
|
107
|
+
log(
|
|
108
|
+
` [API拦截] ${elapsed}ms 后拿到 ${items.length} 条评论 (total: ${data.total || "?"})`,
|
|
109
|
+
);
|
|
77
110
|
|
|
78
111
|
if (items.length >= maxComments) {
|
|
79
|
-
return {
|
|
112
|
+
return {
|
|
113
|
+
comments: items.slice(0, maxComments),
|
|
114
|
+
total: data.total || 0,
|
|
115
|
+
captchaDetected,
|
|
116
|
+
};
|
|
80
117
|
}
|
|
81
118
|
|
|
82
119
|
// 翻页
|
|
@@ -86,7 +123,10 @@ async function fetchUserCommentsAPI(page, { maxComments = 100, log = console.log
|
|
|
86
123
|
|
|
87
124
|
while (hasMore && cursor && items.length < maxComments) {
|
|
88
125
|
pageNum++;
|
|
89
|
-
const pageUrl = apiRequestUrl.replace(
|
|
126
|
+
const pageUrl = apiRequestUrl.replace(
|
|
127
|
+
/cursor=([^&]+)/,
|
|
128
|
+
`cursor=${cursor}`,
|
|
129
|
+
);
|
|
90
130
|
|
|
91
131
|
const pageData = await page.evaluate(async (u) => {
|
|
92
132
|
try {
|
|
@@ -103,7 +143,9 @@ async function fetchUserCommentsAPI(page, { maxComments = 100, log = console.log
|
|
|
103
143
|
}
|
|
104
144
|
|
|
105
145
|
const pageComments = pageData.comments || [];
|
|
106
|
-
log(
|
|
146
|
+
log(
|
|
147
|
+
` [API拦截] 翻页 ${pageNum}: ${pageComments.length} 条 (累计: ${items.length + pageComments.length})`,
|
|
148
|
+
);
|
|
107
149
|
|
|
108
150
|
items.push(...pageComments);
|
|
109
151
|
cursor = pageData.cursor;
|
|
@@ -119,7 +161,7 @@ async function fetchUserCommentsAPI(page, { maxComments = 100, log = console.log
|
|
|
119
161
|
|
|
120
162
|
return { comments: result, total: data.total || 0, captchaDetected };
|
|
121
163
|
} finally {
|
|
122
|
-
page.off(
|
|
164
|
+
page.off("response", handler);
|
|
123
165
|
}
|
|
124
166
|
}
|
|
125
167
|
|
|
@@ -18,7 +18,12 @@ async function processAPIResponse(
|
|
|
18
18
|
for (const item of firstPageItems) {
|
|
19
19
|
if (items.length >= maxVideos) break;
|
|
20
20
|
const href = `https://www.tiktok.com/@${username}/video/${item.id}`;
|
|
21
|
-
items.push({
|
|
21
|
+
items.push({
|
|
22
|
+
id: item.id,
|
|
23
|
+
href,
|
|
24
|
+
createTime: item.createTime || null,
|
|
25
|
+
playCount: item.stats?.playCount || 0,
|
|
26
|
+
});
|
|
22
27
|
}
|
|
23
28
|
|
|
24
29
|
let cursor = data.cursor;
|
|
@@ -66,6 +71,7 @@ async function processAPIResponse(
|
|
|
66
71
|
id: item.id,
|
|
67
72
|
href,
|
|
68
73
|
createTime: item.createTime || null,
|
|
74
|
+
playCount: item.stats?.playCount || 0,
|
|
69
75
|
});
|
|
70
76
|
}
|
|
71
77
|
}
|
|
@@ -128,7 +134,17 @@ async function fetchUserVideosAPI(page, username, maxVideos, log) {
|
|
|
128
134
|
{ timeout: 30000 },
|
|
129
135
|
);
|
|
130
136
|
|
|
131
|
-
|
|
137
|
+
// 超时保护:response.json() 内部调用 CDP Network.getResponseBody,
|
|
138
|
+
// 当页面刷新/验证码导致响应资源丢失时会挂起,需独立超时控制
|
|
139
|
+
data = await Promise.race([
|
|
140
|
+
response.json(),
|
|
141
|
+
new Promise((_, reject) =>
|
|
142
|
+
setTimeout(
|
|
143
|
+
() => reject(new Error("Response body fetch timeout (60s)")),
|
|
144
|
+
60000,
|
|
145
|
+
),
|
|
146
|
+
),
|
|
147
|
+
]);
|
|
132
148
|
} catch (e) {
|
|
133
149
|
interceptionError = e.message;
|
|
134
150
|
} finally {
|
package/src/lib/args.js
CHANGED
|
@@ -347,6 +347,9 @@ function parseInfoArgs(args) {
|
|
|
347
347
|
function parseWatchArgs(args) {
|
|
348
348
|
let dataAnchor = "./result.db";
|
|
349
349
|
let watchPort = 3001;
|
|
350
|
+
let llmRefill = true;
|
|
351
|
+
let llmRefillMinScore = null;
|
|
352
|
+
let llmRefillSampleSize = null;
|
|
350
353
|
|
|
351
354
|
for (let i = 0; i < args.length; i++) {
|
|
352
355
|
const arg = args[i];
|
|
@@ -354,6 +357,14 @@ function parseWatchArgs(args) {
|
|
|
354
357
|
dataAnchor = args[++i];
|
|
355
358
|
} else if (arg === "-p") {
|
|
356
359
|
watchPort = parseInt(args[++i]) || 3001;
|
|
360
|
+
} else if (arg === "--llm-refill") {
|
|
361
|
+
llmRefill = true;
|
|
362
|
+
} else if (arg === "--no-llm-refill") {
|
|
363
|
+
llmRefill = false;
|
|
364
|
+
} else if (arg === "--llm-refill-min") {
|
|
365
|
+
llmRefillMinScore = parseInt(args[++i]) || 60;
|
|
366
|
+
} else if (arg === "--llm-refill-sample") {
|
|
367
|
+
llmRefillSampleSize = parseInt(args[++i]) || 100;
|
|
357
368
|
}
|
|
358
369
|
}
|
|
359
370
|
|
|
@@ -362,6 +373,9 @@ function parseWatchArgs(args) {
|
|
|
362
373
|
outputFile: dataAnchor,
|
|
363
374
|
dataAnchor,
|
|
364
375
|
watchPort,
|
|
376
|
+
llmRefill,
|
|
377
|
+
llmRefillMinScore,
|
|
378
|
+
llmRefillSampleSize,
|
|
365
379
|
urls: [],
|
|
366
380
|
outputFormat: "json",
|
|
367
381
|
exploreCount: 0,
|
|
@@ -8,6 +8,7 @@ import {
|
|
|
8
8
|
DEFAULT_TARGET_LOCATIONS_CSV,
|
|
9
9
|
findFirstMatchingLocation,
|
|
10
10
|
isLocationInList,
|
|
11
|
+
normalizeLocation,
|
|
11
12
|
normalizeLocationList,
|
|
12
13
|
} from "../lib/target-locations.js";
|
|
13
14
|
import {
|
|
@@ -40,6 +41,7 @@ async function processExplore(page, username, options, log) {
|
|
|
40
41
|
keepFollow: false,
|
|
41
42
|
locationCreated: null,
|
|
42
43
|
latestVideoTime: null,
|
|
44
|
+
topRecentVideo: null,
|
|
43
45
|
noVideo: false,
|
|
44
46
|
restricted: false,
|
|
45
47
|
error: null,
|
|
@@ -84,6 +86,27 @@ async function processExplore(page, username, options, log) {
|
|
|
84
86
|
if (result.userInfo) result.userInfo.latestVideoTime = latestCreateTime;
|
|
85
87
|
}
|
|
86
88
|
|
|
89
|
+
// 找出 7 天内发布且播放量最大的视频
|
|
90
|
+
const SEVEN_DAYS_SECONDS = 7 * 24 * 60 * 60;
|
|
91
|
+
const nowSeconds = Math.floor(Date.now() / 1000);
|
|
92
|
+
const recentVideos = videoArray.filter(
|
|
93
|
+
(v) => v.createTime && nowSeconds - v.createTime <= SEVEN_DAYS_SECONDS,
|
|
94
|
+
);
|
|
95
|
+
if (recentVideos.length > 0) {
|
|
96
|
+
const topVideo = recentVideos.reduce((max, v) =>
|
|
97
|
+
v.playCount > max.playCount ? v : max,
|
|
98
|
+
);
|
|
99
|
+
result.topRecentVideo = {
|
|
100
|
+
id: topVideo.id,
|
|
101
|
+
href: topVideo.href,
|
|
102
|
+
playCount: topVideo.playCount,
|
|
103
|
+
createTime: topVideo.createTime,
|
|
104
|
+
};
|
|
105
|
+
log(
|
|
106
|
+
` 7天内最高播放视频: ${topVideo.playCount} 次播放 (${recentVideos.length} 个候选)`,
|
|
107
|
+
);
|
|
108
|
+
}
|
|
109
|
+
|
|
87
110
|
if (videoArray.length <= 0) {
|
|
88
111
|
// 视频为空:可能是页面受限或用户真的没有视频
|
|
89
112
|
result.processed = true;
|
|
@@ -110,7 +133,10 @@ async function processExplore(page, username, options, log) {
|
|
|
110
133
|
log(
|
|
111
134
|
` 国家采样(${locations.length}个): [${locations.filter(Boolean).join(", ") || "无数据"}]`,
|
|
112
135
|
);
|
|
113
|
-
|
|
136
|
+
// 直接标准化,不去重(保留重复值用于频率统计)
|
|
137
|
+
const normalizedLocations = locations
|
|
138
|
+
.map((loc) => normalizeLocation(loc))
|
|
139
|
+
.filter(Boolean);
|
|
114
140
|
|
|
115
141
|
// 统计频率
|
|
116
142
|
const freq = {};
|