tt-help-cli-ycl 1.3.92 → 1.3.94
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli/comments.js +49 -24
- package/src/cli/tag.js +239 -94
- package/src/lib/args.js +23 -0
- package/src/lib/browser/cdp.js +4 -1
- package/src/lib/constants.js +15 -0
- package/src/lib/tag-fetcher.js +69 -63
- package/src/watch/data-store.js +537 -2298
- package/src/watch/data-store.js.bak +5091 -0
- package/src/watch/data-store.js.bak2 +5019 -0
- package/src/watch/db-columns.js +160 -0
- package/src/watch/db-crud.js +458 -0
- package/src/watch/db-mappers.js +128 -0
- package/src/watch/db-raw-jobs.js +235 -0
- package/src/watch/db-schema.js +367 -0
- package/src/watch/db-stats.js +235 -0
- package/src/watch/db-tags.js +348 -0
- package/src/watch/llm-scoring.js +235 -0
- package/src/watch/public/app.js +47 -0
- package/src/watch/public/index.html +6 -0
- package/src/watch/server.js +24 -0
- package/src/watch/tag-service.js +142 -11
package/package.json
CHANGED
package/src/cli/comments.js
CHANGED
|
@@ -1,13 +1,18 @@
|
|
|
1
1
|
import { chromium } from "playwright";
|
|
2
2
|
import { fetchUserCommentsAPI } from "../lib/api-interceptor-comment.js";
|
|
3
3
|
import { closeCommentPanel } from "../lib/browser/page.js";
|
|
4
|
-
import {
|
|
4
|
+
import {
|
|
5
|
+
server as defaultServer,
|
|
6
|
+
proxy as configuredProxy,
|
|
7
|
+
} from "../lib/constants.js";
|
|
5
8
|
import {
|
|
6
9
|
DEFAULT_TARGET_LOCATIONS,
|
|
7
10
|
isLocationInList,
|
|
8
11
|
normalizeLocation,
|
|
9
12
|
} from "../lib/target-locations.js";
|
|
10
13
|
import { createApiClient } from "../lib/api-client.js";
|
|
14
|
+
import { ensureBrowserReady as ensureBrowserReadyCDP } from "../lib/browser/cdp.js";
|
|
15
|
+
import { delay } from "../lib/delay.js";
|
|
11
16
|
|
|
12
17
|
async function waitForPageReady(page, timeout = 30000) {
|
|
13
18
|
const startTime = Date.now();
|
|
@@ -33,8 +38,6 @@ async function safeEvaluate(page, fn) {
|
|
|
33
38
|
}
|
|
34
39
|
}
|
|
35
40
|
|
|
36
|
-
|
|
37
|
-
|
|
38
41
|
function isBrowserClosedError(err) {
|
|
39
42
|
if (!err) return false;
|
|
40
43
|
const msg = err.message || err.toString() || "";
|
|
@@ -53,20 +56,31 @@ function isBrowserClosedError(err) {
|
|
|
53
56
|
* 自动模式:循环从服务端取视频任务,抓评论,提交
|
|
54
57
|
*/
|
|
55
58
|
async function runAutoMode(options) {
|
|
56
|
-
const {
|
|
59
|
+
const {
|
|
60
|
+
serverUrl,
|
|
61
|
+
parallel,
|
|
62
|
+
interval,
|
|
63
|
+
maxComments,
|
|
64
|
+
basePort,
|
|
65
|
+
proxy: proxyOption,
|
|
66
|
+
} = options;
|
|
67
|
+
const actualParallel = Math.max(1, parallel || 1);
|
|
68
|
+
const actualInterval = interval || 10;
|
|
69
|
+
const actualMaxComments = maxComments || 200;
|
|
70
|
+
const actualBasePort = basePort || 9222;
|
|
71
|
+
const effectiveProxy = proxyOption || configuredProxy;
|
|
72
|
+
let shuttingDown = false;
|
|
73
|
+
|
|
57
74
|
const { apiGet, apiPost, apiPut } = createApiClient({
|
|
58
75
|
checkStatus: true,
|
|
59
76
|
maxRetries: 2,
|
|
60
77
|
backoff: 2000,
|
|
61
78
|
log: true,
|
|
79
|
+
meta: { port: actualBasePort },
|
|
62
80
|
});
|
|
63
|
-
const actualParallel = Math.max(1, parallel || 1);
|
|
64
|
-
const actualInterval = interval || 10;
|
|
65
|
-
const actualMaxComments = maxComments || 200;
|
|
66
|
-
let shuttingDown = false;
|
|
67
81
|
|
|
68
82
|
console.error(
|
|
69
|
-
`\n[Comments Auto] 并行: ${actualParallel}, 间隔: ${actualInterval}s, 评论数: ${actualMaxComments}`,
|
|
83
|
+
`\n[Comments Auto] 并行: ${actualParallel}, 间隔: ${actualInterval}s, 评论数: ${actualMaxComments}, 端口: ${actualBasePort}`,
|
|
70
84
|
);
|
|
71
85
|
console.error(`服务器: ${serverUrl}`);
|
|
72
86
|
console.error(`目标国家: ${DEFAULT_TARGET_LOCATIONS.join(", ")}`);
|
|
@@ -110,7 +124,9 @@ async function runAutoMode(options) {
|
|
|
110
124
|
browser = null;
|
|
111
125
|
}
|
|
112
126
|
}
|
|
113
|
-
|
|
127
|
+
const cdpOpts = { port: actualBasePort };
|
|
128
|
+
if (effectiveProxy) cdpOpts.proxyServer = effectiveProxy;
|
|
129
|
+
browser = await ensureBrowserReadyCDP(cdpOpts);
|
|
114
130
|
return browser;
|
|
115
131
|
}
|
|
116
132
|
|
|
@@ -131,7 +147,7 @@ async function runAutoMode(options) {
|
|
|
131
147
|
page = await getPage(browser);
|
|
132
148
|
if (!page) {
|
|
133
149
|
console.error("[Comments Auto] 未找到可用页面,等待中...");
|
|
134
|
-
await
|
|
150
|
+
await delay(1000, actualInterval * 1000);
|
|
135
151
|
continue;
|
|
136
152
|
}
|
|
137
153
|
|
|
@@ -149,16 +165,16 @@ async function runAutoMode(options) {
|
|
|
149
165
|
console.error("[Comments Auto] 连续获取失败超过10次,请检查服务端");
|
|
150
166
|
process.exit(1);
|
|
151
167
|
}
|
|
152
|
-
await
|
|
168
|
+
await delay(1000, actualInterval * 1000);
|
|
153
169
|
continue;
|
|
154
170
|
}
|
|
155
171
|
consecutiveErrors = 0;
|
|
156
172
|
|
|
157
173
|
if (tasks.length === 0) {
|
|
158
174
|
console.error(
|
|
159
|
-
`[Comments Auto] 暂无任务,${actualInterval}s
|
|
175
|
+
`[Comments Auto] 暂无任务,${actualInterval}s 内随机等待后重试...`,
|
|
160
176
|
);
|
|
161
|
-
await
|
|
177
|
+
await delay(1000, actualInterval * 1000);
|
|
162
178
|
continue;
|
|
163
179
|
}
|
|
164
180
|
|
|
@@ -298,8 +314,8 @@ async function runAutoMode(options) {
|
|
|
298
314
|
}
|
|
299
315
|
}
|
|
300
316
|
|
|
301
|
-
//
|
|
302
|
-
await
|
|
317
|
+
// 等待间隔(随机 1~N 秒)
|
|
318
|
+
await delay(1000, actualInterval * 1000);
|
|
303
319
|
}
|
|
304
320
|
} finally {
|
|
305
321
|
process.removeListener("SIGINT", onSigint);
|
|
@@ -316,35 +332,44 @@ export async function handleComments(options) {
|
|
|
316
332
|
commentsParallel,
|
|
317
333
|
commentsInterval,
|
|
318
334
|
commentsServer,
|
|
335
|
+
commentsBasePort,
|
|
336
|
+
commentsProxy,
|
|
319
337
|
} = options;
|
|
320
338
|
|
|
321
|
-
|
|
322
|
-
|
|
339
|
+
const effectiveProxy = commentsProxy || configuredProxy;
|
|
340
|
+
|
|
341
|
+
// 自动模式:无URL就进入自动模式
|
|
342
|
+
if (!commentsUrl) {
|
|
323
343
|
return runAutoMode({
|
|
324
|
-
serverUrl: commentsServer,
|
|
344
|
+
serverUrl: commentsServer || defaultServer,
|
|
325
345
|
parallel: commentsParallel,
|
|
326
346
|
interval: commentsInterval,
|
|
327
347
|
maxComments: commentsMax || 200,
|
|
348
|
+
basePort: commentsBasePort || 9222,
|
|
349
|
+
proxy: effectiveProxy,
|
|
328
350
|
});
|
|
329
351
|
}
|
|
330
352
|
|
|
331
|
-
//
|
|
332
|
-
if (
|
|
353
|
+
// 手动模式(有 URL)
|
|
354
|
+
if (commentsUrl) {
|
|
333
355
|
console.error("用法: tt-help comments <视频URL> [最大评论数] [--save]");
|
|
334
356
|
console.error(
|
|
335
357
|
" tt-help comments [-p N] [-i N] [-s server] [-m maxComments]",
|
|
336
358
|
);
|
|
337
359
|
console.error("");
|
|
338
360
|
console.error("手动模式: tt-help comments <URL> [N] [--save]");
|
|
339
|
-
console.error("自动模式: tt-help comments
|
|
361
|
+
console.error("自动模式: tt-help comments (全部默认值)");
|
|
340
362
|
console.error("");
|
|
341
363
|
console.error(
|
|
342
364
|
"选项: --save 去重后保存到服务端,来源标记为 comment",
|
|
343
365
|
);
|
|
344
366
|
console.error(" -p, --parallel 并行数 (默认 1)");
|
|
345
|
-
console.error(
|
|
346
|
-
|
|
367
|
+
console.error(
|
|
368
|
+
" -i, --interval 空闲间隔上限秒 (默认 10,实际 1~N 随机)",
|
|
369
|
+
);
|
|
370
|
+
console.error(` -s, --server 服务端地址 (默认 ${defaultServer})`);
|
|
347
371
|
console.error(" -m, --max-comments 每视频最大评论数 (默认 200)");
|
|
372
|
+
console.error(" --base-port 浏览器CDP端口 (默认 9222)");
|
|
348
373
|
process.exit(1);
|
|
349
374
|
}
|
|
350
375
|
|
package/src/cli/tag.js
CHANGED
|
@@ -1,13 +1,19 @@
|
|
|
1
1
|
import { writeFileSync } from "fs";
|
|
2
2
|
import { randomUUID } from "crypto";
|
|
3
|
+
import readline from "readline";
|
|
3
4
|
import { fetchTagData, enrichVideosWithLocation } from "../lib/tag-fetcher.js";
|
|
5
|
+
import { killEdgeProcesses, ensureBrowserReady } from "../lib/browser/cdp.js";
|
|
6
|
+
import { getOrCreatePage } from "../lib/browser/page.js";
|
|
4
7
|
import { TikTokScraper } from "../lib/tiktok-scraper.mjs";
|
|
5
8
|
import {
|
|
6
9
|
DEFAULT_TARGET_LOCATIONS,
|
|
7
10
|
isLocationInList,
|
|
8
11
|
} from "../lib/target-locations.js";
|
|
9
12
|
import { discoverTags } from "../lib/tag-discover.js";
|
|
10
|
-
import {
|
|
13
|
+
import {
|
|
14
|
+
server as cfgServer,
|
|
15
|
+
proxy as configuredProxy,
|
|
16
|
+
} from "../lib/constants.js";
|
|
11
17
|
|
|
12
18
|
const ALL_COUNTRIES = DEFAULT_TARGET_LOCATIONS;
|
|
13
19
|
const DEFAULT_SERVER = cfgServer || "http://127.0.0.1:3000";
|
|
@@ -105,13 +111,14 @@ async function processTag(
|
|
|
105
111
|
tag,
|
|
106
112
|
index,
|
|
107
113
|
total,
|
|
108
|
-
{ enrich, targetLocations, noFilter, serverUrl, recordTags },
|
|
114
|
+
{ enrich, targetLocations, noFilter, serverUrl, recordTags, port },
|
|
109
115
|
) {
|
|
110
116
|
const prefix = total > 1 ? `[${index + 1}/${total}]` : "";
|
|
111
117
|
process.stderr.write(`${prefix} 正在获取 #${tag} ... `);
|
|
112
118
|
|
|
113
119
|
try {
|
|
114
120
|
const result = await fetchTagData(tag, {
|
|
121
|
+
port: port || 9222,
|
|
115
122
|
onProgress: ({ videos, authors }) => {
|
|
116
123
|
process.stderr.write(
|
|
117
124
|
`\r${prefix} #${tag}: ${videos} 视频, ${authors} 作者`,
|
|
@@ -270,56 +277,31 @@ export async function handleDiscover(parsed) {
|
|
|
270
277
|
}
|
|
271
278
|
}
|
|
272
279
|
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
);
|
|
286
|
-
console.error(
|
|
287
|
-
" tt-help tag score ventas --countries ES,FR # 指定目标国家",
|
|
288
|
-
);
|
|
289
|
-
console.error(
|
|
290
|
-
" tt-help tag score ventas -s http://127.0.0.1:3001 # 指定服务端",
|
|
291
|
-
);
|
|
292
|
-
process.exit(1);
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
const baseUrl = serverUrl || DEFAULT_SERVER;
|
|
296
|
-
const targetCountries = countries || [
|
|
297
|
-
"ES",
|
|
298
|
-
"FR",
|
|
299
|
-
"DE",
|
|
300
|
-
"PT",
|
|
301
|
-
"IT",
|
|
302
|
-
"NL",
|
|
303
|
-
"BE",
|
|
304
|
-
"AT",
|
|
305
|
-
"IE",
|
|
306
|
-
"PL",
|
|
307
|
-
"CZ",
|
|
308
|
-
"GR",
|
|
309
|
-
"HU",
|
|
310
|
-
];
|
|
280
|
+
function askTagQuestion(question) {
|
|
281
|
+
return new Promise((resolve) => {
|
|
282
|
+
const rl = readline.createInterface({
|
|
283
|
+
input: process.stdin,
|
|
284
|
+
output: process.stderr,
|
|
285
|
+
});
|
|
286
|
+
rl.question(question, (answer) => {
|
|
287
|
+
rl.close();
|
|
288
|
+
resolve(answer);
|
|
289
|
+
});
|
|
290
|
+
});
|
|
291
|
+
}
|
|
311
292
|
|
|
293
|
+
async function scoreSingleTag(
|
|
294
|
+
tag,
|
|
295
|
+
{ baseUrl, cdpPort, targetCountries, effectiveProxy },
|
|
296
|
+
) {
|
|
312
297
|
const log = (...args) => process.stderr.write(args.join(" ") + "\n");
|
|
298
|
+
const progress = (msg) => process.stderr.write(`\r ${msg}`);
|
|
299
|
+
const clearLine = () => process.stderr.write("\r" + " ".repeat(80) + "\r");
|
|
300
|
+
|
|
313
301
|
const startTime = Date.now();
|
|
314
302
|
|
|
315
303
|
log("");
|
|
316
|
-
log(
|
|
317
|
-
log(` 标签打分: #${tag}`);
|
|
318
|
-
log(` 目标国家: ${targetCountries.join(", ")}`);
|
|
319
|
-
log(` 服务端: ${baseUrl}`);
|
|
320
|
-
log(" 模式: 客户端本地打分(Playwright → enrich → 算分 → 上报)");
|
|
321
|
-
log("========================================");
|
|
322
|
-
log("");
|
|
304
|
+
log(`▶ 开始打分: #${tag}`);
|
|
323
305
|
|
|
324
306
|
const result = {
|
|
325
307
|
tag,
|
|
@@ -334,46 +316,56 @@ export async function handleScore(parsed) {
|
|
|
334
316
|
};
|
|
335
317
|
|
|
336
318
|
try {
|
|
337
|
-
// Step 1:
|
|
338
|
-
log("
|
|
319
|
+
// Step 1: 抓取视频
|
|
320
|
+
log(" ⏳ 抓取标签页视频...");
|
|
339
321
|
const tagResult = await fetchTagData(tag, {
|
|
322
|
+
port: cdpPort,
|
|
323
|
+
proxyServer: effectiveProxy || undefined,
|
|
340
324
|
onProgress: ({ videos, authors }) => {
|
|
341
|
-
|
|
325
|
+
progress(`${videos} 视频, ${authors} 作者`);
|
|
342
326
|
},
|
|
343
327
|
});
|
|
328
|
+
clearLine();
|
|
344
329
|
log(
|
|
345
|
-
|
|
330
|
+
` ✓ 抓取完成: ${tagResult.videoCount} 视频, ${tagResult.uniqueAuthorCount} 作者`,
|
|
346
331
|
);
|
|
347
332
|
result.totalPosts = tagResult.totalPosts || 0;
|
|
348
333
|
result.authorCount = tagResult.uniqueAuthorCount || 0;
|
|
349
334
|
|
|
350
335
|
let videos = tagResult.videos;
|
|
351
336
|
if (!videos || videos.length === 0) {
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
337
|
+
if (tagResult.error === "tag_not_found") {
|
|
338
|
+
log(` ✗ 标签不存在(TikTok 提示"找不到此话题标签"),标记为 dead`);
|
|
339
|
+
result.status = "dead";
|
|
340
|
+
result.error = "tag_not_found";
|
|
341
|
+
} else {
|
|
342
|
+
log(` ✗ 没有视频数据,标记为 dead(可能是网络问题或页面未加载)`);
|
|
343
|
+
result.status = "dead";
|
|
344
|
+
result.error = "no videos found";
|
|
345
|
+
}
|
|
355
346
|
await reportToServer(baseUrl, result);
|
|
356
|
-
return;
|
|
347
|
+
return result;
|
|
357
348
|
}
|
|
358
349
|
|
|
359
|
-
// Step 2
|
|
360
|
-
log(`
|
|
361
|
-
const
|
|
350
|
+
// Step 2: 补充国家信息
|
|
351
|
+
log(` ⏳ 补充国家信息 (${videos.length} 个视频)...`);
|
|
352
|
+
const enrichOpts = {
|
|
362
353
|
mode: "videos",
|
|
363
354
|
onProgress: ({ done, total, current, locationCreated }) => {
|
|
364
355
|
if (done % 10 === 0 || done === total) {
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
);
|
|
356
|
+
const name = current.split("/").pop().slice(0, 20);
|
|
357
|
+
progress(`[${done}/${total}] ${name} → ${locationCreated || "-"}`);
|
|
368
358
|
}
|
|
369
359
|
},
|
|
370
|
-
}
|
|
360
|
+
};
|
|
361
|
+
if (effectiveProxy) enrichOpts.proxyServer = effectiveProxy;
|
|
362
|
+
const enriched = await enrichVideosWithLocation(videos, enrichOpts);
|
|
363
|
+
clearLine();
|
|
371
364
|
videos = enriched.videos;
|
|
372
365
|
const withLoc = videos.filter((v) => v.locationCreated).length;
|
|
373
|
-
log(
|
|
366
|
+
log(` ✓ 国家信息: ${withLoc}/${videos.length} 个视频有国家`);
|
|
374
367
|
|
|
375
|
-
// Step 3
|
|
376
|
-
log("Step 3/4: 过滤目标国家 + 计算分数...");
|
|
368
|
+
// Step 3: 过滤 + 算分
|
|
377
369
|
const { matchedAuthorSet } = applyFilterAndScore(
|
|
378
370
|
videos,
|
|
379
371
|
targetCountries,
|
|
@@ -381,16 +373,15 @@ export async function handleScore(parsed) {
|
|
|
381
373
|
);
|
|
382
374
|
|
|
383
375
|
log(
|
|
384
|
-
` 算分: ${result.score}/100 → ${result.status} (匹配 ${result.matchedAuthors}/${result.authorCount} 作者)`,
|
|
376
|
+
` ✓ 算分: ${result.score}/100 → ${result.status} (匹配 ${result.matchedAuthors}/${result.authorCount} 作者)`,
|
|
385
377
|
);
|
|
386
378
|
if (result.matchedCountries.length > 0) {
|
|
387
379
|
log(
|
|
388
|
-
`
|
|
380
|
+
` 国家分布: ${result.matchedCountries.map((c) => `${c.c}:${c.n}`).join(", ")}`,
|
|
389
381
|
);
|
|
390
382
|
}
|
|
391
383
|
|
|
392
|
-
// Step 4
|
|
393
|
-
log("Step 4/4: 推送用户到服务端 + 上报打分结果...");
|
|
384
|
+
// Step 4: 推送 + 上报
|
|
394
385
|
if (result.matchedAuthors > 0) {
|
|
395
386
|
const pushResult = await pushToServer(
|
|
396
387
|
baseUrl,
|
|
@@ -398,15 +389,17 @@ export async function handleScore(parsed) {
|
|
|
398
389
|
videos,
|
|
399
390
|
);
|
|
400
391
|
result.pushedUsers = pushResult.added || 0;
|
|
392
|
+
log(` ✓ 推送用户: ${result.pushedUsers} 人`);
|
|
401
393
|
}
|
|
402
394
|
await reportToServer(baseUrl, result);
|
|
403
395
|
} catch (e) {
|
|
404
|
-
|
|
396
|
+
clearLine();
|
|
397
|
+
log(` ✗ 错误: ${e.message}`);
|
|
405
398
|
result.error = e.message;
|
|
406
399
|
try {
|
|
407
400
|
await reportToServer(baseUrl, result);
|
|
408
401
|
} catch {}
|
|
409
|
-
return;
|
|
402
|
+
return result;
|
|
410
403
|
}
|
|
411
404
|
|
|
412
405
|
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
@@ -418,16 +411,141 @@ export async function handleScore(parsed) {
|
|
|
418
411
|
: result.status === "dead"
|
|
419
412
|
? "🔴"
|
|
420
413
|
: "⚪";
|
|
421
|
-
log("");
|
|
422
|
-
log("----------------------------------------");
|
|
423
|
-
log(` ${icon} 打分完成 (${elapsed}s)`);
|
|
424
|
-
log(` 状态: ${result.status} 分数: ${result.score}/100`);
|
|
425
414
|
log(
|
|
426
|
-
`
|
|
415
|
+
` ${icon} #${tag} 完成 (${elapsed}s) — 分数: ${result.score}/100, 状态: ${result.status}`,
|
|
427
416
|
);
|
|
428
|
-
log("----------------------------------------");
|
|
429
417
|
|
|
430
|
-
|
|
418
|
+
return result;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
export async function handleScore(parsed) {
|
|
422
|
+
const { tagScore } = parsed;
|
|
423
|
+
const { tag, countries, serverUrl, port, proxy: cliProxy } = tagScore || {};
|
|
424
|
+
|
|
425
|
+
const baseUrl = serverUrl || DEFAULT_SERVER;
|
|
426
|
+
const cdpPort = port || 9222;
|
|
427
|
+
const effectiveProxy = cliProxy || configuredProxy;
|
|
428
|
+
const targetCountries = countries || [
|
|
429
|
+
"ES",
|
|
430
|
+
"FR",
|
|
431
|
+
"DE",
|
|
432
|
+
"PT",
|
|
433
|
+
"IT",
|
|
434
|
+
"NL",
|
|
435
|
+
"BE",
|
|
436
|
+
"AT",
|
|
437
|
+
"IE",
|
|
438
|
+
"PL",
|
|
439
|
+
"CZ",
|
|
440
|
+
"GR",
|
|
441
|
+
"HU",
|
|
442
|
+
];
|
|
443
|
+
|
|
444
|
+
const log = (...args) => process.stderr.write(args.join(" ") + "\n");
|
|
445
|
+
|
|
446
|
+
if (!tag) {
|
|
447
|
+
log("");
|
|
448
|
+
log("╔══════════════════════════════════════════╗");
|
|
449
|
+
log(
|
|
450
|
+
"║ 标签打分模式 (CDP: " +
|
|
451
|
+
cdpPort +
|
|
452
|
+
" 国家: " +
|
|
453
|
+
targetCountries.slice(0, 6).join(",") +
|
|
454
|
+
"...)",
|
|
455
|
+
);
|
|
456
|
+
log("║ 多个 tag 用逗号/空格分隔 ║");
|
|
457
|
+
log("║ quit/q 退出 | 回车重复上一个 ║");
|
|
458
|
+
log("╚══════════════════════════════════════════╝");
|
|
459
|
+
log("");
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
// 解析 tag:支持逗号或空格分隔多个 tag
|
|
463
|
+
const parseTags = (input) =>
|
|
464
|
+
input
|
|
465
|
+
.split(/[,,\s]+/)
|
|
466
|
+
.map((t) => t.trim().replace("#", "").toLowerCase())
|
|
467
|
+
.filter(Boolean);
|
|
468
|
+
|
|
469
|
+
let tagQueue = tag ? parseTags(tag) : [];
|
|
470
|
+
|
|
471
|
+
while (true) {
|
|
472
|
+
// 从队列取 tag,没有则交互式输入
|
|
473
|
+
let currentTag = tagQueue.shift();
|
|
474
|
+
|
|
475
|
+
if (!currentTag) {
|
|
476
|
+
const answer = await askTagQuestion(
|
|
477
|
+
`请输入标签名称 (多个用逗号/空格分隔, quit/q 退出): `,
|
|
478
|
+
);
|
|
479
|
+
if (
|
|
480
|
+
!answer ||
|
|
481
|
+
answer.trim().toLowerCase() === "quit" ||
|
|
482
|
+
answer.trim().toLowerCase() === "q"
|
|
483
|
+
) {
|
|
484
|
+
log("再见!");
|
|
485
|
+
break;
|
|
486
|
+
}
|
|
487
|
+
const tags = parseTags(answer);
|
|
488
|
+
if (tags.length > 1) {
|
|
489
|
+
// 倒序入队,保证顺序正确
|
|
490
|
+
for (let i = tags.length - 1; i >= 0; i--) {
|
|
491
|
+
tagQueue.push(tags[i]);
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
currentTag = tags[0];
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
const remaining = tagQueue.length;
|
|
498
|
+
if (remaining > 0) {
|
|
499
|
+
log(` [排队中: 还有 ${remaining} 个标签待处理]`);
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
await scoreSingleTag(currentTag, {
|
|
503
|
+
baseUrl,
|
|
504
|
+
cdpPort,
|
|
505
|
+
targetCountries,
|
|
506
|
+
effectiveProxy,
|
|
507
|
+
});
|
|
508
|
+
|
|
509
|
+
// 如果队列还有 tag,自动继续;否则询问
|
|
510
|
+
if (tagQueue.length > 0) {
|
|
511
|
+
continue;
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
const answer = await askTagQuestion(
|
|
515
|
+
`\n请输入下一个标签 (直接回车重复上一个, quit/q 退出): `,
|
|
516
|
+
);
|
|
517
|
+
if (!answer || answer.trim() === "") {
|
|
518
|
+
// 重复上一个:入队后 continue,会被 while 开头 shift 出来
|
|
519
|
+
tagQueue.unshift(currentTag);
|
|
520
|
+
continue;
|
|
521
|
+
}
|
|
522
|
+
if (
|
|
523
|
+
answer.trim().toLowerCase() === "quit" ||
|
|
524
|
+
answer.trim().toLowerCase() === "q"
|
|
525
|
+
) {
|
|
526
|
+
log("再见!");
|
|
527
|
+
break;
|
|
528
|
+
}
|
|
529
|
+
const tags = parseTags(answer);
|
|
530
|
+
if (tags.length > 1) {
|
|
531
|
+
for (let i = tags.length - 1; i >= 0; i--) {
|
|
532
|
+
tagQueue.push(tags[i]);
|
|
533
|
+
}
|
|
534
|
+
} else {
|
|
535
|
+
tagQueue.push(tags[0]);
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
// 退出时关闭浏览器
|
|
540
|
+
log("");
|
|
541
|
+
log(`正在关闭 CDP 端口 ${cdpPort} 对应的浏览器...`);
|
|
542
|
+
try {
|
|
543
|
+
killEdgeProcesses(null, cdpPort);
|
|
544
|
+
log("浏览器已关闭");
|
|
545
|
+
} catch (e) {
|
|
546
|
+
log(`关闭浏览器失败: ${e.message}`);
|
|
547
|
+
}
|
|
548
|
+
log("");
|
|
431
549
|
}
|
|
432
550
|
|
|
433
551
|
async function reportToServer(baseUrl, result, clientId, meta) {
|
|
@@ -448,9 +566,17 @@ async function reportToServer(baseUrl, result, clientId, meta) {
|
|
|
448
566
|
|
|
449
567
|
export async function handleScoreAll(parsed) {
|
|
450
568
|
const { tagScoreAll } = parsed;
|
|
451
|
-
let {
|
|
569
|
+
let {
|
|
570
|
+
countries,
|
|
571
|
+
serverUrl,
|
|
572
|
+
autoDiscover,
|
|
573
|
+
port,
|
|
574
|
+
proxy: cliProxy,
|
|
575
|
+
} = tagScoreAll || {};
|
|
452
576
|
|
|
453
577
|
const baseUrl = serverUrl || DEFAULT_SERVER;
|
|
578
|
+
const cdpPort = port || 9222;
|
|
579
|
+
const effectiveProxy = cliProxy || configuredProxy;
|
|
454
580
|
const targetCountries = countries || [
|
|
455
581
|
"ES",
|
|
456
582
|
"FR",
|
|
@@ -470,16 +596,25 @@ export async function handleScoreAll(parsed) {
|
|
|
470
596
|
const log = (...args) => process.stderr.write(args.join(" ") + "\n");
|
|
471
597
|
|
|
472
598
|
log("");
|
|
473
|
-
log("
|
|
474
|
-
log("
|
|
475
|
-
log(
|
|
476
|
-
log(
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
599
|
+
log("╔══════════════════════════════════════════╗");
|
|
600
|
+
log("║ 自动循环打分模式(CDP 连接已登录浏览器)");
|
|
601
|
+
log("║ 目标国家: " + targetCountries.slice(0, 6).join(",") + "...");
|
|
602
|
+
log("║ 服务端: " + baseUrl);
|
|
603
|
+
log(
|
|
604
|
+
"║ CDP 端口: " +
|
|
605
|
+
cdpPort +
|
|
606
|
+
(effectiveProxy ? " 代理: " + effectiveProxy : ""),
|
|
607
|
+
);
|
|
608
|
+
if (autoDiscover) log("║ 自动发现: 开启");
|
|
609
|
+
log("╚══════════════════════════════════════════╝");
|
|
481
610
|
log("");
|
|
482
611
|
|
|
612
|
+
// 连接 CDP 浏览器
|
|
613
|
+
const cdpOpts = { port: cdpPort };
|
|
614
|
+
if (effectiveProxy) cdpOpts.proxyServer = effectiveProxy;
|
|
615
|
+
const browser = await ensureBrowserReady(cdpOpts);
|
|
616
|
+
const page = await getOrCreatePage(browser);
|
|
617
|
+
|
|
483
618
|
let totalScored = 0;
|
|
484
619
|
let emptyRounds = 0; // 连续无任务的轮数
|
|
485
620
|
const DISCOVER_AFTER_EMPTY = 3; // 连续 3 轮无任务时触发 discover
|
|
@@ -489,7 +624,10 @@ export async function handleScoreAll(parsed) {
|
|
|
489
624
|
const clientMeta = { type: "scoring" };
|
|
490
625
|
|
|
491
626
|
// 复用 TikTokScraper 实例,避免每次 enrich 都启动/关闭 headless 浏览器
|
|
492
|
-
const enrichScraper = new TikTokScraper({
|
|
627
|
+
const enrichScraper = new TikTokScraper({
|
|
628
|
+
poolSize: 3,
|
|
629
|
+
proxyServer: effectiveProxy || undefined,
|
|
630
|
+
});
|
|
493
631
|
await enrichScraper.init();
|
|
494
632
|
log("✅ TikTokScraper 已就绪 (enrich 复用)");
|
|
495
633
|
log(` 客户端 ID: ${clientId.substring(0, 8)}...`);
|
|
@@ -581,9 +719,10 @@ export async function handleScoreAll(parsed) {
|
|
|
581
719
|
continue;
|
|
582
720
|
}
|
|
583
721
|
|
|
584
|
-
//
|
|
722
|
+
// 抓取视频(CDP 连接已登录 Edge)
|
|
585
723
|
log(` 抓取 TikTok 标签页...`);
|
|
586
724
|
const tagResult = await fetchTagData(tag, {
|
|
725
|
+
port: cdpPort,
|
|
587
726
|
onProgress: ({ videos, authors }) => {
|
|
588
727
|
process.stderr.write(`\r 抓取中: ${videos} 视频, ${authors} 作者`);
|
|
589
728
|
},
|
|
@@ -606,7 +745,7 @@ export async function handleScoreAll(parsed) {
|
|
|
606
745
|
}
|
|
607
746
|
|
|
608
747
|
// enrich: 逐个视频查 view-source 获取国家
|
|
609
|
-
const
|
|
748
|
+
const enrichOpts = {
|
|
610
749
|
mode: "videos",
|
|
611
750
|
existingScraper: enrichScraper,
|
|
612
751
|
onProgress: ({ done, total, current, locationCreated }) => {
|
|
@@ -616,12 +755,11 @@ export async function handleScoreAll(parsed) {
|
|
|
616
755
|
);
|
|
617
756
|
}
|
|
618
757
|
},
|
|
619
|
-
}
|
|
758
|
+
};
|
|
759
|
+
if (effectiveProxy) enrichOpts.proxyServer = effectiveProxy;
|
|
760
|
+
const enriched = await enrichVideosWithLocation(videos, enrichOpts);
|
|
620
761
|
videos = enriched.videos;
|
|
621
762
|
|
|
622
|
-
// 更新 meta 中当前正在处理的标签
|
|
623
|
-
clientMeta.tag = tag;
|
|
624
|
-
|
|
625
763
|
// 过滤 + 算分 (共用函数)
|
|
626
764
|
const { matchedAuthorSet } = applyFilterAndScore(
|
|
627
765
|
videos,
|
|
@@ -696,6 +834,13 @@ export async function handleScoreAll(parsed) {
|
|
|
696
834
|
} finally {
|
|
697
835
|
await enrichScraper.close();
|
|
698
836
|
log("✅ TikTokScraper 已关闭");
|
|
837
|
+
log("正在关闭 CDP 浏览器...");
|
|
838
|
+
try {
|
|
839
|
+
killEdgeProcesses(null, cdpPort);
|
|
840
|
+
log("浏览器已关闭");
|
|
841
|
+
} catch (e) {
|
|
842
|
+
log(`关闭浏览器失败: ${e.message}`);
|
|
843
|
+
}
|
|
699
844
|
}
|
|
700
845
|
}
|
|
701
846
|
|