tt-help-cli-ycl 1.3.88 → 1.3.91
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli/auto.js +7 -0
- package/src/cli/explore.js +12 -2
- package/src/cli/refresh.js +10 -1
- package/src/cli/tag.js +88 -52
- package/src/lib/tag-discover.js +97 -134
- package/src/scraper/explore-core.js +6 -6
- package/src/scraper/modules/follow-extractor.js +47 -2
- package/src/watch/data-store.js +220 -16
- package/src/watch/server.js +49 -1
- package/src/watch/tag-service.js +37 -19
package/package.json
CHANGED
package/src/cli/auto.js
CHANGED
|
@@ -232,6 +232,13 @@ export async function handleAuto(options) {
|
|
|
232
232
|
displayName: Array.isArray(f) ? f[1] : null,
|
|
233
233
|
guessedLocation,
|
|
234
234
|
})),
|
|
235
|
+
discoveredRecommended: (result.discoveredRecommended || []).map(
|
|
236
|
+
(f) => ({
|
|
237
|
+
handle: Array.isArray(f) ? f[0] : f,
|
|
238
|
+
displayName: Array.isArray(f) ? f[1] : null,
|
|
239
|
+
guessedLocation,
|
|
240
|
+
}),
|
|
241
|
+
),
|
|
235
242
|
};
|
|
236
243
|
await apiPost(`${serverUrl}/api/job/${username}`, payload);
|
|
237
244
|
console.error(" 已提交");
|
package/src/cli/explore.js
CHANGED
|
@@ -143,7 +143,9 @@ export async function handleExplore(options) {
|
|
|
143
143
|
console.error(`CDP 端口: ${cdpOptions.port}, 用户编号: ${userId}`);
|
|
144
144
|
console.error(`浏览器配置: ${path.basename(cdpOptions.userDataDir)}`);
|
|
145
145
|
|
|
146
|
-
const { apiGet, apiPost } = createApiClient({
|
|
146
|
+
const { apiGet, apiPost } = createApiClient({
|
|
147
|
+
meta: { port: cdpOptions.port },
|
|
148
|
+
});
|
|
147
149
|
|
|
148
150
|
await apiGet(`${serverUrl}/api/stats`);
|
|
149
151
|
|
|
@@ -508,7 +510,8 @@ export async function handleExplore(options) {
|
|
|
508
510
|
if (result.hasFollowData && result.keepFollow) {
|
|
509
511
|
const totalFollows =
|
|
510
512
|
(result.discoveredFollowing || []).length +
|
|
511
|
-
(result.discoveredFollowers || []).length
|
|
513
|
+
(result.discoveredFollowers || []).length +
|
|
514
|
+
(result.discoveredRecommended || []).length;
|
|
512
515
|
if (totalFollows > 0) {
|
|
513
516
|
lastFollowSuccessTime = Date.now();
|
|
514
517
|
}
|
|
@@ -528,6 +531,13 @@ export async function handleExplore(options) {
|
|
|
528
531
|
displayName: Array.isArray(f) ? f[1] : null,
|
|
529
532
|
guessedLocation,
|
|
530
533
|
})),
|
|
534
|
+
discoveredRecommended: (result.discoveredRecommended || []).map(
|
|
535
|
+
(f) => ({
|
|
536
|
+
handle: Array.isArray(f) ? f[0] : f,
|
|
537
|
+
displayName: Array.isArray(f) ? f[1] : null,
|
|
538
|
+
guessedLocation,
|
|
539
|
+
}),
|
|
540
|
+
),
|
|
531
541
|
processed: result.processed,
|
|
532
542
|
hasFollowData: result.hasFollowData,
|
|
533
543
|
keepFollow: result.keepFollow,
|
package/src/cli/refresh.js
CHANGED
|
@@ -155,7 +155,9 @@ export async function handleRefresh(options) {
|
|
|
155
155
|
);
|
|
156
156
|
}
|
|
157
157
|
|
|
158
|
-
const { apiGet, apiPost } = createApiClient({
|
|
158
|
+
const { apiGet, apiPost } = createApiClient({
|
|
159
|
+
meta: { port: cdpOptions.port },
|
|
160
|
+
});
|
|
159
161
|
|
|
160
162
|
// 连接服务器验证
|
|
161
163
|
await apiGet(`${serverUrl}/api/stats`);
|
|
@@ -545,6 +547,13 @@ export async function handleRefresh(options) {
|
|
|
545
547
|
displayName: Array.isArray(f) ? f[1] : null,
|
|
546
548
|
guessedLocation,
|
|
547
549
|
})),
|
|
550
|
+
discoveredRecommended: (result.discoveredRecommended || []).map(
|
|
551
|
+
(f) => ({
|
|
552
|
+
handle: Array.isArray(f) ? f[0] : f,
|
|
553
|
+
displayName: Array.isArray(f) ? f[1] : null,
|
|
554
|
+
guessedLocation,
|
|
555
|
+
}),
|
|
556
|
+
),
|
|
548
557
|
processed: result.processed,
|
|
549
558
|
hasFollowData: result.hasFollowData,
|
|
550
559
|
keepFollow: result.keepFollow,
|
package/src/cli/tag.js
CHANGED
|
@@ -5,7 +5,7 @@ import {
|
|
|
5
5
|
DEFAULT_TARGET_LOCATIONS,
|
|
6
6
|
isLocationInList,
|
|
7
7
|
} from "../lib/target-locations.js";
|
|
8
|
-
import { discoverTags
|
|
8
|
+
import { discoverTags } from "../lib/tag-discover.js";
|
|
9
9
|
import { server as cfgServer } from "../lib/constants.js";
|
|
10
10
|
|
|
11
11
|
const ALL_COUNTRIES = DEFAULT_TARGET_LOCATIONS;
|
|
@@ -157,9 +157,18 @@ async function processTag(
|
|
|
157
157
|
const countries = [
|
|
158
158
|
...new Set(videos.map((v) => v.locationCreated).filter(Boolean)),
|
|
159
159
|
];
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
160
|
+
// 通过 API 上报到服务端,由服务端写入数据库
|
|
161
|
+
try {
|
|
162
|
+
await fetch(`${serverUrl}/api/tags/productive`, {
|
|
163
|
+
method: "POST",
|
|
164
|
+
headers: { "Content-Type": "application/json" },
|
|
165
|
+
body: JSON.stringify({
|
|
166
|
+
tag,
|
|
167
|
+
countries,
|
|
168
|
+
pushedUsers: pushResult.added,
|
|
169
|
+
}),
|
|
170
|
+
});
|
|
171
|
+
} catch {}
|
|
163
172
|
process.stderr.write(
|
|
164
173
|
` 已记录标签 #${tag} (${countries.join(",")}, ${pushResult.added} 用户)\n`,
|
|
165
174
|
);
|
|
@@ -221,7 +230,7 @@ export async function handleDiscover(parsed) {
|
|
|
221
230
|
process.exit(1);
|
|
222
231
|
}
|
|
223
232
|
|
|
224
|
-
const baseUrl = serverUrl ||
|
|
233
|
+
const baseUrl = serverUrl || DEFAULT_SERVER;
|
|
225
234
|
|
|
226
235
|
for (const country of countries) {
|
|
227
236
|
const params = new URLSearchParams({ country, count: String(count) });
|
|
@@ -452,8 +461,8 @@ export async function handleScoreAll(parsed) {
|
|
|
452
461
|
log("");
|
|
453
462
|
|
|
454
463
|
let totalScored = 0;
|
|
455
|
-
let
|
|
456
|
-
const
|
|
464
|
+
let emptyRounds = 0; // 连续无任务的轮数
|
|
465
|
+
const DISCOVER_AFTER_EMPTY = 3; // 连续 3 轮无任务时触发 discover
|
|
457
466
|
|
|
458
467
|
// 复用 TikTokScraper 实例,避免每次 enrich 都启动/关闭 headless 浏览器
|
|
459
468
|
const enrichScraper = new TikTokScraper({ poolSize: 3 });
|
|
@@ -463,56 +472,64 @@ export async function handleScoreAll(parsed) {
|
|
|
463
472
|
|
|
464
473
|
try {
|
|
465
474
|
while (true) {
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
)
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
475
|
+
try {
|
|
476
|
+
// 从服务端取下一个 new 标签
|
|
477
|
+
const tagsRes = await fetch(`${baseUrl}/api/tags?status=new&limit=1`);
|
|
478
|
+
const tagsData = await tagsRes.json();
|
|
479
|
+
if (!tagsData.tags || tagsData.tags.length === 0) {
|
|
480
|
+
emptyRounds++;
|
|
481
|
+
|
|
482
|
+
// 自动发现:连续 N 轮无任务时自动生成标签
|
|
483
|
+
if (autoDiscover && emptyRounds >= DISCOVER_AFTER_EMPTY) {
|
|
484
|
+
log(
|
|
485
|
+
`🔍 连续 ${emptyRounds} 轮无待打分标签,自动为 ${targetCountries.length} 个国家生成标签...`,
|
|
486
|
+
);
|
|
487
|
+
for (const country of targetCountries) {
|
|
488
|
+
try {
|
|
489
|
+
const discRes = await fetch(
|
|
490
|
+
`${baseUrl}/api/tags/discover?country=${country}&count=5`,
|
|
491
|
+
);
|
|
492
|
+
const discData = await discRes.json();
|
|
493
|
+
if (discData.inserted) {
|
|
494
|
+
log(` ${country}: 新增 ${discData.inserted} 个`);
|
|
495
|
+
}
|
|
496
|
+
} catch (e) {
|
|
497
|
+
log(` ${country}: 请求失败 (${e.message})`);
|
|
483
498
|
}
|
|
484
|
-
} catch (e) {
|
|
485
|
-
log(` ${country}: 请求失败 (${e.message})`);
|
|
486
499
|
}
|
|
500
|
+
emptyRounds = 0; // 重置计数器
|
|
501
|
+
// 等 3 秒让服务端处理完
|
|
502
|
+
await new Promise((r) => setTimeout(r, 3000));
|
|
503
|
+
continue;
|
|
487
504
|
}
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
await new Promise((r) => setTimeout(r, 3000));
|
|
505
|
+
log(`⏳ 暂无待打分标签(连续 ${emptyRounds} 轮),10 秒后重试...`);
|
|
506
|
+
await new Promise((r) => setTimeout(r, 10000));
|
|
491
507
|
continue;
|
|
492
508
|
}
|
|
493
|
-
log(`⏳ 暂无待打分标签,10 秒后重试...`);
|
|
494
|
-
await new Promise((r) => setTimeout(r, 10000));
|
|
495
|
-
continue;
|
|
496
|
-
}
|
|
497
|
-
|
|
498
|
-
const tag = tagsData.tags[0].tag.replace(/^#+/, "").trim().toLowerCase();
|
|
499
|
-
const startTime = Date.now();
|
|
500
509
|
|
|
501
|
-
|
|
510
|
+
// 有任务了,重置计数器
|
|
511
|
+
emptyRounds = 0;
|
|
512
|
+
|
|
513
|
+
const tag = tagsData.tags[0].tag
|
|
514
|
+
.replace(/^#+/, "")
|
|
515
|
+
.trim()
|
|
516
|
+
.toLowerCase();
|
|
517
|
+
const startTime = Date.now();
|
|
518
|
+
|
|
519
|
+
log(`[${totalScored + 1}] 正在打分 #${tag} ...`);
|
|
520
|
+
|
|
521
|
+
const result = {
|
|
522
|
+
tag,
|
|
523
|
+
status: "error",
|
|
524
|
+
score: 0,
|
|
525
|
+
totalPosts: 0,
|
|
526
|
+
authorCount: 0,
|
|
527
|
+
matchedAuthors: 0,
|
|
528
|
+
matchedCountries: [],
|
|
529
|
+
pushedUsers: 0,
|
|
530
|
+
error: null,
|
|
531
|
+
};
|
|
502
532
|
|
|
503
|
-
const result = {
|
|
504
|
-
tag,
|
|
505
|
-
status: "error",
|
|
506
|
-
score: 0,
|
|
507
|
-
totalPosts: 0,
|
|
508
|
-
authorCount: 0,
|
|
509
|
-
matchedAuthors: 0,
|
|
510
|
-
matchedCountries: [],
|
|
511
|
-
pushedUsers: 0,
|
|
512
|
-
error: null,
|
|
513
|
-
};
|
|
514
|
-
|
|
515
|
-
try {
|
|
516
533
|
// 锁定 tag
|
|
517
534
|
const claimRes = await fetch(`${baseUrl}/api/tags/claim`, {
|
|
518
535
|
method: "POST",
|
|
@@ -610,10 +627,28 @@ export async function handleScoreAll(parsed) {
|
|
|
610
627
|
);
|
|
611
628
|
log("");
|
|
612
629
|
} catch (e) {
|
|
630
|
+
// 区分网络错误和业务错误
|
|
631
|
+
const isNetworkError =
|
|
632
|
+
e.code === "ECONNREFUSED" ||
|
|
633
|
+
e.code === "ENOTFOUND" ||
|
|
634
|
+
e.code === "ECONNRESET" ||
|
|
635
|
+
(e.message &&
|
|
636
|
+
(e.message.includes("ECONNREFUSED") ||
|
|
637
|
+
e.message.includes("fetch failed") ||
|
|
638
|
+
e.message.includes("network")));
|
|
639
|
+
if (isNetworkError) {
|
|
640
|
+
log(` ⚠️ 服务端连接失败 (${e.message}),15 秒后重试...`);
|
|
641
|
+
await new Promise((r) => setTimeout(r, 15000));
|
|
642
|
+
continue;
|
|
643
|
+
}
|
|
613
644
|
log(` ❌ 失败: ${e.message}`);
|
|
614
|
-
result.error = e.message;
|
|
615
645
|
try {
|
|
616
|
-
await reportToServer(baseUrl,
|
|
646
|
+
await reportToServer(baseUrl, {
|
|
647
|
+
tag: "",
|
|
648
|
+
status: "error",
|
|
649
|
+
score: 0,
|
|
650
|
+
error: e.message,
|
|
651
|
+
});
|
|
617
652
|
} catch {}
|
|
618
653
|
totalScored++;
|
|
619
654
|
}
|
|
@@ -684,6 +719,7 @@ export async function handleTag(parsed) {
|
|
|
684
719
|
const discoverCount = typeof discover === "number" ? discover : 10;
|
|
685
720
|
const generatedTags = await discoverTags(targetLocations, {
|
|
686
721
|
count: discoverCount,
|
|
722
|
+
serverUrl,
|
|
687
723
|
});
|
|
688
724
|
finalTags = [...new Set([...finalTags, ...generatedTags])];
|
|
689
725
|
process.stderr.write(` 共 ${finalTags.length} 个标签待处理\n\n`);
|
package/src/lib/tag-discover.js
CHANGED
|
@@ -1,150 +1,113 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Tag 发现(CLI 模式)
|
|
3
|
+
*
|
|
4
|
+
* 使用 tag-service 的公共函数(LLM 调用、prompt 组装、解析)。
|
|
5
|
+
* 历史 tag 数据通过 API 从服务端获取,不再读写 productive-tags.json。
|
|
6
|
+
*/
|
|
7
|
+
import {
|
|
8
|
+
COUNTRY_LANG,
|
|
9
|
+
getLang,
|
|
10
|
+
callLLM,
|
|
11
|
+
normalizeTag,
|
|
12
|
+
parseTagsFromResponse,
|
|
13
|
+
buildDiscoverPrompt,
|
|
14
|
+
} from "../watch/tag-service.js";
|
|
15
|
+
|
|
16
|
+
const DEFAULT_SERVER = "http://127.0.0.1:3000";
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* 从服务端获取某国的历史 tag(正样本 + 负样本 + 全部已存在)
|
|
20
|
+
*/
|
|
21
|
+
async function fetchTagHistory(serverUrl, country) {
|
|
22
|
+
const baseUrl = serverUrl || DEFAULT_SERVER;
|
|
23
|
+
|
|
24
|
+
const productivePromise = fetch(
|
|
25
|
+
`${baseUrl}/api/tags/history?country=${country}&type=productive`,
|
|
26
|
+
)
|
|
27
|
+
.then((r) => r.json())
|
|
28
|
+
.then((data) => data.tags || [])
|
|
29
|
+
.catch(() => []);
|
|
30
|
+
|
|
31
|
+
const deadPromise = fetch(
|
|
32
|
+
`${baseUrl}/api/tags/history?country=${country}&type=dead`,
|
|
33
|
+
)
|
|
34
|
+
.then((r) => r.json())
|
|
35
|
+
.then((data) => data.tags || [])
|
|
36
|
+
.catch(() => []);
|
|
37
|
+
|
|
38
|
+
// 获取所有已存在的 tag(防止重复生成)
|
|
39
|
+
const allPromise = fetch(
|
|
40
|
+
`${baseUrl}/api/tags/history?country=${country}&type=all`,
|
|
41
|
+
)
|
|
42
|
+
.then((r) => r.json())
|
|
43
|
+
.then((data) => data.tags || [])
|
|
44
|
+
.catch(() => []);
|
|
45
|
+
|
|
46
|
+
const [productive, dead, allExisting] = await Promise.all([
|
|
47
|
+
productivePromise,
|
|
48
|
+
deadPromise,
|
|
49
|
+
allPromise,
|
|
50
|
+
]);
|
|
51
|
+
return { productive, dead, allExisting: allExisting.map((t) => t.tag) };
|
|
21
52
|
}
|
|
22
53
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
54
|
+
/**
|
|
55
|
+
* 为单个国家生成 tag(CLI 模式,通过 API 获取历史数据)
|
|
56
|
+
*/
|
|
57
|
+
async function discoverTagsForCountryCli(
|
|
58
|
+
country,
|
|
59
|
+
count = 4,
|
|
60
|
+
userPrompt = null,
|
|
61
|
+
serverUrl = null,
|
|
62
|
+
) {
|
|
63
|
+
if (!COUNTRY_LANG[country]) {
|
|
64
|
+
return { country, error: `不支持的国家代码: ${country}` };
|
|
28
65
|
}
|
|
29
|
-
writeFileSync(TAGS_FILE, JSON.stringify(data, null, 2), "utf-8");
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
export function getProductiveTags() {
|
|
33
|
-
return loadTags().tags;
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
export function recordProductiveTag(tag, country, userCount) {
|
|
37
|
-
const data = loadTags();
|
|
38
|
-
const existing = data.tags.find((t) => t.tag === tag);
|
|
39
|
-
if (existing) {
|
|
40
|
-
if (!existing.countries.includes(country)) {
|
|
41
|
-
existing.countries.push(country);
|
|
42
|
-
}
|
|
43
|
-
existing.userCount += userCount;
|
|
44
|
-
existing.lastUsed = new Date().toISOString();
|
|
45
|
-
} else {
|
|
46
|
-
data.tags.push({
|
|
47
|
-
tag,
|
|
48
|
-
countries: [country],
|
|
49
|
-
userCount,
|
|
50
|
-
firstSeen: new Date().toISOString(),
|
|
51
|
-
lastUsed: new Date().toISOString(),
|
|
52
|
-
});
|
|
53
|
-
}
|
|
54
|
-
data.lastUpdated = new Date().toISOString();
|
|
55
|
-
saveTags(data);
|
|
56
|
-
}
|
|
57
66
|
|
|
58
|
-
|
|
59
|
-
const
|
|
60
|
-
const { fetch } = await import("undici");
|
|
61
|
-
|
|
62
|
-
const response = await fetch(
|
|
63
|
-
"http://82.156.52.214:18000/v1/chat/completions",
|
|
64
|
-
{
|
|
65
|
-
method: "POST",
|
|
66
|
-
headers: {
|
|
67
|
-
"Content-Type": "application/json",
|
|
68
|
-
Authorization: `Bearer ${apiKey}`,
|
|
69
|
-
},
|
|
70
|
-
body: JSON.stringify({
|
|
71
|
-
model: "zc-fast",
|
|
72
|
-
messages: [{ role: "user", content: prompt }],
|
|
73
|
-
max_tokens: 1024,
|
|
74
|
-
temperature: 0.7,
|
|
75
|
-
}),
|
|
76
|
-
},
|
|
77
|
-
);
|
|
78
|
-
|
|
79
|
-
const result = await response.json();
|
|
80
|
-
const content = result.choices?.[0]?.message?.content || "";
|
|
81
|
-
return content;
|
|
82
|
-
}
|
|
67
|
+
// 从服务端获取历史 tag
|
|
68
|
+
const history = await fetchTagHistory(serverUrl, country);
|
|
83
69
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
function parseTagsFromResponse(content) {
|
|
89
|
-
try {
|
|
90
|
-
const parsed = JSON.parse(content);
|
|
91
|
-
if (Array.isArray(parsed)) {
|
|
92
|
-
return parsed.map(normalizeTag).filter((t) => t && t.length >= 2);
|
|
93
|
-
}
|
|
94
|
-
if (Array.isArray(parsed.tags)) {
|
|
95
|
-
return parsed.tags.map(normalizeTag).filter((t) => t && t.length >= 2);
|
|
96
|
-
}
|
|
97
|
-
} catch {}
|
|
98
|
-
|
|
99
|
-
const lines = content.split(/[\n,]+/);
|
|
100
|
-
const tags = [];
|
|
101
|
-
for (const line of lines) {
|
|
102
|
-
const cleaned = normalizeTag(line.replace(/^[-\d.\s]+/, ""));
|
|
103
|
-
if (cleaned && /^[a-z0-9_]+$/.test(cleaned) && cleaned.length >= 2) {
|
|
104
|
-
tags.push(cleaned);
|
|
105
|
-
}
|
|
106
|
-
}
|
|
107
|
-
return tags;
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
export async function discoverTags(countries, options = {}) {
|
|
111
|
-
const { language = "auto", count = 10 } = options;
|
|
112
|
-
|
|
113
|
-
const productiveTags = getProductiveTags();
|
|
114
|
-
const countryStr = Array.isArray(countries)
|
|
115
|
-
? countries.join(", ")
|
|
116
|
-
: countries;
|
|
117
|
-
const langHint =
|
|
118
|
-
language === "auto" ? "" : `Tags should be in ${language} language.`;
|
|
119
|
-
|
|
120
|
-
const historyHint =
|
|
121
|
-
productiveTags.length > 0
|
|
122
|
-
? `Previously productive tags for these countries: ${productiveTags
|
|
123
|
-
.filter((t) => t.countries.some((c) => countries.includes(c)))
|
|
124
|
-
.map((t) => `#${t.tag}`)
|
|
125
|
-
.join(", ")}. Generate new ones, don't repeat these.`
|
|
126
|
-
: "";
|
|
127
|
-
|
|
128
|
-
const prompt = `Generate ${count} TikTok hashtags (lowercase, no spaces, no # symbol) that are likely to be used by online sellers, shop owners, e-commerce merchants, and small businesses in these countries: ${countryStr}.
|
|
129
|
-
|
|
130
|
-
Requirements:
|
|
131
|
-
- Focus on tags that sellers/merchants actually use to promote their products
|
|
132
|
-
- Include local language commerce tags (sell, shop, store, online, vendor, etc. in the local language)
|
|
133
|
-
- Mix broad commerce tags with country-specific tags
|
|
134
|
-
${langHint}
|
|
135
|
-
${historyHint}
|
|
136
|
-
|
|
137
|
-
Return ONLY a JSON array of tag strings, nothing else. Example: ["ventas","tiendaonline","vender"]`;
|
|
70
|
+
// 使用统一的 prompt 组装
|
|
71
|
+
const prompt = buildDiscoverPrompt(country, count, history, userPrompt);
|
|
138
72
|
|
|
139
73
|
process.stderr.write(
|
|
140
|
-
` [LLM] 正在生成 ${count} 个标签 (
|
|
74
|
+
` [LLM] 正在生成 ${count} 个标签 (国家: ${country}, 语言: ${getLang(country)})...\n`,
|
|
141
75
|
);
|
|
142
76
|
const content = await callLLM(prompt);
|
|
143
77
|
const tags = parseTagsFromResponse(content);
|
|
144
|
-
|
|
145
78
|
const unique = [...new Set(tags)].slice(0, count);
|
|
79
|
+
|
|
146
80
|
process.stderr.write(
|
|
147
81
|
` [LLM] 生成 ${unique.length} 个标签: ${unique.join(", ")}\n`,
|
|
148
82
|
);
|
|
149
83
|
return unique;
|
|
150
84
|
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* 批量为多个国家生成 tag(兼容旧接口)
|
|
88
|
+
* @param {string|string[]} countries - 国家代码或数组
|
|
89
|
+
* @param {object} options
|
|
90
|
+
* @param {number} [options.count=10] - 每个国家生成的 tag 数量
|
|
91
|
+
* @param {string} [options.serverUrl] - 服务端地址
|
|
92
|
+
* @param {string} [options.prompt] - 用户自定义提示
|
|
93
|
+
*/
|
|
94
|
+
export async function discoverTags(countries, options = {}) {
|
|
95
|
+
const { count = 10, serverUrl, prompt: userPrompt } = options;
|
|
96
|
+
|
|
97
|
+
const countryList = Array.isArray(countries) ? countries : [countries];
|
|
98
|
+
const allTags = [];
|
|
99
|
+
|
|
100
|
+
for (const country of countryList) {
|
|
101
|
+
const tags = await discoverTagsForCountryCli(
|
|
102
|
+
country,
|
|
103
|
+
count,
|
|
104
|
+
userPrompt,
|
|
105
|
+
serverUrl,
|
|
106
|
+
);
|
|
107
|
+
allTags.push(...tags);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return allTags;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
export { discoverTagsForCountryCli };
|
|
@@ -35,6 +35,7 @@ async function processExplore(page, username, options, log) {
|
|
|
35
35
|
discoveredGuessAuthors: [],
|
|
36
36
|
discoveredFollowing: [],
|
|
37
37
|
discoveredFollowers: [],
|
|
38
|
+
discoveredRecommended: [],
|
|
38
39
|
collectedVideos: 0,
|
|
39
40
|
processed: false,
|
|
40
41
|
hasFollowData: false,
|
|
@@ -205,19 +206,18 @@ async function processExplore(page, username, options, log) {
|
|
|
205
206
|
log(
|
|
206
207
|
` 商家用户,关注采集: ${effectiveMaxFollowing}, 粉丝采集: ${effectiveMaxFollowers}`,
|
|
207
208
|
);
|
|
208
|
-
const { following, followers } =
|
|
209
|
-
page,
|
|
210
|
-
{
|
|
209
|
+
const { following, followers, recommended } =
|
|
210
|
+
await extractFollowAndFollowers(page, {
|
|
211
211
|
maxFollowing: effectiveMaxFollowing,
|
|
212
212
|
maxFollowers: effectiveMaxFollowers,
|
|
213
213
|
log,
|
|
214
|
-
}
|
|
215
|
-
);
|
|
214
|
+
});
|
|
216
215
|
result.discoveredFollowing = following || [];
|
|
217
216
|
result.discoveredFollowers = followers || [];
|
|
217
|
+
result.discoveredRecommended = recommended || [];
|
|
218
218
|
result.hasFollowData = true;
|
|
219
219
|
log(
|
|
220
|
-
` 关注: ${result.discoveredFollowing.length}, 粉丝: ${result.discoveredFollowers.length}`,
|
|
220
|
+
` 关注: ${result.discoveredFollowing.length}, 粉丝: ${result.discoveredFollowers.length}, 推荐: ${result.discoveredRecommended.length}`,
|
|
221
221
|
);
|
|
222
222
|
} catch (e) {
|
|
223
223
|
log(` 关注/粉丝提取失败: ${e.message}`);
|
|
@@ -2,7 +2,7 @@ import { delay, getDelayConfig } from "./page-helpers.js";
|
|
|
2
2
|
import { scrollAndCollect } from "./scroll-collector.js";
|
|
3
3
|
import { extractUniqueId, toProfileUrl } from "../../lib/url.js";
|
|
4
4
|
|
|
5
|
-
const FILTER_WORDS = ["主页", "已关注", "粉丝"
|
|
5
|
+
const FILTER_WORDS = ["主页", "已关注", "粉丝"];
|
|
6
6
|
|
|
7
7
|
const FOLLOW_TRIGGER_SELECTORS = [
|
|
8
8
|
"[data-e2e=following]",
|
|
@@ -11,6 +11,8 @@ const FOLLOW_TRIGGER_SELECTORS = [
|
|
|
11
11
|
'[data-e2e*="following"]',
|
|
12
12
|
];
|
|
13
13
|
|
|
14
|
+
const RECOMMEND_TAB_TEXTS = ["推荐", "Suggested", "Recommended"];
|
|
15
|
+
|
|
14
16
|
async function waitForFollowTrigger(page, timeout = 15000) {
|
|
15
17
|
await page
|
|
16
18
|
.waitForFunction(
|
|
@@ -187,7 +189,7 @@ async function closeFollowModal(page) {
|
|
|
187
189
|
|
|
188
190
|
function createUserCollectFn() {
|
|
189
191
|
return (container) => {
|
|
190
|
-
const FILTER_WORDS = ["主页", "已关注", "粉丝"
|
|
192
|
+
const FILTER_WORDS = ["主页", "已关注", "粉丝"];
|
|
191
193
|
const modal = document.querySelector("[class*=eyhy6180]");
|
|
192
194
|
const root = modal || document;
|
|
193
195
|
const users = [];
|
|
@@ -239,12 +241,55 @@ async function extractFollowAndFollowers(page, options = {}) {
|
|
|
239
241
|
const followers = await extractUsersFromModal(page, maxFollowers);
|
|
240
242
|
log(` 粉丝: ${followers.length}`);
|
|
241
243
|
|
|
244
|
+
// ===== 3. 采集推荐 =====
|
|
245
|
+
let recommended = [];
|
|
246
|
+
if (following.length > 0 || followers.length > 0) {
|
|
247
|
+
try {
|
|
248
|
+
await delay(500, 1500);
|
|
249
|
+
await clickRecommendTab(page);
|
|
250
|
+
await delay(500, 1500);
|
|
251
|
+
recommended = await scrollAndCollect(page, {
|
|
252
|
+
container: "[class*=DivUserListContainer]",
|
|
253
|
+
findScrollable: false,
|
|
254
|
+
collectFn: createUserCollectFn(),
|
|
255
|
+
uniqueKey: (u) => u.handle,
|
|
256
|
+
maxItems: 50,
|
|
257
|
+
staleThreshold: 2,
|
|
258
|
+
});
|
|
259
|
+
if (log) log(` 推荐: ${recommended.length}`);
|
|
260
|
+
} catch (e) {
|
|
261
|
+
if (log) log(` 推荐采集失败: ${e.message}`);
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
242
265
|
await closeFollowModal(page);
|
|
243
266
|
|
|
244
267
|
return {
|
|
245
268
|
following: following.map((u) => [u.handle, u.displayName]),
|
|
246
269
|
followers: followers.map((u) => [u.handle, u.displayName]),
|
|
270
|
+
recommended: recommended.map((u) => [u.handle, u.displayName]),
|
|
247
271
|
};
|
|
248
272
|
}
|
|
249
273
|
|
|
274
|
+
async function clickRecommendTab(page) {
|
|
275
|
+
await page.evaluate(() => {
|
|
276
|
+
const tabs = document.querySelectorAll("[class*=DivTabItem]");
|
|
277
|
+
for (const tab of tabs) {
|
|
278
|
+
const text = (tab.textContent || "").trim();
|
|
279
|
+
if (
|
|
280
|
+
text.includes("推荐") ||
|
|
281
|
+
text.includes("Suggested") ||
|
|
282
|
+
text.includes("Recommended")
|
|
283
|
+
) {
|
|
284
|
+
tab.click();
|
|
285
|
+
return;
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
throw new Error("未找到推荐 Tab");
|
|
289
|
+
});
|
|
290
|
+
await page.waitForSelector("[class*=DivUserListContainer]", {
|
|
291
|
+
timeout: 30000,
|
|
292
|
+
});
|
|
293
|
+
}
|
|
294
|
+
|
|
250
295
|
export { extractFollowAndFollowers };
|
package/src/watch/data-store.js
CHANGED
|
@@ -2223,6 +2223,58 @@ export function createStore(filePath, options = {}) {
|
|
|
2223
2223
|
if (filePath) {
|
|
2224
2224
|
// 初始化 SQLite 用户表(用于判重)
|
|
2225
2225
|
initUserDb(filePath);
|
|
2226
|
+
// 从数据库恢复偏移量
|
|
2227
|
+
loadLlmSampleOffsets();
|
|
2228
|
+
}
|
|
2229
|
+
|
|
2230
|
+
/**
|
|
2231
|
+
* 从数据库加载 LLM 采样偏移量
|
|
2232
|
+
*/
|
|
2233
|
+
function loadLlmSampleOffsets() {
|
|
2234
|
+
try {
|
|
2235
|
+
const row = db
|
|
2236
|
+
.prepare(`SELECT offsets FROM _llm_sample_offsets LIMIT 1`)
|
|
2237
|
+
.get();
|
|
2238
|
+
if (row && row.offsets) {
|
|
2239
|
+
const parsed = JSON.parse(row.offsets);
|
|
2240
|
+
if (parsed && typeof parsed === "object") {
|
|
2241
|
+
Object.entries(parsed).forEach(([k, v]) => {
|
|
2242
|
+
llmSampleOffsets.set(k, v);
|
|
2243
|
+
});
|
|
2244
|
+
console.error(
|
|
2245
|
+
`[data-store] 已恢复 LLM 采样偏移量: ${Array.from(
|
|
2246
|
+
llmSampleOffsets.entries(),
|
|
2247
|
+
)
|
|
2248
|
+
.map(([k, v]) => `${k}:${v}`)
|
|
2249
|
+
.join(", ")}`,
|
|
2250
|
+
);
|
|
2251
|
+
}
|
|
2252
|
+
}
|
|
2253
|
+
} catch (e) {
|
|
2254
|
+
// 表不存在或解析失败,使用空偏移量
|
|
2255
|
+
console.error(
|
|
2256
|
+
`[data-store] 加载 LLM 采样偏移量失败,使用空偏移量: ${e.message}`,
|
|
2257
|
+
);
|
|
2258
|
+
}
|
|
2259
|
+
}
|
|
2260
|
+
|
|
2261
|
+
/**
|
|
2262
|
+
* 将 LLM 采样偏移量持久化到数据库
|
|
2263
|
+
*/
|
|
2264
|
+
function saveLlmSampleOffsets() {
|
|
2265
|
+
try {
|
|
2266
|
+
const offsetsJson = JSON.stringify(Object.fromEntries(llmSampleOffsets));
|
|
2267
|
+
// 表不存在则创建
|
|
2268
|
+
db.prepare(
|
|
2269
|
+
`CREATE TABLE IF NOT EXISTS _llm_sample_offsets (id INTEGER PRIMARY KEY CHECK (id = 1), offsets TEXT)`,
|
|
2270
|
+
).run();
|
|
2271
|
+
// 插入或更新
|
|
2272
|
+
db.prepare(
|
|
2273
|
+
`INSERT OR REPLACE INTO _llm_sample_offsets (id, offsets) VALUES (1, ?)`,
|
|
2274
|
+
).run(offsetsJson);
|
|
2275
|
+
} catch (e) {
|
|
2276
|
+
console.error(`[data-store] 保存 LLM 采样偏移量失败: ${e.message}`);
|
|
2277
|
+
}
|
|
2226
2278
|
}
|
|
2227
2279
|
|
|
2228
2280
|
// stats 缓存
|
|
@@ -2383,15 +2435,97 @@ export function createStore(filePath, options = {}) {
|
|
|
2383
2435
|
}
|
|
2384
2436
|
|
|
2385
2437
|
function flushSave() {
|
|
2438
|
+
// 数据库模式:先保存 LLM 偏移量,再备份数据库
|
|
2439
|
+
if (db && dbPath) {
|
|
2440
|
+
try {
|
|
2441
|
+
saveLlmSampleOffsets();
|
|
2442
|
+
} catch (e) {
|
|
2443
|
+
console.error(`[data-store] 保存 LLM 偏移量失败: ${e.message}`);
|
|
2444
|
+
}
|
|
2445
|
+
}
|
|
2386
2446
|
return Promise.resolve();
|
|
2387
2447
|
}
|
|
2388
2448
|
|
|
2389
|
-
|
|
2390
|
-
|
|
2449
|
+
/**
|
|
2450
|
+
* 数据库备份:使用 SQLite BACKUP 命令,保留最新 maxBackups 个备份
|
|
2451
|
+
* @param {number} maxBackups - 保留的备份数量,默认 3
|
|
2452
|
+
* @returns {string|null} 备份文件路径,失败返回 null
|
|
2453
|
+
*/
|
|
2454
|
+
function backupDatabase(maxBackups = 3) {
|
|
2455
|
+
if (!db || !dbPath) {
|
|
2456
|
+
console.error("[data-store] 数据库未初始化,跳过备份");
|
|
2457
|
+
return null;
|
|
2458
|
+
}
|
|
2459
|
+
|
|
2460
|
+
try {
|
|
2461
|
+
// 生成备份文件名:result-20260627T094400.db
|
|
2462
|
+
const now = new Date();
|
|
2463
|
+
const timestamp = now
|
|
2464
|
+
.toISOString()
|
|
2465
|
+
.replace(/[-:T.]/g, "")
|
|
2466
|
+
.slice(0, 15); // YYYYMMDDHHmmss
|
|
2467
|
+
const baseName = path.basename(dbPath, ".db");
|
|
2468
|
+
const backupName = `${baseName}-${timestamp}.db`;
|
|
2469
|
+
const backupDir = path.dirname(dbPath);
|
|
2470
|
+
const backupPath = path.join(backupDir, backupName);
|
|
2471
|
+
|
|
2472
|
+
console.error(`[data-store] 正在备份数据库: ${backupName}`);
|
|
2473
|
+
|
|
2474
|
+
// 使用 better-sqlite3 的 backup API(原子性备份,安全可靠)
|
|
2475
|
+
const backupDb = new Database(backupPath);
|
|
2476
|
+
db.backup("main", backupDb, "main");
|
|
2477
|
+
backupDb.close();
|
|
2478
|
+
|
|
2479
|
+
// 验证备份文件大小
|
|
2480
|
+
const stat = fs.statSync(backupPath);
|
|
2481
|
+
const sizeMB = (stat.size / 1024 / 1024).toFixed(2);
|
|
2482
|
+
console.error(`[data-store] 备份完成: ${backupName} (${sizeMB} MB)`);
|
|
2483
|
+
|
|
2484
|
+
// 清理旧备份:保留最新 maxBackups 个
|
|
2485
|
+
cleanupOldBackups(backupDir, baseName, maxBackups);
|
|
2486
|
+
|
|
2487
|
+
return backupPath;
|
|
2488
|
+
} catch (e) {
|
|
2489
|
+
console.error(`[data-store] 备份失败: ${e.message}`);
|
|
2490
|
+
return null;
|
|
2491
|
+
}
|
|
2492
|
+
}
|
|
2493
|
+
|
|
2494
|
+
/**
|
|
2495
|
+
* 清理旧备份文件,保留最新 maxBackups 个
|
|
2496
|
+
*/
|
|
2497
|
+
function cleanupOldBackups(backupDir, baseName, maxBackups) {
|
|
2498
|
+
try {
|
|
2499
|
+
// 查找所有备份文件:baseName-YYYYMMDDHHmmss.db
|
|
2500
|
+
const pattern = new RegExp(`^${baseName}-\\d{15}\\.db$`);
|
|
2501
|
+
const backups = fs
|
|
2502
|
+
.readdirSync(backupDir)
|
|
2503
|
+
.filter((f) => pattern.test(f))
|
|
2504
|
+
.sort() // 按时间戳排序(ASCII 排序 = 时间排序)
|
|
2505
|
+
.reverse(); // 最新的在前
|
|
2506
|
+
|
|
2507
|
+
if (backups.length > maxBackups) {
|
|
2508
|
+
const toDelete = backups.slice(maxBackups);
|
|
2509
|
+
for (const file of toDelete) {
|
|
2510
|
+
const filePath = path.join(backupDir, file);
|
|
2511
|
+
fs.unlinkSync(filePath);
|
|
2512
|
+
console.error(`[data-store] 已清理旧备份: ${file}`);
|
|
2513
|
+
}
|
|
2514
|
+
}
|
|
2515
|
+
|
|
2516
|
+
console.error(
|
|
2517
|
+
`[data-store] 备份清理完成: 保留 ${Math.min(backups.length, maxBackups)} / ${backups.length} 个备份`,
|
|
2518
|
+
);
|
|
2519
|
+
} catch (e) {
|
|
2520
|
+
console.error(`[data-store] 清理旧备份失败: ${e.message}`);
|
|
2521
|
+
}
|
|
2391
2522
|
}
|
|
2392
2523
|
|
|
2393
2524
|
function stopBackup() {
|
|
2394
|
-
|
|
2525
|
+
// 退出时执行备份
|
|
2526
|
+
if (db && dbPath) {
|
|
2527
|
+
backupDatabase();
|
|
2528
|
+
}
|
|
2395
2529
|
}
|
|
2396
2530
|
|
|
2397
2531
|
function getUser(uid) {
|
|
@@ -2795,7 +2929,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2795
2929
|
.prepare(
|
|
2796
2930
|
`
|
|
2797
2931
|
SELECT * FROM raw_jobs WHERE ${whereSql} AND guessed_location = ?
|
|
2798
|
-
ORDER BY
|
|
2932
|
+
ORDER BY
|
|
2933
|
+
CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
|
|
2934
|
+
COALESCE(video_count, 0) DESC, created_at DESC
|
|
2799
2935
|
LIMIT ? OFFSET ?
|
|
2800
2936
|
`,
|
|
2801
2937
|
)
|
|
@@ -2803,11 +2939,32 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2803
2939
|
|
|
2804
2940
|
if (samples.length === 0) break;
|
|
2805
2941
|
|
|
2806
|
-
|
|
2807
|
-
|
|
2808
|
-
|
|
2942
|
+
// 分离 tag 来源和非 tag 来源:tag 来源跳过 LLM 打分直接合格
|
|
2943
|
+
const tagSamples = samples.filter((s) =>
|
|
2944
|
+
(s.sources || "").includes("tag"),
|
|
2809
2945
|
);
|
|
2810
|
-
const
|
|
2946
|
+
const nonTagSamples = samples.filter(
|
|
2947
|
+
(s) => !(s.sources || "").includes("tag"),
|
|
2948
|
+
);
|
|
2949
|
+
|
|
2950
|
+
// tag 来源直接加入合格列表
|
|
2951
|
+
if (tagSamples.length > 0) {
|
|
2952
|
+
allQualified.push(...tagSamples.map((s) => s.unique_id));
|
|
2953
|
+
console.error(
|
|
2954
|
+
`[data-store] ${location}: 本批 ${tagSamples.length} 条 tag 来源任务跳过 LLM 打分直接合格`,
|
|
2955
|
+
);
|
|
2956
|
+
}
|
|
2957
|
+
|
|
2958
|
+
// 非 tag 来源走 LLM 打分
|
|
2959
|
+
let batchQualified = [];
|
|
2960
|
+
let scores = [];
|
|
2961
|
+
if (nonTagSamples.length > 0) {
|
|
2962
|
+
scores = await scoreJobsBatch(
|
|
2963
|
+
nonTagSamples,
|
|
2964
|
+
DEFAULT_TARGET_LOCATIONS,
|
|
2965
|
+
);
|
|
2966
|
+
batchQualified = scores.filter((s) => s.score >= llmMinScore);
|
|
2967
|
+
}
|
|
2811
2968
|
|
|
2812
2969
|
allScores.push(...scores);
|
|
2813
2970
|
allQualified.push(...batchQualified.map((s) => s.uniqueId));
|
|
@@ -2829,12 +2986,27 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2829
2986
|
if (allQualified.length >= llmMinReturn) break;
|
|
2830
2987
|
}
|
|
2831
2988
|
|
|
2832
|
-
//
|
|
2833
|
-
|
|
2989
|
+
// 分离 tag 合格和非 tag 合格
|
|
2990
|
+
// tag 任务直接合格(不在 allScores 中),非 tag 任务走 LLM 打分
|
|
2991
|
+
const tagQualified = allQualified.filter(
|
|
2992
|
+
(uid) => !allScores.find((s) => s.uniqueId === uid),
|
|
2993
|
+
);
|
|
2994
|
+
const nonTagQualifiedScores = allScores
|
|
2834
2995
|
.filter((s) => s.score >= llmMinScore)
|
|
2835
|
-
.sort((a, b) => b.score - a.score)
|
|
2836
|
-
|
|
2837
|
-
|
|
2996
|
+
.sort((a, b) => b.score - a.score);
|
|
2997
|
+
const nonTagQualified = nonTagQualifiedScores.map((s) => s.uniqueId);
|
|
2998
|
+
|
|
2999
|
+
// 限制 tag 占比:最多占 safeLimit 的 70%,留 30% 给非 tag
|
|
3000
|
+
const tagMaxCount = Math.floor(safeLimit * 0.7);
|
|
3001
|
+
const tagCount = Math.min(tagQualified.length, tagMaxCount);
|
|
3002
|
+
const nonTagMaxCount = safeLimit - tagCount;
|
|
3003
|
+
const finalNonTagQualified = nonTagQualified.slice(0, nonTagMaxCount);
|
|
3004
|
+
|
|
3005
|
+
// 最终合格列表:tag 优先 + 非 tag 按分数排序
|
|
3006
|
+
const qualified = [
|
|
3007
|
+
...tagQualified.slice(0, tagCount),
|
|
3008
|
+
...finalNonTagQualified,
|
|
3009
|
+
];
|
|
2838
3010
|
|
|
2839
3011
|
if (!qualified.length) {
|
|
2840
3012
|
console.error(
|
|
@@ -2881,6 +3053,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2881
3053
|
moveTxn();
|
|
2882
3054
|
markStatsDirty();
|
|
2883
3055
|
|
|
3056
|
+
// 持久化偏移量到数据库
|
|
3057
|
+
saveLlmSampleOffsets();
|
|
3058
|
+
|
|
2884
3059
|
// 打印最终偏移量状态
|
|
2885
3060
|
const finalOffsetSummary = Array.from(llmSampleOffsets.entries())
|
|
2886
3061
|
.map(([k, v]) => `${k}:${v}`)
|
|
@@ -2920,7 +3095,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2920
3095
|
status_code, latest_video_time, user_create_time
|
|
2921
3096
|
FROM raw_jobs
|
|
2922
3097
|
WHERE ${whereSql}
|
|
2923
|
-
ORDER BY
|
|
3098
|
+
ORDER BY
|
|
3099
|
+
CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
|
|
3100
|
+
COALESCE(video_count, 0) DESC, created_at DESC
|
|
2924
3101
|
LIMIT ?
|
|
2925
3102
|
`,
|
|
2926
3103
|
).run(...args, safeLimit);
|
|
@@ -2932,7 +3109,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2932
3109
|
WHERE unique_id IN (
|
|
2933
3110
|
SELECT unique_id FROM raw_jobs
|
|
2934
3111
|
WHERE ${whereSql}
|
|
2935
|
-
ORDER BY
|
|
3112
|
+
ORDER BY
|
|
3113
|
+
CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
|
|
3114
|
+
COALESCE(video_count, 0) DESC, created_at DESC
|
|
2936
3115
|
LIMIT ?
|
|
2937
3116
|
)
|
|
2938
3117
|
`,
|
|
@@ -3786,6 +3965,17 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3786
3965
|
(typeof f === "object" && f.guessedLocation) || guessedLocation,
|
|
3787
3966
|
};
|
|
3788
3967
|
}),
|
|
3968
|
+
...(result.discoveredRecommended || []).map((f) => {
|
|
3969
|
+
const handle = Array.isArray(f) ? f[0] : f.handle || "";
|
|
3970
|
+
const name = Array.isArray(f) ? f[1] : f.displayName || null;
|
|
3971
|
+
return {
|
|
3972
|
+
uniqueId: handle.replace(/^@/, ""),
|
|
3973
|
+
nickname: name,
|
|
3974
|
+
sources: ["recommended"],
|
|
3975
|
+
guessedLocation:
|
|
3976
|
+
(typeof f === "object" && f.guessedLocation) || guessedLocation,
|
|
3977
|
+
};
|
|
3978
|
+
}),
|
|
3789
3979
|
].filter((u) => u.uniqueId);
|
|
3790
3980
|
|
|
3791
3981
|
// 先对 discovered 内部去重,再用 uidIndex 批量判断
|
|
@@ -3880,6 +4070,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3880
4070
|
"discoveredGuessAuthors",
|
|
3881
4071
|
"discoveredFollowing",
|
|
3882
4072
|
"discoveredFollowers",
|
|
4073
|
+
"discoveredRecommended",
|
|
3883
4074
|
"uniqueId",
|
|
3884
4075
|
"sources",
|
|
3885
4076
|
"topRecentVideo", // 单独处理,不进入通用循环
|
|
@@ -4262,7 +4453,12 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4262
4453
|
sqlParams.push(...targetCountries);
|
|
4263
4454
|
}
|
|
4264
4455
|
|
|
4265
|
-
|
|
4456
|
+
// 优先级:sources 包含 "tag" 的任务优先,其余按 created_at 排序
|
|
4457
|
+
sql += ` ORDER BY
|
|
4458
|
+
CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
|
|
4459
|
+
created_at ASC,
|
|
4460
|
+
unique_id ASC
|
|
4461
|
+
LIMIT ?`;
|
|
4266
4462
|
sqlParams.push(l);
|
|
4267
4463
|
|
|
4268
4464
|
const rows = db.prepare(sql).all(...sqlParams);
|
|
@@ -4310,6 +4506,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4310
4506
|
}
|
|
4311
4507
|
return false;
|
|
4312
4508
|
})
|
|
4509
|
+
.sort((a, b) => {
|
|
4510
|
+
// 优先级:sources 包含 "tag" 的任务优先
|
|
4511
|
+
const aIsTag = (a.sources || "").includes("tag");
|
|
4512
|
+
const bIsTag = (b.sources || "").includes("tag");
|
|
4513
|
+
if (aIsTag !== bIsTag) return aIsTag ? -1 : 1;
|
|
4514
|
+
return (a.createdAt || 0) - (b.createdAt || 0);
|
|
4515
|
+
})
|
|
4313
4516
|
.slice(0, l);
|
|
4314
4517
|
// 接受任务时 userUpdateCount + 1
|
|
4315
4518
|
pending.forEach((u) => {
|
|
@@ -4823,6 +5026,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4823
5026
|
commitCommentTask,
|
|
4824
5027
|
debugClaimNextJob,
|
|
4825
5028
|
stopBackup,
|
|
5029
|
+
backupDatabase, // 手动备份数据库
|
|
4826
5030
|
rawQuery,
|
|
4827
5031
|
getLlmSampleOffsets, // 获取 LLM 采样偏移量状态
|
|
4828
5032
|
// Tag 发现与打分
|
package/src/watch/server.js
CHANGED
|
@@ -1125,6 +1125,51 @@ export function startWatchServer(
|
|
|
1125
1125
|
return;
|
|
1126
1126
|
}
|
|
1127
1127
|
|
|
1128
|
+
// GET /api/tags/history?country=ES&type=productive|dead — CLI 模式获取历史 tag
|
|
1129
|
+
if (req.method === "GET" && routePath === "/api/tags/history") {
|
|
1130
|
+
const country = params.country || null;
|
|
1131
|
+
const type = params.type || "productive";
|
|
1132
|
+
|
|
1133
|
+
if (!country) {
|
|
1134
|
+
sendJSON(res, 400, { error: "缺少 country 参数" });
|
|
1135
|
+
return;
|
|
1136
|
+
}
|
|
1137
|
+
|
|
1138
|
+
let tags;
|
|
1139
|
+
if (type === "dead") {
|
|
1140
|
+
tags = store.getDeadTags(country);
|
|
1141
|
+
} else if (type === "all") {
|
|
1142
|
+
tags = store.getTagsByCountry(country, 0);
|
|
1143
|
+
} else {
|
|
1144
|
+
tags = store.getTagsByCountry(country, 50);
|
|
1145
|
+
}
|
|
1146
|
+
|
|
1147
|
+
sendJSON(res, 200, { tags, total: tags.length });
|
|
1148
|
+
return;
|
|
1149
|
+
}
|
|
1150
|
+
|
|
1151
|
+
// POST /api/tags/productive — CLI 模式上报 productive tag
|
|
1152
|
+
if (req.method === "POST" && routePath === "/api/tags/productive") {
|
|
1153
|
+
try {
|
|
1154
|
+
const body = await readBody(req);
|
|
1155
|
+
const { tag, countries, pushedUsers } = body || {};
|
|
1156
|
+
|
|
1157
|
+
if (!tag || !countries || countries.length === 0) {
|
|
1158
|
+
sendJSON(res, 400, { error: "tag 和 countries 不能为空" });
|
|
1159
|
+
return;
|
|
1160
|
+
}
|
|
1161
|
+
|
|
1162
|
+
// 将 productive 信息写入数据库(更新已有 tag 或插入新 tag)
|
|
1163
|
+
for (const c of countries) {
|
|
1164
|
+
store.insertTag(tag, [c], "cli-productive");
|
|
1165
|
+
}
|
|
1166
|
+
sendJSON(res, 200, { ok: true });
|
|
1167
|
+
} catch (e) {
|
|
1168
|
+
sendJSON(res, 500, { error: e.message });
|
|
1169
|
+
}
|
|
1170
|
+
return;
|
|
1171
|
+
}
|
|
1172
|
+
|
|
1128
1173
|
if (
|
|
1129
1174
|
req.method === "GET" &&
|
|
1130
1175
|
(routePath === "/" || routePath === "/index.html")
|
|
@@ -1205,7 +1250,10 @@ export function startWatchServer(
|
|
|
1205
1250
|
console.error("[server] HTTP 服务已关闭");
|
|
1206
1251
|
});
|
|
1207
1252
|
await store.flushSave();
|
|
1208
|
-
console.error("[server]
|
|
1253
|
+
console.error("[server] 数据已保存");
|
|
1254
|
+
// 备份数据库
|
|
1255
|
+
store.stopBackup();
|
|
1256
|
+
console.error("[server] 退出");
|
|
1209
1257
|
process.exit(0);
|
|
1210
1258
|
}
|
|
1211
1259
|
|
package/src/watch/tag-service.js
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
// 国家 → 语言映射
|
|
9
|
-
const COUNTRY_LANG = {
|
|
9
|
+
export const COUNTRY_LANG = {
|
|
10
10
|
CZ: "cs",
|
|
11
11
|
GR: "el",
|
|
12
12
|
HU: "hu",
|
|
@@ -22,16 +22,16 @@ const COUNTRY_LANG = {
|
|
|
22
22
|
AT: "de",
|
|
23
23
|
};
|
|
24
24
|
|
|
25
|
-
const LLM_URL = "http://82.156.52.214:18000/v1/chat/completions";
|
|
26
|
-
const LLM_MODEL = "zc-fast";
|
|
25
|
+
export const LLM_URL = "http://82.156.52.214:18000/v1/chat/completions";
|
|
26
|
+
export const LLM_MODEL = "zc-fast";
|
|
27
27
|
|
|
28
|
-
function getLang(country) {
|
|
28
|
+
export function getLang(country) {
|
|
29
29
|
return COUNTRY_LANG[country] || "en";
|
|
30
30
|
}
|
|
31
31
|
|
|
32
32
|
// ====== LLM 调用 ======
|
|
33
33
|
|
|
34
|
-
async function callLLM(prompt) {
|
|
34
|
+
export async function callLLM(prompt) {
|
|
35
35
|
const apiKey = process.env.APIKEY || "";
|
|
36
36
|
const { fetch } = await import("undici");
|
|
37
37
|
|
|
@@ -53,11 +53,11 @@ async function callLLM(prompt) {
|
|
|
53
53
|
return result.choices?.[0]?.message?.content || "";
|
|
54
54
|
}
|
|
55
55
|
|
|
56
|
-
function normalizeTag(t) {
|
|
56
|
+
export function normalizeTag(t) {
|
|
57
57
|
return t.replace(/^#+/, "").trim().toLowerCase();
|
|
58
58
|
}
|
|
59
59
|
|
|
60
|
-
function parseTagsFromResponse(content) {
|
|
60
|
+
export function parseTagsFromResponse(content) {
|
|
61
61
|
try {
|
|
62
62
|
const parsed = JSON.parse(content);
|
|
63
63
|
if (Array.isArray(parsed)) {
|
|
@@ -81,7 +81,7 @@ function parseTagsFromResponse(content) {
|
|
|
81
81
|
|
|
82
82
|
// ====== Prompt 组装 ======
|
|
83
83
|
|
|
84
|
-
function buildDiscoverPrompt(country, count, history, userPrompt) {
|
|
84
|
+
export function buildDiscoverPrompt(country, count, history, userPrompt) {
|
|
85
85
|
const lang = getLang(country);
|
|
86
86
|
const langNames = {
|
|
87
87
|
cs: "Czech",
|
|
@@ -98,18 +98,18 @@ function buildDiscoverPrompt(country, count, history, userPrompt) {
|
|
|
98
98
|
};
|
|
99
99
|
const langName = langNames[lang] || lang;
|
|
100
100
|
|
|
101
|
-
// 正样本:该国高分 tag
|
|
101
|
+
// 正样本:该国高分 tag(只给 LLM 看效果,不给模板)
|
|
102
102
|
const productive = history.productive || [];
|
|
103
103
|
const productiveHint =
|
|
104
104
|
productive.length > 0
|
|
105
|
-
? `\
|
|
105
|
+
? `\nTags that already worked well for ${country}: ${productive.map((t) => t.tag).join(", ")}. These are examples of what works — explore DIFFERENT directions, not variations of these.`
|
|
106
106
|
: "";
|
|
107
107
|
|
|
108
108
|
// 负样本:该国 dead tag
|
|
109
109
|
const dead = history.dead || [];
|
|
110
110
|
const deadHint =
|
|
111
111
|
dead.length > 0
|
|
112
|
-
? `\
|
|
112
|
+
? `\nTags that failed for ${country} (found no matching users): ${dead.map((t) => t.tag).join(", ")}. Avoid these and similar patterns.`
|
|
113
113
|
: "";
|
|
114
114
|
|
|
115
115
|
// 死因分析
|
|
@@ -118,20 +118,35 @@ function buildDiscoverPrompt(country, count, history, userPrompt) {
|
|
|
118
118
|
];
|
|
119
119
|
const errorHint =
|
|
120
120
|
errorPatterns.length > 0
|
|
121
|
-
? `\
|
|
121
|
+
? `\nWhy previous tags failed: ${errorPatterns.join("; ")}. Avoid tags likely to have same issues.`
|
|
122
|
+
: "";
|
|
123
|
+
|
|
124
|
+
// 已存在的所有 tag(防止重复生成)
|
|
125
|
+
const allExisting = history.allExisting || [];
|
|
126
|
+
const existingHint =
|
|
127
|
+
allExisting.length > 0
|
|
128
|
+
? `\nTags already in database (DO NOT generate these again): ${allExisting.slice(-50).join(", ")}.`
|
|
122
129
|
: "";
|
|
123
130
|
|
|
124
131
|
const userHint = userPrompt
|
|
125
132
|
? `\nAdditional focus: ${userPrompt}. Generate tags specifically for this niche.`
|
|
126
133
|
: "";
|
|
127
134
|
|
|
128
|
-
return `
|
|
135
|
+
return `You are discovering TikTok hashtags used by people who sell things in ${country}.
|
|
136
|
+
|
|
137
|
+
Your goal: Find hashtags that real sellers in ${country} actually use — any kind of tag they might use. Think broadly:
|
|
138
|
+
- Who they are (seller, shop owner, entrepreneur, artisan...)
|
|
139
|
+
- What they sell (shoes, clothes, jewelry, food, pets, furniture...)
|
|
140
|
+
- How they sell (online, handmade, second-hand, local pickup...)
|
|
141
|
+
- Product-specific tags (sneakers, dresses, cakes, necklaces...)
|
|
142
|
+
|
|
143
|
+
All tags must be in ${langName} language (or widely used in ${country}).
|
|
144
|
+
Generate ${count} tags that are ALL DIFFERENT from each other and from any existing tags.
|
|
129
145
|
|
|
130
|
-
|
|
131
|
-
-
|
|
132
|
-
-
|
|
133
|
-
-
|
|
134
|
-
- Prefer specific/niche tags over generic ones (e.g., "vendozapatos" not "vender")${productiveHint}${deadHint}${errorHint}${userHint}
|
|
146
|
+
Rules:
|
|
147
|
+
- Each tag should explore a DIFFERENT angle — don't just swap country suffixes
|
|
148
|
+
- Prefer specific and niche tags over generic ones (e.g., "vendozapatos" beats "vender")
|
|
149
|
+
- Do NOT generate tags that already exist${productiveHint}${deadHint}${errorHint}${existingHint}${userHint}
|
|
135
150
|
|
|
136
151
|
Return ONLY a JSON array of tag strings, nothing else. Example: ["ventas","tiendaonline","vender"]`;
|
|
137
152
|
}
|
|
@@ -151,7 +166,10 @@ export async function discoverTagsForCountry(
|
|
|
151
166
|
// 读取历史打分记录
|
|
152
167
|
const productive = store.getTagsByCountry(country, 50);
|
|
153
168
|
const dead = store.getDeadTags(country);
|
|
154
|
-
|
|
169
|
+
// 获取该国所有已存在的 tag 名(防止重复生成)
|
|
170
|
+
const allTags = store.getTagsByCountry(country, 0);
|
|
171
|
+
const allExisting = allTags.map((t) => t.tag);
|
|
172
|
+
const history = { productive, dead, allExisting };
|
|
155
173
|
|
|
156
174
|
// 组装 prompt 并调用 LLM
|
|
157
175
|
const prompt = buildDiscoverPrompt(country, count, history, userPrompt);
|