tt-help-cli-ycl 1.3.81 → 1.3.82
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli/refresh.js +1 -0
- package/src/cli/watch.js +25 -4
- package/src/lib/api-interceptor.js +7 -1
- package/src/lib/args.js +14 -0
- package/src/scraper/explore-core.js +27 -1
- package/src/watch/data-store.js +586 -68
- package/src/watch/public/app.js +59 -4
- package/src/watch/public/index.html +2 -1
- package/src/watch/public/style.css +25 -0
- package/src/watch/server.js +66 -3
package/package.json
CHANGED
package/src/cli/refresh.js
CHANGED
|
@@ -549,6 +549,7 @@ export async function handleRefresh(options) {
|
|
|
549
549
|
|
|
550
550
|
const payload = {
|
|
551
551
|
userInfo: refreshUserInfo,
|
|
552
|
+
topRecentVideo: result.topRecentVideo || null,
|
|
552
553
|
discoveredFollowing: (result.discoveredFollowing || []).map((f) => ({
|
|
553
554
|
handle: Array.isArray(f) ? f[0] : f,
|
|
554
555
|
displayName: Array.isArray(f) ? f[1] : null,
|
package/src/cli/watch.js
CHANGED
|
@@ -5,19 +5,40 @@ import { startWatchServer, openBrowser } from "../watch/server.js";
|
|
|
5
5
|
|
|
6
6
|
export async function handleWatch(options) {
|
|
7
7
|
const dataAnchor = options.dataAnchor || options.outputFile;
|
|
8
|
-
const { watchPort } =
|
|
8
|
+
const { watchPort, llmRefill, llmRefillMinScore, llmRefillSampleSize } =
|
|
9
|
+
options;
|
|
9
10
|
|
|
10
11
|
if (!dataAnchor) {
|
|
11
|
-
console.error("用法: tt-help watch -o <db路径> [-p 端口]");
|
|
12
|
+
console.error("用法: tt-help watch -o <db路径> [-p 端口] [--llm-refill]");
|
|
12
13
|
console.error(" tt-help watch -o data/result.db");
|
|
13
14
|
console.error(" tt-help watch -o data/result.db -p 8080");
|
|
15
|
+
console.error(
|
|
16
|
+
" tt-help watch -o data/result.db --llm-refill --llm-refill-min 60",
|
|
17
|
+
);
|
|
14
18
|
process.exit(1);
|
|
15
19
|
}
|
|
16
20
|
|
|
17
21
|
mkdirSync(path.dirname(path.resolve(dataAnchor)), { recursive: true });
|
|
18
22
|
|
|
19
|
-
const
|
|
20
|
-
|
|
23
|
+
const storeOptions = {};
|
|
24
|
+
if (llmRefill) {
|
|
25
|
+
storeOptions.refillLlm = {
|
|
26
|
+
llmScore: true,
|
|
27
|
+
llmMinScore: llmRefillMinScore ?? 60,
|
|
28
|
+
llmSampleSize: llmRefillSampleSize ?? 100,
|
|
29
|
+
};
|
|
30
|
+
console.error(
|
|
31
|
+
`[watch] LLM 自动打分已启用: 最低分 ${storeOptions.refillLlm.llmMinScore}, 采样 ${storeOptions.refillLlm.llmSampleSize}`,
|
|
32
|
+
);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const store = createStore(dataAnchor, storeOptions);
|
|
36
|
+
const { server, port } = await startWatchServer(
|
|
37
|
+
dataAnchor,
|
|
38
|
+
watchPort,
|
|
39
|
+
store,
|
|
40
|
+
storeOptions,
|
|
41
|
+
);
|
|
21
42
|
openBrowser(port);
|
|
22
43
|
|
|
23
44
|
process.once("SIGINT", () => {
|
|
@@ -18,7 +18,12 @@ async function processAPIResponse(
|
|
|
18
18
|
for (const item of firstPageItems) {
|
|
19
19
|
if (items.length >= maxVideos) break;
|
|
20
20
|
const href = `https://www.tiktok.com/@${username}/video/${item.id}`;
|
|
21
|
-
items.push({
|
|
21
|
+
items.push({
|
|
22
|
+
id: item.id,
|
|
23
|
+
href,
|
|
24
|
+
createTime: item.createTime || null,
|
|
25
|
+
playCount: item.stats?.playCount || 0,
|
|
26
|
+
});
|
|
22
27
|
}
|
|
23
28
|
|
|
24
29
|
let cursor = data.cursor;
|
|
@@ -66,6 +71,7 @@ async function processAPIResponse(
|
|
|
66
71
|
id: item.id,
|
|
67
72
|
href,
|
|
68
73
|
createTime: item.createTime || null,
|
|
74
|
+
playCount: item.stats?.playCount || 0,
|
|
69
75
|
});
|
|
70
76
|
}
|
|
71
77
|
}
|
package/src/lib/args.js
CHANGED
|
@@ -347,6 +347,9 @@ function parseInfoArgs(args) {
|
|
|
347
347
|
function parseWatchArgs(args) {
|
|
348
348
|
let dataAnchor = "./result.db";
|
|
349
349
|
let watchPort = 3001;
|
|
350
|
+
let llmRefill = true;
|
|
351
|
+
let llmRefillMinScore = null;
|
|
352
|
+
let llmRefillSampleSize = null;
|
|
350
353
|
|
|
351
354
|
for (let i = 0; i < args.length; i++) {
|
|
352
355
|
const arg = args[i];
|
|
@@ -354,6 +357,14 @@ function parseWatchArgs(args) {
|
|
|
354
357
|
dataAnchor = args[++i];
|
|
355
358
|
} else if (arg === "-p") {
|
|
356
359
|
watchPort = parseInt(args[++i]) || 3001;
|
|
360
|
+
} else if (arg === "--llm-refill") {
|
|
361
|
+
llmRefill = true;
|
|
362
|
+
} else if (arg === "--no-llm-refill") {
|
|
363
|
+
llmRefill = false;
|
|
364
|
+
} else if (arg === "--llm-refill-min") {
|
|
365
|
+
llmRefillMinScore = parseInt(args[++i]) || 60;
|
|
366
|
+
} else if (arg === "--llm-refill-sample") {
|
|
367
|
+
llmRefillSampleSize = parseInt(args[++i]) || 100;
|
|
357
368
|
}
|
|
358
369
|
}
|
|
359
370
|
|
|
@@ -362,6 +373,9 @@ function parseWatchArgs(args) {
|
|
|
362
373
|
outputFile: dataAnchor,
|
|
363
374
|
dataAnchor,
|
|
364
375
|
watchPort,
|
|
376
|
+
llmRefill,
|
|
377
|
+
llmRefillMinScore,
|
|
378
|
+
llmRefillSampleSize,
|
|
365
379
|
urls: [],
|
|
366
380
|
outputFormat: "json",
|
|
367
381
|
exploreCount: 0,
|
|
@@ -8,6 +8,7 @@ import {
|
|
|
8
8
|
DEFAULT_TARGET_LOCATIONS_CSV,
|
|
9
9
|
findFirstMatchingLocation,
|
|
10
10
|
isLocationInList,
|
|
11
|
+
normalizeLocation,
|
|
11
12
|
normalizeLocationList,
|
|
12
13
|
} from "../lib/target-locations.js";
|
|
13
14
|
import {
|
|
@@ -40,6 +41,7 @@ async function processExplore(page, username, options, log) {
|
|
|
40
41
|
keepFollow: false,
|
|
41
42
|
locationCreated: null,
|
|
42
43
|
latestVideoTime: null,
|
|
44
|
+
topRecentVideo: null,
|
|
43
45
|
noVideo: false,
|
|
44
46
|
restricted: false,
|
|
45
47
|
error: null,
|
|
@@ -84,6 +86,27 @@ async function processExplore(page, username, options, log) {
|
|
|
84
86
|
if (result.userInfo) result.userInfo.latestVideoTime = latestCreateTime;
|
|
85
87
|
}
|
|
86
88
|
|
|
89
|
+
// 找出 7 天内发布且播放量最大的视频
|
|
90
|
+
const SEVEN_DAYS_SECONDS = 7 * 24 * 60 * 60;
|
|
91
|
+
const nowSeconds = Math.floor(Date.now() / 1000);
|
|
92
|
+
const recentVideos = videoArray.filter(
|
|
93
|
+
(v) => v.createTime && nowSeconds - v.createTime <= SEVEN_DAYS_SECONDS,
|
|
94
|
+
);
|
|
95
|
+
if (recentVideos.length > 0) {
|
|
96
|
+
const topVideo = recentVideos.reduce((max, v) =>
|
|
97
|
+
v.playCount > max.playCount ? v : max,
|
|
98
|
+
);
|
|
99
|
+
result.topRecentVideo = {
|
|
100
|
+
id: topVideo.id,
|
|
101
|
+
href: topVideo.href,
|
|
102
|
+
playCount: topVideo.playCount,
|
|
103
|
+
createTime: topVideo.createTime,
|
|
104
|
+
};
|
|
105
|
+
log(
|
|
106
|
+
` 7天内最高播放视频: ${topVideo.playCount} 次播放 (${recentVideos.length} 个候选)`,
|
|
107
|
+
);
|
|
108
|
+
}
|
|
109
|
+
|
|
87
110
|
if (videoArray.length <= 0) {
|
|
88
111
|
// 视频为空:可能是页面受限或用户真的没有视频
|
|
89
112
|
result.processed = true;
|
|
@@ -110,7 +133,10 @@ async function processExplore(page, username, options, log) {
|
|
|
110
133
|
log(
|
|
111
134
|
` 国家采样(${locations.length}个): [${locations.filter(Boolean).join(", ") || "无数据"}]`,
|
|
112
135
|
);
|
|
113
|
-
|
|
136
|
+
// 直接标准化,不去重(保留重复值用于频率统计)
|
|
137
|
+
const normalizedLocations = locations
|
|
138
|
+
.map((loc) => normalizeLocation(loc))
|
|
139
|
+
.filter(Boolean);
|
|
114
140
|
|
|
115
141
|
// 统计频率
|
|
116
142
|
const freq = {};
|
package/src/watch/data-store.js
CHANGED
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import fs from "fs";
|
|
2
2
|
import path from "path";
|
|
3
3
|
import Database from "better-sqlite3";
|
|
4
|
-
import {
|
|
4
|
+
import {
|
|
5
|
+
isLocationInList,
|
|
6
|
+
DEFAULT_TARGET_LOCATIONS,
|
|
7
|
+
} from "../lib/target-locations.js";
|
|
5
8
|
|
|
6
9
|
// SQLite 用户表(用于判重)
|
|
7
10
|
let db = null;
|
|
@@ -143,6 +146,12 @@ function initUserDb(filePath) {
|
|
|
143
146
|
if (!existingJobColumns.has("bio_link")) {
|
|
144
147
|
db.exec(`ALTER TABLE jobs ADD COLUMN bio_link TEXT`);
|
|
145
148
|
}
|
|
149
|
+
if (!existingJobColumns.has("top_video_play_count")) {
|
|
150
|
+
db.exec(`ALTER TABLE jobs ADD COLUMN top_video_play_count INTEGER`);
|
|
151
|
+
}
|
|
152
|
+
if (!existingJobColumns.has("top_video_href")) {
|
|
153
|
+
db.exec(`ALTER TABLE jobs ADD COLUMN top_video_href TEXT`);
|
|
154
|
+
}
|
|
146
155
|
db.exec(`
|
|
147
156
|
CREATE TABLE IF NOT EXISTS jobs_base (
|
|
148
157
|
unique_id TEXT PRIMARY KEY,
|
|
@@ -773,7 +782,7 @@ function getUserUpdateByCountryFromDb() {
|
|
|
773
782
|
COALESCE(guessed_location, '未知') as country,
|
|
774
783
|
COUNT(*) as count
|
|
775
784
|
FROM jobs_base
|
|
776
|
-
WHERE
|
|
785
|
+
WHERE tt_seller IS NULL
|
|
777
786
|
AND COALESCE(user_update_count, 0) <= 0
|
|
778
787
|
GROUP BY COALESCE(guessed_location, '未知')
|
|
779
788
|
ORDER BY count DESC
|
|
@@ -794,7 +803,7 @@ function getAttachStuckByCountryFromDb() {
|
|
|
794
803
|
COALESCE(guessed_location, '未知') as country,
|
|
795
804
|
COUNT(*) as count
|
|
796
805
|
FROM jobs_base
|
|
797
|
-
WHERE
|
|
806
|
+
WHERE tt_seller IS NULL
|
|
798
807
|
AND COALESCE(user_update_count, 0) = 1
|
|
799
808
|
GROUP BY COALESCE(guessed_location, '未知')
|
|
800
809
|
ORDER BY count DESC
|
|
@@ -931,11 +940,36 @@ function moveJobsToRawByCountry(scope, country) {
|
|
|
931
940
|
};
|
|
932
941
|
}
|
|
933
942
|
|
|
943
|
+
// pending 操作 jobs 表(与 getPendingByCountryFromDb 数据源一致)
|
|
944
|
+
// userUpdate 操作 jobs_base 表(与 getUserUpdateByCountryFromDb 数据源一致)
|
|
945
|
+
let sourceTable = "";
|
|
934
946
|
let scopeWhere = "";
|
|
947
|
+
let columns = "";
|
|
948
|
+
|
|
935
949
|
if (normalizedScope === "pending") {
|
|
936
|
-
|
|
950
|
+
sourceTable = "jobs";
|
|
951
|
+
scopeWhere = `status = 'pending'`;
|
|
952
|
+
columns = `
|
|
953
|
+
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
954
|
+
error, pinned, no_video, restricted, user_update_count,
|
|
955
|
+
tt_seller, verified, video_count, comment_count,
|
|
956
|
+
guessed_location, location_created, follower_count,
|
|
957
|
+
following_count, heart_count, refresh_time, processed,
|
|
958
|
+
processed_at, created_at, updated_at, region, signature,
|
|
959
|
+
sec_uid, latest_video_time
|
|
960
|
+
`;
|
|
937
961
|
} else if (normalizedScope === "userUpdate") {
|
|
938
|
-
|
|
962
|
+
sourceTable = "jobs_base";
|
|
963
|
+
scopeWhere = `tt_seller IS NULL AND COALESCE(user_update_count, 0) <= 0`;
|
|
964
|
+
columns = `
|
|
965
|
+
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
966
|
+
error, pinned, no_video, restricted, user_update_count,
|
|
967
|
+
tt_seller, verified, video_count, comment_count,
|
|
968
|
+
guessed_location, location_created, follower_count,
|
|
969
|
+
following_count, heart_count, refresh_time, processed,
|
|
970
|
+
processed_at, created_at, updated_at, region, signature,
|
|
971
|
+
sec_uid, latest_video_time
|
|
972
|
+
`;
|
|
939
973
|
} else {
|
|
940
974
|
return {
|
|
941
975
|
moved: 0,
|
|
@@ -954,7 +988,7 @@ function moveJobsToRawByCountry(scope, country) {
|
|
|
954
988
|
.prepare(
|
|
955
989
|
`
|
|
956
990
|
SELECT COUNT(*) as c
|
|
957
|
-
FROM
|
|
991
|
+
FROM ${sourceTable}
|
|
958
992
|
WHERE ${whereSql}
|
|
959
993
|
`,
|
|
960
994
|
)
|
|
@@ -968,74 +1002,18 @@ function moveJobsToRawByCountry(scope, country) {
|
|
|
968
1002
|
db.prepare(
|
|
969
1003
|
`
|
|
970
1004
|
INSERT OR REPLACE INTO raw_jobs (
|
|
971
|
-
|
|
972
|
-
nickname,
|
|
973
|
-
status,
|
|
974
|
-
sources,
|
|
975
|
-
claimed_by,
|
|
976
|
-
claimed_at,
|
|
977
|
-
error,
|
|
978
|
-
pinned,
|
|
979
|
-
no_video,
|
|
980
|
-
restricted,
|
|
981
|
-
user_update_count,
|
|
982
|
-
tt_seller,
|
|
983
|
-
verified,
|
|
984
|
-
video_count,
|
|
985
|
-
comment_count,
|
|
986
|
-
guessed_location,
|
|
987
|
-
location_created,
|
|
988
|
-
follower_count,
|
|
989
|
-
following_count,
|
|
990
|
-
heart_count,
|
|
991
|
-
refresh_time,
|
|
992
|
-
processed,
|
|
993
|
-
processed_at,
|
|
994
|
-
created_at,
|
|
995
|
-
updated_at,
|
|
996
|
-
region,
|
|
997
|
-
signature,
|
|
998
|
-
sec_uid,
|
|
999
|
-
latest_video_time
|
|
1005
|
+
${columns}
|
|
1000
1006
|
)
|
|
1001
1007
|
SELECT
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
status,
|
|
1005
|
-
sources,
|
|
1006
|
-
claimed_by,
|
|
1007
|
-
claimed_at,
|
|
1008
|
-
error,
|
|
1009
|
-
pinned,
|
|
1010
|
-
no_video,
|
|
1011
|
-
restricted,
|
|
1012
|
-
user_update_count,
|
|
1013
|
-
tt_seller,
|
|
1014
|
-
verified,
|
|
1015
|
-
video_count,
|
|
1016
|
-
comment_count,
|
|
1017
|
-
guessed_location,
|
|
1018
|
-
location_created,
|
|
1019
|
-
follower_count,
|
|
1020
|
-
following_count,
|
|
1021
|
-
heart_count,
|
|
1022
|
-
refresh_time,
|
|
1023
|
-
processed,
|
|
1024
|
-
processed_at,
|
|
1025
|
-
created_at,
|
|
1026
|
-
updated_at,
|
|
1027
|
-
region,
|
|
1028
|
-
signature,
|
|
1029
|
-
sec_uid,
|
|
1030
|
-
latest_video_time
|
|
1031
|
-
FROM jobs_base
|
|
1008
|
+
${columns}
|
|
1009
|
+
FROM ${sourceTable}
|
|
1032
1010
|
WHERE ${whereSql}
|
|
1033
1011
|
`,
|
|
1034
1012
|
).run(targetCountry);
|
|
1035
1013
|
|
|
1036
1014
|
db.prepare(
|
|
1037
1015
|
`
|
|
1038
|
-
DELETE FROM
|
|
1016
|
+
DELETE FROM ${sourceTable}
|
|
1039
1017
|
WHERE ${whereSql}
|
|
1040
1018
|
`,
|
|
1041
1019
|
).run(targetCountry);
|
|
@@ -1538,6 +1516,8 @@ function getTargetUsersByCountryFromDb(targetLocations = [], options = {}) {
|
|
|
1538
1516
|
modified_at,
|
|
1539
1517
|
latest_video_time,
|
|
1540
1518
|
refresh_time,
|
|
1519
|
+
top_video_play_count,
|
|
1520
|
+
top_video_href,
|
|
1541
1521
|
status,
|
|
1542
1522
|
sources
|
|
1543
1523
|
FROM jobs
|
|
@@ -1686,6 +1666,8 @@ const writableJobColumns = new Set([
|
|
|
1686
1666
|
"sec_uid",
|
|
1687
1667
|
"status_code",
|
|
1688
1668
|
"latest_video_time",
|
|
1669
|
+
"top_video_play_count",
|
|
1670
|
+
"top_video_href",
|
|
1689
1671
|
]);
|
|
1690
1672
|
|
|
1691
1673
|
function normalizeJobValue(column, value) {
|
|
@@ -1950,16 +1932,30 @@ function addJob(user) {
|
|
|
1950
1932
|
writeTxn(user);
|
|
1951
1933
|
}
|
|
1952
1934
|
|
|
1953
|
-
export function createStore(filePath) {
|
|
1935
|
+
export function createStore(filePath, options = {}) {
|
|
1954
1936
|
if (!filePath) {
|
|
1955
1937
|
throw new Error("createStore requires an explicit .db path");
|
|
1956
1938
|
}
|
|
1939
|
+
|
|
1940
|
+
// refillJobsFromRaw 的 LLM 打分配置(自动补充任务时使用)
|
|
1941
|
+
const refillLlmConfig = {
|
|
1942
|
+
llmScore: false,
|
|
1943
|
+
llmMinScore: 60,
|
|
1944
|
+
llmSampleSize: 100,
|
|
1945
|
+
...options.refillLlm,
|
|
1946
|
+
};
|
|
1947
|
+
|
|
1957
1948
|
let data = [];
|
|
1958
1949
|
// uniqueId → index 内存索引,O(1) 查找
|
|
1959
1950
|
let uidIndex = new Map();
|
|
1960
1951
|
let clientErrors = new Map();
|
|
1961
1952
|
// 客户端登录状态:userId → boolean
|
|
1962
1953
|
let clientLoginStatus = new Map();
|
|
1954
|
+
// refill 锁:防止多个 claimNextJob 同时触发 LLM refill
|
|
1955
|
+
let refillLock = null; // Promise | null
|
|
1956
|
+
// LLM 采样偏移量记忆:按猜测国家记录上次查询位置,避免重复采样
|
|
1957
|
+
// 格式: { "ES": 300, "PL": 500, "NL": 400 }
|
|
1958
|
+
let llmSampleOffsets = new Map();
|
|
1963
1959
|
if (filePath) {
|
|
1964
1960
|
// 初始化 SQLite 用户表(用于判重)
|
|
1965
1961
|
initUserDb(filePath);
|
|
@@ -2217,7 +2213,439 @@ export function createStore(filePath) {
|
|
|
2217
2213
|
return data;
|
|
2218
2214
|
}
|
|
2219
2215
|
|
|
2220
|
-
|
|
2216
|
+
/**
|
|
2217
|
+
* 使用 LLM 对单个 job 的国家匹配度打分(0-100)
|
|
2218
|
+
* @param {Object} job - raw_jobs 中的一条记录
|
|
2219
|
+
* @param {string[]} targetLocations - 目标国家列表
|
|
2220
|
+
* @returns {Promise<{ uniqueId: string, score: number, reason: string }>}
|
|
2221
|
+
*/
|
|
2222
|
+
async function scoreJobLocation(job, targetLocations) {
|
|
2223
|
+
const { fetch: undiciFetch } = await import("undici");
|
|
2224
|
+
|
|
2225
|
+
const prompt = `
|
|
2226
|
+
你是一个 TikTok 用户数据分析助手。请根据以下用户信息,判断该用户是否来自以下**任意一个**目标国家。
|
|
2227
|
+
|
|
2228
|
+
目标国家列表: ${targetLocations.join(", ")}
|
|
2229
|
+
|
|
2230
|
+
重要:
|
|
2231
|
+
- 用户只要来自上述**任意一个**国家就算匹配。
|
|
2232
|
+
- guessed_location 是系统初步猜测的结果,**仅供参考**,不要完全依赖它。
|
|
2233
|
+
- 请综合用户名、昵称、签名、位置等信息做判断。
|
|
2234
|
+
|
|
2235
|
+
用户信息:
|
|
2236
|
+
- 用户名: ${job.unique_id || "未知"}
|
|
2237
|
+
- 昵称: ${job.nickname || "未知"}
|
|
2238
|
+
- 签名: ${job.signature || "未知"}
|
|
2239
|
+
- 地区: ${job.region || "未知"}
|
|
2240
|
+
- 猜测国家(参考): ${job.guessed_location || "未知"}
|
|
2241
|
+
- 位置信息: ${job.location_created || "未知"}
|
|
2242
|
+
- 主页链接: ${job.bio_link || "未知"}
|
|
2243
|
+
|
|
2244
|
+
返回 JSON(仅返回 JSON,无其他内容):
|
|
2245
|
+
{"score": 0-100, "reason": "English only, under 50 chars, no quotes/brackets"}
|
|
2246
|
+
|
|
2247
|
+
Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unlikely
|
|
2248
|
+
`;
|
|
2249
|
+
|
|
2250
|
+
try {
|
|
2251
|
+
const apiKey = process.env.APIKEY || "";
|
|
2252
|
+
const response = await undiciFetch(
|
|
2253
|
+
"http://82.156.52.214:18000/v1/chat/completions",
|
|
2254
|
+
{
|
|
2255
|
+
method: "POST",
|
|
2256
|
+
headers: {
|
|
2257
|
+
"Content-Type": "application/json",
|
|
2258
|
+
Authorization: `Bearer ${apiKey}`,
|
|
2259
|
+
},
|
|
2260
|
+
body: JSON.stringify({
|
|
2261
|
+
model: "zc-fast",
|
|
2262
|
+
messages: [{ role: "user", content: prompt }],
|
|
2263
|
+
max_tokens: 512,
|
|
2264
|
+
temperature: 0.1,
|
|
2265
|
+
}),
|
|
2266
|
+
},
|
|
2267
|
+
);
|
|
2268
|
+
|
|
2269
|
+
const result = await response.json();
|
|
2270
|
+
const content = result.choices?.[0]?.message?.content || "";
|
|
2271
|
+
|
|
2272
|
+
// 解析 JSON 响应(多层容错)
|
|
2273
|
+
let parsed = null;
|
|
2274
|
+
|
|
2275
|
+
// 尝试 1: 直接解析
|
|
2276
|
+
try {
|
|
2277
|
+
parsed = JSON.parse(content);
|
|
2278
|
+
} catch {
|
|
2279
|
+
// 尝试 2: 提取 {} 包裹的内容
|
|
2280
|
+
const match = content.match(/\{[\s\S]*\}/);
|
|
2281
|
+
if (match) {
|
|
2282
|
+
try {
|
|
2283
|
+
parsed = JSON.parse(match[0]);
|
|
2284
|
+
} catch {
|
|
2285
|
+
// 尝试 3: 清理常见问题后解析
|
|
2286
|
+
const cleaned = match[0]
|
|
2287
|
+
.replace(/"/g, '"') // 弯引号 → 直引号
|
|
2288
|
+
.replace(/\s+/g, " ") // 多余空白
|
|
2289
|
+
.trim();
|
|
2290
|
+
try {
|
|
2291
|
+
parsed = JSON.parse(cleaned);
|
|
2292
|
+
} catch {
|
|
2293
|
+
// 尝试 4: 从文本中提取 score 和 reason(reason 可能包含引号等特殊字符)
|
|
2294
|
+
const scoreMatch = content.match(/"?score"?\s*:\s*(\d+)/i);
|
|
2295
|
+
if (scoreMatch) {
|
|
2296
|
+
let reason = "解析降级";
|
|
2297
|
+
// 找 "reason": 的位置,取到最后一个 } 前的内容
|
|
2298
|
+
const reasonKeyPos = content.search(/"?reason"?\s*:\s*"/i);
|
|
2299
|
+
if (reasonKeyPos !== -1) {
|
|
2300
|
+
const afterKey = content.substring(reasonKeyPos);
|
|
2301
|
+
const colonPos = afterKey.indexOf(":");
|
|
2302
|
+
const valueStart = afterKey.indexOf('"', colonPos + 1) + 1;
|
|
2303
|
+
const rawValue = afterKey.substring(valueStart);
|
|
2304
|
+
// 取到原始 content 最后一个 } 前
|
|
2305
|
+
const lastBrace = content.lastIndexOf("}");
|
|
2306
|
+
const reasonEnd = lastBrace - reasonKeyPos - valueStart;
|
|
2307
|
+
if (reasonEnd > 0) {
|
|
2308
|
+
reason = rawValue.substring(0, reasonEnd).trim();
|
|
2309
|
+
// 去掉首尾的引号
|
|
2310
|
+
if (reason.startsWith('"')) reason = reason.substring(1);
|
|
2311
|
+
if (reason.endsWith('"'))
|
|
2312
|
+
reason = reason.substring(0, reason.length - 1);
|
|
2313
|
+
}
|
|
2314
|
+
}
|
|
2315
|
+
parsed = {
|
|
2316
|
+
score: parseInt(scoreMatch[1]) || 50,
|
|
2317
|
+
reason,
|
|
2318
|
+
};
|
|
2319
|
+
}
|
|
2320
|
+
}
|
|
2321
|
+
}
|
|
2322
|
+
}
|
|
2323
|
+
|
|
2324
|
+
// 尝试 5: 如果以上都失败,用更宽松的正则提取
|
|
2325
|
+
if (!parsed) {
|
|
2326
|
+
const scoreMatch = content.match(/"score"\s*:\s*(\d+)/);
|
|
2327
|
+
const reasonMatch = content.match(/"reason"\s*:\s*"([^"]*)"/);
|
|
2328
|
+
if (scoreMatch) {
|
|
2329
|
+
parsed = {
|
|
2330
|
+
score: parseInt(scoreMatch[1]) || 50,
|
|
2331
|
+
reason: reasonMatch ? reasonMatch[1] : "解析降级 - 宽松模式",
|
|
2332
|
+
};
|
|
2333
|
+
}
|
|
2334
|
+
}
|
|
2335
|
+
}
|
|
2336
|
+
|
|
2337
|
+
if (parsed && typeof parsed.score === "number") {
|
|
2338
|
+
return {
|
|
2339
|
+
uniqueId: job.unique_id,
|
|
2340
|
+
score: Math.max(0, Math.min(100, parsed.score)),
|
|
2341
|
+
reason: parsed.reason || "",
|
|
2342
|
+
};
|
|
2343
|
+
}
|
|
2344
|
+
|
|
2345
|
+
// 所有解析都失败,返回默认分
|
|
2346
|
+
console.error(
|
|
2347
|
+
`[scoreJobLocation] JSON 解析失败 (${job.unique_id}): ${content.substring(0, 100)}`,
|
|
2348
|
+
);
|
|
2349
|
+
return {
|
|
2350
|
+
uniqueId: job.unique_id,
|
|
2351
|
+
score: 50,
|
|
2352
|
+
reason: "LLM 响应解析失败,使用默认分",
|
|
2353
|
+
};
|
|
2354
|
+
} catch (e) {
|
|
2355
|
+
console.error(
|
|
2356
|
+
`[scoreJobLocation] LLM 调用失败 (${job.unique_id}): ${e.message}`,
|
|
2357
|
+
);
|
|
2358
|
+
return {
|
|
2359
|
+
uniqueId: job.unique_id,
|
|
2360
|
+
score: 50,
|
|
2361
|
+
reason: `LLM 调用异常: ${e.message}`,
|
|
2362
|
+
};
|
|
2363
|
+
}
|
|
2364
|
+
}
|
|
2365
|
+
|
|
2366
|
+
/**
|
|
2367
|
+
* 批量对 jobs 进行 LLM 国家匹配度打分
|
|
2368
|
+
* @param {Object[]} jobs - raw_jobs 记录数组
|
|
2369
|
+
* @param {string[]} targetLocations - 目标国家列表
|
|
2370
|
+
* @param {number} batchSize - 每批处理数量(并发),默认 10
|
|
2371
|
+
* @returns {Promise<Array<{ uniqueId: string, score: number, reason: string }>>}
|
|
2372
|
+
*/
|
|
2373
|
+
async function scoreJobsBatch(jobs, targetLocations, batchSize = 10) {
|
|
2374
|
+
const results = [];
|
|
2375
|
+
for (let i = 0; i < jobs.length; i += batchSize) {
|
|
2376
|
+
const batch = jobs.slice(i, i + batchSize);
|
|
2377
|
+
const batchResults = await Promise.all(
|
|
2378
|
+
batch.map((job) => scoreJobLocation(job, targetLocations)),
|
|
2379
|
+
);
|
|
2380
|
+
results.push(...batchResults);
|
|
2381
|
+
}
|
|
2382
|
+
return results;
|
|
2383
|
+
}
|
|
2384
|
+
|
|
2385
|
+
/**
|
|
2386
|
+
* 从 raw_jobs 中移动一批符合条件的任务到 jobs 表
|
|
2387
|
+
* @param {string[]} locations - 目标国家列表(null 表示不限制)
|
|
2388
|
+
* @param {number} limit - 每次移动的最大数量,默认 500
|
|
2389
|
+
* @param {Object} options - 可选配置
|
|
2390
|
+
* @param {boolean} options.llmScore - 是否启用 LLM 打分过滤,默认 false
|
|
2391
|
+
* @param {number} options.llmMinScore - LLM 最低分数阈值,默认 60
|
|
2392
|
+
* @param {number} options.llmSampleSize - LLM 打分的采样数量,默认 100
|
|
2393
|
+
* @returns {{ moved: number }} 实际移动的数量
|
|
2394
|
+
*/
|
|
2395
|
+
function refillJobsFromRaw(locations = null, limit = 500, options = {}) {
|
|
2396
|
+
if (!db) {
|
|
2397
|
+
return { moved: 0, error: "db not ready" };
|
|
2398
|
+
}
|
|
2399
|
+
|
|
2400
|
+
const safeLimit = Math.max(1, Math.min(2000, parseInt(limit) || 500));
|
|
2401
|
+
const normalizedLocations = locations
|
|
2402
|
+
? locations.map((loc) => String(loc).trim().toUpperCase()).filter(Boolean)
|
|
2403
|
+
: null;
|
|
2404
|
+
|
|
2405
|
+
const useLlm = !!options.llmScore;
|
|
2406
|
+
const llmMinScore = options.llmMinScore ?? 60;
|
|
2407
|
+
const llmSampleSize = options.llmSampleSize ?? 100;
|
|
2408
|
+
|
|
2409
|
+
// 构建 WHERE 条件
|
|
2410
|
+
const conditions = [
|
|
2411
|
+
"COALESCE(video_count, 0) > 0",
|
|
2412
|
+
"COALESCE(follower_count, 0) > 0",
|
|
2413
|
+
"COALESCE(following_count, 0) > 0",
|
|
2414
|
+
];
|
|
2415
|
+
const args = [];
|
|
2416
|
+
|
|
2417
|
+
if (normalizedLocations && normalizedLocations.length > 0) {
|
|
2418
|
+
conditions.push(
|
|
2419
|
+
`UPPER(COALESCE(guessed_location, '')) IN (${normalizedLocations.map(() => "?").join(", ")})`,
|
|
2420
|
+
);
|
|
2421
|
+
args.push(...normalizedLocations);
|
|
2422
|
+
}
|
|
2423
|
+
|
|
2424
|
+
const whereSql = conditions.join(" AND ");
|
|
2425
|
+
|
|
2426
|
+
// 统计符合条件的数量
|
|
2427
|
+
const count =
|
|
2428
|
+
db
|
|
2429
|
+
.prepare(`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql}`)
|
|
2430
|
+
.get(...args)?.c || 0;
|
|
2431
|
+
|
|
2432
|
+
if (!count) {
|
|
2433
|
+
return { moved: 0 };
|
|
2434
|
+
}
|
|
2435
|
+
|
|
2436
|
+
// 如果启用 LLM 打分,先采样一批进行评分(累积模式:按猜测国家分组,使用偏移量记忆避免重复采样)
|
|
2437
|
+
if (useLlm && normalizedLocations && normalizedLocations.length > 0) {
|
|
2438
|
+
const llmMinReturn = options.llmMinReturn ?? 60; // 最少返回合格数
|
|
2439
|
+
const maxBatches = options.llmMaxBatches ?? 10; // 最多采样轮次,防止无限循环
|
|
2440
|
+
|
|
2441
|
+
// 打印当前偏移量状态
|
|
2442
|
+
const offsetSummary = Array.from(llmSampleOffsets.entries())
|
|
2443
|
+
.map(([k, v]) => `${k}:${v}`)
|
|
2444
|
+
.join(", ");
|
|
2445
|
+
console.error(
|
|
2446
|
+
`[data-store] LLM 打分开始: 符合条件 ${count} 条,每批 ${llmSampleSize} 条,最低分 ${llmMinScore},最少返回 ${llmMinReturn} 条`,
|
|
2447
|
+
);
|
|
2448
|
+
if (offsetSummary) {
|
|
2449
|
+
console.error(`[data-store] 偏移量记忆: ${offsetSummary}`);
|
|
2450
|
+
}
|
|
2451
|
+
|
|
2452
|
+
// 返回 Promise,调用方需要 await
|
|
2453
|
+
return (async () => {
|
|
2454
|
+
const allQualified = [];
|
|
2455
|
+
const allScores = [];
|
|
2456
|
+
|
|
2457
|
+
// 按猜测国家分组处理,每个国家使用独立的偏移量
|
|
2458
|
+
const locationGroups = normalizedLocations;
|
|
2459
|
+
let totalBatches = 0;
|
|
2460
|
+
|
|
2461
|
+
for (const location of locationGroups) {
|
|
2462
|
+
// 获取该国家上次的偏移量
|
|
2463
|
+
let offset = llmSampleOffsets.get(location) || 0;
|
|
2464
|
+
|
|
2465
|
+
// 查询该国家的总数量
|
|
2466
|
+
const locationCountSql = `SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql} AND guessed_location = ?`;
|
|
2467
|
+
const locationArgs = [...args, location];
|
|
2468
|
+
const locationCount =
|
|
2469
|
+
db.prepare(locationCountSql).get(...locationArgs)?.c || 0;
|
|
2470
|
+
|
|
2471
|
+
if (locationCount === 0) {
|
|
2472
|
+
console.error(
|
|
2473
|
+
`[data-store] 国家 ${location}: raw_jobs 中无数据,跳过`,
|
|
2474
|
+
);
|
|
2475
|
+
continue;
|
|
2476
|
+
}
|
|
2477
|
+
|
|
2478
|
+
// 如果偏移量超过总数,重置为 0(一轮结束,重新开始)
|
|
2479
|
+
if (offset >= locationCount) {
|
|
2480
|
+
offset = 0;
|
|
2481
|
+
llmSampleOffsets.set(location, 0);
|
|
2482
|
+
}
|
|
2483
|
+
|
|
2484
|
+
console.error(
|
|
2485
|
+
`[data-store] 国家 ${location}: 共 ${locationCount} 条,从偏移量 ${offset} 开始`,
|
|
2486
|
+
);
|
|
2487
|
+
|
|
2488
|
+
for (let batch = 0; batch < maxBatches; batch++) {
|
|
2489
|
+
const remaining = locationCount - offset;
|
|
2490
|
+
if (remaining <= 0) break;
|
|
2491
|
+
|
|
2492
|
+
const sampleLimit = Math.min(llmSampleSize, remaining);
|
|
2493
|
+
const samples = db
|
|
2494
|
+
.prepare(
|
|
2495
|
+
`
|
|
2496
|
+
SELECT * FROM raw_jobs WHERE ${whereSql} AND guessed_location = ?
|
|
2497
|
+
ORDER BY created_at DESC
|
|
2498
|
+
LIMIT ? OFFSET ?
|
|
2499
|
+
`,
|
|
2500
|
+
)
|
|
2501
|
+
.all(...locationArgs, sampleLimit, offset);
|
|
2502
|
+
|
|
2503
|
+
if (samples.length === 0) break;
|
|
2504
|
+
|
|
2505
|
+
const scores = await scoreJobsBatch(
|
|
2506
|
+
samples,
|
|
2507
|
+
DEFAULT_TARGET_LOCATIONS,
|
|
2508
|
+
);
|
|
2509
|
+
const batchQualified = scores.filter((s) => s.score >= llmMinScore);
|
|
2510
|
+
|
|
2511
|
+
allScores.push(...scores);
|
|
2512
|
+
allQualified.push(...batchQualified.map((s) => s.uniqueId));
|
|
2513
|
+
|
|
2514
|
+
totalBatches++;
|
|
2515
|
+
console.error(
|
|
2516
|
+
`[data-store] ${location} 第 ${batch + 1} 批: 采样 ${samples.length} 条,本批合格 ${batchQualified.length} 条,累计合格 ${allQualified.length} 条`,
|
|
2517
|
+
);
|
|
2518
|
+
|
|
2519
|
+
// 更新偏移量记忆
|
|
2520
|
+
offset += samples.length;
|
|
2521
|
+
llmSampleOffsets.set(location, offset);
|
|
2522
|
+
|
|
2523
|
+
// 合格数已达到最小返回阈值,停止采样
|
|
2524
|
+
if (allQualified.length >= llmMinReturn) break;
|
|
2525
|
+
}
|
|
2526
|
+
|
|
2527
|
+
// 合格数已达到最小返回阈值,停止所有国家的采样
|
|
2528
|
+
if (allQualified.length >= llmMinReturn) break;
|
|
2529
|
+
}
|
|
2530
|
+
|
|
2531
|
+
// 按分数降序排序,取前 safeLimit 条
|
|
2532
|
+
const qualifiedScores = allScores
|
|
2533
|
+
.filter((s) => s.score >= llmMinScore)
|
|
2534
|
+
.sort((a, b) => b.score - a.score)
|
|
2535
|
+
.slice(0, safeLimit);
|
|
2536
|
+
const qualified = qualifiedScores.map((s) => s.uniqueId);
|
|
2537
|
+
|
|
2538
|
+
if (!qualified.length) {
|
|
2539
|
+
console.error(
|
|
2540
|
+
`[data-store] LLM 打分后无符合条件的任务(阈值: ${llmMinScore},共采样 ${allScores.length} 条)`,
|
|
2541
|
+
);
|
|
2542
|
+
return {
|
|
2543
|
+
moved: 0,
|
|
2544
|
+
scored: allScores.length,
|
|
2545
|
+
qualified: 0,
|
|
2546
|
+
scores: allScores,
|
|
2547
|
+
};
|
|
2548
|
+
}
|
|
2549
|
+
|
|
2550
|
+
// 移动符合条件的记录
|
|
2551
|
+
const placeholders = qualified.map(() => "?").join(", ");
|
|
2552
|
+
const moveTxn = db.transaction(() => {
|
|
2553
|
+
db.prepare(
|
|
2554
|
+
`
|
|
2555
|
+
INSERT OR IGNORE INTO jobs (
|
|
2556
|
+
unique_id, nickname, status, sources, pinned,
|
|
2557
|
+
tt_seller, verified, video_count, comment_count,
|
|
2558
|
+
guessed_location, location_created, confirmed_location,
|
|
2559
|
+
follower_count, following_count, heart_count,
|
|
2560
|
+
created_at, updated_at, region, signature, bio_link, sec_uid,
|
|
2561
|
+
status_code, latest_video_time
|
|
2562
|
+
)
|
|
2563
|
+
SELECT
|
|
2564
|
+
unique_id, nickname, 'pending', sources, pinned,
|
|
2565
|
+
tt_seller, verified, video_count, comment_count,
|
|
2566
|
+
guessed_location, location_created, confirmed_location,
|
|
2567
|
+
follower_count, following_count, heart_count,
|
|
2568
|
+
created_at, updated_at, region, signature, bio_link, sec_uid,
|
|
2569
|
+
status_code, latest_video_time
|
|
2570
|
+
FROM raw_jobs
|
|
2571
|
+
WHERE unique_id IN (${placeholders})
|
|
2572
|
+
`,
|
|
2573
|
+
).run(...qualified);
|
|
2574
|
+
|
|
2575
|
+
db.prepare(
|
|
2576
|
+
`DELETE FROM raw_jobs WHERE unique_id IN (${placeholders})`,
|
|
2577
|
+
).run(...qualified);
|
|
2578
|
+
});
|
|
2579
|
+
|
|
2580
|
+
moveTxn();
|
|
2581
|
+
markStatsDirty();
|
|
2582
|
+
|
|
2583
|
+
// 打印最终偏移量状态
|
|
2584
|
+
const finalOffsetSummary = Array.from(llmSampleOffsets.entries())
|
|
2585
|
+
.map(([k, v]) => `${k}:${v}`)
|
|
2586
|
+
.join(", ");
|
|
2587
|
+
console.error(
|
|
2588
|
+
`[data-store] LLM 打分完成: 共采样 ${allScores.length} 条,合格 ${qualified.length} 条,已移动到 jobs`,
|
|
2589
|
+
);
|
|
2590
|
+
console.error(`[data-store] 偏移量记忆更新: ${finalOffsetSummary}`);
|
|
2591
|
+
const scoresDetail = allScores.map((s) => s);
|
|
2592
|
+
return {
|
|
2593
|
+
moved: qualified.length,
|
|
2594
|
+
scored: allScores.length,
|
|
2595
|
+
qualified: qualified.length,
|
|
2596
|
+
scores: scoresDetail,
|
|
2597
|
+
};
|
|
2598
|
+
})();
|
|
2599
|
+
}
|
|
2600
|
+
|
|
2601
|
+
// 常规移动:INSERT + DELETE 事务
|
|
2602
|
+
const moveTxn = db.transaction(() => {
|
|
2603
|
+
db.prepare(
|
|
2604
|
+
`
|
|
2605
|
+
INSERT OR IGNORE INTO jobs (
|
|
2606
|
+
unique_id, nickname, status, sources, pinned,
|
|
2607
|
+
tt_seller, verified, video_count, comment_count,
|
|
2608
|
+
guessed_location, location_created, confirmed_location,
|
|
2609
|
+
follower_count, following_count, heart_count,
|
|
2610
|
+
created_at, updated_at, region, signature, bio_link, sec_uid,
|
|
2611
|
+
status_code, latest_video_time
|
|
2612
|
+
)
|
|
2613
|
+
SELECT
|
|
2614
|
+
unique_id, nickname, 'pending', sources, pinned,
|
|
2615
|
+
tt_seller, verified, video_count, comment_count,
|
|
2616
|
+
guessed_location, location_created, confirmed_location,
|
|
2617
|
+
follower_count, following_count, heart_count,
|
|
2618
|
+
created_at, updated_at, region, signature, bio_link, sec_uid,
|
|
2619
|
+
status_code, latest_video_time
|
|
2620
|
+
FROM raw_jobs
|
|
2621
|
+
WHERE ${whereSql}
|
|
2622
|
+
ORDER BY created_at DESC
|
|
2623
|
+
LIMIT ?
|
|
2624
|
+
`,
|
|
2625
|
+
).run(...args, safeLimit);
|
|
2626
|
+
|
|
2627
|
+
// 删除已移动的记录:用子查询匹配刚 INSERT 的 unique_id
|
|
2628
|
+
db.prepare(
|
|
2629
|
+
`
|
|
2630
|
+
DELETE FROM raw_jobs
|
|
2631
|
+
WHERE unique_id IN (
|
|
2632
|
+
SELECT unique_id FROM raw_jobs
|
|
2633
|
+
WHERE ${whereSql}
|
|
2634
|
+
ORDER BY created_at DESC
|
|
2635
|
+
LIMIT ?
|
|
2636
|
+
)
|
|
2637
|
+
`,
|
|
2638
|
+
).run(...args, safeLimit);
|
|
2639
|
+
});
|
|
2640
|
+
|
|
2641
|
+
moveTxn();
|
|
2642
|
+
markStatsDirty();
|
|
2643
|
+
|
|
2644
|
+
const actualMoved = Math.min(count, safeLimit);
|
|
2645
|
+
return { moved: actualMoved };
|
|
2646
|
+
}
|
|
2647
|
+
|
|
2648
|
+
async function claimNextJob(
|
|
2221
2649
|
userId,
|
|
2222
2650
|
expireMs = 5 * 60 * 1000,
|
|
2223
2651
|
locations = null,
|
|
@@ -2493,6 +2921,71 @@ export function createStore(filePath) {
|
|
|
2493
2921
|
return claimRow(ranked);
|
|
2494
2922
|
}
|
|
2495
2923
|
}
|
|
2924
|
+
// 尝试从 raw_jobs 毛料库补充任务(使用 createStore 时配置的 LLM 打分)
|
|
2925
|
+
// 使用锁防止多个请求同时触发 LLM refill
|
|
2926
|
+
if (refillLock) {
|
|
2927
|
+
// 已有 refill 在进行中,等待完成后重新尝试领取
|
|
2928
|
+
await refillLock;
|
|
2929
|
+
for (const requireVideo of [true, false]) {
|
|
2930
|
+
const pinned = findPinnedPending(requireVideo);
|
|
2931
|
+
if (pinned) {
|
|
2932
|
+
return claimRow(pinned);
|
|
2933
|
+
}
|
|
2934
|
+
const ranked = findPrioritizedPending(requireVideo);
|
|
2935
|
+
if (ranked) {
|
|
2936
|
+
return claimRow(ranked);
|
|
2937
|
+
}
|
|
2938
|
+
}
|
|
2939
|
+
return null;
|
|
2940
|
+
}
|
|
2941
|
+
const refillResult = (async () => {
|
|
2942
|
+
refillLock = Promise.resolve(); // 占位
|
|
2943
|
+
const result = refillJobsFromRaw(
|
|
2944
|
+
normalizedLocations.length ? normalizedLocations : null,
|
|
2945
|
+
500,
|
|
2946
|
+
refillLlmConfig,
|
|
2947
|
+
);
|
|
2948
|
+
// refillJobsFromRaw 在 LLM 模式下返回 Promise
|
|
2949
|
+
if (result && typeof result.then === "function") {
|
|
2950
|
+
return result.finally(() => {
|
|
2951
|
+
refillLock = null;
|
|
2952
|
+
});
|
|
2953
|
+
}
|
|
2954
|
+
return result;
|
|
2955
|
+
})();
|
|
2956
|
+
if (refillResult && typeof refillResult.then === "function") {
|
|
2957
|
+
const awaited = await refillResult;
|
|
2958
|
+
if (awaited.moved > 0) {
|
|
2959
|
+
console.error(
|
|
2960
|
+
`[data-store] 从 raw_jobs 补充了 ${awaited.moved} 条任务到 jobs`,
|
|
2961
|
+
);
|
|
2962
|
+
for (const requireVideo of [true, false]) {
|
|
2963
|
+
const pinned = findPinnedPending(requireVideo);
|
|
2964
|
+
if (pinned) {
|
|
2965
|
+
return claimRow(pinned);
|
|
2966
|
+
}
|
|
2967
|
+
const ranked = findPrioritizedPending(requireVideo);
|
|
2968
|
+
if (ranked) {
|
|
2969
|
+
return claimRow(ranked);
|
|
2970
|
+
}
|
|
2971
|
+
}
|
|
2972
|
+
}
|
|
2973
|
+
} else if (refillResult.moved > 0) {
|
|
2974
|
+
console.error(
|
|
2975
|
+
`[data-store] 从 raw_jobs 补充了 ${refillResult.moved} 条任务到 jobs`,
|
|
2976
|
+
);
|
|
2977
|
+
for (const requireVideo of [true, false]) {
|
|
2978
|
+
const pinned = findPinnedPending(requireVideo);
|
|
2979
|
+
if (pinned) {
|
|
2980
|
+
return claimRow(pinned);
|
|
2981
|
+
}
|
|
2982
|
+
const ranked = findPrioritizedPending(requireVideo);
|
|
2983
|
+
if (ranked) {
|
|
2984
|
+
return claimRow(ranked);
|
|
2985
|
+
}
|
|
2986
|
+
}
|
|
2987
|
+
}
|
|
2988
|
+
|
|
2496
2989
|
return null;
|
|
2497
2990
|
}
|
|
2498
2991
|
|
|
@@ -3088,6 +3581,7 @@ export function createStore(filePath) {
|
|
|
3088
3581
|
"discoveredFollowers",
|
|
3089
3582
|
"uniqueId",
|
|
3090
3583
|
"sources",
|
|
3584
|
+
"topRecentVideo", // 单独处理,不进入通用循环
|
|
3091
3585
|
];
|
|
3092
3586
|
for (const key of Object.keys(result)) {
|
|
3093
3587
|
if (extraFields.includes(key)) continue;
|
|
@@ -3099,6 +3593,11 @@ export function createStore(filePath) {
|
|
|
3099
3593
|
user[key] = result[key];
|
|
3100
3594
|
}
|
|
3101
3595
|
}
|
|
3596
|
+
// 将 topRecentVideo 对象展开为扁平字段
|
|
3597
|
+
if (result.topRecentVideo && typeof result.topRecentVideo === "object") {
|
|
3598
|
+
user.topVideoPlayCount = result.topRecentVideo.playCount || null;
|
|
3599
|
+
user.topVideoHref = result.topRecentVideo.href || null;
|
|
3600
|
+
}
|
|
3102
3601
|
user.sources = [...new Set([...(user.sources || []), "processed"])];
|
|
3103
3602
|
}
|
|
3104
3603
|
if (user.status !== oldStatus) markStatsDirty();
|
|
@@ -3340,6 +3839,11 @@ export function createStore(filePath) {
|
|
|
3340
3839
|
}
|
|
3341
3840
|
}
|
|
3342
3841
|
}
|
|
3842
|
+
// 将 topRecentVideo 对象展开为扁平字段
|
|
3843
|
+
if (result.topRecentVideo && typeof result.topRecentVideo === "object") {
|
|
3844
|
+
user.topVideoPlayCount = result.topRecentVideo.playCount || null;
|
|
3845
|
+
user.topVideoHref = result.topRecentVideo.href || null;
|
|
3846
|
+
}
|
|
3343
3847
|
const newUsers = processDiscoveredUsers(result);
|
|
3344
3848
|
const ret = updateJobInfo(uniqueId, user, false);
|
|
3345
3849
|
if (ret.error) return { saved: false, error: ret.error };
|
|
@@ -3360,6 +3864,11 @@ export function createStore(filePath) {
|
|
|
3360
3864
|
}
|
|
3361
3865
|
}
|
|
3362
3866
|
}
|
|
3867
|
+
// 将 topRecentVideo 对象展开为扁平字段
|
|
3868
|
+
if (result.topRecentVideo && typeof result.topRecentVideo === "object") {
|
|
3869
|
+
user.topVideoPlayCount = result.topRecentVideo.playCount || null;
|
|
3870
|
+
user.topVideoHref = result.topRecentVideo.href || null;
|
|
3871
|
+
}
|
|
3363
3872
|
const newUsers = processDiscoveredUsers(result);
|
|
3364
3873
|
return { saved: true, newUsers };
|
|
3365
3874
|
}
|
|
@@ -3966,6 +4475,9 @@ export function createStore(filePath) {
|
|
|
3966
4475
|
getStats,
|
|
3967
4476
|
getStatusGroups,
|
|
3968
4477
|
markGroupsDirty,
|
|
4478
|
+
refillJobsFromRaw,
|
|
4479
|
+
scoreJobLocation,
|
|
4480
|
+
scoreJobsBatch,
|
|
3969
4481
|
claimNextJob,
|
|
3970
4482
|
commitJob,
|
|
3971
4483
|
commitNewExplore,
|
|
@@ -3991,6 +4503,12 @@ export function createStore(filePath) {
|
|
|
3991
4503
|
debugClaimNextJob,
|
|
3992
4504
|
stopBackup,
|
|
3993
4505
|
rawQuery,
|
|
4506
|
+
getLlmSampleOffsets, // 获取 LLM 采样偏移量状态
|
|
3994
4507
|
data,
|
|
3995
4508
|
};
|
|
4509
|
+
|
|
4510
|
+
// 辅助函数:获取 LLM 采样偏移量
|
|
4511
|
+
function getLlmSampleOffsets() {
|
|
4512
|
+
return Object.fromEntries(llmSampleOffsets);
|
|
4513
|
+
}
|
|
3996
4514
|
}
|
package/src/watch/public/app.js
CHANGED
|
@@ -961,6 +961,7 @@ let currentTargetTotal = 0;
|
|
|
961
961
|
let currentTargetLoading = false;
|
|
962
962
|
let currentTargetAllLoaded = false;
|
|
963
963
|
let currentTargetSeq = 0;
|
|
964
|
+
let currentTargetSort = { key: null, asc: true };
|
|
964
965
|
const TARGET_PAGE_SIZE = 200;
|
|
965
966
|
|
|
966
967
|
async function fetchTargetByCountry() {
|
|
@@ -1087,16 +1088,36 @@ function renderTargetTable() {
|
|
|
1087
1088
|
const el = document.getElementById("targetTable");
|
|
1088
1089
|
const moreHint = document.getElementById("targetMoreHint");
|
|
1089
1090
|
|
|
1090
|
-
|
|
1091
|
+
// 应用本地排序
|
|
1092
|
+
let displayUsers = currentTargetUsers;
|
|
1093
|
+
if (currentTargetSort.key) {
|
|
1094
|
+
displayUsers = [...currentTargetUsers].sort((a, b) => {
|
|
1095
|
+
let va = a[currentTargetSort.key];
|
|
1096
|
+
let vb = b[currentTargetSort.key];
|
|
1097
|
+
if (va == null && vb == null) return 0;
|
|
1098
|
+
if (va == null) return 1;
|
|
1099
|
+
if (vb == null) return -1;
|
|
1100
|
+
if (typeof va === "number" && typeof vb === "number") {
|
|
1101
|
+
return currentTargetSort.asc ? va - vb : vb - va;
|
|
1102
|
+
}
|
|
1103
|
+
va = String(va).toLowerCase();
|
|
1104
|
+
vb = String(vb).toLowerCase();
|
|
1105
|
+
return currentTargetSort.asc
|
|
1106
|
+
? va.localeCompare(vb)
|
|
1107
|
+
: vb.localeCompare(va);
|
|
1108
|
+
});
|
|
1109
|
+
}
|
|
1110
|
+
|
|
1111
|
+
if (displayUsers.length === 0) {
|
|
1091
1112
|
el.innerHTML =
|
|
1092
|
-
'<tr><td colspan="
|
|
1113
|
+
'<tr><td colspan="10" style="text-align:center;color:#888;padding:24px">暂无数据</td></tr>';
|
|
1093
1114
|
if (moreHint) {
|
|
1094
1115
|
moreHint.style.display = "none";
|
|
1095
1116
|
}
|
|
1096
1117
|
return;
|
|
1097
1118
|
}
|
|
1098
1119
|
|
|
1099
|
-
el.innerHTML =
|
|
1120
|
+
el.innerHTML = displayUsers
|
|
1100
1121
|
.map((u, i) => {
|
|
1101
1122
|
const nick = (u.nickname || "")
|
|
1102
1123
|
.replace(/</g, "<")
|
|
@@ -1113,6 +1134,13 @@ function renderTargetTable() {
|
|
|
1113
1134
|
? formatTime(u.latestVideoTime * 1000)
|
|
1114
1135
|
: "-";
|
|
1115
1136
|
const refreshTime = u.refreshTime ? formatTime(u.refreshTime) : "-";
|
|
1137
|
+
const topPlayCount =
|
|
1138
|
+
u.topVideoPlayCount != null && u.topVideoPlayCount > 0
|
|
1139
|
+
? formatNum(u.topVideoPlayCount)
|
|
1140
|
+
: "-";
|
|
1141
|
+
const topPlayCountCell = u.topVideoHref
|
|
1142
|
+
? `<td data-label="最大播放量" style="font-size:11px;color:#888"><a href="${u.topVideoHref}" target="_blank" style="color:#3b82f6;text-decoration:none" title="点击查看视频">${topPlayCount}</a></td>`
|
|
1143
|
+
: `<td data-label="最大播放量" style="font-size:11px;color:#888">${topPlayCount}</td>`;
|
|
1116
1144
|
|
|
1117
1145
|
const editIcon = ' <span style="font-size:10px;opacity:0.5">✏️</span>';
|
|
1118
1146
|
const locationCell = u.modifiedAt
|
|
@@ -1140,6 +1168,7 @@ function renderTargetTable() {
|
|
|
1140
1168
|
<td data-label="视频">${videos}</td>
|
|
1141
1169
|
${locationCell}
|
|
1142
1170
|
<td data-label="确认国家" style="font-size:11px">${confirmedLocation}</td>
|
|
1171
|
+
${topPlayCountCell}
|
|
1143
1172
|
<td data-label="最近发布" style="font-size:11px;color:#888">${latestVideo}</td>
|
|
1144
1173
|
<td data-label="最近刷新" style="font-size:11px;color:#888">${refreshTime}</td>
|
|
1145
1174
|
</tr>`;
|
|
@@ -1325,7 +1354,7 @@ function renderPendingCountryGrid(countries) {
|
|
|
1325
1354
|
<div class="pending-country-item${isUnknown ? "" : " has-target"}"
|
|
1326
1355
|
onclick="filterByPendingCountry('${safeCountry}')">
|
|
1327
1356
|
<div class="country-action-btns">
|
|
1328
|
-
<button class="country-action-btn restore" title="重置为需要预处理" onclick="event.stopPropagation(); resetPendingByCountry('${safeCountry}', ${c.count})">↺</button>
|
|
1357
|
+
<!-- <button class="country-action-btn restore" title="重置为需要预处理" onclick="event.stopPropagation(); resetPendingByCountry('${safeCountry}', ${c.count})">↺</button> -->
|
|
1329
1358
|
<button class="country-action-btn" title="移到毛料库,暂不处理" onclick="event.stopPropagation(); moveCountryJobsToRaw('pending', '${safeCountry}', ${c.count})">✕</button>
|
|
1330
1359
|
</div>
|
|
1331
1360
|
<div class="country-name">${isUnknown ? "🌍 " : ""}${c.country}</div>
|
|
@@ -1836,11 +1865,37 @@ function initTableSorting() {
|
|
|
1836
1865
|
});
|
|
1837
1866
|
}
|
|
1838
1867
|
|
|
1868
|
+
function initTargetTableSorting() {
|
|
1869
|
+
document.querySelectorAll("th.sortable-target").forEach((th) => {
|
|
1870
|
+
th.addEventListener("click", () => {
|
|
1871
|
+
const key = th.dataset.sort;
|
|
1872
|
+
if (!key) return;
|
|
1873
|
+
if (currentTargetSort.key === key) {
|
|
1874
|
+
currentTargetSort.asc = !currentTargetSort.asc;
|
|
1875
|
+
} else {
|
|
1876
|
+
currentTargetSort.key = key;
|
|
1877
|
+
currentTargetSort.asc = true;
|
|
1878
|
+
}
|
|
1879
|
+
// 更新排序指示器
|
|
1880
|
+
document.querySelectorAll("th.sortable-target").forEach((h) => {
|
|
1881
|
+
h.classList.remove("sort-asc", "sort-desc");
|
|
1882
|
+
const icon = h.querySelector(".sort-icon");
|
|
1883
|
+
if (icon) icon.textContent = "↕";
|
|
1884
|
+
});
|
|
1885
|
+
th.classList.add(currentTargetSort.asc ? "sort-asc" : "sort-desc");
|
|
1886
|
+
const icon = th.querySelector(".sort-icon");
|
|
1887
|
+
if (icon) icon.textContent = currentTargetSort.asc ? "↑" : "↓";
|
|
1888
|
+
renderTargetTable();
|
|
1889
|
+
});
|
|
1890
|
+
});
|
|
1891
|
+
}
|
|
1892
|
+
|
|
1839
1893
|
// 初始化
|
|
1840
1894
|
fetchStats();
|
|
1841
1895
|
fetchUsers();
|
|
1842
1896
|
fetchClientErrors();
|
|
1843
1897
|
initTableSorting();
|
|
1898
|
+
initTargetTableSorting();
|
|
1844
1899
|
|
|
1845
1900
|
setInterval(fetchStats, 10000);
|
|
1846
1901
|
setInterval(fetchUsers, 10000);
|
|
@@ -327,7 +327,8 @@
|
|
|
327
327
|
<th>视频</th>
|
|
328
328
|
<th>国家</th>
|
|
329
329
|
<th>确认国家</th>
|
|
330
|
-
<th
|
|
330
|
+
<th class="sortable-target" data-sort="topVideoPlayCount">最大播放量 <span class="sort-icon">↕</span></th>
|
|
331
|
+
<th class="sortable-target" data-sort="latestVideoTime">最近发布 <span class="sort-icon">↕</span></th>
|
|
331
332
|
<th>最近刷新</th>
|
|
332
333
|
</tr>
|
|
333
334
|
</thead>
|
|
@@ -619,6 +619,31 @@ th.sortable.sort-desc .sort-icon {
|
|
|
619
619
|
color: #fe2c55;
|
|
620
620
|
}
|
|
621
621
|
|
|
622
|
+
th.sortable-target {
|
|
623
|
+
cursor: pointer;
|
|
624
|
+
user-select: none;
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
th.sortable-target:hover {
|
|
628
|
+
color: #fe2c55;
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
th.sortable-target .sort-icon {
|
|
632
|
+
font-size: 10px;
|
|
633
|
+
opacity: 0.4;
|
|
634
|
+
margin-left: 2px;
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
th.sortable-target.sort-asc .sort-icon {
|
|
638
|
+
opacity: 1;
|
|
639
|
+
color: #fe2c55;
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
th.sortable-target.sort-desc .sort-icon {
|
|
643
|
+
opacity: 1;
|
|
644
|
+
color: #fe2c55;
|
|
645
|
+
}
|
|
646
|
+
|
|
622
647
|
td {
|
|
623
648
|
padding: 6px 10px;
|
|
624
649
|
border-bottom: 1px solid #1f1f2a;
|
package/src/watch/server.js
CHANGED
|
@@ -89,9 +89,14 @@ function sendCSV(res, columns, rows) {
|
|
|
89
89
|
res.end(body);
|
|
90
90
|
}
|
|
91
91
|
|
|
92
|
-
export function startWatchServer(
|
|
92
|
+
export function startWatchServer(
|
|
93
|
+
dataAnchor,
|
|
94
|
+
port = 3000,
|
|
95
|
+
existingStore,
|
|
96
|
+
options = {},
|
|
97
|
+
) {
|
|
93
98
|
return new Promise((_resolve, reject) => {
|
|
94
|
-
const store = existingStore || createStore(dataAnchor);
|
|
99
|
+
const store = existingStore || createStore(dataAnchor, options);
|
|
95
100
|
|
|
96
101
|
function logJob(action, detail) {
|
|
97
102
|
const ts = new Date().toLocaleTimeString("zh-CN", { hour12: false });
|
|
@@ -172,7 +177,7 @@ export function startWatchServer(dataAnchor, port = 3000, existingStore) {
|
|
|
172
177
|
.filter(Boolean)
|
|
173
178
|
: null;
|
|
174
179
|
const loggedIn = params.loggedIn === "true";
|
|
175
|
-
const job = store.claimNextJob(
|
|
180
|
+
const job = await store.claimNextJob(
|
|
176
181
|
userId,
|
|
177
182
|
5 * 60 * 1000,
|
|
178
183
|
locations,
|
|
@@ -377,6 +382,7 @@ export function startWatchServer(dataAnchor, port = 3000, existingStore) {
|
|
|
377
382
|
const stats = computeStatsIncremental(store);
|
|
378
383
|
stats.targetLocations = DEFAULT_TARGET_LOCATIONS;
|
|
379
384
|
stats.clientLoginStatus = store.getClientLoginStatus();
|
|
385
|
+
stats.llmSampleOffsets = store.getLlmSampleOffsets(); // 添加偏移量状态
|
|
380
386
|
sendJSON(res, 200, stats);
|
|
381
387
|
return;
|
|
382
388
|
}
|
|
@@ -570,6 +576,8 @@ export function startWatchServer(dataAnchor, port = 3000, existingStore) {
|
|
|
570
576
|
uniqueId: u.uniqueId,
|
|
571
577
|
nickname: u.nickname || "",
|
|
572
578
|
followerCount: u.followerCount || 0,
|
|
579
|
+
topVideoPlayCount: u.topVideoPlayCount || null,
|
|
580
|
+
topVideoHref: u.topVideoHref || null,
|
|
573
581
|
}));
|
|
574
582
|
sendJSON(res, 200, { total: targets.length, users });
|
|
575
583
|
}
|
|
@@ -657,12 +665,32 @@ export function startWatchServer(dataAnchor, port = 3000, existingStore) {
|
|
|
657
665
|
offset: parseInt(params.offset || "0", 10),
|
|
658
666
|
},
|
|
659
667
|
);
|
|
668
|
+
// 确保每个用户对象包含 topVideoPlayCount 和 topVideoHref
|
|
669
|
+
if (result.users && Array.isArray(result.users)) {
|
|
670
|
+
result.users = result.users.map((u) => ({
|
|
671
|
+
...u,
|
|
672
|
+
topVideoPlayCount: u.topVideoPlayCount || null,
|
|
673
|
+
topVideoHref: u.topVideoHref || null,
|
|
674
|
+
}));
|
|
675
|
+
}
|
|
660
676
|
sendJSON(res, 200, result);
|
|
661
677
|
return;
|
|
662
678
|
}
|
|
663
679
|
|
|
664
680
|
// 默认:全量(兼容旧调用)
|
|
665
681
|
const result = store.getTargetUsersByCountry(DEFAULT_TARGET_LOCATIONS);
|
|
682
|
+
// 确保每个用户对象包含 topVideoPlayCount 和 topVideoHref
|
|
683
|
+
if (result.countries && Array.isArray(result.countries)) {
|
|
684
|
+
for (const country of result.countries) {
|
|
685
|
+
if (country.users && Array.isArray(country.users)) {
|
|
686
|
+
country.users = country.users.map((u) => ({
|
|
687
|
+
...u,
|
|
688
|
+
topVideoPlayCount: u.topVideoPlayCount || null,
|
|
689
|
+
topVideoHref: u.topVideoHref || null,
|
|
690
|
+
}));
|
|
691
|
+
}
|
|
692
|
+
}
|
|
693
|
+
}
|
|
666
694
|
sendJSON(res, 200, result);
|
|
667
695
|
return;
|
|
668
696
|
}
|
|
@@ -764,6 +792,41 @@ export function startWatchServer(dataAnchor, port = 3000, existingStore) {
|
|
|
764
792
|
return;
|
|
765
793
|
}
|
|
766
794
|
|
|
795
|
+
// 手动触发从 raw_jobs 补充任务到 jobs
|
|
796
|
+
if (req.method === "POST" && routePath === "/api/raw-jobs/refill") {
|
|
797
|
+
try {
|
|
798
|
+
const body = await readBody(req);
|
|
799
|
+
const locationsParam = body.locations || "";
|
|
800
|
+
const locations = locationsParam
|
|
801
|
+
? locationsParam
|
|
802
|
+
.split(",")
|
|
803
|
+
.map((s) => s.trim().toUpperCase())
|
|
804
|
+
.filter(Boolean)
|
|
805
|
+
: null;
|
|
806
|
+
const limit = body.limit || 500;
|
|
807
|
+
const options = {
|
|
808
|
+
llmScore: !!body.llmScore,
|
|
809
|
+
llmMinScore: body.llmMinScore ?? 60,
|
|
810
|
+
llmSampleSize: body.llmSampleSize ?? 100,
|
|
811
|
+
llmMinReturn: body.llmMinReturn ?? 60,
|
|
812
|
+
llmMaxBatches: body.llmMaxBatches ?? 10,
|
|
813
|
+
};
|
|
814
|
+
const result = await store.refillJobsFromRaw(
|
|
815
|
+
locations,
|
|
816
|
+
limit,
|
|
817
|
+
options,
|
|
818
|
+
);
|
|
819
|
+
if (result.error) {
|
|
820
|
+
sendJSON(res, 400, result);
|
|
821
|
+
return;
|
|
822
|
+
}
|
|
823
|
+
sendJSON(res, 200, result);
|
|
824
|
+
} catch (e) {
|
|
825
|
+
sendJSON(res, 400, { error: e.message });
|
|
826
|
+
}
|
|
827
|
+
return;
|
|
828
|
+
}
|
|
829
|
+
|
|
767
830
|
if (req.method === "POST" && routePath === "/api/attach-stuck/restore") {
|
|
768
831
|
try {
|
|
769
832
|
const body = await readBody(req);
|