tt-help-cli-ycl 1.3.80 → 1.3.82
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli/refresh.js +1 -0
- package/src/cli/watch.js +25 -4
- package/src/lib/api-interceptor.js +21 -4
- package/src/lib/args.js +14 -0
- package/src/lib/page-error-detector.js +31 -14
- package/src/lib/scroll-collector.js +1 -1
- package/src/scraper/explore-core.js +27 -1
- package/src/videos/core.js +6 -2
- package/src/watch/data-store.js +701 -96
- package/src/watch/public/app.js +59 -4
- package/src/watch/public/index.html +2 -1
- package/src/watch/public/style.css +25 -0
- package/src/watch/server.js +66 -3
package/src/watch/data-store.js
CHANGED
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import fs from "fs";
|
|
2
2
|
import path from "path";
|
|
3
3
|
import Database from "better-sqlite3";
|
|
4
|
-
import {
|
|
4
|
+
import {
|
|
5
|
+
isLocationInList,
|
|
6
|
+
DEFAULT_TARGET_LOCATIONS,
|
|
7
|
+
} from "../lib/target-locations.js";
|
|
5
8
|
|
|
6
9
|
// SQLite 用户表(用于判重)
|
|
7
10
|
let db = null;
|
|
@@ -143,6 +146,12 @@ function initUserDb(filePath) {
|
|
|
143
146
|
if (!existingJobColumns.has("bio_link")) {
|
|
144
147
|
db.exec(`ALTER TABLE jobs ADD COLUMN bio_link TEXT`);
|
|
145
148
|
}
|
|
149
|
+
if (!existingJobColumns.has("top_video_play_count")) {
|
|
150
|
+
db.exec(`ALTER TABLE jobs ADD COLUMN top_video_play_count INTEGER`);
|
|
151
|
+
}
|
|
152
|
+
if (!existingJobColumns.has("top_video_href")) {
|
|
153
|
+
db.exec(`ALTER TABLE jobs ADD COLUMN top_video_href TEXT`);
|
|
154
|
+
}
|
|
146
155
|
db.exec(`
|
|
147
156
|
CREATE TABLE IF NOT EXISTS jobs_base (
|
|
148
157
|
unique_id TEXT PRIMARY KEY,
|
|
@@ -650,13 +659,24 @@ function getDashboardStatsFromDb(targetLocations = []) {
|
|
|
650
659
|
AND instr(COALESCE(sources, ''), '"guess"') = 0
|
|
651
660
|
AND instr(COALESCE(sources, ''), '"following"') = 0
|
|
652
661
|
AND instr(COALESCE(sources, ''), '"follower"') = 0
|
|
653
|
-
THEN 1 ELSE 0 END) as seed
|
|
654
|
-
SUM(CASE WHEN COALESCE(tt_seller, '') = '' AND COALESCE(user_update_count, 0) <= 0 THEN 1 ELSE 0 END) as userUpdateTasks
|
|
662
|
+
THEN 1 ELSE 0 END) as seed
|
|
655
663
|
FROM jobs
|
|
656
664
|
`,
|
|
657
665
|
)
|
|
658
666
|
.get(...targetParams);
|
|
659
667
|
|
|
668
|
+
// userUpdateTasks 单独从 jobs_base 统计
|
|
669
|
+
const userUpdateTasksRow = db
|
|
670
|
+
.prepare(
|
|
671
|
+
`
|
|
672
|
+
SELECT COUNT(*) as userUpdateTasks
|
|
673
|
+
FROM jobs_base
|
|
674
|
+
WHERE COALESCE(tt_seller, '') = ''
|
|
675
|
+
AND COALESCE(user_update_count, 0) <= 0
|
|
676
|
+
`,
|
|
677
|
+
)
|
|
678
|
+
.get();
|
|
679
|
+
|
|
660
680
|
// countryStats 和 targetCountryStats 需要 GROUP BY,保留为独立查询
|
|
661
681
|
const countryStats = db
|
|
662
682
|
.prepare(
|
|
@@ -712,7 +732,7 @@ function getDashboardStatsFromDb(targetLocations = []) {
|
|
|
712
732
|
restrictedUsers: aggregateRow.restricted,
|
|
713
733
|
errorUsers: aggregateRow.error,
|
|
714
734
|
targetUsers: aggregateRow.targetUsers,
|
|
715
|
-
userUpdateTasks:
|
|
735
|
+
userUpdateTasks: userUpdateTasksRow.userUpdateTasks,
|
|
716
736
|
targetCountryStats,
|
|
717
737
|
countryStats,
|
|
718
738
|
sourceStats: {
|
|
@@ -761,8 +781,8 @@ function getUserUpdateByCountryFromDb() {
|
|
|
761
781
|
SELECT
|
|
762
782
|
COALESCE(guessed_location, '未知') as country,
|
|
763
783
|
COUNT(*) as count
|
|
764
|
-
FROM
|
|
765
|
-
WHERE
|
|
784
|
+
FROM jobs_base
|
|
785
|
+
WHERE tt_seller IS NULL
|
|
766
786
|
AND COALESCE(user_update_count, 0) <= 0
|
|
767
787
|
GROUP BY COALESCE(guessed_location, '未知')
|
|
768
788
|
ORDER BY count DESC
|
|
@@ -782,8 +802,8 @@ function getAttachStuckByCountryFromDb() {
|
|
|
782
802
|
SELECT
|
|
783
803
|
COALESCE(guessed_location, '未知') as country,
|
|
784
804
|
COUNT(*) as count
|
|
785
|
-
FROM
|
|
786
|
-
WHERE
|
|
805
|
+
FROM jobs_base
|
|
806
|
+
WHERE tt_seller IS NULL
|
|
787
807
|
AND COALESCE(user_update_count, 0) = 1
|
|
788
808
|
GROUP BY COALESCE(guessed_location, '未知')
|
|
789
809
|
ORDER BY count DESC
|
|
@@ -816,7 +836,7 @@ function restoreAttachStuckByCountry(country) {
|
|
|
816
836
|
.prepare(
|
|
817
837
|
`
|
|
818
838
|
SELECT COUNT(*) as c
|
|
819
|
-
FROM
|
|
839
|
+
FROM jobs_base
|
|
820
840
|
WHERE ${whereSql}
|
|
821
841
|
`,
|
|
822
842
|
)
|
|
@@ -828,7 +848,7 @@ function restoreAttachStuckByCountry(country) {
|
|
|
828
848
|
|
|
829
849
|
db.prepare(
|
|
830
850
|
`
|
|
831
|
-
UPDATE
|
|
851
|
+
UPDATE jobs_base
|
|
832
852
|
SET user_update_count = 0,
|
|
833
853
|
updated_at = ?,
|
|
834
854
|
claimed_by = NULL,
|
|
@@ -920,11 +940,36 @@ function moveJobsToRawByCountry(scope, country) {
|
|
|
920
940
|
};
|
|
921
941
|
}
|
|
922
942
|
|
|
943
|
+
// pending 操作 jobs 表(与 getPendingByCountryFromDb 数据源一致)
|
|
944
|
+
// userUpdate 操作 jobs_base 表(与 getUserUpdateByCountryFromDb 数据源一致)
|
|
945
|
+
let sourceTable = "";
|
|
923
946
|
let scopeWhere = "";
|
|
947
|
+
let columns = "";
|
|
948
|
+
|
|
924
949
|
if (normalizedScope === "pending") {
|
|
925
|
-
|
|
950
|
+
sourceTable = "jobs";
|
|
951
|
+
scopeWhere = `status = 'pending'`;
|
|
952
|
+
columns = `
|
|
953
|
+
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
954
|
+
error, pinned, no_video, restricted, user_update_count,
|
|
955
|
+
tt_seller, verified, video_count, comment_count,
|
|
956
|
+
guessed_location, location_created, follower_count,
|
|
957
|
+
following_count, heart_count, refresh_time, processed,
|
|
958
|
+
processed_at, created_at, updated_at, region, signature,
|
|
959
|
+
sec_uid, latest_video_time
|
|
960
|
+
`;
|
|
926
961
|
} else if (normalizedScope === "userUpdate") {
|
|
927
|
-
|
|
962
|
+
sourceTable = "jobs_base";
|
|
963
|
+
scopeWhere = `tt_seller IS NULL AND COALESCE(user_update_count, 0) <= 0`;
|
|
964
|
+
columns = `
|
|
965
|
+
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
966
|
+
error, pinned, no_video, restricted, user_update_count,
|
|
967
|
+
tt_seller, verified, video_count, comment_count,
|
|
968
|
+
guessed_location, location_created, follower_count,
|
|
969
|
+
following_count, heart_count, refresh_time, processed,
|
|
970
|
+
processed_at, created_at, updated_at, region, signature,
|
|
971
|
+
sec_uid, latest_video_time
|
|
972
|
+
`;
|
|
928
973
|
} else {
|
|
929
974
|
return {
|
|
930
975
|
moved: 0,
|
|
@@ -943,7 +988,7 @@ function moveJobsToRawByCountry(scope, country) {
|
|
|
943
988
|
.prepare(
|
|
944
989
|
`
|
|
945
990
|
SELECT COUNT(*) as c
|
|
946
|
-
FROM
|
|
991
|
+
FROM ${sourceTable}
|
|
947
992
|
WHERE ${whereSql}
|
|
948
993
|
`,
|
|
949
994
|
)
|
|
@@ -957,74 +1002,18 @@ function moveJobsToRawByCountry(scope, country) {
|
|
|
957
1002
|
db.prepare(
|
|
958
1003
|
`
|
|
959
1004
|
INSERT OR REPLACE INTO raw_jobs (
|
|
960
|
-
|
|
961
|
-
nickname,
|
|
962
|
-
status,
|
|
963
|
-
sources,
|
|
964
|
-
claimed_by,
|
|
965
|
-
claimed_at,
|
|
966
|
-
error,
|
|
967
|
-
pinned,
|
|
968
|
-
no_video,
|
|
969
|
-
restricted,
|
|
970
|
-
user_update_count,
|
|
971
|
-
tt_seller,
|
|
972
|
-
verified,
|
|
973
|
-
video_count,
|
|
974
|
-
comment_count,
|
|
975
|
-
guessed_location,
|
|
976
|
-
location_created,
|
|
977
|
-
follower_count,
|
|
978
|
-
following_count,
|
|
979
|
-
heart_count,
|
|
980
|
-
refresh_time,
|
|
981
|
-
processed,
|
|
982
|
-
processed_at,
|
|
983
|
-
created_at,
|
|
984
|
-
updated_at,
|
|
985
|
-
region,
|
|
986
|
-
signature,
|
|
987
|
-
sec_uid,
|
|
988
|
-
latest_video_time
|
|
1005
|
+
${columns}
|
|
989
1006
|
)
|
|
990
1007
|
SELECT
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
status,
|
|
994
|
-
sources,
|
|
995
|
-
claimed_by,
|
|
996
|
-
claimed_at,
|
|
997
|
-
error,
|
|
998
|
-
pinned,
|
|
999
|
-
no_video,
|
|
1000
|
-
restricted,
|
|
1001
|
-
user_update_count,
|
|
1002
|
-
tt_seller,
|
|
1003
|
-
verified,
|
|
1004
|
-
video_count,
|
|
1005
|
-
comment_count,
|
|
1006
|
-
guessed_location,
|
|
1007
|
-
location_created,
|
|
1008
|
-
follower_count,
|
|
1009
|
-
following_count,
|
|
1010
|
-
heart_count,
|
|
1011
|
-
refresh_time,
|
|
1012
|
-
processed,
|
|
1013
|
-
processed_at,
|
|
1014
|
-
created_at,
|
|
1015
|
-
updated_at,
|
|
1016
|
-
region,
|
|
1017
|
-
signature,
|
|
1018
|
-
sec_uid,
|
|
1019
|
-
latest_video_time
|
|
1020
|
-
FROM jobs
|
|
1008
|
+
${columns}
|
|
1009
|
+
FROM ${sourceTable}
|
|
1021
1010
|
WHERE ${whereSql}
|
|
1022
1011
|
`,
|
|
1023
1012
|
).run(targetCountry);
|
|
1024
1013
|
|
|
1025
1014
|
db.prepare(
|
|
1026
1015
|
`
|
|
1027
|
-
DELETE FROM
|
|
1016
|
+
DELETE FROM ${sourceTable}
|
|
1028
1017
|
WHERE ${whereSql}
|
|
1029
1018
|
`,
|
|
1030
1019
|
).run(targetCountry);
|
|
@@ -1527,6 +1516,8 @@ function getTargetUsersByCountryFromDb(targetLocations = [], options = {}) {
|
|
|
1527
1516
|
modified_at,
|
|
1528
1517
|
latest_video_time,
|
|
1529
1518
|
refresh_time,
|
|
1519
|
+
top_video_play_count,
|
|
1520
|
+
top_video_href,
|
|
1530
1521
|
status,
|
|
1531
1522
|
sources
|
|
1532
1523
|
FROM jobs
|
|
@@ -1675,6 +1666,8 @@ const writableJobColumns = new Set([
|
|
|
1675
1666
|
"sec_uid",
|
|
1676
1667
|
"status_code",
|
|
1677
1668
|
"latest_video_time",
|
|
1669
|
+
"top_video_play_count",
|
|
1670
|
+
"top_video_href",
|
|
1678
1671
|
]);
|
|
1679
1672
|
|
|
1680
1673
|
function normalizeJobValue(column, value) {
|
|
@@ -1718,6 +1711,13 @@ function getJobRow(uniqueId) {
|
|
|
1718
1711
|
return db.prepare("SELECT * FROM jobs WHERE unique_id = ?").get(uniqueId);
|
|
1719
1712
|
}
|
|
1720
1713
|
|
|
1714
|
+
function getJobBaseRow(uniqueId) {
|
|
1715
|
+
if (!db) return null;
|
|
1716
|
+
return db
|
|
1717
|
+
.prepare("SELECT * FROM jobs_base WHERE unique_id = ?")
|
|
1718
|
+
.get(uniqueId);
|
|
1719
|
+
}
|
|
1720
|
+
|
|
1721
1721
|
function getJob(uniqueId) {
|
|
1722
1722
|
return mapJobRow(getJobRow(uniqueId));
|
|
1723
1723
|
}
|
|
@@ -1795,6 +1795,43 @@ function inferStatus(u) {
|
|
|
1795
1795
|
return "pending";
|
|
1796
1796
|
}
|
|
1797
1797
|
|
|
1798
|
+
function updateJobBaseInfo(uniqueId, info, incrementCount = true) {
|
|
1799
|
+
if (!db) return { error: "db not initialized" };
|
|
1800
|
+
const existing = getJobBaseRow(uniqueId);
|
|
1801
|
+
if (!existing) return { error: "user not found" };
|
|
1802
|
+
|
|
1803
|
+
const nextValues = {};
|
|
1804
|
+
for (const [key, value] of Object.entries(info || {})) {
|
|
1805
|
+
if (key === "uniqueId" || key === "unique_id") continue;
|
|
1806
|
+
if (value === undefined || value === "") continue;
|
|
1807
|
+
let column = camelToSnake(key);
|
|
1808
|
+
// 字段别名:bio → signature
|
|
1809
|
+
if (column === "bio") column = "signature";
|
|
1810
|
+
if (!writableJobColumns.has(column)) continue;
|
|
1811
|
+
nextValues[column] = normalizeJobValue(column, value);
|
|
1812
|
+
}
|
|
1813
|
+
|
|
1814
|
+
nextValues.updated_at = Date.now();
|
|
1815
|
+
if (incrementCount) {
|
|
1816
|
+
nextValues.user_update_count = (existing.user_update_count || 0) + 1;
|
|
1817
|
+
}
|
|
1818
|
+
|
|
1819
|
+
const columns = Object.keys(nextValues);
|
|
1820
|
+
if (columns.length > 0) {
|
|
1821
|
+
const sql = `UPDATE jobs_base SET ${columns.map((column) => `${column} = ?`).join(", ")} WHERE unique_id = ?`;
|
|
1822
|
+
db.prepare(sql).run(
|
|
1823
|
+
...columns.map((column) => nextValues[column]),
|
|
1824
|
+
uniqueId,
|
|
1825
|
+
);
|
|
1826
|
+
}
|
|
1827
|
+
|
|
1828
|
+
return {
|
|
1829
|
+
ok: true,
|
|
1830
|
+
userUpdateCount:
|
|
1831
|
+
nextValues.user_update_count ?? existing.user_update_count ?? 0,
|
|
1832
|
+
};
|
|
1833
|
+
}
|
|
1834
|
+
|
|
1798
1835
|
function addJobBaseToDb(user) {
|
|
1799
1836
|
if (!db) return;
|
|
1800
1837
|
const now = Date.now();
|
|
@@ -1895,16 +1932,30 @@ function addJob(user) {
|
|
|
1895
1932
|
writeTxn(user);
|
|
1896
1933
|
}
|
|
1897
1934
|
|
|
1898
|
-
export function createStore(filePath) {
|
|
1935
|
+
export function createStore(filePath, options = {}) {
|
|
1899
1936
|
if (!filePath) {
|
|
1900
1937
|
throw new Error("createStore requires an explicit .db path");
|
|
1901
1938
|
}
|
|
1939
|
+
|
|
1940
|
+
// refillJobsFromRaw 的 LLM 打分配置(自动补充任务时使用)
|
|
1941
|
+
const refillLlmConfig = {
|
|
1942
|
+
llmScore: false,
|
|
1943
|
+
llmMinScore: 60,
|
|
1944
|
+
llmSampleSize: 100,
|
|
1945
|
+
...options.refillLlm,
|
|
1946
|
+
};
|
|
1947
|
+
|
|
1902
1948
|
let data = [];
|
|
1903
1949
|
// uniqueId → index 内存索引,O(1) 查找
|
|
1904
1950
|
let uidIndex = new Map();
|
|
1905
1951
|
let clientErrors = new Map();
|
|
1906
1952
|
// 客户端登录状态:userId → boolean
|
|
1907
1953
|
let clientLoginStatus = new Map();
|
|
1954
|
+
// refill 锁:防止多个 claimNextJob 同时触发 LLM refill
|
|
1955
|
+
let refillLock = null; // Promise | null
|
|
1956
|
+
// LLM 采样偏移量记忆:按猜测国家记录上次查询位置,避免重复采样
|
|
1957
|
+
// 格式: { "ES": 300, "PL": 500, "NL": 400 }
|
|
1958
|
+
let llmSampleOffsets = new Map();
|
|
1908
1959
|
if (filePath) {
|
|
1909
1960
|
// 初始化 SQLite 用户表(用于判重)
|
|
1910
1961
|
initUserDb(filePath);
|
|
@@ -2162,7 +2213,439 @@ export function createStore(filePath) {
|
|
|
2162
2213
|
return data;
|
|
2163
2214
|
}
|
|
2164
2215
|
|
|
2165
|
-
|
|
2216
|
+
/**
|
|
2217
|
+
* 使用 LLM 对单个 job 的国家匹配度打分(0-100)
|
|
2218
|
+
* @param {Object} job - raw_jobs 中的一条记录
|
|
2219
|
+
* @param {string[]} targetLocations - 目标国家列表
|
|
2220
|
+
* @returns {Promise<{ uniqueId: string, score: number, reason: string }>}
|
|
2221
|
+
*/
|
|
2222
|
+
async function scoreJobLocation(job, targetLocations) {
|
|
2223
|
+
const { fetch: undiciFetch } = await import("undici");
|
|
2224
|
+
|
|
2225
|
+
const prompt = `
|
|
2226
|
+
你是一个 TikTok 用户数据分析助手。请根据以下用户信息,判断该用户是否来自以下**任意一个**目标国家。
|
|
2227
|
+
|
|
2228
|
+
目标国家列表: ${targetLocations.join(", ")}
|
|
2229
|
+
|
|
2230
|
+
重要:
|
|
2231
|
+
- 用户只要来自上述**任意一个**国家就算匹配。
|
|
2232
|
+
- guessed_location 是系统初步猜测的结果,**仅供参考**,不要完全依赖它。
|
|
2233
|
+
- 请综合用户名、昵称、签名、位置等信息做判断。
|
|
2234
|
+
|
|
2235
|
+
用户信息:
|
|
2236
|
+
- 用户名: ${job.unique_id || "未知"}
|
|
2237
|
+
- 昵称: ${job.nickname || "未知"}
|
|
2238
|
+
- 签名: ${job.signature || "未知"}
|
|
2239
|
+
- 地区: ${job.region || "未知"}
|
|
2240
|
+
- 猜测国家(参考): ${job.guessed_location || "未知"}
|
|
2241
|
+
- 位置信息: ${job.location_created || "未知"}
|
|
2242
|
+
- 主页链接: ${job.bio_link || "未知"}
|
|
2243
|
+
|
|
2244
|
+
返回 JSON(仅返回 JSON,无其他内容):
|
|
2245
|
+
{"score": 0-100, "reason": "English only, under 50 chars, no quotes/brackets"}
|
|
2246
|
+
|
|
2247
|
+
Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unlikely
|
|
2248
|
+
`;
|
|
2249
|
+
|
|
2250
|
+
try {
|
|
2251
|
+
const apiKey = process.env.APIKEY || "";
|
|
2252
|
+
const response = await undiciFetch(
|
|
2253
|
+
"http://82.156.52.214:18000/v1/chat/completions",
|
|
2254
|
+
{
|
|
2255
|
+
method: "POST",
|
|
2256
|
+
headers: {
|
|
2257
|
+
"Content-Type": "application/json",
|
|
2258
|
+
Authorization: `Bearer ${apiKey}`,
|
|
2259
|
+
},
|
|
2260
|
+
body: JSON.stringify({
|
|
2261
|
+
model: "zc-fast",
|
|
2262
|
+
messages: [{ role: "user", content: prompt }],
|
|
2263
|
+
max_tokens: 512,
|
|
2264
|
+
temperature: 0.1,
|
|
2265
|
+
}),
|
|
2266
|
+
},
|
|
2267
|
+
);
|
|
2268
|
+
|
|
2269
|
+
const result = await response.json();
|
|
2270
|
+
const content = result.choices?.[0]?.message?.content || "";
|
|
2271
|
+
|
|
2272
|
+
// 解析 JSON 响应(多层容错)
|
|
2273
|
+
let parsed = null;
|
|
2274
|
+
|
|
2275
|
+
// 尝试 1: 直接解析
|
|
2276
|
+
try {
|
|
2277
|
+
parsed = JSON.parse(content);
|
|
2278
|
+
} catch {
|
|
2279
|
+
// 尝试 2: 提取 {} 包裹的内容
|
|
2280
|
+
const match = content.match(/\{[\s\S]*\}/);
|
|
2281
|
+
if (match) {
|
|
2282
|
+
try {
|
|
2283
|
+
parsed = JSON.parse(match[0]);
|
|
2284
|
+
} catch {
|
|
2285
|
+
// 尝试 3: 清理常见问题后解析
|
|
2286
|
+
const cleaned = match[0]
|
|
2287
|
+
.replace(/"/g, '"') // 弯引号 → 直引号
|
|
2288
|
+
.replace(/\s+/g, " ") // 多余空白
|
|
2289
|
+
.trim();
|
|
2290
|
+
try {
|
|
2291
|
+
parsed = JSON.parse(cleaned);
|
|
2292
|
+
} catch {
|
|
2293
|
+
// 尝试 4: 从文本中提取 score 和 reason(reason 可能包含引号等特殊字符)
|
|
2294
|
+
const scoreMatch = content.match(/"?score"?\s*:\s*(\d+)/i);
|
|
2295
|
+
if (scoreMatch) {
|
|
2296
|
+
let reason = "解析降级";
|
|
2297
|
+
// 找 "reason": 的位置,取到最后一个 } 前的内容
|
|
2298
|
+
const reasonKeyPos = content.search(/"?reason"?\s*:\s*"/i);
|
|
2299
|
+
if (reasonKeyPos !== -1) {
|
|
2300
|
+
const afterKey = content.substring(reasonKeyPos);
|
|
2301
|
+
const colonPos = afterKey.indexOf(":");
|
|
2302
|
+
const valueStart = afterKey.indexOf('"', colonPos + 1) + 1;
|
|
2303
|
+
const rawValue = afterKey.substring(valueStart);
|
|
2304
|
+
// 取到原始 content 最后一个 } 前
|
|
2305
|
+
const lastBrace = content.lastIndexOf("}");
|
|
2306
|
+
const reasonEnd = lastBrace - reasonKeyPos - valueStart;
|
|
2307
|
+
if (reasonEnd > 0) {
|
|
2308
|
+
reason = rawValue.substring(0, reasonEnd).trim();
|
|
2309
|
+
// 去掉首尾的引号
|
|
2310
|
+
if (reason.startsWith('"')) reason = reason.substring(1);
|
|
2311
|
+
if (reason.endsWith('"'))
|
|
2312
|
+
reason = reason.substring(0, reason.length - 1);
|
|
2313
|
+
}
|
|
2314
|
+
}
|
|
2315
|
+
parsed = {
|
|
2316
|
+
score: parseInt(scoreMatch[1]) || 50,
|
|
2317
|
+
reason,
|
|
2318
|
+
};
|
|
2319
|
+
}
|
|
2320
|
+
}
|
|
2321
|
+
}
|
|
2322
|
+
}
|
|
2323
|
+
|
|
2324
|
+
// 尝试 5: 如果以上都失败,用更宽松的正则提取
|
|
2325
|
+
if (!parsed) {
|
|
2326
|
+
const scoreMatch = content.match(/"score"\s*:\s*(\d+)/);
|
|
2327
|
+
const reasonMatch = content.match(/"reason"\s*:\s*"([^"]*)"/);
|
|
2328
|
+
if (scoreMatch) {
|
|
2329
|
+
parsed = {
|
|
2330
|
+
score: parseInt(scoreMatch[1]) || 50,
|
|
2331
|
+
reason: reasonMatch ? reasonMatch[1] : "解析降级 - 宽松模式",
|
|
2332
|
+
};
|
|
2333
|
+
}
|
|
2334
|
+
}
|
|
2335
|
+
}
|
|
2336
|
+
|
|
2337
|
+
if (parsed && typeof parsed.score === "number") {
|
|
2338
|
+
return {
|
|
2339
|
+
uniqueId: job.unique_id,
|
|
2340
|
+
score: Math.max(0, Math.min(100, parsed.score)),
|
|
2341
|
+
reason: parsed.reason || "",
|
|
2342
|
+
};
|
|
2343
|
+
}
|
|
2344
|
+
|
|
2345
|
+
// 所有解析都失败,返回默认分
|
|
2346
|
+
console.error(
|
|
2347
|
+
`[scoreJobLocation] JSON 解析失败 (${job.unique_id}): ${content.substring(0, 100)}`,
|
|
2348
|
+
);
|
|
2349
|
+
return {
|
|
2350
|
+
uniqueId: job.unique_id,
|
|
2351
|
+
score: 50,
|
|
2352
|
+
reason: "LLM 响应解析失败,使用默认分",
|
|
2353
|
+
};
|
|
2354
|
+
} catch (e) {
|
|
2355
|
+
console.error(
|
|
2356
|
+
`[scoreJobLocation] LLM 调用失败 (${job.unique_id}): ${e.message}`,
|
|
2357
|
+
);
|
|
2358
|
+
return {
|
|
2359
|
+
uniqueId: job.unique_id,
|
|
2360
|
+
score: 50,
|
|
2361
|
+
reason: `LLM 调用异常: ${e.message}`,
|
|
2362
|
+
};
|
|
2363
|
+
}
|
|
2364
|
+
}
|
|
2365
|
+
|
|
2366
|
+
/**
|
|
2367
|
+
* 批量对 jobs 进行 LLM 国家匹配度打分
|
|
2368
|
+
* @param {Object[]} jobs - raw_jobs 记录数组
|
|
2369
|
+
* @param {string[]} targetLocations - 目标国家列表
|
|
2370
|
+
* @param {number} batchSize - 每批处理数量(并发),默认 10
|
|
2371
|
+
* @returns {Promise<Array<{ uniqueId: string, score: number, reason: string }>>}
|
|
2372
|
+
*/
|
|
2373
|
+
async function scoreJobsBatch(jobs, targetLocations, batchSize = 10) {
|
|
2374
|
+
const results = [];
|
|
2375
|
+
for (let i = 0; i < jobs.length; i += batchSize) {
|
|
2376
|
+
const batch = jobs.slice(i, i + batchSize);
|
|
2377
|
+
const batchResults = await Promise.all(
|
|
2378
|
+
batch.map((job) => scoreJobLocation(job, targetLocations)),
|
|
2379
|
+
);
|
|
2380
|
+
results.push(...batchResults);
|
|
2381
|
+
}
|
|
2382
|
+
return results;
|
|
2383
|
+
}
|
|
2384
|
+
|
|
2385
|
+
/**
|
|
2386
|
+
* 从 raw_jobs 中移动一批符合条件的任务到 jobs 表
|
|
2387
|
+
* @param {string[]} locations - 目标国家列表(null 表示不限制)
|
|
2388
|
+
* @param {number} limit - 每次移动的最大数量,默认 500
|
|
2389
|
+
* @param {Object} options - 可选配置
|
|
2390
|
+
* @param {boolean} options.llmScore - 是否启用 LLM 打分过滤,默认 false
|
|
2391
|
+
* @param {number} options.llmMinScore - LLM 最低分数阈值,默认 60
|
|
2392
|
+
* @param {number} options.llmSampleSize - LLM 打分的采样数量,默认 100
|
|
2393
|
+
* @returns {{ moved: number }} 实际移动的数量
|
|
2394
|
+
*/
|
|
2395
|
+
function refillJobsFromRaw(locations = null, limit = 500, options = {}) {
|
|
2396
|
+
if (!db) {
|
|
2397
|
+
return { moved: 0, error: "db not ready" };
|
|
2398
|
+
}
|
|
2399
|
+
|
|
2400
|
+
const safeLimit = Math.max(1, Math.min(2000, parseInt(limit) || 500));
|
|
2401
|
+
const normalizedLocations = locations
|
|
2402
|
+
? locations.map((loc) => String(loc).trim().toUpperCase()).filter(Boolean)
|
|
2403
|
+
: null;
|
|
2404
|
+
|
|
2405
|
+
const useLlm = !!options.llmScore;
|
|
2406
|
+
const llmMinScore = options.llmMinScore ?? 60;
|
|
2407
|
+
const llmSampleSize = options.llmSampleSize ?? 100;
|
|
2408
|
+
|
|
2409
|
+
// 构建 WHERE 条件
|
|
2410
|
+
const conditions = [
|
|
2411
|
+
"COALESCE(video_count, 0) > 0",
|
|
2412
|
+
"COALESCE(follower_count, 0) > 0",
|
|
2413
|
+
"COALESCE(following_count, 0) > 0",
|
|
2414
|
+
];
|
|
2415
|
+
const args = [];
|
|
2416
|
+
|
|
2417
|
+
if (normalizedLocations && normalizedLocations.length > 0) {
|
|
2418
|
+
conditions.push(
|
|
2419
|
+
`UPPER(COALESCE(guessed_location, '')) IN (${normalizedLocations.map(() => "?").join(", ")})`,
|
|
2420
|
+
);
|
|
2421
|
+
args.push(...normalizedLocations);
|
|
2422
|
+
}
|
|
2423
|
+
|
|
2424
|
+
const whereSql = conditions.join(" AND ");
|
|
2425
|
+
|
|
2426
|
+
// 统计符合条件的数量
|
|
2427
|
+
const count =
|
|
2428
|
+
db
|
|
2429
|
+
.prepare(`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql}`)
|
|
2430
|
+
.get(...args)?.c || 0;
|
|
2431
|
+
|
|
2432
|
+
if (!count) {
|
|
2433
|
+
return { moved: 0 };
|
|
2434
|
+
}
|
|
2435
|
+
|
|
2436
|
+
// 如果启用 LLM 打分,先采样一批进行评分(累积模式:按猜测国家分组,使用偏移量记忆避免重复采样)
|
|
2437
|
+
if (useLlm && normalizedLocations && normalizedLocations.length > 0) {
|
|
2438
|
+
const llmMinReturn = options.llmMinReturn ?? 60; // 最少返回合格数
|
|
2439
|
+
const maxBatches = options.llmMaxBatches ?? 10; // 最多采样轮次,防止无限循环
|
|
2440
|
+
|
|
2441
|
+
// 打印当前偏移量状态
|
|
2442
|
+
const offsetSummary = Array.from(llmSampleOffsets.entries())
|
|
2443
|
+
.map(([k, v]) => `${k}:${v}`)
|
|
2444
|
+
.join(", ");
|
|
2445
|
+
console.error(
|
|
2446
|
+
`[data-store] LLM 打分开始: 符合条件 ${count} 条,每批 ${llmSampleSize} 条,最低分 ${llmMinScore},最少返回 ${llmMinReturn} 条`,
|
|
2447
|
+
);
|
|
2448
|
+
if (offsetSummary) {
|
|
2449
|
+
console.error(`[data-store] 偏移量记忆: ${offsetSummary}`);
|
|
2450
|
+
}
|
|
2451
|
+
|
|
2452
|
+
// 返回 Promise,调用方需要 await
|
|
2453
|
+
return (async () => {
|
|
2454
|
+
const allQualified = [];
|
|
2455
|
+
const allScores = [];
|
|
2456
|
+
|
|
2457
|
+
// 按猜测国家分组处理,每个国家使用独立的偏移量
|
|
2458
|
+
const locationGroups = normalizedLocations;
|
|
2459
|
+
let totalBatches = 0;
|
|
2460
|
+
|
|
2461
|
+
for (const location of locationGroups) {
|
|
2462
|
+
// 获取该国家上次的偏移量
|
|
2463
|
+
let offset = llmSampleOffsets.get(location) || 0;
|
|
2464
|
+
|
|
2465
|
+
// 查询该国家的总数量
|
|
2466
|
+
const locationCountSql = `SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql} AND guessed_location = ?`;
|
|
2467
|
+
const locationArgs = [...args, location];
|
|
2468
|
+
const locationCount =
|
|
2469
|
+
db.prepare(locationCountSql).get(...locationArgs)?.c || 0;
|
|
2470
|
+
|
|
2471
|
+
if (locationCount === 0) {
|
|
2472
|
+
console.error(
|
|
2473
|
+
`[data-store] 国家 ${location}: raw_jobs 中无数据,跳过`,
|
|
2474
|
+
);
|
|
2475
|
+
continue;
|
|
2476
|
+
}
|
|
2477
|
+
|
|
2478
|
+
// 如果偏移量超过总数,重置为 0(一轮结束,重新开始)
|
|
2479
|
+
if (offset >= locationCount) {
|
|
2480
|
+
offset = 0;
|
|
2481
|
+
llmSampleOffsets.set(location, 0);
|
|
2482
|
+
}
|
|
2483
|
+
|
|
2484
|
+
console.error(
|
|
2485
|
+
`[data-store] 国家 ${location}: 共 ${locationCount} 条,从偏移量 ${offset} 开始`,
|
|
2486
|
+
);
|
|
2487
|
+
|
|
2488
|
+
for (let batch = 0; batch < maxBatches; batch++) {
|
|
2489
|
+
const remaining = locationCount - offset;
|
|
2490
|
+
if (remaining <= 0) break;
|
|
2491
|
+
|
|
2492
|
+
const sampleLimit = Math.min(llmSampleSize, remaining);
|
|
2493
|
+
const samples = db
|
|
2494
|
+
.prepare(
|
|
2495
|
+
`
|
|
2496
|
+
SELECT * FROM raw_jobs WHERE ${whereSql} AND guessed_location = ?
|
|
2497
|
+
ORDER BY created_at DESC
|
|
2498
|
+
LIMIT ? OFFSET ?
|
|
2499
|
+
`,
|
|
2500
|
+
)
|
|
2501
|
+
.all(...locationArgs, sampleLimit, offset);
|
|
2502
|
+
|
|
2503
|
+
if (samples.length === 0) break;
|
|
2504
|
+
|
|
2505
|
+
const scores = await scoreJobsBatch(
|
|
2506
|
+
samples,
|
|
2507
|
+
DEFAULT_TARGET_LOCATIONS,
|
|
2508
|
+
);
|
|
2509
|
+
const batchQualified = scores.filter((s) => s.score >= llmMinScore);
|
|
2510
|
+
|
|
2511
|
+
allScores.push(...scores);
|
|
2512
|
+
allQualified.push(...batchQualified.map((s) => s.uniqueId));
|
|
2513
|
+
|
|
2514
|
+
totalBatches++;
|
|
2515
|
+
console.error(
|
|
2516
|
+
`[data-store] ${location} 第 ${batch + 1} 批: 采样 ${samples.length} 条,本批合格 ${batchQualified.length} 条,累计合格 ${allQualified.length} 条`,
|
|
2517
|
+
);
|
|
2518
|
+
|
|
2519
|
+
// 更新偏移量记忆
|
|
2520
|
+
offset += samples.length;
|
|
2521
|
+
llmSampleOffsets.set(location, offset);
|
|
2522
|
+
|
|
2523
|
+
// 合格数已达到最小返回阈值,停止采样
|
|
2524
|
+
if (allQualified.length >= llmMinReturn) break;
|
|
2525
|
+
}
|
|
2526
|
+
|
|
2527
|
+
// 合格数已达到最小返回阈值,停止所有国家的采样
|
|
2528
|
+
if (allQualified.length >= llmMinReturn) break;
|
|
2529
|
+
}
|
|
2530
|
+
|
|
2531
|
+
// 按分数降序排序,取前 safeLimit 条
|
|
2532
|
+
const qualifiedScores = allScores
|
|
2533
|
+
.filter((s) => s.score >= llmMinScore)
|
|
2534
|
+
.sort((a, b) => b.score - a.score)
|
|
2535
|
+
.slice(0, safeLimit);
|
|
2536
|
+
const qualified = qualifiedScores.map((s) => s.uniqueId);
|
|
2537
|
+
|
|
2538
|
+
if (!qualified.length) {
|
|
2539
|
+
console.error(
|
|
2540
|
+
`[data-store] LLM 打分后无符合条件的任务(阈值: ${llmMinScore},共采样 ${allScores.length} 条)`,
|
|
2541
|
+
);
|
|
2542
|
+
return {
|
|
2543
|
+
moved: 0,
|
|
2544
|
+
scored: allScores.length,
|
|
2545
|
+
qualified: 0,
|
|
2546
|
+
scores: allScores,
|
|
2547
|
+
};
|
|
2548
|
+
}
|
|
2549
|
+
|
|
2550
|
+
// 移动符合条件的记录
|
|
2551
|
+
const placeholders = qualified.map(() => "?").join(", ");
|
|
2552
|
+
const moveTxn = db.transaction(() => {
|
|
2553
|
+
db.prepare(
|
|
2554
|
+
`
|
|
2555
|
+
INSERT OR IGNORE INTO jobs (
|
|
2556
|
+
unique_id, nickname, status, sources, pinned,
|
|
2557
|
+
tt_seller, verified, video_count, comment_count,
|
|
2558
|
+
guessed_location, location_created, confirmed_location,
|
|
2559
|
+
follower_count, following_count, heart_count,
|
|
2560
|
+
created_at, updated_at, region, signature, bio_link, sec_uid,
|
|
2561
|
+
status_code, latest_video_time
|
|
2562
|
+
)
|
|
2563
|
+
SELECT
|
|
2564
|
+
unique_id, nickname, 'pending', sources, pinned,
|
|
2565
|
+
tt_seller, verified, video_count, comment_count,
|
|
2566
|
+
guessed_location, location_created, confirmed_location,
|
|
2567
|
+
follower_count, following_count, heart_count,
|
|
2568
|
+
created_at, updated_at, region, signature, bio_link, sec_uid,
|
|
2569
|
+
status_code, latest_video_time
|
|
2570
|
+
FROM raw_jobs
|
|
2571
|
+
WHERE unique_id IN (${placeholders})
|
|
2572
|
+
`,
|
|
2573
|
+
).run(...qualified);
|
|
2574
|
+
|
|
2575
|
+
db.prepare(
|
|
2576
|
+
`DELETE FROM raw_jobs WHERE unique_id IN (${placeholders})`,
|
|
2577
|
+
).run(...qualified);
|
|
2578
|
+
});
|
|
2579
|
+
|
|
2580
|
+
moveTxn();
|
|
2581
|
+
markStatsDirty();
|
|
2582
|
+
|
|
2583
|
+
// 打印最终偏移量状态
|
|
2584
|
+
const finalOffsetSummary = Array.from(llmSampleOffsets.entries())
|
|
2585
|
+
.map(([k, v]) => `${k}:${v}`)
|
|
2586
|
+
.join(", ");
|
|
2587
|
+
console.error(
|
|
2588
|
+
`[data-store] LLM 打分完成: 共采样 ${allScores.length} 条,合格 ${qualified.length} 条,已移动到 jobs`,
|
|
2589
|
+
);
|
|
2590
|
+
console.error(`[data-store] 偏移量记忆更新: ${finalOffsetSummary}`);
|
|
2591
|
+
const scoresDetail = allScores.map((s) => s);
|
|
2592
|
+
return {
|
|
2593
|
+
moved: qualified.length,
|
|
2594
|
+
scored: allScores.length,
|
|
2595
|
+
qualified: qualified.length,
|
|
2596
|
+
scores: scoresDetail,
|
|
2597
|
+
};
|
|
2598
|
+
})();
|
|
2599
|
+
}
|
|
2600
|
+
|
|
2601
|
+
// 常规移动:INSERT + DELETE 事务
|
|
2602
|
+
const moveTxn = db.transaction(() => {
|
|
2603
|
+
db.prepare(
|
|
2604
|
+
`
|
|
2605
|
+
INSERT OR IGNORE INTO jobs (
|
|
2606
|
+
unique_id, nickname, status, sources, pinned,
|
|
2607
|
+
tt_seller, verified, video_count, comment_count,
|
|
2608
|
+
guessed_location, location_created, confirmed_location,
|
|
2609
|
+
follower_count, following_count, heart_count,
|
|
2610
|
+
created_at, updated_at, region, signature, bio_link, sec_uid,
|
|
2611
|
+
status_code, latest_video_time
|
|
2612
|
+
)
|
|
2613
|
+
SELECT
|
|
2614
|
+
unique_id, nickname, 'pending', sources, pinned,
|
|
2615
|
+
tt_seller, verified, video_count, comment_count,
|
|
2616
|
+
guessed_location, location_created, confirmed_location,
|
|
2617
|
+
follower_count, following_count, heart_count,
|
|
2618
|
+
created_at, updated_at, region, signature, bio_link, sec_uid,
|
|
2619
|
+
status_code, latest_video_time
|
|
2620
|
+
FROM raw_jobs
|
|
2621
|
+
WHERE ${whereSql}
|
|
2622
|
+
ORDER BY created_at DESC
|
|
2623
|
+
LIMIT ?
|
|
2624
|
+
`,
|
|
2625
|
+
).run(...args, safeLimit);
|
|
2626
|
+
|
|
2627
|
+
// 删除已移动的记录:用子查询匹配刚 INSERT 的 unique_id
|
|
2628
|
+
db.prepare(
|
|
2629
|
+
`
|
|
2630
|
+
DELETE FROM raw_jobs
|
|
2631
|
+
WHERE unique_id IN (
|
|
2632
|
+
SELECT unique_id FROM raw_jobs
|
|
2633
|
+
WHERE ${whereSql}
|
|
2634
|
+
ORDER BY created_at DESC
|
|
2635
|
+
LIMIT ?
|
|
2636
|
+
)
|
|
2637
|
+
`,
|
|
2638
|
+
).run(...args, safeLimit);
|
|
2639
|
+
});
|
|
2640
|
+
|
|
2641
|
+
moveTxn();
|
|
2642
|
+
markStatsDirty();
|
|
2643
|
+
|
|
2644
|
+
const actualMoved = Math.min(count, safeLimit);
|
|
2645
|
+
return { moved: actualMoved };
|
|
2646
|
+
}
|
|
2647
|
+
|
|
2648
|
+
async function claimNextJob(
|
|
2166
2649
|
userId,
|
|
2167
2650
|
expireMs = 5 * 60 * 1000,
|
|
2168
2651
|
locations = null,
|
|
@@ -2438,6 +2921,71 @@ export function createStore(filePath) {
|
|
|
2438
2921
|
return claimRow(ranked);
|
|
2439
2922
|
}
|
|
2440
2923
|
}
|
|
2924
|
+
// 尝试从 raw_jobs 毛料库补充任务(使用 createStore 时配置的 LLM 打分)
|
|
2925
|
+
// 使用锁防止多个请求同时触发 LLM refill
|
|
2926
|
+
if (refillLock) {
|
|
2927
|
+
// 已有 refill 在进行中,等待完成后重新尝试领取
|
|
2928
|
+
await refillLock;
|
|
2929
|
+
for (const requireVideo of [true, false]) {
|
|
2930
|
+
const pinned = findPinnedPending(requireVideo);
|
|
2931
|
+
if (pinned) {
|
|
2932
|
+
return claimRow(pinned);
|
|
2933
|
+
}
|
|
2934
|
+
const ranked = findPrioritizedPending(requireVideo);
|
|
2935
|
+
if (ranked) {
|
|
2936
|
+
return claimRow(ranked);
|
|
2937
|
+
}
|
|
2938
|
+
}
|
|
2939
|
+
return null;
|
|
2940
|
+
}
|
|
2941
|
+
const refillResult = (async () => {
|
|
2942
|
+
refillLock = Promise.resolve(); // 占位
|
|
2943
|
+
const result = refillJobsFromRaw(
|
|
2944
|
+
normalizedLocations.length ? normalizedLocations : null,
|
|
2945
|
+
500,
|
|
2946
|
+
refillLlmConfig,
|
|
2947
|
+
);
|
|
2948
|
+
// refillJobsFromRaw 在 LLM 模式下返回 Promise
|
|
2949
|
+
if (result && typeof result.then === "function") {
|
|
2950
|
+
return result.finally(() => {
|
|
2951
|
+
refillLock = null;
|
|
2952
|
+
});
|
|
2953
|
+
}
|
|
2954
|
+
return result;
|
|
2955
|
+
})();
|
|
2956
|
+
if (refillResult && typeof refillResult.then === "function") {
|
|
2957
|
+
const awaited = await refillResult;
|
|
2958
|
+
if (awaited.moved > 0) {
|
|
2959
|
+
console.error(
|
|
2960
|
+
`[data-store] 从 raw_jobs 补充了 ${awaited.moved} 条任务到 jobs`,
|
|
2961
|
+
);
|
|
2962
|
+
for (const requireVideo of [true, false]) {
|
|
2963
|
+
const pinned = findPinnedPending(requireVideo);
|
|
2964
|
+
if (pinned) {
|
|
2965
|
+
return claimRow(pinned);
|
|
2966
|
+
}
|
|
2967
|
+
const ranked = findPrioritizedPending(requireVideo);
|
|
2968
|
+
if (ranked) {
|
|
2969
|
+
return claimRow(ranked);
|
|
2970
|
+
}
|
|
2971
|
+
}
|
|
2972
|
+
}
|
|
2973
|
+
} else if (refillResult.moved > 0) {
|
|
2974
|
+
console.error(
|
|
2975
|
+
`[data-store] 从 raw_jobs 补充了 ${refillResult.moved} 条任务到 jobs`,
|
|
2976
|
+
);
|
|
2977
|
+
for (const requireVideo of [true, false]) {
|
|
2978
|
+
const pinned = findPinnedPending(requireVideo);
|
|
2979
|
+
if (pinned) {
|
|
2980
|
+
return claimRow(pinned);
|
|
2981
|
+
}
|
|
2982
|
+
const ranked = findPrioritizedPending(requireVideo);
|
|
2983
|
+
if (ranked) {
|
|
2984
|
+
return claimRow(ranked);
|
|
2985
|
+
}
|
|
2986
|
+
}
|
|
2987
|
+
}
|
|
2988
|
+
|
|
2441
2989
|
return null;
|
|
2442
2990
|
}
|
|
2443
2991
|
|
|
@@ -3033,6 +3581,7 @@ export function createStore(filePath) {
|
|
|
3033
3581
|
"discoveredFollowers",
|
|
3034
3582
|
"uniqueId",
|
|
3035
3583
|
"sources",
|
|
3584
|
+
"topRecentVideo", // 单独处理,不进入通用循环
|
|
3036
3585
|
];
|
|
3037
3586
|
for (const key of Object.keys(result)) {
|
|
3038
3587
|
if (extraFields.includes(key)) continue;
|
|
@@ -3044,6 +3593,11 @@ export function createStore(filePath) {
|
|
|
3044
3593
|
user[key] = result[key];
|
|
3045
3594
|
}
|
|
3046
3595
|
}
|
|
3596
|
+
// 将 topRecentVideo 对象展开为扁平字段
|
|
3597
|
+
if (result.topRecentVideo && typeof result.topRecentVideo === "object") {
|
|
3598
|
+
user.topVideoPlayCount = result.topRecentVideo.playCount || null;
|
|
3599
|
+
user.topVideoHref = result.topRecentVideo.href || null;
|
|
3600
|
+
}
|
|
3047
3601
|
user.sources = [...new Set([...(user.sources || []), "processed"])];
|
|
3048
3602
|
}
|
|
3049
3603
|
if (user.status !== oldStatus) markStatsDirty();
|
|
@@ -3285,6 +3839,11 @@ export function createStore(filePath) {
|
|
|
3285
3839
|
}
|
|
3286
3840
|
}
|
|
3287
3841
|
}
|
|
3842
|
+
// 将 topRecentVideo 对象展开为扁平字段
|
|
3843
|
+
if (result.topRecentVideo && typeof result.topRecentVideo === "object") {
|
|
3844
|
+
user.topVideoPlayCount = result.topRecentVideo.playCount || null;
|
|
3845
|
+
user.topVideoHref = result.topRecentVideo.href || null;
|
|
3846
|
+
}
|
|
3288
3847
|
const newUsers = processDiscoveredUsers(result);
|
|
3289
3848
|
const ret = updateJobInfo(uniqueId, user, false);
|
|
3290
3849
|
if (ret.error) return { saved: false, error: ret.error };
|
|
@@ -3305,6 +3864,11 @@ export function createStore(filePath) {
|
|
|
3305
3864
|
}
|
|
3306
3865
|
}
|
|
3307
3866
|
}
|
|
3867
|
+
// 将 topRecentVideo 对象展开为扁平字段
|
|
3868
|
+
if (result.topRecentVideo && typeof result.topRecentVideo === "object") {
|
|
3869
|
+
user.topVideoPlayCount = result.topRecentVideo.playCount || null;
|
|
3870
|
+
user.topVideoHref = result.topRecentVideo.href || null;
|
|
3871
|
+
}
|
|
3308
3872
|
const newUsers = processDiscoveredUsers(result);
|
|
3309
3873
|
return { saved: true, newUsers };
|
|
3310
3874
|
}
|
|
@@ -3375,7 +3939,7 @@ export function createStore(filePath) {
|
|
|
3375
3939
|
|
|
3376
3940
|
let sql = `
|
|
3377
3941
|
SELECT *
|
|
3378
|
-
FROM
|
|
3942
|
+
FROM jobs_base
|
|
3379
3943
|
WHERE COALESCE(tt_seller, '') = ''
|
|
3380
3944
|
AND COALESCE(user_update_count, 0) <= 0
|
|
3381
3945
|
`;
|
|
@@ -3395,7 +3959,7 @@ export function createStore(filePath) {
|
|
|
3395
3959
|
const now = Date.now();
|
|
3396
3960
|
const bumpStmt = db.prepare(
|
|
3397
3961
|
`
|
|
3398
|
-
UPDATE
|
|
3962
|
+
UPDATE jobs_base
|
|
3399
3963
|
SET user_update_count = COALESCE(user_update_count, 0) + 1,
|
|
3400
3964
|
updated_at = ?
|
|
3401
3965
|
WHERE unique_id = ?
|
|
@@ -3526,7 +4090,8 @@ export function createStore(filePath) {
|
|
|
3526
4090
|
function batchUpdateUserInfo(updates) {
|
|
3527
4091
|
if (db) {
|
|
3528
4092
|
const results = [];
|
|
3529
|
-
const
|
|
4093
|
+
const rawMoveList = [];
|
|
4094
|
+
const sellerMoveList = [];
|
|
3530
4095
|
|
|
3531
4096
|
const txn = db.transaction((items) => {
|
|
3532
4097
|
items.forEach((item) => {
|
|
@@ -3536,13 +4101,13 @@ export function createStore(filePath) {
|
|
|
3536
4101
|
let updateResult;
|
|
3537
4102
|
if (info && info.error && info.statusCode !== undefined) {
|
|
3538
4103
|
// 只更新 status_code,不更新其他字段
|
|
3539
|
-
updateResult =
|
|
4104
|
+
updateResult = updateJobBaseInfo(
|
|
3540
4105
|
uniqueId,
|
|
3541
4106
|
{ statusCode: info.statusCode },
|
|
3542
4107
|
true,
|
|
3543
4108
|
);
|
|
3544
4109
|
} else {
|
|
3545
|
-
updateResult =
|
|
4110
|
+
updateResult = updateJobBaseInfo(uniqueId, info, true);
|
|
3546
4111
|
}
|
|
3547
4112
|
|
|
3548
4113
|
if (updateResult.error) {
|
|
@@ -3550,34 +4115,66 @@ export function createStore(filePath) {
|
|
|
3550
4115
|
return;
|
|
3551
4116
|
}
|
|
3552
4117
|
|
|
3553
|
-
// 检查 tt_seller
|
|
3554
|
-
const row =
|
|
4118
|
+
// 检查 tt_seller:商家移到 jobs,非商家移到 raw_jobs
|
|
4119
|
+
const row = getJobBaseRow(uniqueId);
|
|
3555
4120
|
const ttSeller = row ? row.tt_seller : null;
|
|
3556
4121
|
if (ttSeller) {
|
|
3557
|
-
//
|
|
4122
|
+
// 商家:标记移动到 jobs
|
|
3558
4123
|
results.push({
|
|
3559
4124
|
uniqueId,
|
|
3560
4125
|
ok: true,
|
|
3561
4126
|
userUpdateCount: updateResult.userUpdateCount,
|
|
4127
|
+
_movedToJobs: true,
|
|
3562
4128
|
});
|
|
4129
|
+
sellerMoveList.push(uniqueId);
|
|
3563
4130
|
} else {
|
|
3564
|
-
//
|
|
4131
|
+
// 非商家:标记移动到 raw_jobs
|
|
3565
4132
|
results.push({
|
|
3566
4133
|
uniqueId,
|
|
3567
4134
|
ok: true,
|
|
3568
4135
|
userUpdateCount: updateResult.userUpdateCount,
|
|
3569
4136
|
_movedToRaw: true,
|
|
3570
4137
|
});
|
|
3571
|
-
|
|
4138
|
+
rawMoveList.push(uniqueId);
|
|
3572
4139
|
}
|
|
3573
4140
|
});
|
|
3574
4141
|
});
|
|
3575
4142
|
txn(updates);
|
|
3576
4143
|
|
|
3577
|
-
//
|
|
3578
|
-
if (
|
|
3579
|
-
const placeholders =
|
|
3580
|
-
|
|
4144
|
+
// 批量移动商家用户到 jobs
|
|
4145
|
+
if (sellerMoveList.length > 0) {
|
|
4146
|
+
const placeholders = sellerMoveList.map(() => "?").join(",");
|
|
4147
|
+
db.prepare(
|
|
4148
|
+
`
|
|
4149
|
+
INSERT OR REPLACE INTO jobs (
|
|
4150
|
+
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
4151
|
+
error, pinned, no_video, restricted, user_update_count,
|
|
4152
|
+
tt_seller, verified, video_count, comment_count,
|
|
4153
|
+
guessed_location, location_created, confirmed_location, modified_at,
|
|
4154
|
+
follower_count, following_count, heart_count, refresh_time,
|
|
4155
|
+
processed, processed_at, created_at, updated_at,
|
|
4156
|
+
region, signature, bio_link, sec_uid, status_code, latest_video_time
|
|
4157
|
+
)
|
|
4158
|
+
SELECT
|
|
4159
|
+
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
4160
|
+
error, pinned, no_video, restricted, user_update_count,
|
|
4161
|
+
tt_seller, verified, video_count, comment_count,
|
|
4162
|
+
guessed_location, location_created, confirmed_location, modified_at,
|
|
4163
|
+
follower_count, following_count, heart_count, refresh_time,
|
|
4164
|
+
processed, processed_at, created_at, updated_at,
|
|
4165
|
+
region, signature, bio_link, sec_uid, status_code, latest_video_time
|
|
4166
|
+
FROM jobs_base WHERE unique_id IN (${placeholders})
|
|
4167
|
+
`,
|
|
4168
|
+
).run(...sellerMoveList);
|
|
4169
|
+
|
|
4170
|
+
db.prepare(
|
|
4171
|
+
`DELETE FROM jobs_base WHERE unique_id IN (${placeholders})`,
|
|
4172
|
+
).run(...sellerMoveList);
|
|
4173
|
+
}
|
|
4174
|
+
|
|
4175
|
+
// 批量移动非商家用户到 raw_jobs
|
|
4176
|
+
if (rawMoveList.length > 0) {
|
|
4177
|
+
const placeholders = rawMoveList.map(() => "?").join(",");
|
|
3581
4178
|
db.prepare(
|
|
3582
4179
|
`
|
|
3583
4180
|
INSERT OR REPLACE INTO raw_jobs (
|
|
@@ -3597,19 +4194,18 @@ export function createStore(filePath) {
|
|
|
3597
4194
|
follower_count, following_count, heart_count, refresh_time,
|
|
3598
4195
|
processed, processed_at, created_at, updated_at,
|
|
3599
4196
|
region, signature, bio_link, sec_uid, status_code, latest_video_time
|
|
3600
|
-
FROM
|
|
4197
|
+
FROM jobs_base WHERE unique_id IN (${placeholders})
|
|
3601
4198
|
`,
|
|
3602
|
-
).run(...
|
|
4199
|
+
).run(...rawMoveList);
|
|
3603
4200
|
|
|
3604
|
-
|
|
3605
|
-
|
|
3606
|
-
|
|
3607
|
-
);
|
|
4201
|
+
db.prepare(
|
|
4202
|
+
`DELETE FROM jobs_base WHERE unique_id IN (${placeholders})`,
|
|
4203
|
+
).run(...rawMoveList);
|
|
3608
4204
|
}
|
|
3609
4205
|
|
|
3610
4206
|
// 清理内部标记
|
|
3611
4207
|
return results.map((r) => {
|
|
3612
|
-
const { _movedToRaw, ...rest } = r;
|
|
4208
|
+
const { _movedToRaw, _movedToJobs, ...rest } = r;
|
|
3613
4209
|
return rest;
|
|
3614
4210
|
});
|
|
3615
4211
|
}
|
|
@@ -3879,6 +4475,9 @@ export function createStore(filePath) {
|
|
|
3879
4475
|
getStats,
|
|
3880
4476
|
getStatusGroups,
|
|
3881
4477
|
markGroupsDirty,
|
|
4478
|
+
refillJobsFromRaw,
|
|
4479
|
+
scoreJobLocation,
|
|
4480
|
+
scoreJobsBatch,
|
|
3882
4481
|
claimNextJob,
|
|
3883
4482
|
commitJob,
|
|
3884
4483
|
commitNewExplore,
|
|
@@ -3904,6 +4503,12 @@ export function createStore(filePath) {
|
|
|
3904
4503
|
debugClaimNextJob,
|
|
3905
4504
|
stopBackup,
|
|
3906
4505
|
rawQuery,
|
|
4506
|
+
getLlmSampleOffsets, // 获取 LLM 采样偏移量状态
|
|
3907
4507
|
data,
|
|
3908
4508
|
};
|
|
4509
|
+
|
|
4510
|
+
// 辅助函数:获取 LLM 采样偏移量
|
|
4511
|
+
function getLlmSampleOffsets() {
|
|
4512
|
+
return Object.fromEntries(llmSampleOffsets);
|
|
4513
|
+
}
|
|
3909
4514
|
}
|