tt-help-cli-ycl 1.3.84 → 1.3.85
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/scripts/test-refill-order.mjs +218 -0
- package/src/cli/tag.js +712 -0
- package/src/lib/args.js +182 -6
- package/src/lib/constants.js +43 -0
- package/src/lib/parse-ssr.mjs +1 -0
- package/src/lib/tag-discover.js +124 -0
- package/src/lib/tag-fetcher.js +296 -0
- package/src/lib/target-locations.js +18 -0
- package/src/main.js +14 -0
- package/src/npm-main.js +3 -0
- package/src/scraper/explore-core.js +6 -6
- package/src/watch/data-store.js +268 -49
- package/src/watch/server.js +164 -1
- package/src/watch/tag-service.js +334 -0
|
@@ -51,10 +51,28 @@ function findFirstMatchingLocation(
|
|
|
51
51
|
);
|
|
52
52
|
}
|
|
53
53
|
|
|
54
|
+
/**
|
|
55
|
+
* 从按频率排序的 entries 中,找第一个属于目标国家的。
|
|
56
|
+
* @param {Array<[string, number]>} entries - 已按频率降序排列的 [国家, 次数] 数组
|
|
57
|
+
* @param {string[]} targetLocations - 目标国家列表
|
|
58
|
+
* @returns {string|null} 频率最高的目标国家,如都不匹配则返回 null
|
|
59
|
+
*/
|
|
60
|
+
function findBestMatchingLocation(
|
|
61
|
+
entries,
|
|
62
|
+
targetLocations = DEFAULT_TARGET_LOCATIONS,
|
|
63
|
+
) {
|
|
64
|
+
const normalizedTarget = normalizeLocationList(targetLocations);
|
|
65
|
+
for (const [loc] of entries) {
|
|
66
|
+
if (normalizedTarget.includes(loc)) return loc;
|
|
67
|
+
}
|
|
68
|
+
return null;
|
|
69
|
+
}
|
|
70
|
+
|
|
54
71
|
export {
|
|
55
72
|
DEFAULT_TARGET_LOCATIONS,
|
|
56
73
|
DEFAULT_TARGET_LOCATIONS_CSV,
|
|
57
74
|
findFirstMatchingLocation,
|
|
75
|
+
findBestMatchingLocation,
|
|
58
76
|
isLocationInList,
|
|
59
77
|
normalizeLocation,
|
|
60
78
|
normalizeLocationList,
|
package/src/main.js
CHANGED
|
@@ -11,6 +11,12 @@ import { handleVideoStats } from "./cli/videostats.js";
|
|
|
11
11
|
import { handleDbImport } from "./cli/db-import.js";
|
|
12
12
|
import { handleWebserver } from "./cli/webserver.js";
|
|
13
13
|
import { handleRefresh } from "./cli/refresh.js";
|
|
14
|
+
import {
|
|
15
|
+
handleTag,
|
|
16
|
+
handleDiscover,
|
|
17
|
+
handleScore,
|
|
18
|
+
handleScoreAll,
|
|
19
|
+
} from "./cli/tag.js";
|
|
14
20
|
|
|
15
21
|
async function main() {
|
|
16
22
|
const parsed = parseArgs();
|
|
@@ -36,6 +42,14 @@ async function main() {
|
|
|
36
42
|
return handleDbImport(parsed);
|
|
37
43
|
case "refresh":
|
|
38
44
|
return handleRefresh(parsed);
|
|
45
|
+
case "tag":
|
|
46
|
+
return handleTag(parsed);
|
|
47
|
+
case "tag-discover":
|
|
48
|
+
return handleDiscover(parsed);
|
|
49
|
+
case "tag-score":
|
|
50
|
+
return handleScore(parsed);
|
|
51
|
+
case "tag-score-all":
|
|
52
|
+
return handleScoreAll(parsed);
|
|
39
53
|
}
|
|
40
54
|
|
|
41
55
|
const {
|
package/src/npm-main.js
CHANGED
|
@@ -7,6 +7,7 @@ import { handleConfig, showConfig, showUsage, version } from "./cli/config.js";
|
|
|
7
7
|
import { handleOpen } from "./cli/open.js";
|
|
8
8
|
import { handleComments } from "./cli/comments.js";
|
|
9
9
|
import { handleRefresh } from "./cli/refresh.js";
|
|
10
|
+
import { handleTag } from "./cli/tag.js";
|
|
10
11
|
|
|
11
12
|
function exitUnsupportedCommand(command) {
|
|
12
13
|
console.error(
|
|
@@ -36,6 +37,8 @@ async function main() {
|
|
|
36
37
|
return handleComments(parsed);
|
|
37
38
|
case "refresh":
|
|
38
39
|
return handleRefresh(parsed);
|
|
40
|
+
case "tag":
|
|
41
|
+
return handleTag(parsed);
|
|
39
42
|
}
|
|
40
43
|
|
|
41
44
|
const {
|
|
@@ -6,7 +6,7 @@ import { extractFollowAndFollowers } from "./modules/follow-extractor.js";
|
|
|
6
6
|
import { extractVideoLocation, setScraperProxy } from "../lib/scrape.js";
|
|
7
7
|
import {
|
|
8
8
|
DEFAULT_TARGET_LOCATIONS_CSV,
|
|
9
|
-
|
|
9
|
+
findBestMatchingLocation,
|
|
10
10
|
isLocationInList,
|
|
11
11
|
normalizeLocation,
|
|
12
12
|
normalizeLocationList,
|
|
@@ -152,13 +152,13 @@ async function processExplore(page, username, options, log) {
|
|
|
152
152
|
locationDecision = `众数 (${entries[0][1]}次)`;
|
|
153
153
|
}
|
|
154
154
|
} else {
|
|
155
|
-
// explore
|
|
156
|
-
const
|
|
157
|
-
|
|
155
|
+
// explore 模式:取频率最高的目标国家,不匹配则回退众数
|
|
156
|
+
const bestTargetLocation = findBestMatchingLocation(
|
|
157
|
+
entries,
|
|
158
158
|
locationList,
|
|
159
159
|
);
|
|
160
|
-
if (
|
|
161
|
-
locationCreated =
|
|
160
|
+
if (bestTargetLocation) {
|
|
161
|
+
locationCreated = bestTargetLocation;
|
|
162
162
|
locationDecision = "命中目标国家";
|
|
163
163
|
} else if (entries.length > 0) {
|
|
164
164
|
locationCreated = entries[0][0];
|
package/src/watch/data-store.js
CHANGED
|
@@ -152,6 +152,9 @@ function initUserDb(filePath) {
|
|
|
152
152
|
if (!existingJobColumns.has("top_video_href")) {
|
|
153
153
|
db.exec(`ALTER TABLE jobs ADD COLUMN top_video_href TEXT`);
|
|
154
154
|
}
|
|
155
|
+
if (!existingJobColumns.has("user_create_time")) {
|
|
156
|
+
db.exec(`ALTER TABLE jobs ADD COLUMN user_create_time INTEGER`);
|
|
157
|
+
}
|
|
155
158
|
db.exec(`
|
|
156
159
|
CREATE TABLE IF NOT EXISTS jobs_base (
|
|
157
160
|
unique_id TEXT PRIMARY KEY,
|
|
@@ -212,6 +215,9 @@ function initUserDb(filePath) {
|
|
|
212
215
|
if (!existingJobBaseColumns.has("bio_link")) {
|
|
213
216
|
db.exec(`ALTER TABLE jobs_base ADD COLUMN bio_link TEXT`);
|
|
214
217
|
}
|
|
218
|
+
if (!existingJobBaseColumns.has("user_create_time")) {
|
|
219
|
+
db.exec(`ALTER TABLE jobs_base ADD COLUMN user_create_time INTEGER`);
|
|
220
|
+
}
|
|
215
221
|
db.exec(`
|
|
216
222
|
CREATE TABLE IF NOT EXISTS raw_jobs (
|
|
217
223
|
unique_id TEXT PRIMARY KEY,
|
|
@@ -271,6 +277,9 @@ function initUserDb(filePath) {
|
|
|
271
277
|
if (!existingRawJobColumns.has("bio_link")) {
|
|
272
278
|
db.exec(`ALTER TABLE raw_jobs ADD COLUMN bio_link TEXT`);
|
|
273
279
|
}
|
|
280
|
+
if (!existingRawJobColumns.has("user_create_time")) {
|
|
281
|
+
db.exec(`ALTER TABLE raw_jobs ADD COLUMN user_create_time INTEGER`);
|
|
282
|
+
}
|
|
274
283
|
db.exec(`
|
|
275
284
|
CREATE TABLE IF NOT EXISTS videos (
|
|
276
285
|
id TEXT PRIMARY KEY,
|
|
@@ -384,6 +393,30 @@ function initUserDb(filePath) {
|
|
|
384
393
|
db.exec(`ALTER TABLE videos ADD COLUMN create_time INTEGER`);
|
|
385
394
|
}
|
|
386
395
|
|
|
396
|
+
// tags 表:标签发现与打分系统
|
|
397
|
+
db.exec(`
|
|
398
|
+
CREATE TABLE IF NOT EXISTS tags (
|
|
399
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
400
|
+
tag TEXT NOT NULL UNIQUE,
|
|
401
|
+
status TEXT NOT NULL DEFAULT 'new',
|
|
402
|
+
score REAL NOT NULL DEFAULT 0,
|
|
403
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
404
|
+
scored_at TEXT,
|
|
405
|
+
score_count INTEGER NOT NULL DEFAULT 0,
|
|
406
|
+
countries TEXT NOT NULL DEFAULT '[]',
|
|
407
|
+
matched_countries TEXT DEFAULT '[]',
|
|
408
|
+
total_posts INTEGER DEFAULT 0,
|
|
409
|
+
author_count INTEGER DEFAULT 0,
|
|
410
|
+
matched_authors INTEGER DEFAULT 0,
|
|
411
|
+
pushed_users INTEGER DEFAULT 0,
|
|
412
|
+
source TEXT NOT NULL DEFAULT 'llm',
|
|
413
|
+
user_prompt TEXT,
|
|
414
|
+
last_error TEXT
|
|
415
|
+
)
|
|
416
|
+
`);
|
|
417
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_tags_status ON tags(status)`);
|
|
418
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_tags_score ON tags(score DESC)`);
|
|
419
|
+
|
|
387
420
|
const count = db.prepare("SELECT COUNT(*) as c FROM users").get().c;
|
|
388
421
|
console.log(`[data-store] SQLite users 表初始化完成: ${count} 条`);
|
|
389
422
|
}
|
|
@@ -956,7 +989,7 @@ function moveJobsToRawByCountry(scope, country) {
|
|
|
956
989
|
guessed_location, location_created, follower_count,
|
|
957
990
|
following_count, heart_count, refresh_time, processed,
|
|
958
991
|
processed_at, created_at, updated_at, region, signature,
|
|
959
|
-
sec_uid, latest_video_time
|
|
992
|
+
sec_uid, latest_video_time, user_create_time
|
|
960
993
|
`;
|
|
961
994
|
} else if (normalizedScope === "userUpdate") {
|
|
962
995
|
sourceTable = "jobs_base";
|
|
@@ -968,7 +1001,7 @@ function moveJobsToRawByCountry(scope, country) {
|
|
|
968
1001
|
guessed_location, location_created, follower_count,
|
|
969
1002
|
following_count, heart_count, refresh_time, processed,
|
|
970
1003
|
processed_at, created_at, updated_at, region, signature,
|
|
971
|
-
sec_uid, latest_video_time
|
|
1004
|
+
sec_uid, latest_video_time, user_create_time
|
|
972
1005
|
`;
|
|
973
1006
|
} else {
|
|
974
1007
|
return {
|
|
@@ -1307,6 +1340,158 @@ function getRawJobsPageFromDb({
|
|
|
1307
1340
|
};
|
|
1308
1341
|
}
|
|
1309
1342
|
|
|
1343
|
+
// ====== Tag 发现与打分 CRUD ======
|
|
1344
|
+
|
|
1345
|
+
function insertTag(tag, countries, source = "llm") {
|
|
1346
|
+
if (!db) return { inserted: false, error: "db not ready" };
|
|
1347
|
+
try {
|
|
1348
|
+
const result = db
|
|
1349
|
+
.prepare(
|
|
1350
|
+
`
|
|
1351
|
+
INSERT OR IGNORE INTO tags (tag, countries, source)
|
|
1352
|
+
VALUES (?, ?, ?)
|
|
1353
|
+
`,
|
|
1354
|
+
)
|
|
1355
|
+
.run(tag, JSON.stringify(countries), source);
|
|
1356
|
+
return { inserted: result.changes > 0, tag };
|
|
1357
|
+
} catch (e) {
|
|
1358
|
+
return { inserted: false, error: e.message };
|
|
1359
|
+
}
|
|
1360
|
+
}
|
|
1361
|
+
|
|
1362
|
+
function getTagsByStatus(status, limit = 100) {
|
|
1363
|
+
if (!db) return [];
|
|
1364
|
+
const rows = db
|
|
1365
|
+
.prepare(
|
|
1366
|
+
`
|
|
1367
|
+
SELECT * FROM tags WHERE status = ? ORDER BY score ASC, created_at ASC LIMIT ?
|
|
1368
|
+
`,
|
|
1369
|
+
)
|
|
1370
|
+
.all(status, limit);
|
|
1371
|
+
return rows.map((r) => ({
|
|
1372
|
+
...r,
|
|
1373
|
+
countries: JSON.parse(r.countries || "[]"),
|
|
1374
|
+
matched_countries: JSON.parse(r.matched_countries || "[]"),
|
|
1375
|
+
}));
|
|
1376
|
+
}
|
|
1377
|
+
|
|
1378
|
+
function getTagsByCountry(country, minScore = 0) {
|
|
1379
|
+
if (!db) return [];
|
|
1380
|
+
const rows = db
|
|
1381
|
+
.prepare(
|
|
1382
|
+
`
|
|
1383
|
+
SELECT * FROM tags WHERE status != 'dead'
|
|
1384
|
+
ORDER BY score DESC
|
|
1385
|
+
`,
|
|
1386
|
+
)
|
|
1387
|
+
.all();
|
|
1388
|
+
// Filter in JS since countries is JSON
|
|
1389
|
+
return rows
|
|
1390
|
+
.map((r) => ({
|
|
1391
|
+
...r,
|
|
1392
|
+
countries: JSON.parse(r.countries || "[]"),
|
|
1393
|
+
matched_countries: JSON.parse(r.matched_countries || "[]"),
|
|
1394
|
+
}))
|
|
1395
|
+
.filter((r) => r.countries.includes(country) && r.score >= minScore);
|
|
1396
|
+
}
|
|
1397
|
+
|
|
1398
|
+
function getDeadTags(country) {
|
|
1399
|
+
if (!db) return [];
|
|
1400
|
+
const rows = db
|
|
1401
|
+
.prepare(
|
|
1402
|
+
`
|
|
1403
|
+
SELECT * FROM tags WHERE status = 'dead' ORDER BY score ASC
|
|
1404
|
+
`,
|
|
1405
|
+
)
|
|
1406
|
+
.all();
|
|
1407
|
+
return rows
|
|
1408
|
+
.map((r) => ({
|
|
1409
|
+
...r,
|
|
1410
|
+
countries: JSON.parse(r.countries || "[]"),
|
|
1411
|
+
matched_countries: JSON.parse(r.matched_countries || "[]"),
|
|
1412
|
+
}))
|
|
1413
|
+
.filter((r) => r.countries.includes(country));
|
|
1414
|
+
}
|
|
1415
|
+
|
|
1416
|
+
function claimTag(tag) {
|
|
1417
|
+
if (!db) return { ok: false, error: "db not ready" };
|
|
1418
|
+
const row = db.prepare("SELECT status FROM tags WHERE tag = ?").get(tag);
|
|
1419
|
+
if (!row) return { ok: false, error: "tag not found" };
|
|
1420
|
+
if (row.status !== "new")
|
|
1421
|
+
return { ok: false, error: `tag status is ${row.status}, not new` };
|
|
1422
|
+
db.prepare("UPDATE tags SET status = 'scoring' WHERE tag = ?").run(tag);
|
|
1423
|
+
return { ok: true, tag, previousStatus: row.status };
|
|
1424
|
+
}
|
|
1425
|
+
|
|
1426
|
+
function reportTagScore(tag, fields) {
|
|
1427
|
+
if (!db) return { ok: false, error: "db not ready" };
|
|
1428
|
+
const {
|
|
1429
|
+
score,
|
|
1430
|
+
status,
|
|
1431
|
+
totalPosts,
|
|
1432
|
+
authorCount,
|
|
1433
|
+
matchedAuthors,
|
|
1434
|
+
matchedCountries,
|
|
1435
|
+
pushedUsers,
|
|
1436
|
+
error,
|
|
1437
|
+
} = fields;
|
|
1438
|
+
const matchedCountriesJson = matchedCountries
|
|
1439
|
+
? JSON.stringify(matchedCountries)
|
|
1440
|
+
: null;
|
|
1441
|
+
const now = new Date().toISOString();
|
|
1442
|
+
|
|
1443
|
+
try {
|
|
1444
|
+
const result = db
|
|
1445
|
+
.prepare(
|
|
1446
|
+
`
|
|
1447
|
+
UPDATE tags SET
|
|
1448
|
+
score = COALESCE(?, score),
|
|
1449
|
+
status = COALESCE(?, status),
|
|
1450
|
+
total_posts = COALESCE(?, total_posts),
|
|
1451
|
+
author_count = COALESCE(?, author_count),
|
|
1452
|
+
matched_authors = COALESCE(?, matched_authors),
|
|
1453
|
+
matched_countries = COALESCE(?, matched_countries),
|
|
1454
|
+
pushed_users = COALESCE(?, pushed_users),
|
|
1455
|
+
last_error = COALESCE(?, last_error),
|
|
1456
|
+
scored_at = ?,
|
|
1457
|
+
score_count = score_count + 1
|
|
1458
|
+
WHERE tag = ?
|
|
1459
|
+
`,
|
|
1460
|
+
)
|
|
1461
|
+
.run(
|
|
1462
|
+
score ?? null,
|
|
1463
|
+
status ?? null,
|
|
1464
|
+
totalPosts ?? null,
|
|
1465
|
+
authorCount ?? null,
|
|
1466
|
+
matchedAuthors ?? null,
|
|
1467
|
+
matchedCountriesJson,
|
|
1468
|
+
pushedUsers ?? null,
|
|
1469
|
+
error ?? null,
|
|
1470
|
+
now,
|
|
1471
|
+
tag,
|
|
1472
|
+
);
|
|
1473
|
+
return { ok: result.changes > 0, tag };
|
|
1474
|
+
} catch (e) {
|
|
1475
|
+
return { ok: false, error: e.message };
|
|
1476
|
+
}
|
|
1477
|
+
}
|
|
1478
|
+
|
|
1479
|
+
function getAllTags(limit = 200) {
|
|
1480
|
+
if (!db) return [];
|
|
1481
|
+
const rows = db
|
|
1482
|
+
.prepare(
|
|
1483
|
+
`
|
|
1484
|
+
SELECT * FROM tags ORDER BY score DESC, created_at DESC LIMIT ?
|
|
1485
|
+
`,
|
|
1486
|
+
)
|
|
1487
|
+
.all(limit);
|
|
1488
|
+
return rows.map((r) => ({
|
|
1489
|
+
...r,
|
|
1490
|
+
countries: JSON.parse(r.countries || "[]"),
|
|
1491
|
+
matched_countries: JSON.parse(r.matched_countries || "[]"),
|
|
1492
|
+
}));
|
|
1493
|
+
}
|
|
1494
|
+
|
|
1310
1495
|
// 调试接口:直接执行 SQL 查询,返回原始数据
|
|
1311
1496
|
function rawQuery(sql, params = []) {
|
|
1312
1497
|
if (!db) return { error: "db not ready" };
|
|
@@ -1668,6 +1853,7 @@ const writableJobColumns = new Set([
|
|
|
1668
1853
|
"latest_video_time",
|
|
1669
1854
|
"top_video_play_count",
|
|
1670
1855
|
"top_video_href",
|
|
1856
|
+
"user_create_time",
|
|
1671
1857
|
]);
|
|
1672
1858
|
|
|
1673
1859
|
function normalizeJobValue(column, value) {
|
|
@@ -1761,8 +1947,9 @@ function updateJobInfo(uniqueId, info, incrementCount = true) {
|
|
|
1761
1947
|
if (key === "uniqueId" || key === "unique_id") continue;
|
|
1762
1948
|
if (value === undefined || value === "") continue;
|
|
1763
1949
|
let column = camelToSnake(key);
|
|
1764
|
-
// 字段别名:bio → signature
|
|
1950
|
+
// 字段别名:bio → signature, createTime → user_create_time
|
|
1765
1951
|
if (column === "bio") column = "signature";
|
|
1952
|
+
if (column === "create_time") column = "user_create_time";
|
|
1766
1953
|
if (!writableJobColumns.has(column)) continue;
|
|
1767
1954
|
nextValues[column] = normalizeJobValue(column, value);
|
|
1768
1955
|
}
|
|
@@ -1805,8 +1992,9 @@ function updateJobBaseInfo(uniqueId, info, incrementCount = true) {
|
|
|
1805
1992
|
if (key === "uniqueId" || key === "unique_id") continue;
|
|
1806
1993
|
if (value === undefined || value === "") continue;
|
|
1807
1994
|
let column = camelToSnake(key);
|
|
1808
|
-
// 字段别名:bio → signature
|
|
1995
|
+
// 字段别名:bio → signature, createTime → user_create_time
|
|
1809
1996
|
if (column === "bio") column = "signature";
|
|
1997
|
+
if (column === "create_time") column = "user_create_time";
|
|
1810
1998
|
if (!writableJobColumns.has(column)) continue;
|
|
1811
1999
|
nextValues[column] = normalizeJobValue(column, value);
|
|
1812
2000
|
}
|
|
@@ -2194,6 +2382,43 @@ export function createStore(filePath, options = {}) {
|
|
|
2194
2382
|
}
|
|
2195
2383
|
}
|
|
2196
2384
|
|
|
2385
|
+
function addRawUsers(users) {
|
|
2386
|
+
if (!Array.isArray(users)) return { added: 0, skipped: 0 };
|
|
2387
|
+
const now = Date.now();
|
|
2388
|
+
let added = 0;
|
|
2389
|
+
let skipped = 0;
|
|
2390
|
+
|
|
2391
|
+
for (const u of users) {
|
|
2392
|
+
const uniqueId = (u.uniqueId || "").replace(/^@/, "").trim();
|
|
2393
|
+
if (!uniqueId) continue;
|
|
2394
|
+
if (hasUser(uniqueId)) {
|
|
2395
|
+
skipped++;
|
|
2396
|
+
continue;
|
|
2397
|
+
}
|
|
2398
|
+
const userObj = {
|
|
2399
|
+
uniqueId,
|
|
2400
|
+
status: "pending",
|
|
2401
|
+
sources: Array.isArray(u.sources)
|
|
2402
|
+
? u.sources
|
|
2403
|
+
: u.sources
|
|
2404
|
+
? [u.sources]
|
|
2405
|
+
: ["tag"],
|
|
2406
|
+
guessedLocation: u.guessedLocation || u.locationCreated || null,
|
|
2407
|
+
locationCreated: u.locationCreated || null,
|
|
2408
|
+
createdAt: now,
|
|
2409
|
+
updatedAt: now,
|
|
2410
|
+
};
|
|
2411
|
+
const writeTxn = db.transaction((job) => {
|
|
2412
|
+
addUserToDb(job);
|
|
2413
|
+
addJobBaseToDb(job);
|
|
2414
|
+
});
|
|
2415
|
+
writeTxn(userObj);
|
|
2416
|
+
added++;
|
|
2417
|
+
}
|
|
2418
|
+
|
|
2419
|
+
return { added, skipped };
|
|
2420
|
+
}
|
|
2421
|
+
|
|
2197
2422
|
function getPendingUsers() {
|
|
2198
2423
|
if (db) {
|
|
2199
2424
|
return getAllJobs().filter((u) => u.status === "pending");
|
|
@@ -2410,7 +2635,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2410
2635
|
|
|
2411
2636
|
// 构建 WHERE 条件
|
|
2412
2637
|
const conditions = [
|
|
2413
|
-
"COALESCE(video_count, 0) >
|
|
2638
|
+
"COALESCE(video_count, 0) > 6",
|
|
2414
2639
|
"COALESCE(follower_count, 0) > 0",
|
|
2415
2640
|
"COALESCE(following_count, 0) > 0",
|
|
2416
2641
|
];
|
|
@@ -2496,7 +2721,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2496
2721
|
.prepare(
|
|
2497
2722
|
`
|
|
2498
2723
|
SELECT * FROM raw_jobs WHERE ${whereSql} AND guessed_location = ?
|
|
2499
|
-
ORDER BY created_at DESC
|
|
2724
|
+
ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
|
|
2500
2725
|
LIMIT ? OFFSET ?
|
|
2501
2726
|
`,
|
|
2502
2727
|
)
|
|
@@ -2610,7 +2835,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2610
2835
|
guessed_location, location_created, confirmed_location,
|
|
2611
2836
|
follower_count, following_count, heart_count,
|
|
2612
2837
|
created_at, updated_at, region, signature, bio_link, sec_uid,
|
|
2613
|
-
status_code, latest_video_time
|
|
2838
|
+
status_code, latest_video_time, user_create_time
|
|
2614
2839
|
)
|
|
2615
2840
|
SELECT
|
|
2616
2841
|
unique_id, nickname, 'pending', sources, pinned,
|
|
@@ -2618,10 +2843,10 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2618
2843
|
guessed_location, location_created, confirmed_location,
|
|
2619
2844
|
follower_count, following_count, heart_count,
|
|
2620
2845
|
created_at, updated_at, region, signature, bio_link, sec_uid,
|
|
2621
|
-
status_code, latest_video_time
|
|
2846
|
+
status_code, latest_video_time, user_create_time
|
|
2622
2847
|
FROM raw_jobs
|
|
2623
2848
|
WHERE ${whereSql}
|
|
2624
|
-
ORDER BY created_at DESC
|
|
2849
|
+
ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
|
|
2625
2850
|
LIMIT ?
|
|
2626
2851
|
`,
|
|
2627
2852
|
).run(...args, safeLimit);
|
|
@@ -2633,7 +2858,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2633
2858
|
WHERE unique_id IN (
|
|
2634
2859
|
SELECT unique_id FROM raw_jobs
|
|
2635
2860
|
WHERE ${whereSql}
|
|
2636
|
-
ORDER BY created_at DESC
|
|
2861
|
+
ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
|
|
2637
2862
|
LIMIT ?
|
|
2638
2863
|
)
|
|
2639
2864
|
`,
|
|
@@ -3734,20 +3959,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3734
3959
|
const now = Date.now();
|
|
3735
3960
|
const threshold = now - maxAgeSeconds * 1000;
|
|
3736
3961
|
const defaultTime = new Date("2016-01-01T00:00:00Z").getTime();
|
|
3737
|
-
const targetLocations =
|
|
3738
|
-
"CZ",
|
|
3739
|
-
"GR",
|
|
3740
|
-
"HU",
|
|
3741
|
-
"PT",
|
|
3742
|
-
"ES",
|
|
3743
|
-
"PL",
|
|
3744
|
-
"NL",
|
|
3745
|
-
"BE",
|
|
3746
|
-
"DE",
|
|
3747
|
-
"FR",
|
|
3748
|
-
"IT",
|
|
3749
|
-
"IE",
|
|
3750
|
-
];
|
|
3962
|
+
const targetLocations = DEFAULT_TARGET_LOCATIONS;
|
|
3751
3963
|
const placeholders = targetLocations.map(() => "?").join(",");
|
|
3752
3964
|
const row = db
|
|
3753
3965
|
.prepare(
|
|
@@ -3758,7 +3970,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3758
3970
|
AND verified = 0
|
|
3759
3971
|
AND location_created IN (${placeholders})
|
|
3760
3972
|
AND COALESCE(refresh_time, ?) < ?
|
|
3761
|
-
ORDER BY COALESCE(refresh_time, ?) ASC
|
|
3973
|
+
ORDER BY COALESCE(pinned, 0) DESC, COALESCE(refresh_time, ?) ASC
|
|
3762
3974
|
LIMIT 1
|
|
3763
3975
|
`,
|
|
3764
3976
|
)
|
|
@@ -3779,20 +3991,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3779
3991
|
const defaultTime = new Date("2016-01-01T00:00:00Z").getTime();
|
|
3780
3992
|
|
|
3781
3993
|
// 筛选目标国家用户,按 refreshTime 升序取最远的(没有则默认 2016-01-01)
|
|
3782
|
-
const targetLocations =
|
|
3783
|
-
"CZ",
|
|
3784
|
-
"GR",
|
|
3785
|
-
"HU",
|
|
3786
|
-
"PT",
|
|
3787
|
-
"ES",
|
|
3788
|
-
"PL",
|
|
3789
|
-
"NL",
|
|
3790
|
-
"BE",
|
|
3791
|
-
"DE",
|
|
3792
|
-
"FR",
|
|
3793
|
-
"IT",
|
|
3794
|
-
"IE",
|
|
3795
|
-
];
|
|
3994
|
+
const targetLocations = DEFAULT_TARGET_LOCATIONS;
|
|
3796
3995
|
const targetUsers = data.filter(
|
|
3797
3996
|
(u) =>
|
|
3798
3997
|
u.ttSeller &&
|
|
@@ -3808,6 +4007,10 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3808
4007
|
if (recentEnough.length === 0) return null;
|
|
3809
4008
|
|
|
3810
4009
|
recentEnough.sort((a, b) => {
|
|
4010
|
+
// pinned 优先,其次按 refreshTime 升序
|
|
4011
|
+
if ((a.pinned ? 1 : 0) !== (b.pinned ? 1 : 0)) {
|
|
4012
|
+
return (b.pinned ? 1 : 0) - (a.pinned ? 1 : 0);
|
|
4013
|
+
}
|
|
3811
4014
|
const ta = a.refreshTime || defaultTime;
|
|
3812
4015
|
const tb = b.refreshTime || defaultTime;
|
|
3813
4016
|
return ta - tb;
|
|
@@ -4101,7 +4304,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4101
4304
|
guessed_location, location_created, confirmed_location, modified_at,
|
|
4102
4305
|
follower_count, following_count, heart_count, refresh_time,
|
|
4103
4306
|
processed, processed_at, created_at, updated_at,
|
|
4104
|
-
region, signature, bio_link, sec_uid, status_code, latest_video_time
|
|
4307
|
+
region, signature, bio_link, sec_uid, status_code, latest_video_time,
|
|
4308
|
+
user_create_time
|
|
4105
4309
|
)
|
|
4106
4310
|
SELECT
|
|
4107
4311
|
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
@@ -4110,7 +4314,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4110
4314
|
guessed_location, location_created, confirmed_location, modified_at,
|
|
4111
4315
|
follower_count, following_count, heart_count, refresh_time,
|
|
4112
4316
|
processed, processed_at, created_at, updated_at,
|
|
4113
|
-
region, signature, bio_link, sec_uid, status_code, latest_video_time
|
|
4317
|
+
region, signature, bio_link, sec_uid, status_code, latest_video_time,
|
|
4318
|
+
user_create_time
|
|
4114
4319
|
FROM jobs WHERE unique_id = ?
|
|
4115
4320
|
`,
|
|
4116
4321
|
).run(safeId);
|
|
@@ -4149,11 +4354,12 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4149
4354
|
return;
|
|
4150
4355
|
}
|
|
4151
4356
|
|
|
4152
|
-
// 检查 tt_seller
|
|
4357
|
+
// 检查 tt_seller:商家且视频数>0移到 jobs,否则移到 raw_jobs
|
|
4153
4358
|
const row = getJobBaseRow(uniqueId);
|
|
4154
4359
|
const ttSeller = row ? row.tt_seller : null;
|
|
4155
|
-
|
|
4156
|
-
|
|
4360
|
+
const videoCount = row ? row.video_count || 0 : 0;
|
|
4361
|
+
if (ttSeller && videoCount > 0) {
|
|
4362
|
+
// 商家且有视频:标记移动到 jobs
|
|
4157
4363
|
results.push({
|
|
4158
4364
|
uniqueId,
|
|
4159
4365
|
ok: true,
|
|
@@ -4162,7 +4368,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4162
4368
|
});
|
|
4163
4369
|
sellerMoveList.push(uniqueId);
|
|
4164
4370
|
} else {
|
|
4165
|
-
//
|
|
4371
|
+
// 非商家或无视频:标记移动到 raw_jobs
|
|
4166
4372
|
results.push({
|
|
4167
4373
|
uniqueId,
|
|
4168
4374
|
ok: true,
|
|
@@ -4187,7 +4393,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4187
4393
|
guessed_location, location_created, confirmed_location, modified_at,
|
|
4188
4394
|
follower_count, following_count, heart_count, refresh_time,
|
|
4189
4395
|
processed, processed_at, created_at, updated_at,
|
|
4190
|
-
region, signature, bio_link, sec_uid, status_code, latest_video_time
|
|
4396
|
+
region, signature, bio_link, sec_uid, status_code, latest_video_time,
|
|
4397
|
+
user_create_time
|
|
4191
4398
|
)
|
|
4192
4399
|
SELECT
|
|
4193
4400
|
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
@@ -4196,7 +4403,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4196
4403
|
guessed_location, location_created, confirmed_location, modified_at,
|
|
4197
4404
|
follower_count, following_count, heart_count, refresh_time,
|
|
4198
4405
|
processed, processed_at, created_at, updated_at,
|
|
4199
|
-
region, signature, bio_link, sec_uid, status_code, latest_video_time
|
|
4406
|
+
region, signature, bio_link, sec_uid, status_code, latest_video_time,
|
|
4407
|
+
user_create_time
|
|
4200
4408
|
FROM jobs_base WHERE unique_id IN (${placeholders})
|
|
4201
4409
|
`,
|
|
4202
4410
|
).run(...sellerMoveList);
|
|
@@ -4218,7 +4426,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4218
4426
|
guessed_location, location_created, confirmed_location, modified_at,
|
|
4219
4427
|
follower_count, following_count, heart_count, refresh_time,
|
|
4220
4428
|
processed, processed_at, created_at, updated_at,
|
|
4221
|
-
region, signature, bio_link, sec_uid, status_code, latest_video_time
|
|
4429
|
+
region, signature, bio_link, sec_uid, status_code, latest_video_time,
|
|
4430
|
+
user_create_time
|
|
4222
4431
|
)
|
|
4223
4432
|
SELECT
|
|
4224
4433
|
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
@@ -4227,7 +4436,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4227
4436
|
guessed_location, location_created, confirmed_location, modified_at,
|
|
4228
4437
|
follower_count, following_count, heart_count, refresh_time,
|
|
4229
4438
|
processed, processed_at, created_at, updated_at,
|
|
4230
|
-
region, signature, bio_link, sec_uid, status_code, latest_video_time
|
|
4439
|
+
region, signature, bio_link, sec_uid, status_code, latest_video_time,
|
|
4440
|
+
user_create_time
|
|
4231
4441
|
FROM jobs_base WHERE unique_id IN (${placeholders})
|
|
4232
4442
|
`,
|
|
4233
4443
|
).run(...rawMoveList);
|
|
@@ -4483,6 +4693,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4483
4693
|
hasUser,
|
|
4484
4694
|
userExists,
|
|
4485
4695
|
addUser,
|
|
4696
|
+
addRawUsers,
|
|
4486
4697
|
getPendingUsers,
|
|
4487
4698
|
getProcessedUsers,
|
|
4488
4699
|
getAllUsers,
|
|
@@ -4540,6 +4751,14 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4540
4751
|
stopBackup,
|
|
4541
4752
|
rawQuery,
|
|
4542
4753
|
getLlmSampleOffsets, // 获取 LLM 采样偏移量状态
|
|
4754
|
+
// Tag 发现与打分
|
|
4755
|
+
insertTag,
|
|
4756
|
+
getTagsByStatus,
|
|
4757
|
+
getTagsByCountry,
|
|
4758
|
+
getDeadTags,
|
|
4759
|
+
claimTag,
|
|
4760
|
+
reportTagScore,
|
|
4761
|
+
getAllTags,
|
|
4543
4762
|
data,
|
|
4544
4763
|
};
|
|
4545
4764
|
|