tt-help-cli-ycl 1.3.84 → 1.3.86
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/scripts/test-refill-order.mjs +218 -0
- package/src/cli/tag.js +736 -0
- package/src/lib/args.js +182 -6
- package/src/lib/constants.js +43 -0
- package/src/lib/parse-ssr.mjs +1 -0
- package/src/lib/tag-discover.js +150 -0
- package/src/lib/tag-fetcher.js +296 -0
- package/src/lib/target-locations.js +18 -0
- package/src/main.js +14 -0
- package/src/npm-main.js +14 -0
- package/src/scraper/explore-core.js +6 -6
- package/src/watch/data-store.js +344 -49
- package/src/watch/server.js +178 -1
- package/src/watch/tag-service.js +339 -0
package/src/watch/data-store.js
CHANGED
|
@@ -152,6 +152,9 @@ function initUserDb(filePath) {
|
|
|
152
152
|
if (!existingJobColumns.has("top_video_href")) {
|
|
153
153
|
db.exec(`ALTER TABLE jobs ADD COLUMN top_video_href TEXT`);
|
|
154
154
|
}
|
|
155
|
+
if (!existingJobColumns.has("user_create_time")) {
|
|
156
|
+
db.exec(`ALTER TABLE jobs ADD COLUMN user_create_time INTEGER`);
|
|
157
|
+
}
|
|
155
158
|
db.exec(`
|
|
156
159
|
CREATE TABLE IF NOT EXISTS jobs_base (
|
|
157
160
|
unique_id TEXT PRIMARY KEY,
|
|
@@ -212,6 +215,9 @@ function initUserDb(filePath) {
|
|
|
212
215
|
if (!existingJobBaseColumns.has("bio_link")) {
|
|
213
216
|
db.exec(`ALTER TABLE jobs_base ADD COLUMN bio_link TEXT`);
|
|
214
217
|
}
|
|
218
|
+
if (!existingJobBaseColumns.has("user_create_time")) {
|
|
219
|
+
db.exec(`ALTER TABLE jobs_base ADD COLUMN user_create_time INTEGER`);
|
|
220
|
+
}
|
|
215
221
|
db.exec(`
|
|
216
222
|
CREATE TABLE IF NOT EXISTS raw_jobs (
|
|
217
223
|
unique_id TEXT PRIMARY KEY,
|
|
@@ -271,6 +277,9 @@ function initUserDb(filePath) {
|
|
|
271
277
|
if (!existingRawJobColumns.has("bio_link")) {
|
|
272
278
|
db.exec(`ALTER TABLE raw_jobs ADD COLUMN bio_link TEXT`);
|
|
273
279
|
}
|
|
280
|
+
if (!existingRawJobColumns.has("user_create_time")) {
|
|
281
|
+
db.exec(`ALTER TABLE raw_jobs ADD COLUMN user_create_time INTEGER`);
|
|
282
|
+
}
|
|
274
283
|
db.exec(`
|
|
275
284
|
CREATE TABLE IF NOT EXISTS videos (
|
|
276
285
|
id TEXT PRIMARY KEY,
|
|
@@ -384,6 +393,30 @@ function initUserDb(filePath) {
|
|
|
384
393
|
db.exec(`ALTER TABLE videos ADD COLUMN create_time INTEGER`);
|
|
385
394
|
}
|
|
386
395
|
|
|
396
|
+
// tags 表:标签发现与打分系统
|
|
397
|
+
db.exec(`
|
|
398
|
+
CREATE TABLE IF NOT EXISTS tags (
|
|
399
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
400
|
+
tag TEXT NOT NULL UNIQUE,
|
|
401
|
+
status TEXT NOT NULL DEFAULT 'new',
|
|
402
|
+
score REAL NOT NULL DEFAULT 0,
|
|
403
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
404
|
+
scored_at TEXT,
|
|
405
|
+
score_count INTEGER NOT NULL DEFAULT 0,
|
|
406
|
+
countries TEXT NOT NULL DEFAULT '[]',
|
|
407
|
+
matched_countries TEXT DEFAULT '[]',
|
|
408
|
+
total_posts INTEGER DEFAULT 0,
|
|
409
|
+
author_count INTEGER DEFAULT 0,
|
|
410
|
+
matched_authors INTEGER DEFAULT 0,
|
|
411
|
+
pushed_users INTEGER DEFAULT 0,
|
|
412
|
+
source TEXT NOT NULL DEFAULT 'llm',
|
|
413
|
+
user_prompt TEXT,
|
|
414
|
+
last_error TEXT
|
|
415
|
+
)
|
|
416
|
+
`);
|
|
417
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_tags_status ON tags(status)`);
|
|
418
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_tags_score ON tags(score DESC)`);
|
|
419
|
+
|
|
387
420
|
const count = db.prepare("SELECT COUNT(*) as c FROM users").get().c;
|
|
388
421
|
console.log(`[data-store] SQLite users 表初始化完成: ${count} 条`);
|
|
389
422
|
}
|
|
@@ -956,7 +989,7 @@ function moveJobsToRawByCountry(scope, country) {
|
|
|
956
989
|
guessed_location, location_created, follower_count,
|
|
957
990
|
following_count, heart_count, refresh_time, processed,
|
|
958
991
|
processed_at, created_at, updated_at, region, signature,
|
|
959
|
-
sec_uid, latest_video_time
|
|
992
|
+
sec_uid, latest_video_time, user_create_time
|
|
960
993
|
`;
|
|
961
994
|
} else if (normalizedScope === "userUpdate") {
|
|
962
995
|
sourceTable = "jobs_base";
|
|
@@ -968,7 +1001,7 @@ function moveJobsToRawByCountry(scope, country) {
|
|
|
968
1001
|
guessed_location, location_created, follower_count,
|
|
969
1002
|
following_count, heart_count, refresh_time, processed,
|
|
970
1003
|
processed_at, created_at, updated_at, region, signature,
|
|
971
|
-
sec_uid, latest_video_time
|
|
1004
|
+
sec_uid, latest_video_time, user_create_time
|
|
972
1005
|
`;
|
|
973
1006
|
} else {
|
|
974
1007
|
return {
|
|
@@ -1307,6 +1340,170 @@ function getRawJobsPageFromDb({
|
|
|
1307
1340
|
};
|
|
1308
1341
|
}
|
|
1309
1342
|
|
|
1343
|
+
// ====== Tag 发现与打分 CRUD ======
|
|
1344
|
+
|
|
1345
|
+
function insertTag(tag, countries, source = "llm") {
|
|
1346
|
+
if (!db) return { inserted: false, error: "db not ready" };
|
|
1347
|
+
// 防止存入带 # 前缀的 tag
|
|
1348
|
+
const normalized = tag.replace(/^#+/, "").trim().toLowerCase();
|
|
1349
|
+
if (!normalized || normalized.length < 2) {
|
|
1350
|
+
return { inserted: false, error: "invalid tag" };
|
|
1351
|
+
}
|
|
1352
|
+
try {
|
|
1353
|
+
const result = db
|
|
1354
|
+
.prepare(
|
|
1355
|
+
`
|
|
1356
|
+
INSERT OR IGNORE INTO tags (tag, countries, source)
|
|
1357
|
+
VALUES (?, ?, ?)
|
|
1358
|
+
`,
|
|
1359
|
+
)
|
|
1360
|
+
.run(normalized, JSON.stringify(countries), source);
|
|
1361
|
+
return { inserted: result.changes > 0, tag: normalized };
|
|
1362
|
+
} catch (e) {
|
|
1363
|
+
return { inserted: false, error: e.message };
|
|
1364
|
+
}
|
|
1365
|
+
}
|
|
1366
|
+
|
|
1367
|
+
function getTagsByStatus(status, limit = 100) {
|
|
1368
|
+
if (!db) return [];
|
|
1369
|
+
const rows = db
|
|
1370
|
+
.prepare(
|
|
1371
|
+
`
|
|
1372
|
+
SELECT * FROM tags WHERE status = ? ORDER BY score ASC, created_at ASC LIMIT ?
|
|
1373
|
+
`,
|
|
1374
|
+
)
|
|
1375
|
+
.all(status, limit);
|
|
1376
|
+
return rows.map((r) => ({
|
|
1377
|
+
...r,
|
|
1378
|
+
countries: JSON.parse(r.countries || "[]"),
|
|
1379
|
+
matched_countries: JSON.parse(r.matched_countries || "[]"),
|
|
1380
|
+
}));
|
|
1381
|
+
}
|
|
1382
|
+
|
|
1383
|
+
function getTagsByCountry(country, minScore = 0) {
|
|
1384
|
+
if (!db) return [];
|
|
1385
|
+
const rows = db
|
|
1386
|
+
.prepare(
|
|
1387
|
+
`
|
|
1388
|
+
SELECT * FROM tags WHERE status != 'dead'
|
|
1389
|
+
ORDER BY score DESC
|
|
1390
|
+
`,
|
|
1391
|
+
)
|
|
1392
|
+
.all();
|
|
1393
|
+
// Filter in JS since countries is JSON
|
|
1394
|
+
return rows
|
|
1395
|
+
.map((r) => ({
|
|
1396
|
+
...r,
|
|
1397
|
+
countries: JSON.parse(r.countries || "[]"),
|
|
1398
|
+
matched_countries: JSON.parse(r.matched_countries || "[]"),
|
|
1399
|
+
}))
|
|
1400
|
+
.filter((r) => r.countries.includes(country) && r.score >= minScore);
|
|
1401
|
+
}
|
|
1402
|
+
|
|
1403
|
+
function getDeadTags(country) {
|
|
1404
|
+
if (!db) return [];
|
|
1405
|
+
const rows = db
|
|
1406
|
+
.prepare(
|
|
1407
|
+
`
|
|
1408
|
+
SELECT * FROM tags WHERE status = 'dead' ORDER BY score ASC
|
|
1409
|
+
`,
|
|
1410
|
+
)
|
|
1411
|
+
.all();
|
|
1412
|
+
return rows
|
|
1413
|
+
.map((r) => ({
|
|
1414
|
+
...r,
|
|
1415
|
+
countries: JSON.parse(r.countries || "[]"),
|
|
1416
|
+
matched_countries: JSON.parse(r.matched_countries || "[]"),
|
|
1417
|
+
}))
|
|
1418
|
+
.filter((r) => r.countries.includes(country));
|
|
1419
|
+
}
|
|
1420
|
+
|
|
1421
|
+
function claimTag(tag) {
|
|
1422
|
+
if (!db) return { ok: false, error: "db not ready" };
|
|
1423
|
+
// 原子操作:只有 status='new' 时才更新为 'scoring',避免竞态
|
|
1424
|
+
const result = db
|
|
1425
|
+
.prepare(
|
|
1426
|
+
"UPDATE tags SET status = 'scoring' WHERE tag = ? AND status = 'new'",
|
|
1427
|
+
)
|
|
1428
|
+
.run(tag);
|
|
1429
|
+
if (result.changes === 0) {
|
|
1430
|
+
// 检查是否不存在 vs 已被别人锁定
|
|
1431
|
+
const row = db.prepare("SELECT status FROM tags WHERE tag = ?").get(tag);
|
|
1432
|
+
if (!row) return { ok: false, error: "tag not found" };
|
|
1433
|
+
return { ok: false, error: `tag status is ${row.status}, already claimed` };
|
|
1434
|
+
}
|
|
1435
|
+
return { ok: true, tag };
|
|
1436
|
+
}
|
|
1437
|
+
|
|
1438
|
+
function reportTagScore(tag, fields) {
|
|
1439
|
+
if (!db) return { ok: false, error: "db not ready" };
|
|
1440
|
+
const {
|
|
1441
|
+
score,
|
|
1442
|
+
status,
|
|
1443
|
+
totalPosts,
|
|
1444
|
+
authorCount,
|
|
1445
|
+
matchedAuthors,
|
|
1446
|
+
matchedCountries,
|
|
1447
|
+
pushedUsers,
|
|
1448
|
+
error,
|
|
1449
|
+
} = fields;
|
|
1450
|
+
const matchedCountriesJson = matchedCountries
|
|
1451
|
+
? JSON.stringify(matchedCountries)
|
|
1452
|
+
: null;
|
|
1453
|
+
const now = new Date().toISOString();
|
|
1454
|
+
|
|
1455
|
+
try {
|
|
1456
|
+
const result = db
|
|
1457
|
+
.prepare(
|
|
1458
|
+
`
|
|
1459
|
+
UPDATE tags SET
|
|
1460
|
+
score = COALESCE(?, score),
|
|
1461
|
+
status = COALESCE(?, status),
|
|
1462
|
+
total_posts = COALESCE(?, total_posts),
|
|
1463
|
+
author_count = COALESCE(?, author_count),
|
|
1464
|
+
matched_authors = COALESCE(?, matched_authors),
|
|
1465
|
+
matched_countries = COALESCE(?, matched_countries),
|
|
1466
|
+
pushed_users = COALESCE(?, pushed_users),
|
|
1467
|
+
last_error = COALESCE(?, last_error),
|
|
1468
|
+
scored_at = ?,
|
|
1469
|
+
score_count = score_count + 1
|
|
1470
|
+
WHERE tag = ?
|
|
1471
|
+
`,
|
|
1472
|
+
)
|
|
1473
|
+
.run(
|
|
1474
|
+
score ?? null,
|
|
1475
|
+
status ?? null,
|
|
1476
|
+
totalPosts ?? null,
|
|
1477
|
+
authorCount ?? null,
|
|
1478
|
+
matchedAuthors ?? null,
|
|
1479
|
+
matchedCountriesJson,
|
|
1480
|
+
pushedUsers ?? null,
|
|
1481
|
+
error ?? null,
|
|
1482
|
+
now,
|
|
1483
|
+
tag,
|
|
1484
|
+
);
|
|
1485
|
+
return { ok: result.changes > 0, tag };
|
|
1486
|
+
} catch (e) {
|
|
1487
|
+
return { ok: false, error: e.message };
|
|
1488
|
+
}
|
|
1489
|
+
}
|
|
1490
|
+
|
|
1491
|
+
function getAllTags(limit = 200) {
|
|
1492
|
+
if (!db) return [];
|
|
1493
|
+
const rows = db
|
|
1494
|
+
.prepare(
|
|
1495
|
+
`
|
|
1496
|
+
SELECT * FROM tags ORDER BY score DESC, created_at DESC LIMIT ?
|
|
1497
|
+
`,
|
|
1498
|
+
)
|
|
1499
|
+
.all(limit);
|
|
1500
|
+
return rows.map((r) => ({
|
|
1501
|
+
...r,
|
|
1502
|
+
countries: JSON.parse(r.countries || "[]"),
|
|
1503
|
+
matched_countries: JSON.parse(r.matched_countries || "[]"),
|
|
1504
|
+
}));
|
|
1505
|
+
}
|
|
1506
|
+
|
|
1310
1507
|
// 调试接口:直接执行 SQL 查询,返回原始数据
|
|
1311
1508
|
function rawQuery(sql, params = []) {
|
|
1312
1509
|
if (!db) return { error: "db not ready" };
|
|
@@ -1318,6 +1515,68 @@ function rawQuery(sql, params = []) {
|
|
|
1318
1515
|
}
|
|
1319
1516
|
}
|
|
1320
1517
|
|
|
1518
|
+
// 清理 tags 表中以 # 开头的脏数据
|
|
1519
|
+
function normalizeTags() {
|
|
1520
|
+
if (!db) return { ok: false, error: "db not ready" };
|
|
1521
|
+
const dirtyRows = db
|
|
1522
|
+
.prepare("SELECT id, tag, countries FROM tags WHERE tag LIKE '#%'")
|
|
1523
|
+
.all();
|
|
1524
|
+
const fixed = [];
|
|
1525
|
+
const merged = [];
|
|
1526
|
+
const skipped = [];
|
|
1527
|
+
|
|
1528
|
+
for (const row of dirtyRows) {
|
|
1529
|
+
const cleanTag = row.tag.replace(/^#+/, "").trim().toLowerCase();
|
|
1530
|
+
if (!cleanTag || cleanTag.length < 2) {
|
|
1531
|
+
db.prepare("DELETE FROM tags WHERE id = ?").run(row.id);
|
|
1532
|
+
skipped.push({
|
|
1533
|
+
dirty: row.tag,
|
|
1534
|
+
reason: "empty after normalize, deleted",
|
|
1535
|
+
});
|
|
1536
|
+
continue;
|
|
1537
|
+
}
|
|
1538
|
+
|
|
1539
|
+
// 检查 cleanTag 是否已存在
|
|
1540
|
+
const existing = db
|
|
1541
|
+
.prepare("SELECT * FROM tags WHERE tag = ?")
|
|
1542
|
+
.get(cleanTag);
|
|
1543
|
+
if (existing) {
|
|
1544
|
+
// 合并:保留已有 clean 版本,合并 countries
|
|
1545
|
+
const oldCountries = JSON.parse(row.countries || "[]");
|
|
1546
|
+
const existCountries = JSON.parse(existing.countries || "[]");
|
|
1547
|
+
const mergedCountries = [
|
|
1548
|
+
...new Set([...existCountries, ...oldCountries]),
|
|
1549
|
+
];
|
|
1550
|
+
db.prepare("UPDATE tags SET countries = ? WHERE tag = ?").run(
|
|
1551
|
+
JSON.stringify(mergedCountries),
|
|
1552
|
+
cleanTag,
|
|
1553
|
+
);
|
|
1554
|
+
// 删除脏数据
|
|
1555
|
+
db.prepare("DELETE FROM tags WHERE id = ?").run(row.id);
|
|
1556
|
+
merged.push({ dirty: row.tag, clean: cleanTag, id: row.id });
|
|
1557
|
+
} else {
|
|
1558
|
+
// 直接重命名
|
|
1559
|
+
db.prepare("UPDATE tags SET tag = ? WHERE id = ?").run(cleanTag, row.id);
|
|
1560
|
+
fixed.push({ dirty: row.tag, clean: cleanTag, id: row.id });
|
|
1561
|
+
}
|
|
1562
|
+
}
|
|
1563
|
+
|
|
1564
|
+
return {
|
|
1565
|
+
ok: true,
|
|
1566
|
+
fixed: fixed.length,
|
|
1567
|
+
merged: merged.length,
|
|
1568
|
+
skipped: skipped.length,
|
|
1569
|
+
details: { fixed, merged, skipped },
|
|
1570
|
+
};
|
|
1571
|
+
}
|
|
1572
|
+
|
|
1573
|
+
function clearTags() {
|
|
1574
|
+
if (!db) return { ok: false, error: "db not ready" };
|
|
1575
|
+
const count = db.prepare("SELECT COUNT(*) as c FROM tags").get().c;
|
|
1576
|
+
db.exec("DELETE FROM tags");
|
|
1577
|
+
return { ok: true, deleted: count };
|
|
1578
|
+
}
|
|
1579
|
+
|
|
1321
1580
|
function getUsersPageFromDb({
|
|
1322
1581
|
status,
|
|
1323
1582
|
search,
|
|
@@ -1668,6 +1927,7 @@ const writableJobColumns = new Set([
|
|
|
1668
1927
|
"latest_video_time",
|
|
1669
1928
|
"top_video_play_count",
|
|
1670
1929
|
"top_video_href",
|
|
1930
|
+
"user_create_time",
|
|
1671
1931
|
]);
|
|
1672
1932
|
|
|
1673
1933
|
function normalizeJobValue(column, value) {
|
|
@@ -1761,8 +2021,9 @@ function updateJobInfo(uniqueId, info, incrementCount = true) {
|
|
|
1761
2021
|
if (key === "uniqueId" || key === "unique_id") continue;
|
|
1762
2022
|
if (value === undefined || value === "") continue;
|
|
1763
2023
|
let column = camelToSnake(key);
|
|
1764
|
-
// 字段别名:bio → signature
|
|
2024
|
+
// 字段别名:bio → signature, createTime → user_create_time
|
|
1765
2025
|
if (column === "bio") column = "signature";
|
|
2026
|
+
if (column === "create_time") column = "user_create_time";
|
|
1766
2027
|
if (!writableJobColumns.has(column)) continue;
|
|
1767
2028
|
nextValues[column] = normalizeJobValue(column, value);
|
|
1768
2029
|
}
|
|
@@ -1805,8 +2066,9 @@ function updateJobBaseInfo(uniqueId, info, incrementCount = true) {
|
|
|
1805
2066
|
if (key === "uniqueId" || key === "unique_id") continue;
|
|
1806
2067
|
if (value === undefined || value === "") continue;
|
|
1807
2068
|
let column = camelToSnake(key);
|
|
1808
|
-
// 字段别名:bio → signature
|
|
2069
|
+
// 字段别名:bio → signature, createTime → user_create_time
|
|
1809
2070
|
if (column === "bio") column = "signature";
|
|
2071
|
+
if (column === "create_time") column = "user_create_time";
|
|
1810
2072
|
if (!writableJobColumns.has(column)) continue;
|
|
1811
2073
|
nextValues[column] = normalizeJobValue(column, value);
|
|
1812
2074
|
}
|
|
@@ -2194,6 +2456,43 @@ export function createStore(filePath, options = {}) {
|
|
|
2194
2456
|
}
|
|
2195
2457
|
}
|
|
2196
2458
|
|
|
2459
|
+
function addRawUsers(users) {
|
|
2460
|
+
if (!Array.isArray(users)) return { added: 0, skipped: 0 };
|
|
2461
|
+
const now = Date.now();
|
|
2462
|
+
let added = 0;
|
|
2463
|
+
let skipped = 0;
|
|
2464
|
+
|
|
2465
|
+
for (const u of users) {
|
|
2466
|
+
const uniqueId = (u.uniqueId || "").replace(/^@/, "").trim();
|
|
2467
|
+
if (!uniqueId) continue;
|
|
2468
|
+
if (hasUser(uniqueId)) {
|
|
2469
|
+
skipped++;
|
|
2470
|
+
continue;
|
|
2471
|
+
}
|
|
2472
|
+
const userObj = {
|
|
2473
|
+
uniqueId,
|
|
2474
|
+
status: "pending",
|
|
2475
|
+
sources: Array.isArray(u.sources)
|
|
2476
|
+
? u.sources
|
|
2477
|
+
: u.sources
|
|
2478
|
+
? [u.sources]
|
|
2479
|
+
: ["tag"],
|
|
2480
|
+
guessedLocation: u.guessedLocation || u.locationCreated || null,
|
|
2481
|
+
locationCreated: u.locationCreated || null,
|
|
2482
|
+
createdAt: now,
|
|
2483
|
+
updatedAt: now,
|
|
2484
|
+
};
|
|
2485
|
+
const writeTxn = db.transaction((job) => {
|
|
2486
|
+
addUserToDb(job);
|
|
2487
|
+
addJobBaseToDb(job);
|
|
2488
|
+
});
|
|
2489
|
+
writeTxn(userObj);
|
|
2490
|
+
added++;
|
|
2491
|
+
}
|
|
2492
|
+
|
|
2493
|
+
return { added, skipped };
|
|
2494
|
+
}
|
|
2495
|
+
|
|
2197
2496
|
function getPendingUsers() {
|
|
2198
2497
|
if (db) {
|
|
2199
2498
|
return getAllJobs().filter((u) => u.status === "pending");
|
|
@@ -2410,7 +2709,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2410
2709
|
|
|
2411
2710
|
// 构建 WHERE 条件
|
|
2412
2711
|
const conditions = [
|
|
2413
|
-
"COALESCE(video_count, 0) >
|
|
2712
|
+
"COALESCE(video_count, 0) > 6",
|
|
2414
2713
|
"COALESCE(follower_count, 0) > 0",
|
|
2415
2714
|
"COALESCE(following_count, 0) > 0",
|
|
2416
2715
|
];
|
|
@@ -2496,7 +2795,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2496
2795
|
.prepare(
|
|
2497
2796
|
`
|
|
2498
2797
|
SELECT * FROM raw_jobs WHERE ${whereSql} AND guessed_location = ?
|
|
2499
|
-
ORDER BY created_at DESC
|
|
2798
|
+
ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
|
|
2500
2799
|
LIMIT ? OFFSET ?
|
|
2501
2800
|
`,
|
|
2502
2801
|
)
|
|
@@ -2610,7 +2909,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2610
2909
|
guessed_location, location_created, confirmed_location,
|
|
2611
2910
|
follower_count, following_count, heart_count,
|
|
2612
2911
|
created_at, updated_at, region, signature, bio_link, sec_uid,
|
|
2613
|
-
status_code, latest_video_time
|
|
2912
|
+
status_code, latest_video_time, user_create_time
|
|
2614
2913
|
)
|
|
2615
2914
|
SELECT
|
|
2616
2915
|
unique_id, nickname, 'pending', sources, pinned,
|
|
@@ -2618,10 +2917,10 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2618
2917
|
guessed_location, location_created, confirmed_location,
|
|
2619
2918
|
follower_count, following_count, heart_count,
|
|
2620
2919
|
created_at, updated_at, region, signature, bio_link, sec_uid,
|
|
2621
|
-
status_code, latest_video_time
|
|
2920
|
+
status_code, latest_video_time, user_create_time
|
|
2622
2921
|
FROM raw_jobs
|
|
2623
2922
|
WHERE ${whereSql}
|
|
2624
|
-
ORDER BY created_at DESC
|
|
2923
|
+
ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
|
|
2625
2924
|
LIMIT ?
|
|
2626
2925
|
`,
|
|
2627
2926
|
).run(...args, safeLimit);
|
|
@@ -2633,7 +2932,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2633
2932
|
WHERE unique_id IN (
|
|
2634
2933
|
SELECT unique_id FROM raw_jobs
|
|
2635
2934
|
WHERE ${whereSql}
|
|
2636
|
-
ORDER BY created_at DESC
|
|
2935
|
+
ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
|
|
2637
2936
|
LIMIT ?
|
|
2638
2937
|
)
|
|
2639
2938
|
`,
|
|
@@ -3734,20 +4033,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3734
4033
|
const now = Date.now();
|
|
3735
4034
|
const threshold = now - maxAgeSeconds * 1000;
|
|
3736
4035
|
const defaultTime = new Date("2016-01-01T00:00:00Z").getTime();
|
|
3737
|
-
const targetLocations =
|
|
3738
|
-
"CZ",
|
|
3739
|
-
"GR",
|
|
3740
|
-
"HU",
|
|
3741
|
-
"PT",
|
|
3742
|
-
"ES",
|
|
3743
|
-
"PL",
|
|
3744
|
-
"NL",
|
|
3745
|
-
"BE",
|
|
3746
|
-
"DE",
|
|
3747
|
-
"FR",
|
|
3748
|
-
"IT",
|
|
3749
|
-
"IE",
|
|
3750
|
-
];
|
|
4036
|
+
const targetLocations = DEFAULT_TARGET_LOCATIONS;
|
|
3751
4037
|
const placeholders = targetLocations.map(() => "?").join(",");
|
|
3752
4038
|
const row = db
|
|
3753
4039
|
.prepare(
|
|
@@ -3758,7 +4044,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3758
4044
|
AND verified = 0
|
|
3759
4045
|
AND location_created IN (${placeholders})
|
|
3760
4046
|
AND COALESCE(refresh_time, ?) < ?
|
|
3761
|
-
ORDER BY COALESCE(refresh_time, ?) ASC
|
|
4047
|
+
ORDER BY COALESCE(pinned, 0) DESC, COALESCE(refresh_time, ?) ASC
|
|
3762
4048
|
LIMIT 1
|
|
3763
4049
|
`,
|
|
3764
4050
|
)
|
|
@@ -3779,20 +4065,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3779
4065
|
const defaultTime = new Date("2016-01-01T00:00:00Z").getTime();
|
|
3780
4066
|
|
|
3781
4067
|
// 筛选目标国家用户,按 refreshTime 升序取最远的(没有则默认 2016-01-01)
|
|
3782
|
-
const targetLocations =
|
|
3783
|
-
"CZ",
|
|
3784
|
-
"GR",
|
|
3785
|
-
"HU",
|
|
3786
|
-
"PT",
|
|
3787
|
-
"ES",
|
|
3788
|
-
"PL",
|
|
3789
|
-
"NL",
|
|
3790
|
-
"BE",
|
|
3791
|
-
"DE",
|
|
3792
|
-
"FR",
|
|
3793
|
-
"IT",
|
|
3794
|
-
"IE",
|
|
3795
|
-
];
|
|
4068
|
+
const targetLocations = DEFAULT_TARGET_LOCATIONS;
|
|
3796
4069
|
const targetUsers = data.filter(
|
|
3797
4070
|
(u) =>
|
|
3798
4071
|
u.ttSeller &&
|
|
@@ -3808,6 +4081,10 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3808
4081
|
if (recentEnough.length === 0) return null;
|
|
3809
4082
|
|
|
3810
4083
|
recentEnough.sort((a, b) => {
|
|
4084
|
+
// pinned 优先,其次按 refreshTime 升序
|
|
4085
|
+
if ((a.pinned ? 1 : 0) !== (b.pinned ? 1 : 0)) {
|
|
4086
|
+
return (b.pinned ? 1 : 0) - (a.pinned ? 1 : 0);
|
|
4087
|
+
}
|
|
3811
4088
|
const ta = a.refreshTime || defaultTime;
|
|
3812
4089
|
const tb = b.refreshTime || defaultTime;
|
|
3813
4090
|
return ta - tb;
|
|
@@ -4101,7 +4378,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4101
4378
|
guessed_location, location_created, confirmed_location, modified_at,
|
|
4102
4379
|
follower_count, following_count, heart_count, refresh_time,
|
|
4103
4380
|
processed, processed_at, created_at, updated_at,
|
|
4104
|
-
region, signature, bio_link, sec_uid, status_code, latest_video_time
|
|
4381
|
+
region, signature, bio_link, sec_uid, status_code, latest_video_time,
|
|
4382
|
+
user_create_time
|
|
4105
4383
|
)
|
|
4106
4384
|
SELECT
|
|
4107
4385
|
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
@@ -4110,7 +4388,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4110
4388
|
guessed_location, location_created, confirmed_location, modified_at,
|
|
4111
4389
|
follower_count, following_count, heart_count, refresh_time,
|
|
4112
4390
|
processed, processed_at, created_at, updated_at,
|
|
4113
|
-
region, signature, bio_link, sec_uid, status_code, latest_video_time
|
|
4391
|
+
region, signature, bio_link, sec_uid, status_code, latest_video_time,
|
|
4392
|
+
user_create_time
|
|
4114
4393
|
FROM jobs WHERE unique_id = ?
|
|
4115
4394
|
`,
|
|
4116
4395
|
).run(safeId);
|
|
@@ -4149,11 +4428,12 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4149
4428
|
return;
|
|
4150
4429
|
}
|
|
4151
4430
|
|
|
4152
|
-
// 检查 tt_seller
|
|
4431
|
+
// 检查 tt_seller:商家且视频数>0移到 jobs,否则移到 raw_jobs
|
|
4153
4432
|
const row = getJobBaseRow(uniqueId);
|
|
4154
4433
|
const ttSeller = row ? row.tt_seller : null;
|
|
4155
|
-
|
|
4156
|
-
|
|
4434
|
+
const videoCount = row ? row.video_count || 0 : 0;
|
|
4435
|
+
if (ttSeller && videoCount > 0) {
|
|
4436
|
+
// 商家且有视频:标记移动到 jobs
|
|
4157
4437
|
results.push({
|
|
4158
4438
|
uniqueId,
|
|
4159
4439
|
ok: true,
|
|
@@ -4162,7 +4442,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4162
4442
|
});
|
|
4163
4443
|
sellerMoveList.push(uniqueId);
|
|
4164
4444
|
} else {
|
|
4165
|
-
//
|
|
4445
|
+
// 非商家或无视频:标记移动到 raw_jobs
|
|
4166
4446
|
results.push({
|
|
4167
4447
|
uniqueId,
|
|
4168
4448
|
ok: true,
|
|
@@ -4187,7 +4467,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4187
4467
|
guessed_location, location_created, confirmed_location, modified_at,
|
|
4188
4468
|
follower_count, following_count, heart_count, refresh_time,
|
|
4189
4469
|
processed, processed_at, created_at, updated_at,
|
|
4190
|
-
region, signature, bio_link, sec_uid, status_code, latest_video_time
|
|
4470
|
+
region, signature, bio_link, sec_uid, status_code, latest_video_time,
|
|
4471
|
+
user_create_time
|
|
4191
4472
|
)
|
|
4192
4473
|
SELECT
|
|
4193
4474
|
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
@@ -4196,7 +4477,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4196
4477
|
guessed_location, location_created, confirmed_location, modified_at,
|
|
4197
4478
|
follower_count, following_count, heart_count, refresh_time,
|
|
4198
4479
|
processed, processed_at, created_at, updated_at,
|
|
4199
|
-
region, signature, bio_link, sec_uid, status_code, latest_video_time
|
|
4480
|
+
region, signature, bio_link, sec_uid, status_code, latest_video_time,
|
|
4481
|
+
user_create_time
|
|
4200
4482
|
FROM jobs_base WHERE unique_id IN (${placeholders})
|
|
4201
4483
|
`,
|
|
4202
4484
|
).run(...sellerMoveList);
|
|
@@ -4218,7 +4500,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4218
4500
|
guessed_location, location_created, confirmed_location, modified_at,
|
|
4219
4501
|
follower_count, following_count, heart_count, refresh_time,
|
|
4220
4502
|
processed, processed_at, created_at, updated_at,
|
|
4221
|
-
region, signature, bio_link, sec_uid, status_code, latest_video_time
|
|
4503
|
+
region, signature, bio_link, sec_uid, status_code, latest_video_time,
|
|
4504
|
+
user_create_time
|
|
4222
4505
|
)
|
|
4223
4506
|
SELECT
|
|
4224
4507
|
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
@@ -4227,7 +4510,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4227
4510
|
guessed_location, location_created, confirmed_location, modified_at,
|
|
4228
4511
|
follower_count, following_count, heart_count, refresh_time,
|
|
4229
4512
|
processed, processed_at, created_at, updated_at,
|
|
4230
|
-
region, signature, bio_link, sec_uid, status_code, latest_video_time
|
|
4513
|
+
region, signature, bio_link, sec_uid, status_code, latest_video_time,
|
|
4514
|
+
user_create_time
|
|
4231
4515
|
FROM jobs_base WHERE unique_id IN (${placeholders})
|
|
4232
4516
|
`,
|
|
4233
4517
|
).run(...rawMoveList);
|
|
@@ -4483,6 +4767,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4483
4767
|
hasUser,
|
|
4484
4768
|
userExists,
|
|
4485
4769
|
addUser,
|
|
4770
|
+
addRawUsers,
|
|
4486
4771
|
getPendingUsers,
|
|
4487
4772
|
getProcessedUsers,
|
|
4488
4773
|
getAllUsers,
|
|
@@ -4540,6 +4825,16 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4540
4825
|
stopBackup,
|
|
4541
4826
|
rawQuery,
|
|
4542
4827
|
getLlmSampleOffsets, // 获取 LLM 采样偏移量状态
|
|
4828
|
+
// Tag 发现与打分
|
|
4829
|
+
insertTag,
|
|
4830
|
+
getTagsByStatus,
|
|
4831
|
+
getTagsByCountry,
|
|
4832
|
+
getDeadTags,
|
|
4833
|
+
claimTag,
|
|
4834
|
+
reportTagScore,
|
|
4835
|
+
getAllTags,
|
|
4836
|
+
normalizeTags,
|
|
4837
|
+
clearTags,
|
|
4543
4838
|
data,
|
|
4544
4839
|
};
|
|
4545
4840
|
|