tt-help-cli-ycl 1.3.92 → 1.3.94

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5019 @@
1
+ import fs from "fs";
2
+ import path from "path";
3
+ import Database from "better-sqlite3";
4
+ import {
5
+ isLocationInList,
6
+ DEFAULT_TARGET_LOCATIONS,
7
+ } from "../lib/target-locations.js";
8
+
9
+ // SQLite 用户表(用于判重)
10
+ let db = null;
11
+ let dbPath = null;
12
+
13
+ function normalizeDbFilePath(filePath) {
14
+ if (!filePath) {
15
+ throw new Error("db path is required");
16
+ }
17
+ const resolved = path.resolve(filePath);
18
+ if (path.extname(resolved).toLowerCase() !== ".db") {
19
+ throw new Error(`仅支持 .db 路径,当前为: ${filePath}`);
20
+ }
21
+ return resolved;
22
+ }
23
+
24
+ function resetDbConnection() {
25
+ if (db) {
26
+ db.close();
27
+ db = null;
28
+ }
29
+ dbPath = null;
30
+ }
31
+
32
+ function loadLegacyUsersFromFiles(userFilePath, doneFilePath) {
33
+ const merged = new Map();
34
+
35
+ const tryLoad = (targetPath, label) => {
36
+ if (!targetPath) return;
37
+ if (!fs.existsSync(targetPath)) return;
38
+ try {
39
+ const parsed = JSON.parse(fs.readFileSync(targetPath, "utf-8"));
40
+ if (!Array.isArray(parsed)) return;
41
+ for (const item of parsed) {
42
+ const uniqueId = item?.uniqueId || item?.unique_id;
43
+ if (!uniqueId) continue;
44
+ merged.set(uniqueId, {
45
+ ...merged.get(uniqueId),
46
+ ...item,
47
+ uniqueId,
48
+ });
49
+ }
50
+ } catch (e) {
51
+ console.error(`[data-store] SQLite 导入 ${label} 失败: ${e.message}`);
52
+ }
53
+ };
54
+
55
+ tryLoad(userFilePath, "result.json");
56
+ tryLoad(doneFilePath, "result-done.json");
57
+
58
+ return [...merged.values()];
59
+ }
60
+
61
+ function loadLegacyVideosFromFile(videoPath) {
62
+ if (!videoPath) return [];
63
+ if (!fs.existsSync(videoPath)) return [];
64
+
65
+ try {
66
+ const parsed = JSON.parse(fs.readFileSync(videoPath, "utf-8"));
67
+ return Array.isArray(parsed) ? parsed : [];
68
+ } catch (e) {
69
+ console.error(
70
+ `[data-store] SQLite 导入 result-videos.json 失败: ${e.message}`,
71
+ );
72
+ return [];
73
+ }
74
+ }
75
+
76
+ function initUserDb(filePath) {
77
+ dbPath = normalizeDbFilePath(filePath);
78
+ fs.mkdirSync(path.dirname(dbPath), { recursive: true });
79
+ db = new Database(dbPath);
80
+ db.pragma("journal_mode = WAL");
81
+ db.exec(`
82
+ CREATE TABLE IF NOT EXISTS users (
83
+ unique_id TEXT PRIMARY KEY,
84
+ tt_seller TEXT,
85
+ verified INTEGER,
86
+ location_created TEXT,
87
+ created_at TEXT,
88
+ updated_at TEXT
89
+ )
90
+ `);
91
+ db.exec(`
92
+ CREATE TABLE IF NOT EXISTS jobs (
93
+ unique_id TEXT PRIMARY KEY,
94
+ nickname TEXT,
95
+ status TEXT DEFAULT 'pending',
96
+ sources TEXT,
97
+ claimed_by TEXT,
98
+ claimed_at INTEGER,
99
+ error TEXT,
100
+ pinned INTEGER DEFAULT 0,
101
+ no_video INTEGER DEFAULT 0,
102
+ restricted INTEGER DEFAULT 0,
103
+ user_update_count INTEGER DEFAULT 0,
104
+ tt_seller INTEGER,
105
+ verified INTEGER,
106
+ video_count INTEGER DEFAULT 0,
107
+ comment_count INTEGER DEFAULT 0,
108
+ guessed_location TEXT,
109
+ location_created TEXT,
110
+ confirmed_location TEXT,
111
+ modified_at INTEGER,
112
+ follower_count INTEGER DEFAULT 0,
113
+ following_count INTEGER DEFAULT 0,
114
+ heart_count INTEGER DEFAULT 0,
115
+ refresh_time INTEGER,
116
+ processed INTEGER DEFAULT 0,
117
+ processed_at INTEGER,
118
+ created_at INTEGER,
119
+ updated_at INTEGER,
120
+ region TEXT,
121
+ signature TEXT,
122
+ sec_uid TEXT,
123
+ status_code INTEGER
124
+ )
125
+ `);
126
+
127
+ // 迁移:为已存在的 jobs 表添加 status_code 列
128
+ const existingJobColumns = new Set(
129
+ db
130
+ .prepare("PRAGMA table_info(jobs)")
131
+ .all()
132
+ .map((c) => c.name),
133
+ );
134
+ if (!existingJobColumns.has("status_code")) {
135
+ db.exec(`ALTER TABLE jobs ADD COLUMN status_code INTEGER`);
136
+ }
137
+ if (!existingJobColumns.has("latest_video_time")) {
138
+ db.exec(`ALTER TABLE jobs ADD COLUMN latest_video_time INTEGER`);
139
+ }
140
+ if (!existingJobColumns.has("confirmed_location")) {
141
+ db.exec(`ALTER TABLE jobs ADD COLUMN confirmed_location TEXT`);
142
+ }
143
+ if (!existingJobColumns.has("modified_at")) {
144
+ db.exec(`ALTER TABLE jobs ADD COLUMN modified_at INTEGER`);
145
+ }
146
+ if (!existingJobColumns.has("bio_link")) {
147
+ db.exec(`ALTER TABLE jobs ADD COLUMN bio_link TEXT`);
148
+ }
149
+ if (!existingJobColumns.has("top_video_play_count")) {
150
+ db.exec(`ALTER TABLE jobs ADD COLUMN top_video_play_count INTEGER`);
151
+ }
152
+ if (!existingJobColumns.has("top_video_href")) {
153
+ db.exec(`ALTER TABLE jobs ADD COLUMN top_video_href TEXT`);
154
+ }
155
+ if (!existingJobColumns.has("user_create_time")) {
156
+ db.exec(`ALTER TABLE jobs ADD COLUMN user_create_time INTEGER`);
157
+ }
158
+ db.exec(`
159
+ CREATE TABLE IF NOT EXISTS jobs_base (
160
+ unique_id TEXT PRIMARY KEY,
161
+ nickname TEXT,
162
+ status TEXT DEFAULT 'pending',
163
+ sources TEXT,
164
+ claimed_by TEXT,
165
+ claimed_at INTEGER,
166
+ error TEXT,
167
+ pinned INTEGER DEFAULT 0,
168
+ no_video INTEGER DEFAULT 0,
169
+ restricted INTEGER DEFAULT 0,
170
+ user_update_count INTEGER DEFAULT 0,
171
+ tt_seller INTEGER,
172
+ verified INTEGER,
173
+ video_count INTEGER DEFAULT 0,
174
+ comment_count INTEGER DEFAULT 0,
175
+ guessed_location TEXT,
176
+ location_created TEXT,
177
+ confirmed_location TEXT,
178
+ modified_at INTEGER,
179
+ follower_count INTEGER DEFAULT 0,
180
+ following_count INTEGER DEFAULT 0,
181
+ heart_count INTEGER DEFAULT 0,
182
+ refresh_time INTEGER,
183
+ processed INTEGER DEFAULT 0,
184
+ processed_at INTEGER,
185
+ created_at INTEGER,
186
+ updated_at INTEGER,
187
+ region TEXT,
188
+ signature TEXT,
189
+ sec_uid TEXT,
190
+ status_code INTEGER,
191
+ latest_video_time INTEGER,
192
+ bio_link TEXT
193
+ )
194
+ `);
195
+
196
+ // 迁移:为已存在的 jobs_base 表补全列
197
+ const existingJobBaseColumns = new Set(
198
+ db
199
+ .prepare("PRAGMA table_info(jobs_base)")
200
+ .all()
201
+ .map((c) => c.name),
202
+ );
203
+ if (!existingJobBaseColumns.has("status_code")) {
204
+ db.exec(`ALTER TABLE jobs_base ADD COLUMN status_code INTEGER`);
205
+ }
206
+ if (!existingJobBaseColumns.has("latest_video_time")) {
207
+ db.exec(`ALTER TABLE jobs_base ADD COLUMN latest_video_time INTEGER`);
208
+ }
209
+ if (!existingJobBaseColumns.has("confirmed_location")) {
210
+ db.exec(`ALTER TABLE jobs_base ADD COLUMN confirmed_location TEXT`);
211
+ }
212
+ if (!existingJobBaseColumns.has("modified_at")) {
213
+ db.exec(`ALTER TABLE jobs_base ADD COLUMN modified_at INTEGER`);
214
+ }
215
+ if (!existingJobBaseColumns.has("bio_link")) {
216
+ db.exec(`ALTER TABLE jobs_base ADD COLUMN bio_link TEXT`);
217
+ }
218
+ if (!existingJobBaseColumns.has("user_create_time")) {
219
+ db.exec(`ALTER TABLE jobs_base ADD COLUMN user_create_time INTEGER`);
220
+ }
221
+ db.exec(`
222
+ CREATE TABLE IF NOT EXISTS raw_jobs (
223
+ unique_id TEXT PRIMARY KEY,
224
+ nickname TEXT,
225
+ status TEXT DEFAULT 'pending',
226
+ sources TEXT,
227
+ claimed_by TEXT,
228
+ claimed_at INTEGER,
229
+ error TEXT,
230
+ pinned INTEGER DEFAULT 0,
231
+ no_video INTEGER DEFAULT 0,
232
+ restricted INTEGER DEFAULT 0,
233
+ user_update_count INTEGER DEFAULT 0,
234
+ tt_seller INTEGER,
235
+ verified INTEGER,
236
+ video_count INTEGER DEFAULT 0,
237
+ comment_count INTEGER DEFAULT 0,
238
+ guessed_location TEXT,
239
+ location_created TEXT,
240
+ confirmed_location TEXT,
241
+ modified_at INTEGER,
242
+ follower_count INTEGER DEFAULT 0,
243
+ following_count INTEGER DEFAULT 0,
244
+ heart_count INTEGER DEFAULT 0,
245
+ refresh_time INTEGER,
246
+ processed INTEGER DEFAULT 0,
247
+ processed_at INTEGER,
248
+ created_at INTEGER,
249
+ updated_at INTEGER,
250
+ region TEXT,
251
+ signature TEXT,
252
+ sec_uid TEXT,
253
+ status_code INTEGER,
254
+ latest_video_time INTEGER
255
+ )
256
+ `);
257
+
258
+ // 迁移:为已存在的 raw_jobs 表添加 status_code 列
259
+ const existingRawJobColumns = new Set(
260
+ db
261
+ .prepare("PRAGMA table_info(raw_jobs)")
262
+ .all()
263
+ .map((c) => c.name),
264
+ );
265
+ if (!existingRawJobColumns.has("status_code")) {
266
+ db.exec(`ALTER TABLE raw_jobs ADD COLUMN status_code INTEGER`);
267
+ }
268
+ if (!existingRawJobColumns.has("latest_video_time")) {
269
+ db.exec(`ALTER TABLE raw_jobs ADD COLUMN latest_video_time INTEGER`);
270
+ }
271
+ if (!existingRawJobColumns.has("confirmed_location")) {
272
+ db.exec(`ALTER TABLE raw_jobs ADD COLUMN confirmed_location TEXT`);
273
+ }
274
+ if (!existingRawJobColumns.has("modified_at")) {
275
+ db.exec(`ALTER TABLE raw_jobs ADD COLUMN modified_at INTEGER`);
276
+ }
277
+ if (!existingRawJobColumns.has("bio_link")) {
278
+ db.exec(`ALTER TABLE raw_jobs ADD COLUMN bio_link TEXT`);
279
+ }
280
+ if (!existingRawJobColumns.has("user_create_time")) {
281
+ db.exec(`ALTER TABLE raw_jobs ADD COLUMN user_create_time INTEGER`);
282
+ }
283
+ db.exec(`
284
+ CREATE TABLE IF NOT EXISTS videos (
285
+ id TEXT PRIMARY KEY,
286
+ href TEXT,
287
+ author_unique_id TEXT,
288
+ location_created TEXT,
289
+ tt_seller INTEGER DEFAULT 0,
290
+ registered_at INTEGER,
291
+ user_update_count INTEGER DEFAULT 0,
292
+ play_count INTEGER,
293
+ digg_count INTEGER,
294
+ comment_count INTEGER,
295
+ share_count INTEGER,
296
+ collect_count INTEGER,
297
+ stats_updated_at INTEGER,
298
+ create_time INTEGER
299
+ )
300
+ `);
301
+ db.exec(`
302
+ CREATE INDEX IF NOT EXISTS idx_jobs_status_video
303
+ ON jobs(status, video_count DESC)
304
+ `);
305
+ db.exec(`
306
+ CREATE INDEX IF NOT EXISTS idx_jobs_claimed_by_status
307
+ ON jobs(claimed_by, status, claimed_at)
308
+ `);
309
+ db.exec(`
310
+ CREATE INDEX IF NOT EXISTS idx_jobs_status_claimed_at
311
+ ON jobs(status, claimed_at)
312
+ `);
313
+ db.exec(`
314
+ CREATE INDEX IF NOT EXISTS idx_jobs_redo_target
315
+ ON jobs(tt_seller, verified, location_created, refresh_time)
316
+ `);
317
+ db.exec(`
318
+ CREATE INDEX IF NOT EXISTS idx_jobs_pending_priority
319
+ ON jobs(status, pinned DESC, guessed_location, follower_count DESC)
320
+ `);
321
+ db.exec(`
322
+ CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_pinned
323
+ ON jobs(created_at ASC, unique_id ASC)
324
+ WHERE status = 'pending' AND COALESCE(pinned, 0) = 1
325
+ `);
326
+ db.exec(`
327
+ CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_seller
328
+ ON jobs(UPPER(COALESCE(guessed_location, '')), follower_count DESC, created_at ASC, unique_id ASC)
329
+ WHERE status = 'pending'
330
+ AND COALESCE(pinned, 0) = 0
331
+ AND tt_seller = 1
332
+ AND verified = 0
333
+ `);
334
+ db.exec(`
335
+ CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_follow
336
+ ON jobs(UPPER(COALESCE(guessed_location, '')), follower_count DESC, created_at ASC, unique_id ASC)
337
+ WHERE status = 'pending'
338
+ AND COALESCE(pinned, 0) = 0
339
+ AND (
340
+ instr(COALESCE(sources, ''), '"following"') > 0
341
+ OR instr(COALESCE(sources, ''), '"follower"') > 0
342
+ )
343
+ `);
344
+ db.exec(`
345
+ CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_other
346
+ ON jobs(UPPER(COALESCE(guessed_location, '')), follower_count DESC, created_at ASC, unique_id ASC)
347
+ WHERE status = 'pending' AND COALESCE(pinned, 0) = 0
348
+ `);
349
+ db.exec(`
350
+ CREATE INDEX IF NOT EXISTS idx_jobs_user_update_queue
351
+ ON jobs(created_at ASC, unique_id ASC)
352
+ WHERE (tt_seller IS NULL OR tt_seller = '')
353
+ AND (user_update_count IS NULL OR user_update_count <= 0)
354
+ `);
355
+ db.exec(`
356
+ CREATE INDEX IF NOT EXISTS idx_jobs_user_update_queue_expr
357
+ ON jobs(created_at ASC, unique_id ASC)
358
+ WHERE COALESCE(tt_seller, '') = ''
359
+ AND COALESCE(user_update_count, 0) <= 0
360
+ `);
361
+ db.exec(`
362
+ CREATE INDEX IF NOT EXISTS idx_videos_comment_queue
363
+ ON videos(user_update_count, tt_seller DESC, registered_at ASC)
364
+ `);
365
+ db.exec(`
366
+ CREATE INDEX IF NOT EXISTS idx_videos_comment_queue_pending
367
+ ON videos(tt_seller DESC, registered_at ASC, id)
368
+ WHERE user_update_count IS NULL OR user_update_count <= 0
369
+ `);
370
+
371
+ const existingVideoColumns = new Set(
372
+ db
373
+ .prepare("PRAGMA table_info(videos)")
374
+ .all()
375
+ .map((column) => column.name),
376
+ );
377
+ const requiredVideoColumns = {
378
+ play_count: "INTEGER",
379
+ digg_count: "INTEGER",
380
+ comment_count: "INTEGER",
381
+ share_count: "INTEGER",
382
+ collect_count: "INTEGER",
383
+ stats_updated_at: "INTEGER",
384
+ };
385
+ for (const [column, type] of Object.entries(requiredVideoColumns)) {
386
+ if (!existingVideoColumns.has(column)) {
387
+ db.exec(`ALTER TABLE videos ADD COLUMN ${column} ${type}`);
388
+ }
389
+ }
390
+
391
+ // 迁移:videos 表添加 create_time 列
392
+ if (!existingVideoColumns.has("create_time")) {
393
+ db.exec(`ALTER TABLE videos ADD COLUMN create_time INTEGER`);
394
+ }
395
+
396
+ // tags 表:标签发现与打分系统
397
+ db.exec(`
398
+ CREATE TABLE IF NOT EXISTS tags (
399
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
400
+ tag TEXT NOT NULL UNIQUE,
401
+ status TEXT NOT NULL DEFAULT 'new',
402
+ score REAL NOT NULL DEFAULT 0,
403
+ created_at TEXT NOT NULL DEFAULT (datetime('now')),
404
+ scored_at TEXT,
405
+ score_count INTEGER NOT NULL DEFAULT 0,
406
+ countries TEXT NOT NULL DEFAULT '[]',
407
+ matched_countries TEXT DEFAULT '[]',
408
+ total_posts INTEGER DEFAULT 0,
409
+ author_count INTEGER DEFAULT 0,
410
+ matched_authors INTEGER DEFAULT 0,
411
+ pushed_users INTEGER DEFAULT 0,
412
+ source TEXT NOT NULL DEFAULT 'llm',
413
+ user_prompt TEXT,
414
+ last_error TEXT
415
+ )
416
+ `);
417
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_tags_status ON tags(status)`);
418
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_tags_score ON tags(score DESC)`);
419
+
420
+ const count = db.prepare("SELECT COUNT(*) as c FROM users").get().c;
421
+ console.log(`[data-store] SQLite users 表初始化完成: ${count} 条`);
422
+ }
423
+
424
+ export function importLegacyJsonToDb({
425
+ dbFilePath,
426
+ usersFilePath,
427
+ doneFilePath,
428
+ videosFilePath,
429
+ }) {
430
+ resetDbConnection();
431
+ initUserDb(dbFilePath);
432
+
433
+ const legacyUsers = loadLegacyUsersFromFiles(usersFilePath, doneFilePath);
434
+ const legacyVideos = loadLegacyVideosFromFile(videosFilePath);
435
+
436
+ const beforeUsers = db.prepare("SELECT COUNT(*) as c FROM users").get().c;
437
+ const beforeJobs = db.prepare("SELECT COUNT(*) as c FROM jobs").get().c;
438
+ const beforeVideos = db.prepare("SELECT COUNT(*) as c FROM videos").get().c;
439
+
440
+ const insertUserStmt = db.prepare(`
441
+ INSERT OR IGNORE INTO users (unique_id) VALUES (?)
442
+ `);
443
+ const insertVideoStmt = db.prepare(`
444
+ INSERT OR IGNORE INTO videos (
445
+ id,
446
+ href,
447
+ author_unique_id,
448
+ location_created,
449
+ tt_seller,
450
+ registered_at,
451
+ user_update_count,
452
+ create_time
453
+ )
454
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
455
+ `);
456
+
457
+ const importUsersTxn = db.transaction((items) => {
458
+ for (const item of items) {
459
+ const uniqueId = item.uniqueId || item.unique_id;
460
+ if (!uniqueId) continue;
461
+ insertUserStmt.run(uniqueId);
462
+ addJobToDb({ ...item, uniqueId });
463
+ }
464
+ });
465
+
466
+ const importVideosTxn = db.transaction((items) => {
467
+ for (const item of items) {
468
+ if (!item?.id) continue;
469
+ insertVideoStmt.run(
470
+ item.id,
471
+ item.href || null,
472
+ item.authorUniqueId || item.author_unique_id || null,
473
+ item.locationCreated || item.location_created || null,
474
+ item.ttSeller ? 1 : 0,
475
+ item.registeredAt || item.registered_at || Date.now(),
476
+ item.userUpdateCount || item.user_update_count || 0,
477
+ item.createTime || item.create_time || null,
478
+ );
479
+ }
480
+ });
481
+
482
+ importUsersTxn(legacyUsers);
483
+ importVideosTxn(legacyVideos);
484
+
485
+ const afterUsers = db.prepare("SELECT COUNT(*) as c FROM users").get().c;
486
+ const afterJobs = db.prepare("SELECT COUNT(*) as c FROM jobs").get().c;
487
+ const afterVideos = db.prepare("SELECT COUNT(*) as c FROM videos").get().c;
488
+
489
+ return {
490
+ dbPath,
491
+ usersImported: afterUsers - beforeUsers,
492
+ jobsImported: afterJobs - beforeJobs,
493
+ videosImported: afterVideos - beforeVideos,
494
+ totalUsers: afterUsers,
495
+ totalJobs: afterJobs,
496
+ totalVideos: afterVideos,
497
+ };
498
+ }
499
+
500
+ export function closeStoreDb() {
501
+ resetDbConnection();
502
+ }
503
+
504
+ function hasUserInDb(uid) {
505
+ if (!db) return false;
506
+ const row = db.prepare("SELECT 1 FROM users WHERE unique_id = ?").get(uid);
507
+ return !!row;
508
+ }
509
+
510
+ function addUserToDb(user) {
511
+ if (!db) return;
512
+ db.prepare(
513
+ `
514
+ INSERT OR IGNORE INTO users (unique_id, tt_seller, verified, location_created, created_at, updated_at)
515
+ VALUES (?, ?, ?, ?, ?, ?)
516
+ `,
517
+ ).run(
518
+ user.uniqueId,
519
+ user.ttSeller === undefined ||
520
+ user.ttSeller === null ||
521
+ user.ttSeller === ""
522
+ ? null
523
+ : user.ttSeller
524
+ ? 1
525
+ : 0,
526
+ user.verified === undefined ||
527
+ user.verified === null ||
528
+ user.verified === ""
529
+ ? null
530
+ : user.verified
531
+ ? 1
532
+ : 0,
533
+ user.locationCreated || null,
534
+ new Date().toISOString(),
535
+ new Date().toISOString(),
536
+ );
537
+ }
538
+
539
+ function addJobToDb(user) {
540
+ if (!db) return;
541
+ const now = Date.now();
542
+ db.prepare(
543
+ `
544
+ INSERT OR IGNORE INTO jobs (
545
+ unique_id,
546
+ nickname,
547
+ status,
548
+ sources,
549
+ claimed_by,
550
+ claimed_at,
551
+ error,
552
+ pinned,
553
+ no_video,
554
+ restricted,
555
+ user_update_count,
556
+ tt_seller,
557
+ verified,
558
+ video_count,
559
+ comment_count,
560
+ guessed_location,
561
+ location_created,
562
+ follower_count,
563
+ following_count,
564
+ heart_count,
565
+ refresh_time,
566
+ processed,
567
+ processed_at,
568
+ created_at,
569
+ updated_at,
570
+ region,
571
+ signature,
572
+ bio_link,
573
+ sec_uid
574
+ )
575
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
576
+ `,
577
+ ).run(
578
+ user.uniqueId,
579
+ user.nickname || null,
580
+ user.status || inferStatus(user),
581
+ JSON.stringify(
582
+ Array.isArray(user.sources) ? [...new Set(user.sources)] : [],
583
+ ),
584
+ user.claimedBy || null,
585
+ user.claimedAt || null,
586
+ user.error || null,
587
+ user.pinned ? 1 : 0,
588
+ user.noVideo ? 1 : 0,
589
+ user.restricted ? 1 : 0,
590
+ user.userUpdateCount || 0,
591
+ user.ttSeller === undefined ||
592
+ user.ttSeller === null ||
593
+ user.ttSeller === ""
594
+ ? null
595
+ : user.ttSeller
596
+ ? 1
597
+ : 0,
598
+ user.verified === undefined ||
599
+ user.verified === null ||
600
+ user.verified === ""
601
+ ? null
602
+ : user.verified
603
+ ? 1
604
+ : 0,
605
+ user.videoCount || 0,
606
+ user.commentCount || 0,
607
+ user.guessedLocation || null,
608
+ user.locationCreated || null,
609
+ user.followerCount || 0,
610
+ user.followingCount || 0,
611
+ user.heartCount || 0,
612
+ user.refreshTime || null,
613
+ user.processed ? 1 : 0,
614
+ user.processedAt || null,
615
+ user.createdAt || now,
616
+ user.updatedAt || now,
617
+ user.region || null,
618
+ user.signature || null,
619
+ user.bioLink?.link || user.bioLink?.url || user.bioLink || null,
620
+ user.secUid || null,
621
+ );
622
+ }
623
+
624
+ function getUserDbCount() {
625
+ if (!db) return 0;
626
+ return db.prepare("SELECT COUNT(*) as c FROM users").get().c;
627
+ }
628
+
629
+ function getJobsCount() {
630
+ if (!db) return 0;
631
+ return db.prepare("SELECT COUNT(*) as c FROM jobs").get().c;
632
+ }
633
+
634
+ function getPendingJobsCount() {
635
+ if (!db) return 0;
636
+ return db
637
+ .prepare("SELECT COUNT(*) as c FROM jobs WHERE status = 'pending'")
638
+ .get().c;
639
+ }
640
+
641
+ function getPendingJobsUserUpdateCount() {
642
+ if (!db) return 0;
643
+ return db
644
+ .prepare(
645
+ `
646
+ SELECT COUNT(*) as c
647
+ FROM jobs
648
+ WHERE COALESCE(tt_seller, '') = ''
649
+ AND COALESCE(user_update_count, 0) <= 0
650
+ `,
651
+ )
652
+ .get().c;
653
+ }
654
+
655
+ function getRawJobsCount() {
656
+ if (!db) return 0;
657
+ return db.prepare("SELECT COUNT(*) as c FROM raw_jobs").get().c;
658
+ }
659
+
660
+ function getDashboardStatsFromDb(targetLocations = []) {
661
+ if (!db) return null;
662
+
663
+ const targetPlaceholders = targetLocations.map(() => "?").join(", ");
664
+ const targetParams = targetLocations.length ? targetLocations : [];
665
+
666
+ // 合并所有 jobs 表的聚合统计为单次扫描
667
+ const aggregateRow = db
668
+ .prepare(
669
+ `
670
+ SELECT
671
+ COUNT(*) as total,
672
+ SUM(CASE WHEN status = 'pending' THEN 1 ELSE 0 END) as pending,
673
+ SUM(CASE WHEN status = 'processing' THEN 1 ELSE 0 END) as processing,
674
+ SUM(CASE WHEN status = 'done' THEN 1 ELSE 0 END) as done,
675
+ SUM(CASE WHEN status = 'error' THEN 1 ELSE 0 END) as error,
676
+ SUM(CASE WHEN status = 'restricted' THEN 1 ELSE 0 END) as restricted,
677
+ SUM(CASE WHEN tt_seller = 1 AND verified = 0 ${
678
+ targetLocations.length
679
+ ? `AND location_created IN (${targetPlaceholders})`
680
+ : "AND 1 = 0"
681
+ } THEN 1 ELSE 0 END) as targetUsers,
682
+ SUM(CASE WHEN no_video = 1 THEN 1 ELSE 0 END) as noVideo,
683
+ SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"video"') > 0 THEN 1 ELSE 0 END) as video,
684
+ SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"comment"') > 0 THEN 1 ELSE 0 END) as comment,
685
+ SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"guess"') > 0 THEN 1 ELSE 0 END) as guess,
686
+ SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"following"') > 0 THEN 1 ELSE 0 END) as following,
687
+ SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"follower"') > 0 THEN 1 ELSE 0 END) as follower,
688
+ SUM(CASE
689
+ WHEN status != 'done'
690
+ AND instr(COALESCE(sources, ''), '"video"') = 0
691
+ AND instr(COALESCE(sources, ''), '"comment"') = 0
692
+ AND instr(COALESCE(sources, ''), '"guess"') = 0
693
+ AND instr(COALESCE(sources, ''), '"following"') = 0
694
+ AND instr(COALESCE(sources, ''), '"follower"') = 0
695
+ THEN 1 ELSE 0 END) as seed
696
+ FROM jobs
697
+ `,
698
+ )
699
+ .get(...targetParams);
700
+
701
+ // userUpdateTasks 单独从 jobs_base 统计
702
+ const userUpdateTasksRow = db
703
+ .prepare(
704
+ `
705
+ SELECT COUNT(*) as userUpdateTasks
706
+ FROM jobs_base
707
+ WHERE COALESCE(tt_seller, '') = ''
708
+ AND COALESCE(user_update_count, 0) <= 0
709
+ `,
710
+ )
711
+ .get();
712
+
713
+ // countryStats 和 targetCountryStats 需要 GROUP BY,保留为独立查询
714
+ const countryStats = db
715
+ .prepare(
716
+ `
717
+ SELECT
718
+ COALESCE(location_created, '未知') as country,
719
+ COUNT(*) as count,
720
+ SUM(CASE
721
+ WHEN tt_seller = 1 AND verified = 0 ${
722
+ targetLocations.length
723
+ ? `AND location_created IN (${targetPlaceholders})`
724
+ : "AND 1 = 0"
725
+ }
726
+ THEN 1 ELSE 0 END) as targetCount
727
+ FROM jobs
728
+ WHERE status = 'done'
729
+ GROUP BY COALESCE(location_created, '未知')
730
+ ORDER BY count DESC
731
+ `,
732
+ )
733
+ .all(...targetParams);
734
+
735
+ const targetCountryStats = targetLocations.length
736
+ ? db
737
+ .prepare(
738
+ `
739
+ SELECT location_created as country, COUNT(*) as count
740
+ FROM jobs
741
+ WHERE tt_seller = 1
742
+ AND verified = 0
743
+ AND location_created IN (${targetPlaceholders})
744
+ GROUP BY location_created
745
+ ORDER BY count DESC
746
+ `,
747
+ )
748
+ .all(...targetLocations)
749
+ : [];
750
+
751
+ const jobsBaseCount = db
752
+ .prepare("SELECT COUNT(*) as total FROM jobs_base")
753
+ .get().total;
754
+
755
+ return {
756
+ totalUsers: aggregateRow.total,
757
+ rawJobs: getRawJobsCount(),
758
+ dbTotalUsers: getUserDbCount(),
759
+ jobsTotal: aggregateRow.total,
760
+ jobsBaseTotal: jobsBaseCount,
761
+ jobsPending: aggregateRow.pending,
762
+ processedUsers: aggregateRow.done,
763
+ pendingUsers: aggregateRow.pending,
764
+ processingUsers: aggregateRow.processing,
765
+ restrictedUsers: aggregateRow.restricted,
766
+ errorUsers: aggregateRow.error,
767
+ targetUsers: aggregateRow.targetUsers,
768
+ userUpdateTasks: userUpdateTasksRow.userUpdateTasks,
769
+ targetCountryStats,
770
+ countryStats,
771
+ sourceStats: {
772
+ seed: aggregateRow.seed || 0,
773
+ video: aggregateRow.video || 0,
774
+ comment: aggregateRow.comment || 0,
775
+ guess: aggregateRow.guess || 0,
776
+ following: aggregateRow.following || 0,
777
+ follower: aggregateRow.follower || 0,
778
+ processed: aggregateRow.done,
779
+ restricted: aggregateRow.restricted,
780
+ error: aggregateRow.error,
781
+ noVideo: aggregateRow.noVideo || 0,
782
+ },
783
+ };
784
+ }
785
+
786
+ function getPendingByCountryFromDb() {
787
+ if (!db) return [];
788
+
789
+ // 按 guessed_location 分组统计待处理任务
790
+ const rows = db
791
+ .prepare(
792
+ `
793
+ SELECT
794
+ COALESCE(guessed_location, '未知') as country,
795
+ COUNT(*) as count
796
+ FROM jobs
797
+ WHERE status = 'pending'
798
+ GROUP BY COALESCE(guessed_location, '未知')
799
+ ORDER BY count DESC
800
+ `,
801
+ )
802
+ .all();
803
+
804
+ return rows;
805
+ }
806
+
807
+ function getUserUpdateByCountryFromDb() {
808
+ if (!db) return [];
809
+
810
+ // 按 guessed_location 分组统计待补资料任务
811
+ const rows = db
812
+ .prepare(
813
+ `
814
+ SELECT
815
+ COALESCE(guessed_location, '未知') as country,
816
+ COUNT(*) as count
817
+ FROM jobs_base
818
+ WHERE tt_seller IS NULL
819
+ AND COALESCE(user_update_count, 0) <= 0
820
+ GROUP BY COALESCE(guessed_location, '未知')
821
+ ORDER BY count DESC
822
+ `,
823
+ )
824
+ .all();
825
+
826
+ return rows;
827
+ }
828
+
829
+ function getAttachStuckByCountryFromDb() {
830
+ if (!db) return [];
831
+
832
+ return db
833
+ .prepare(
834
+ `
835
+ SELECT
836
+ COALESCE(guessed_location, '未知') as country,
837
+ COUNT(*) as count
838
+ FROM jobs_base
839
+ WHERE tt_seller IS NULL
840
+ AND COALESCE(user_update_count, 0) = 1
841
+ GROUP BY COALESCE(guessed_location, '未知')
842
+ ORDER BY count DESC
843
+ `,
844
+ )
845
+ .all();
846
+ }
847
+
848
+ function restoreAttachStuckByCountry(country) {
849
+ if (!db) {
850
+ return { restored: 0, country, error: "db not ready" };
851
+ }
852
+
853
+ const normalizedCountry = String(country == null ? "未知" : country).trim();
854
+ if (!normalizedCountry) {
855
+ return {
856
+ restored: 0,
857
+ country: normalizedCountry,
858
+ error: "country is required",
859
+ };
860
+ }
861
+
862
+ const whereSql = `
863
+ COALESCE(tt_seller, '') = ''
864
+ AND COALESCE(user_update_count, 0) = 1
865
+ AND COALESCE(guessed_location, '未知') = ?
866
+ `;
867
+ const count =
868
+ db
869
+ .prepare(
870
+ `
871
+ SELECT COUNT(*) as c
872
+ FROM jobs_base
873
+ WHERE ${whereSql}
874
+ `,
875
+ )
876
+ .get(normalizedCountry)?.c || 0;
877
+
878
+ if (!count) {
879
+ return { restored: 0, country: normalizedCountry };
880
+ }
881
+
882
+ db.prepare(
883
+ `
884
+ UPDATE jobs_base
885
+ SET user_update_count = 0,
886
+ updated_at = ?,
887
+ claimed_by = NULL,
888
+ claimed_at = NULL
889
+ WHERE ${whereSql}
890
+ `,
891
+ ).run(Date.now(), normalizedCountry);
892
+
893
+ return { restored: count, country: normalizedCountry };
894
+ }
895
+
896
+ function resetPendingByCountry(country) {
897
+ if (!db) {
898
+ return { reset: 0, country, error: "db not ready" };
899
+ }
900
+
901
+ const normalizedCountry = String(country == null ? "未知" : country).trim();
902
+ if (!normalizedCountry) {
903
+ return {
904
+ reset: 0,
905
+ country: normalizedCountry,
906
+ error: "country is required",
907
+ };
908
+ }
909
+
910
+ const whereSql = `
911
+ status = 'pending'
912
+ AND COALESCE(guessed_location, '未知') = ?
913
+ `;
914
+ const count =
915
+ db
916
+ .prepare(
917
+ `
918
+ SELECT COUNT(*) as c
919
+ FROM jobs
920
+ WHERE ${whereSql}
921
+ `,
922
+ )
923
+ .get(normalizedCountry)?.c || 0;
924
+
925
+ if (!count) {
926
+ return { reset: 0, country: normalizedCountry };
927
+ }
928
+
929
+ db.prepare(
930
+ `
931
+ UPDATE jobs
932
+ SET user_update_count = 0,
933
+ updated_at = ?,
934
+ claimed_by = NULL,
935
+ claimed_at = NULL
936
+ WHERE ${whereSql}
937
+ `,
938
+ ).run(Date.now(), normalizedCountry);
939
+
940
+ return { reset: count, country: normalizedCountry };
941
+ }
942
+
943
+ function getRawByCountryFromDb() {
944
+ if (!db) return [];
945
+
946
+ return db
947
+ .prepare(
948
+ `
949
+ SELECT
950
+ COALESCE(guessed_location, '未知') as country,
951
+ COUNT(*) as count
952
+ FROM raw_jobs
953
+ GROUP BY COALESCE(guessed_location, '未知')
954
+ ORDER BY count DESC
955
+ `,
956
+ )
957
+ .all();
958
+ }
959
+
960
+ function moveJobsToRawByCountry(scope, country) {
961
+ if (!db) {
962
+ return { moved: 0, scope, country, error: "db not ready" };
963
+ }
964
+
965
+ const normalizedScope = String(scope || "").trim();
966
+ const normalizedCountry = String(country == null ? "未知" : country).trim();
967
+ if (!normalizedCountry) {
968
+ return {
969
+ moved: 0,
970
+ scope: normalizedScope,
971
+ country: normalizedCountry,
972
+ error: "country is required",
973
+ };
974
+ }
975
+
976
+ // pending 操作 jobs 表(与 getPendingByCountryFromDb 数据源一致)
977
+ // userUpdate 操作 jobs_base 表(与 getUserUpdateByCountryFromDb 数据源一致)
978
+ let sourceTable = "";
979
+ let scopeWhere = "";
980
+ let columns = "";
981
+
982
+ if (normalizedScope === "pending") {
983
+ sourceTable = "jobs";
984
+ scopeWhere = `status = 'pending'`;
985
+ columns = `
986
+ unique_id, nickname, status, sources, claimed_by, claimed_at,
987
+ error, pinned, no_video, restricted, user_update_count,
988
+ tt_seller, verified, video_count, comment_count,
989
+ guessed_location, location_created, follower_count,
990
+ following_count, heart_count, refresh_time, processed,
991
+ processed_at, created_at, updated_at, region, signature,
992
+ sec_uid, latest_video_time, user_create_time
993
+ `;
994
+ } else if (normalizedScope === "userUpdate") {
995
+ sourceTable = "jobs_base";
996
+ scopeWhere = `tt_seller IS NULL AND COALESCE(user_update_count, 0) <= 0`;
997
+ columns = `
998
+ unique_id, nickname, status, sources, claimed_by, claimed_at,
999
+ error, pinned, no_video, restricted, user_update_count,
1000
+ tt_seller, verified, video_count, comment_count,
1001
+ guessed_location, location_created, follower_count,
1002
+ following_count, heart_count, refresh_time, processed,
1003
+ processed_at, created_at, updated_at, region, signature,
1004
+ sec_uid, latest_video_time, user_create_time
1005
+ `;
1006
+ } else {
1007
+ return {
1008
+ moved: 0,
1009
+ scope: normalizedScope,
1010
+ country: normalizedCountry,
1011
+ error: "unsupported scope",
1012
+ };
1013
+ }
1014
+
1015
+ const whereSql = `
1016
+ ${scopeWhere}
1017
+ AND COALESCE(guessed_location, '未知') = ?
1018
+ `;
1019
+ const count =
1020
+ db
1021
+ .prepare(
1022
+ `
1023
+ SELECT COUNT(*) as c
1024
+ FROM ${sourceTable}
1025
+ WHERE ${whereSql}
1026
+ `,
1027
+ )
1028
+ .get(normalizedCountry)?.c || 0;
1029
+
1030
+ if (!count) {
1031
+ return { moved: 0, scope: normalizedScope, country: normalizedCountry };
1032
+ }
1033
+
1034
+ const moveTxn = db.transaction((targetCountry) => {
1035
+ db.prepare(
1036
+ `
1037
+ INSERT OR REPLACE INTO raw_jobs (
1038
+ ${columns}
1039
+ )
1040
+ SELECT
1041
+ ${columns}
1042
+ FROM ${sourceTable}
1043
+ WHERE ${whereSql}
1044
+ `,
1045
+ ).run(targetCountry);
1046
+
1047
+ db.prepare(
1048
+ `
1049
+ DELETE FROM ${sourceTable}
1050
+ WHERE ${whereSql}
1051
+ `,
1052
+ ).run(targetCountry);
1053
+ });
1054
+
1055
+ moveTxn(normalizedCountry);
1056
+ return { moved: count, scope: normalizedScope, country: normalizedCountry };
1057
+ }
1058
+
1059
+ function restoreRawJobsByCountry(country) {
1060
+ if (!db) {
1061
+ return { restored: 0, country, error: "db not ready" };
1062
+ }
1063
+
1064
+ const normalizedCountry = String(country == null ? "未知" : country).trim();
1065
+ if (!normalizedCountry) {
1066
+ return {
1067
+ restored: 0,
1068
+ country: normalizedCountry,
1069
+ error: "country is required",
1070
+ };
1071
+ }
1072
+
1073
+ const whereSql = `COALESCE(guessed_location, '未知') = ?`;
1074
+ const count =
1075
+ db
1076
+ .prepare(
1077
+ `
1078
+ SELECT COUNT(*) as c
1079
+ FROM raw_jobs
1080
+ WHERE ${whereSql}
1081
+ `,
1082
+ )
1083
+ .get(normalizedCountry)?.c || 0;
1084
+
1085
+ if (!count) {
1086
+ return { restored: 0, country: normalizedCountry };
1087
+ }
1088
+
1089
+ const restoreTxn = db.transaction((targetCountry) => {
1090
+ db.prepare(
1091
+ `
1092
+ INSERT OR REPLACE INTO jobs (
1093
+ unique_id,
1094
+ nickname,
1095
+ status,
1096
+ sources,
1097
+ claimed_by,
1098
+ claimed_at,
1099
+ error,
1100
+ pinned,
1101
+ no_video,
1102
+ restricted,
1103
+ user_update_count,
1104
+ tt_seller,
1105
+ verified,
1106
+ video_count,
1107
+ comment_count,
1108
+ guessed_location,
1109
+ location_created,
1110
+ follower_count,
1111
+ following_count,
1112
+ heart_count,
1113
+ refresh_time,
1114
+ processed,
1115
+ processed_at,
1116
+ created_at,
1117
+ updated_at,
1118
+ region,
1119
+ signature,
1120
+ sec_uid
1121
+ )
1122
+ SELECT
1123
+ unique_id,
1124
+ nickname,
1125
+ status,
1126
+ sources,
1127
+ claimed_by,
1128
+ claimed_at,
1129
+ error,
1130
+ pinned,
1131
+ no_video,
1132
+ restricted,
1133
+ user_update_count,
1134
+ tt_seller,
1135
+ verified,
1136
+ video_count,
1137
+ comment_count,
1138
+ guessed_location,
1139
+ location_created,
1140
+ follower_count,
1141
+ following_count,
1142
+ heart_count,
1143
+ refresh_time,
1144
+ processed,
1145
+ processed_at,
1146
+ created_at,
1147
+ updated_at,
1148
+ region,
1149
+ signature,
1150
+ sec_uid
1151
+ FROM raw_jobs
1152
+ WHERE ${whereSql}
1153
+ `,
1154
+ ).run(targetCountry);
1155
+
1156
+ db.prepare(
1157
+ `
1158
+ DELETE FROM raw_jobs
1159
+ WHERE ${whereSql}
1160
+ `,
1161
+ ).run(targetCountry);
1162
+ });
1163
+
1164
+ restoreTxn(normalizedCountry);
1165
+ return { restored: count, country: normalizedCountry };
1166
+ }
1167
+
1168
+ function restoreRawJobById(uniqueId) {
1169
+ if (!db) {
1170
+ return { restored: 0, uniqueId, error: "db not ready" };
1171
+ }
1172
+
1173
+ const safeId = String(uniqueId).trim();
1174
+ if (!safeId) {
1175
+ return { restored: 0, uniqueId: safeId, error: "uniqueId is required" };
1176
+ }
1177
+
1178
+ const exists =
1179
+ db
1180
+ .prepare("SELECT COUNT(*) as c FROM raw_jobs WHERE unique_id = ?")
1181
+ .get(safeId)?.c || 0;
1182
+
1183
+ if (!exists) {
1184
+ return { restored: 0, uniqueId: safeId };
1185
+ }
1186
+
1187
+ const restoreTxn = db.transaction(() => {
1188
+ db.prepare(
1189
+ `
1190
+ INSERT OR REPLACE INTO jobs (
1191
+ unique_id, nickname, status, sources, claimed_by, claimed_at, error,
1192
+ pinned, no_video, restricted, user_update_count, tt_seller, verified,
1193
+ video_count, comment_count, guessed_location, location_created,
1194
+ follower_count, following_count, heart_count, refresh_time,
1195
+ processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
1196
+ )
1197
+ SELECT
1198
+ unique_id, nickname, status, sources, claimed_by, claimed_at, error,
1199
+ pinned, no_video, restricted, user_update_count, tt_seller, verified,
1200
+ video_count, comment_count, guessed_location, location_created,
1201
+ follower_count, following_count, heart_count, refresh_time,
1202
+ processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
1203
+ FROM raw_jobs WHERE unique_id = ?
1204
+ `,
1205
+ ).run(safeId);
1206
+
1207
+ db.prepare("DELETE FROM raw_jobs WHERE unique_id = ?").run(safeId);
1208
+ });
1209
+
1210
+ restoreTxn();
1211
+ return { restored: 1, uniqueId: safeId };
1212
+ }
1213
+
1214
+ function restoreRawJobsByFilter({ search, location, hasVideo, hasFollower }) {
1215
+ if (!db) {
1216
+ return { restored: 0, error: "db not ready" };
1217
+ }
1218
+
1219
+ const where = [];
1220
+ const args = [];
1221
+
1222
+ if (search) {
1223
+ where.push(
1224
+ "(LOWER(unique_id) LIKE ? OR LOWER(COALESCE(nickname, '')) LIKE ?)",
1225
+ );
1226
+ const likeVal = `%${search.toLowerCase()}%`;
1227
+ args.push(likeVal, likeVal);
1228
+ }
1229
+
1230
+ if (location) {
1231
+ where.push("COALESCE(guessed_location, '未知') = ?");
1232
+ args.push(location);
1233
+ }
1234
+
1235
+ if (hasVideo) {
1236
+ where.push("COALESCE(video_count, 0) > 0");
1237
+ }
1238
+
1239
+ if (hasFollower) {
1240
+ where.push("COALESCE(follower_count, 0) > 0");
1241
+ }
1242
+
1243
+ if (where.length === 0) {
1244
+ return { restored: 0, error: "at least one filter is required" };
1245
+ }
1246
+
1247
+ const whereSql = where.join(" AND ");
1248
+
1249
+ const count =
1250
+ db
1251
+ .prepare(`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql}`)
1252
+ .get(...args)?.c || 0;
1253
+
1254
+ if (!count) {
1255
+ return { restored: 0 };
1256
+ }
1257
+
1258
+ const restoreTxn = db.transaction(() => {
1259
+ db.prepare(
1260
+ `
1261
+ INSERT OR REPLACE INTO jobs (
1262
+ unique_id, nickname, status, sources, claimed_by, claimed_at, error,
1263
+ pinned, no_video, restricted, user_update_count, tt_seller, verified,
1264
+ video_count, comment_count, guessed_location, location_created,
1265
+ follower_count, following_count, heart_count, refresh_time,
1266
+ processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
1267
+ )
1268
+ SELECT
1269
+ unique_id, nickname, status, sources, claimed_by, claimed_at, error,
1270
+ pinned, no_video, restricted, user_update_count, tt_seller, verified,
1271
+ video_count, comment_count, guessed_location, location_created,
1272
+ follower_count, following_count, heart_count, refresh_time,
1273
+ processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
1274
+ FROM raw_jobs WHERE ${whereSql}
1275
+ `,
1276
+ ).run(...args);
1277
+
1278
+ db.prepare(`DELETE FROM raw_jobs WHERE ${whereSql}`).run(...args);
1279
+ });
1280
+
1281
+ restoreTxn();
1282
+ return { restored: count };
1283
+ }
1284
+
1285
+ function getRawJobsPageFromDb({
1286
+ search,
1287
+ location,
1288
+ limit,
1289
+ offset,
1290
+ hasVideo,
1291
+ hasFollower,
1292
+ }) {
1293
+ if (!db) return null;
1294
+
1295
+ const safeLimit = Math.max(1, Math.min(200, parseInt(limit) || 50));
1296
+ const safeOffset = Math.max(0, parseInt(offset) || 0);
1297
+ const where = [];
1298
+ const args = [];
1299
+
1300
+ if (search) {
1301
+ where.push(
1302
+ "(LOWER(unique_id) LIKE ? OR LOWER(COALESCE(nickname, '')) LIKE ?)",
1303
+ );
1304
+ const pattern = `%${String(search).toLowerCase()}%`;
1305
+ args.push(pattern, pattern);
1306
+ }
1307
+ if (location) {
1308
+ where.push("COALESCE(guessed_location, '未知') = ?");
1309
+ args.push(location);
1310
+ }
1311
+ if (hasVideo) {
1312
+ where.push("COALESCE(video_count, 0) > 0");
1313
+ }
1314
+ if (hasFollower) {
1315
+ where.push("COALESCE(follower_count, 0) > 0");
1316
+ }
1317
+
1318
+ const whereSql = where.length ? `WHERE ${where.join(" AND ")}` : "";
1319
+ const total = db
1320
+ .prepare(`SELECT COUNT(*) as c FROM raw_jobs ${whereSql}`)
1321
+ .get(...args).c;
1322
+
1323
+ const rows = db
1324
+ .prepare(
1325
+ `
1326
+ SELECT *
1327
+ FROM raw_jobs
1328
+ ${whereSql}
1329
+ ORDER BY created_at DESC, unique_id ASC
1330
+ LIMIT ? OFFSET ?
1331
+ `,
1332
+ )
1333
+ .all(...args, safeLimit, safeOffset);
1334
+
1335
+ return {
1336
+ total,
1337
+ limit: safeLimit,
1338
+ offset: safeOffset,
1339
+ users: rows.map(mapJobRow),
1340
+ };
1341
+ }
1342
+
1343
+ // ====== Tag 发现与打分 CRUD ======
1344
+
1345
+ function insertTag(tag, countries, source = "llm") {
1346
+ if (!db) return { inserted: false, error: "db not ready" };
1347
+ // 防止存入带 # 前缀的 tag
1348
+ const normalized = tag.replace(/^#+/, "").trim().toLowerCase();
1349
+ if (!normalized || normalized.length < 2) {
1350
+ return { inserted: false, error: "invalid tag" };
1351
+ }
1352
+ try {
1353
+ const result = db
1354
+ .prepare(
1355
+ `
1356
+ INSERT OR IGNORE INTO tags (tag, countries, source)
1357
+ VALUES (?, ?, ?)
1358
+ `,
1359
+ )
1360
+ .run(normalized, JSON.stringify(countries), source);
1361
+ return { inserted: result.changes > 0, tag: normalized };
1362
+ } catch (e) {
1363
+ return { inserted: false, error: e.message };
1364
+ }
1365
+ }
1366
+
1367
+ function getTagsByStatus(status, limit = 100) {
1368
+ if (!db) return [];
1369
+ const rows = db
1370
+ .prepare(
1371
+ `
1372
+ SELECT * FROM tags WHERE status = ? ORDER BY score ASC, created_at ASC LIMIT ?
1373
+ `,
1374
+ )
1375
+ .all(status, limit);
1376
+ return rows.map((r) => ({
1377
+ ...r,
1378
+ countries: JSON.parse(r.countries || "[]"),
1379
+ matched_countries: JSON.parse(r.matched_countries || "[]"),
1380
+ }));
1381
+ }
1382
+
1383
+ function getTagsByCountry(country, minScore = 0) {
1384
+ if (!db) return [];
1385
+ const rows = db
1386
+ .prepare(
1387
+ `
1388
+ SELECT * FROM tags WHERE status != 'dead'
1389
+ ORDER BY score DESC
1390
+ `,
1391
+ )
1392
+ .all();
1393
+ // Filter in JS since countries is JSON
1394
+ return rows
1395
+ .map((r) => ({
1396
+ ...r,
1397
+ countries: JSON.parse(r.countries || "[]"),
1398
+ matched_countries: JSON.parse(r.matched_countries || "[]"),
1399
+ }))
1400
+ .filter((r) => r.countries.includes(country) && r.score >= minScore);
1401
+ }
1402
+
1403
+ function getDeadTags(country) {
1404
+ if (!db) return [];
1405
+ const rows = db
1406
+ .prepare(
1407
+ `
1408
+ SELECT * FROM tags WHERE status = 'dead' ORDER BY score ASC
1409
+ `,
1410
+ )
1411
+ .all();
1412
+ return rows
1413
+ .map((r) => ({
1414
+ ...r,
1415
+ countries: JSON.parse(r.countries || "[]"),
1416
+ matched_countries: JSON.parse(r.matched_countries || "[]"),
1417
+ }))
1418
+ .filter((r) => r.countries.includes(country));
1419
+ }
1420
+
1421
+ function claimTag(tag) {
1422
+ if (!db) return { ok: false, error: "db not ready" };
1423
+ // 原子操作:只有 status='new' 时才更新为 'scoring',避免竞态
1424
+ const result = db
1425
+ .prepare(
1426
+ "UPDATE tags SET status = 'scoring' WHERE tag = ? AND status = 'new'",
1427
+ )
1428
+ .run(tag);
1429
+ if (result.changes === 0) {
1430
+ // 检查是否不存在 vs 已被别人锁定
1431
+ const row = db.prepare("SELECT status FROM tags WHERE tag = ?").get(tag);
1432
+ if (!row) return { ok: false, error: "tag not found" };
1433
+ return { ok: false, error: `tag status is ${row.status}, already claimed` };
1434
+ }
1435
+ return { ok: true, tag };
1436
+ }
1437
+
1438
+ function reportTagScore(tag, fields) {
1439
+ if (!db) return { ok: false, error: "db not ready" };
1440
+ const {
1441
+ score,
1442
+ status,
1443
+ totalPosts,
1444
+ authorCount,
1445
+ matchedAuthors,
1446
+ matchedCountries,
1447
+ pushedUsers,
1448
+ error,
1449
+ } = fields;
1450
+ const matchedCountriesJson = matchedCountries
1451
+ ? JSON.stringify(matchedCountries)
1452
+ : null;
1453
+ const now = new Date().toISOString();
1454
+
1455
+ try {
1456
+ const result = db
1457
+ .prepare(
1458
+ `
1459
+ UPDATE tags SET
1460
+ score = COALESCE(?, score),
1461
+ status = COALESCE(?, status),
1462
+ total_posts = COALESCE(?, total_posts),
1463
+ author_count = COALESCE(?, author_count),
1464
+ matched_authors = COALESCE(?, matched_authors),
1465
+ matched_countries = COALESCE(?, matched_countries),
1466
+ pushed_users = COALESCE(?, pushed_users),
1467
+ last_error = COALESCE(?, last_error),
1468
+ scored_at = ?,
1469
+ score_count = score_count + 1
1470
+ WHERE tag = ?
1471
+ `,
1472
+ )
1473
+ .run(
1474
+ score ?? null,
1475
+ status ?? null,
1476
+ totalPosts ?? null,
1477
+ authorCount ?? null,
1478
+ matchedAuthors ?? null,
1479
+ matchedCountriesJson,
1480
+ pushedUsers ?? null,
1481
+ error ?? null,
1482
+ now,
1483
+ tag,
1484
+ );
1485
+ return { ok: result.changes > 0, tag };
1486
+ } catch (e) {
1487
+ return { ok: false, error: e.message };
1488
+ }
1489
+ }
1490
+
1491
+ function getAllTags(limit = 200) {
1492
+ if (!db) return [];
1493
+ const rows = db
1494
+ .prepare(
1495
+ `
1496
+ SELECT * FROM tags ORDER BY score DESC, created_at DESC LIMIT ?
1497
+ `,
1498
+ )
1499
+ .all(limit);
1500
+ return rows.map((r) => ({
1501
+ ...r,
1502
+ countries: JSON.parse(r.countries || "[]"),
1503
+ matched_countries: JSON.parse(r.matched_countries || "[]"),
1504
+ }));
1505
+ }
1506
+
1507
+ // 调试接口:直接执行 SQL 查询,返回原始数据
1508
+ function rawQuery(sql, params = []) {
1509
+ if (!db) return { error: "db not ready" };
1510
+ try {
1511
+ const rows = db.prepare(sql).all(...params);
1512
+ return { rows };
1513
+ } catch (e) {
1514
+ return { error: e.message };
1515
+ }
1516
+ }
1517
+
1518
+ // 清理 tags 表中以 # 开头的脏数据
1519
+ function normalizeTags() {
1520
+ if (!db) return { ok: false, error: "db not ready" };
1521
+ const dirtyRows = db
1522
+ .prepare("SELECT id, tag, countries FROM tags WHERE tag LIKE '#%'")
1523
+ .all();
1524
+ const fixed = [];
1525
+ const merged = [];
1526
+ const skipped = [];
1527
+
1528
+ for (const row of dirtyRows) {
1529
+ const cleanTag = row.tag.replace(/^#+/, "").trim().toLowerCase();
1530
+ if (!cleanTag || cleanTag.length < 2) {
1531
+ db.prepare("DELETE FROM tags WHERE id = ?").run(row.id);
1532
+ skipped.push({
1533
+ dirty: row.tag,
1534
+ reason: "empty after normalize, deleted",
1535
+ });
1536
+ continue;
1537
+ }
1538
+
1539
+ // 检查 cleanTag 是否已存在
1540
+ const existing = db
1541
+ .prepare("SELECT * FROM tags WHERE tag = ?")
1542
+ .get(cleanTag);
1543
+ if (existing) {
1544
+ // 合并:保留已有 clean 版本,合并 countries
1545
+ const oldCountries = JSON.parse(row.countries || "[]");
1546
+ const existCountries = JSON.parse(existing.countries || "[]");
1547
+ const mergedCountries = [
1548
+ ...new Set([...existCountries, ...oldCountries]),
1549
+ ];
1550
+ db.prepare("UPDATE tags SET countries = ? WHERE tag = ?").run(
1551
+ JSON.stringify(mergedCountries),
1552
+ cleanTag,
1553
+ );
1554
+ // 删除脏数据
1555
+ db.prepare("DELETE FROM tags WHERE id = ?").run(row.id);
1556
+ merged.push({ dirty: row.tag, clean: cleanTag, id: row.id });
1557
+ } else {
1558
+ // 直接重命名
1559
+ db.prepare("UPDATE tags SET tag = ? WHERE id = ?").run(cleanTag, row.id);
1560
+ fixed.push({ dirty: row.tag, clean: cleanTag, id: row.id });
1561
+ }
1562
+ }
1563
+
1564
+ return {
1565
+ ok: true,
1566
+ fixed: fixed.length,
1567
+ merged: merged.length,
1568
+ skipped: skipped.length,
1569
+ details: { fixed, merged, skipped },
1570
+ };
1571
+ }
1572
+
1573
+ function clearTags() {
1574
+ if (!db) return { ok: false, error: "db not ready" };
1575
+ const count = db.prepare("SELECT COUNT(*) as c FROM tags").get().c;
1576
+ db.exec("DELETE FROM tags");
1577
+ return { ok: true, deleted: count };
1578
+ }
1579
+
1580
+ function getUsersPageFromDb({
1581
+ status,
1582
+ search,
1583
+ location,
1584
+ target,
1585
+ targetLocation,
1586
+ limit,
1587
+ offset,
1588
+ targetLocations = [],
1589
+ }) {
1590
+ if (!db) return null;
1591
+
1592
+ const safeLimit = Math.max(1, Math.min(200, parseInt(limit) || 50));
1593
+ const safeOffset = Math.max(0, parseInt(offset) || 0);
1594
+ const where = [];
1595
+ const args = [];
1596
+
1597
+ if (status && status !== "all") {
1598
+ where.push("status = ?");
1599
+ args.push(status);
1600
+ }
1601
+ if (target === "1") {
1602
+ if (targetLocation) {
1603
+ where.push("tt_seller = 1 AND verified = 0 AND location_created = ?");
1604
+ args.push(targetLocation);
1605
+ } else if (targetLocations.length > 0) {
1606
+ where.push(
1607
+ `tt_seller = 1 AND verified = 0 AND location_created IN (${targetLocations
1608
+ .map(() => "?")
1609
+ .join(", ")})`,
1610
+ );
1611
+ args.push(...targetLocations);
1612
+ } else {
1613
+ where.push("1 = 0");
1614
+ }
1615
+ }
1616
+ if (search) {
1617
+ where.push(
1618
+ "(LOWER(unique_id) LIKE ? OR LOWER(COALESCE(nickname, '')) LIKE ?)",
1619
+ );
1620
+ const pattern = `%${String(search).toLowerCase()}%`;
1621
+ args.push(pattern, pattern);
1622
+ }
1623
+ if (location) {
1624
+ where.push("location_created = ?");
1625
+ args.push(location);
1626
+ }
1627
+
1628
+ const whereSql = where.length ? `WHERE ${where.join(" AND ")}` : "";
1629
+
1630
+ // COUNT 缓存:134 万条数据全表扫描慢,5 秒内返回缓存值
1631
+ const cacheKey = whereSql + "|" + args.join(",");
1632
+ if (!getUsersPageFromDb._countCache)
1633
+ getUsersPageFromDb._countCache = new Map();
1634
+ const cachedCount = getUsersPageFromDb._countCache.get(cacheKey);
1635
+ let total;
1636
+ if (cachedCount && Date.now() - cachedCount.time < 5000) {
1637
+ total = cachedCount.c;
1638
+ } else {
1639
+ total = db
1640
+ .prepare(`SELECT COUNT(*) as c FROM jobs ${whereSql}`)
1641
+ .get(...args).c;
1642
+ getUsersPageFromDb._countCache.set(cacheKey, {
1643
+ c: total,
1644
+ time: Date.now(),
1645
+ });
1646
+ }
1647
+
1648
+ // 只查询前端需要的列,避免 SELECT * 带来的大字段传输和 mapJobRow 开销
1649
+ const rows = db
1650
+ .prepare(
1651
+ `
1652
+ SELECT
1653
+ unique_id, nickname, sec_uid, status, sources,
1654
+ tt_seller, verified, follower_count, following_count,
1655
+ location_created, latest_video_time, refresh_time,
1656
+ guessed_location, pinned, processed_at, video_count,
1657
+ no_video, claimed_by, claimed_at, created_at, updated_at
1658
+ FROM jobs
1659
+ ${whereSql}
1660
+ ORDER BY
1661
+ pinned DESC,
1662
+ CASE
1663
+ WHEN ? = 'done' THEN COALESCE(processed_at, 0) * -1
1664
+ WHEN ? = '1' THEN COALESCE(refresh_time, 0) * -1
1665
+ ELSE 0
1666
+ END ASC,
1667
+ CASE status
1668
+ WHEN 'processing' THEN 0
1669
+ WHEN 'pending' THEN 1
1670
+ WHEN 'done' THEN 2
1671
+ WHEN 'error' THEN 3
1672
+ WHEN 'restricted' THEN 4
1673
+ ELSE 9
1674
+ END ASC,
1675
+ COALESCE(follower_count, 0) DESC,
1676
+ COALESCE(processed_at, 0) DESC,
1677
+ unique_id ASC
1678
+ LIMIT ? OFFSET ?
1679
+ `,
1680
+ )
1681
+ .all(...args, status || "", target, safeLimit, safeOffset)
1682
+ .map(mapJobRow);
1683
+
1684
+ return {
1685
+ total,
1686
+ users: rows,
1687
+ };
1688
+ }
1689
+
1690
+ function getTargetUsersFromDb(targetLocations = []) {
1691
+ if (!db) return null;
1692
+ if (!targetLocations.length) {
1693
+ return { total: 0, users: [] };
1694
+ }
1695
+
1696
+ const placeholders = targetLocations.map(() => "?").join(", ");
1697
+ const rows = db
1698
+ .prepare(
1699
+ `
1700
+ SELECT
1701
+ unique_id, nickname, sec_uid, status, sources,
1702
+ tt_seller, verified, follower_count, following_count,
1703
+ location_created, latest_video_time, refresh_time,
1704
+ guessed_location, pinned, processed_at, video_count,
1705
+ no_video, claimed_by, claimed_at, created_at, updated_at
1706
+ AND verified = 0
1707
+ AND location_created IN (${placeholders})
1708
+ ORDER BY COALESCE(follower_count, 0) DESC, unique_id ASC
1709
+ `,
1710
+ )
1711
+ .all(...targetLocations)
1712
+ .map(mapJobRow);
1713
+
1714
+ return {
1715
+ total: rows.length,
1716
+ users: rows,
1717
+ };
1718
+ }
1719
+
1720
+ function getTargetUsersByCountryFromDb(targetLocations = [], options = {}) {
1721
+ if (!db) return null;
1722
+ if (!targetLocations.length) {
1723
+ return { countries: [] };
1724
+ }
1725
+
1726
+ const {
1727
+ summaryOnly = false,
1728
+ country: filterCountry,
1729
+ search,
1730
+ limit,
1731
+ offset,
1732
+ } = options;
1733
+ const placeholders = targetLocations.map(() => "?").join(", ");
1734
+ const baseParams = [...targetLocations];
1735
+
1736
+ // 摘要模式:只返回各国统计数,不返回用户数据
1737
+ if (summaryOnly) {
1738
+ const statsRows = db
1739
+ .prepare(
1740
+ `
1741
+ SELECT location_created as country, COUNT(*) as count
1742
+ FROM jobs
1743
+ WHERE tt_seller = 1
1744
+ AND verified = 0
1745
+ AND location_created IN (${placeholders})
1746
+ GROUP BY location_created
1747
+ ORDER BY count DESC
1748
+ `,
1749
+ )
1750
+ .all(...targetLocations);
1751
+
1752
+ const countries = statsRows.map((r) => ({
1753
+ country: r.country,
1754
+ count: r.count,
1755
+ users: undefined,
1756
+ }));
1757
+ return {
1758
+ total: statsRows.reduce((s, r) => s + r.count, 0),
1759
+ countries,
1760
+ };
1761
+ }
1762
+
1763
+ // 分页模式:按国家或全局分页查询用户
1764
+ if (limit !== undefined) {
1765
+ let sql = `
1766
+ SELECT
1767
+ unique_id,
1768
+ nickname,
1769
+ follower_count,
1770
+ video_count,
1771
+ tt_seller,
1772
+ verified,
1773
+ location_created,
1774
+ confirmed_location,
1775
+ modified_at,
1776
+ latest_video_time,
1777
+ refresh_time,
1778
+ top_video_play_count,
1779
+ top_video_href,
1780
+ status,
1781
+ sources
1782
+ FROM jobs
1783
+ WHERE tt_seller = 1
1784
+ AND verified = 0
1785
+ AND location_created IN (${placeholders})
1786
+ `;
1787
+ const params = [...targetLocations];
1788
+
1789
+ if (filterCountry) {
1790
+ sql += ` AND location_created = ?`;
1791
+ params.push(filterCountry);
1792
+ }
1793
+
1794
+ if (search) {
1795
+ sql += ` AND (unique_id LIKE ? OR nickname LIKE ?)`;
1796
+ const likeSearch = `%${search}%`;
1797
+ params.push(likeSearch, likeSearch);
1798
+ }
1799
+
1800
+ sql += ` ORDER BY location_created ASC, COALESCE(latest_video_time, 0) DESC`;
1801
+
1802
+ const countSql = sql.replace(
1803
+ /SELECT[^FROM]*FROM/,
1804
+ "SELECT COUNT(*) as cnt FROM",
1805
+ );
1806
+ const total = db.prepare(countSql).get(...params)?.cnt || 0;
1807
+
1808
+ sql += ` LIMIT ? OFFSET ?`;
1809
+ const safeLimit = Math.min(Math.floor(limit), 10000);
1810
+ const safeOffset = Math.max(Math.floor(offset), 0);
1811
+
1812
+ const rows = db
1813
+ .prepare(sql)
1814
+ .all(...params, safeLimit, safeOffset)
1815
+ .map(mapJobRow);
1816
+
1817
+ return {
1818
+ total,
1819
+ limit: safeLimit,
1820
+ offset: safeOffset,
1821
+ users: rows,
1822
+ };
1823
+ }
1824
+
1825
+ const rows = db
1826
+ .prepare(
1827
+ `
1828
+ SELECT
1829
+ unique_id,
1830
+ nickname,
1831
+ follower_count,
1832
+ video_count,
1833
+ tt_seller,
1834
+ verified,
1835
+ location_created,
1836
+ confirmed_location,
1837
+ modified_at,
1838
+ latest_video_time,
1839
+ refresh_time,
1840
+ status,
1841
+ sources
1842
+ FROM jobs
1843
+ WHERE tt_seller = 1
1844
+ AND verified = 0
1845
+ AND location_created IN (${placeholders})
1846
+ ORDER BY location_created ASC, COALESCE(latest_video_time, 0) DESC
1847
+ `,
1848
+ )
1849
+ .all(...targetLocations)
1850
+ .map(mapJobRow);
1851
+
1852
+ const countryMap = new Map();
1853
+ for (const row of rows) {
1854
+ const country = row.locationCreated || "未知";
1855
+ if (!countryMap.has(country)) {
1856
+ countryMap.set(country, []);
1857
+ }
1858
+ countryMap.get(country).push(row);
1859
+ }
1860
+
1861
+ const countries = [];
1862
+ for (const [country, users] of countryMap) {
1863
+ countries.push({
1864
+ country,
1865
+ count: users.length,
1866
+ users,
1867
+ });
1868
+ }
1869
+
1870
+ return {
1871
+ total: rows.length,
1872
+ countries,
1873
+ };
1874
+ }
1875
+
1876
+ function snakeToCamel(key) {
1877
+ return key.replace(/_([a-z])/g, (_, ch) => ch.toUpperCase());
1878
+ }
1879
+
1880
+ function camelToSnake(key) {
1881
+ return key.replace(/[A-Z]/g, (ch) => `_${ch.toLowerCase()}`);
1882
+ }
1883
+
1884
+ const jobBooleanColumns = new Set([
1885
+ "pinned",
1886
+ "no_video",
1887
+ "restricted",
1888
+ "processed",
1889
+ "tt_seller",
1890
+ "verified",
1891
+ "error",
1892
+ ]);
1893
+
1894
+ const videoBooleanColumns = new Set(["tt_seller"]);
1895
+
1896
+ const writableJobColumns = new Set([
1897
+ "nickname",
1898
+ "status",
1899
+ "sources",
1900
+ "claimed_by",
1901
+ "claimed_at",
1902
+ "error",
1903
+ "pinned",
1904
+ "no_video",
1905
+ "restricted",
1906
+ "user_update_count",
1907
+ "tt_seller",
1908
+ "verified",
1909
+ "video_count",
1910
+ "comment_count",
1911
+ "guessed_location",
1912
+ "location_created",
1913
+ "confirmed_location",
1914
+ "modified_at",
1915
+ "follower_count",
1916
+ "following_count",
1917
+ "heart_count",
1918
+ "refresh_time",
1919
+ "processed",
1920
+ "processed_at",
1921
+ "updated_at",
1922
+ "region",
1923
+ "signature",
1924
+ "bio_link",
1925
+ "sec_uid",
1926
+ "status_code",
1927
+ "latest_video_time",
1928
+ "top_video_play_count",
1929
+ "top_video_href",
1930
+ "user_create_time",
1931
+ ]);
1932
+
1933
+ function normalizeJobValue(column, value) {
1934
+ if (value === undefined || value === null) return null;
1935
+ if (column === "sources") {
1936
+ if (!Array.isArray(value)) return JSON.stringify([]);
1937
+ return JSON.stringify([...new Set(value)]);
1938
+ }
1939
+ if (jobBooleanColumns.has(column)) {
1940
+ return value ? 1 : 0;
1941
+ }
1942
+ // 防御:如果值是对象或数组,转为 JSON 字符串
1943
+ if (typeof value === "object") return JSON.stringify(value);
1944
+ return value;
1945
+ }
1946
+
1947
+ function mapJobRow(row) {
1948
+ if (!row) return undefined;
1949
+ const mapped = {};
1950
+ for (const [key, value] of Object.entries(row)) {
1951
+ const camelKey = snakeToCamel(key);
1952
+ if (key === "sources") {
1953
+ try {
1954
+ mapped[camelKey] = value ? JSON.parse(value) : [];
1955
+ } catch {
1956
+ mapped[camelKey] = [];
1957
+ }
1958
+ continue;
1959
+ }
1960
+ if (jobBooleanColumns.has(key)) {
1961
+ mapped[camelKey] = value === null || value === undefined ? null : !!value;
1962
+ continue;
1963
+ }
1964
+ mapped[camelKey] = value;
1965
+ }
1966
+ return mapped;
1967
+ }
1968
+
1969
+ function getJobRow(uniqueId) {
1970
+ if (!db) return null;
1971
+ return db.prepare("SELECT * FROM jobs WHERE unique_id = ?").get(uniqueId);
1972
+ }
1973
+
1974
+ function getJobBaseRow(uniqueId) {
1975
+ if (!db) return null;
1976
+ return db
1977
+ .prepare("SELECT * FROM jobs_base WHERE unique_id = ?")
1978
+ .get(uniqueId);
1979
+ }
1980
+
1981
+ function getJob(uniqueId) {
1982
+ return mapJobRow(getJobRow(uniqueId));
1983
+ }
1984
+
1985
+ function getAllJobs() {
1986
+ if (!db) return [];
1987
+ return db.prepare("SELECT * FROM jobs").all().map(mapJobRow);
1988
+ }
1989
+
1990
+ function mapVideoRow(row) {
1991
+ if (!row) return undefined;
1992
+ const mapped = {};
1993
+ for (const [key, value] of Object.entries(row)) {
1994
+ const camelKey = snakeToCamel(key);
1995
+ if (videoBooleanColumns.has(key)) {
1996
+ mapped[camelKey] = value === null || value === undefined ? null : !!value;
1997
+ continue;
1998
+ }
1999
+ mapped[camelKey] = value;
2000
+ }
2001
+ return mapped;
2002
+ }
2003
+
2004
+ function getVideoRow(videoId) {
2005
+ if (!db) return null;
2006
+ return db.prepare("SELECT * FROM videos WHERE id = ?").get(videoId);
2007
+ }
2008
+
2009
+ function getAllVideoRows() {
2010
+ if (!db) return [];
2011
+ return db.prepare("SELECT * FROM videos").all();
2012
+ }
2013
+
2014
+ function updateJobInfo(uniqueId, info, incrementCount = true) {
2015
+ if (!db) return { error: "db not initialized" };
2016
+ const existing = getJobRow(uniqueId);
2017
+ if (!existing) return { error: "user not found" };
2018
+
2019
+ const nextValues = {};
2020
+ for (const [key, value] of Object.entries(info || {})) {
2021
+ if (key === "uniqueId" || key === "unique_id") continue;
2022
+ if (value === undefined || value === "") continue;
2023
+ let column = camelToSnake(key);
2024
+ // 字段别名:bio → signature, createTime → user_create_time
2025
+ if (column === "bio") column = "signature";
2026
+ if (column === "create_time") column = "user_create_time";
2027
+ if (!writableJobColumns.has(column)) continue;
2028
+ nextValues[column] = normalizeJobValue(column, value);
2029
+ }
2030
+
2031
+ nextValues.updated_at = Date.now();
2032
+ if (incrementCount) {
2033
+ nextValues.user_update_count = (existing.user_update_count || 0) + 1;
2034
+ }
2035
+
2036
+ const columns = Object.keys(nextValues);
2037
+ if (columns.length > 0) {
2038
+ const sql = `UPDATE jobs SET ${columns.map((column) => `${column} = ?`).join(", ")} WHERE unique_id = ?`;
2039
+ db.prepare(sql).run(
2040
+ ...columns.map((column) => nextValues[column]),
2041
+ uniqueId,
2042
+ );
2043
+ }
2044
+
2045
+ return {
2046
+ ok: true,
2047
+ userUpdateCount:
2048
+ nextValues.user_update_count ?? existing.user_update_count ?? 0,
2049
+ };
2050
+ }
2051
+
2052
+ function inferStatus(u) {
2053
+ if (u.restricted) return "restricted";
2054
+ if (u.error) return "error";
2055
+ if (u.processed) return "done";
2056
+ return "pending";
2057
+ }
2058
+
2059
+ function updateJobBaseInfo(uniqueId, info, incrementCount = true) {
2060
+ if (!db) return { error: "db not initialized" };
2061
+ const existing = getJobBaseRow(uniqueId);
2062
+ if (!existing) return { error: "user not found" };
2063
+
2064
+ const nextValues = {};
2065
+ for (const [key, value] of Object.entries(info || {})) {
2066
+ if (key === "uniqueId" || key === "unique_id") continue;
2067
+ if (value === undefined || value === "") continue;
2068
+ let column = camelToSnake(key);
2069
+ // 字段别名:bio → signature, createTime → user_create_time
2070
+ if (column === "bio") column = "signature";
2071
+ if (column === "create_time") column = "user_create_time";
2072
+ if (!writableJobColumns.has(column)) continue;
2073
+ nextValues[column] = normalizeJobValue(column, value);
2074
+ }
2075
+
2076
+ nextValues.updated_at = Date.now();
2077
+ if (incrementCount) {
2078
+ nextValues.user_update_count = (existing.user_update_count || 0) + 1;
2079
+ }
2080
+
2081
+ const columns = Object.keys(nextValues);
2082
+ if (columns.length > 0) {
2083
+ const sql = `UPDATE jobs_base SET ${columns.map((column) => `${column} = ?`).join(", ")} WHERE unique_id = ?`;
2084
+ db.prepare(sql).run(
2085
+ ...columns.map((column) => nextValues[column]),
2086
+ uniqueId,
2087
+ );
2088
+ }
2089
+
2090
+ return {
2091
+ ok: true,
2092
+ userUpdateCount:
2093
+ nextValues.user_update_count ?? existing.user_update_count ?? 0,
2094
+ };
2095
+ }
2096
+
2097
+ function addJobBaseToDb(user) {
2098
+ if (!db) return;
2099
+ const now = Date.now();
2100
+ db.prepare(
2101
+ `
2102
+ INSERT OR IGNORE INTO jobs_base (
2103
+ unique_id,
2104
+ nickname,
2105
+ status,
2106
+ sources,
2107
+ claimed_by,
2108
+ claimed_at,
2109
+ error,
2110
+ pinned,
2111
+ no_video,
2112
+ restricted,
2113
+ user_update_count,
2114
+ tt_seller,
2115
+ verified,
2116
+ video_count,
2117
+ comment_count,
2118
+ guessed_location,
2119
+ location_created,
2120
+ follower_count,
2121
+ following_count,
2122
+ heart_count,
2123
+ refresh_time,
2124
+ processed,
2125
+ processed_at,
2126
+ created_at,
2127
+ updated_at,
2128
+ region,
2129
+ signature,
2130
+ bio_link,
2131
+ sec_uid
2132
+ )
2133
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2134
+ `,
2135
+ ).run(
2136
+ user.uniqueId,
2137
+ user.nickname || null,
2138
+ user.status || inferStatus(user),
2139
+ JSON.stringify(
2140
+ Array.isArray(user.sources) ? [...new Set(user.sources)] : [],
2141
+ ),
2142
+ user.claimedBy || null,
2143
+ user.claimedAt || null,
2144
+ user.error || null,
2145
+ user.pinned ? 1 : 0,
2146
+ user.noVideo ? 1 : 0,
2147
+ user.restricted ? 1 : 0,
2148
+ user.userUpdateCount || 0,
2149
+ user.ttSeller === undefined ||
2150
+ user.ttSeller === null ||
2151
+ user.ttSeller === ""
2152
+ ? null
2153
+ : user.ttSeller
2154
+ ? 1
2155
+ : 0,
2156
+ user.verified === undefined ||
2157
+ user.verified === null ||
2158
+ user.verified === ""
2159
+ ? null
2160
+ : user.verified
2161
+ ? 1
2162
+ : 0,
2163
+ user.videoCount || 0,
2164
+ user.commentCount || 0,
2165
+ user.guessedLocation || null,
2166
+ user.locationCreated || null,
2167
+ user.followerCount || 0,
2168
+ user.followingCount || 0,
2169
+ user.heartCount || 0,
2170
+ user.refreshTime || null,
2171
+ user.processed ? 1 : 0,
2172
+ user.processedAt || null,
2173
+ user.createdAt || now,
2174
+ user.updatedAt || now,
2175
+ user.region || null,
2176
+ user.signature || null,
2177
+ user.bioLink?.link || user.bioLink?.url || user.bioLink || null,
2178
+ user.secUid || null,
2179
+ );
2180
+ }
2181
+
2182
+ function addJob(user) {
2183
+ if (!db) {
2184
+ addUserToDb(user);
2185
+ return;
2186
+ }
2187
+ if (!user.status) user.status = inferStatus(user);
2188
+ if (!user.createdAt) user.createdAt = Date.now();
2189
+ if (!user.updatedAt) user.updatedAt = user.createdAt;
2190
+ const writeTxn = db.transaction((job) => {
2191
+ addUserToDb(job);
2192
+ addJobToDb(job);
2193
+ });
2194
+ writeTxn(user);
2195
+ }
2196
+
2197
+ export function createStore(filePath, options = {}) {
2198
+ if (!filePath) {
2199
+ throw new Error("createStore requires an explicit .db path");
2200
+ }
2201
+
2202
+ // refillJobsFromRaw 的 LLM 打分配置(自动补充任务时使用)
2203
+ const refillLlmConfig = {
2204
+ llmScore: false,
2205
+ llmMinScore: 60,
2206
+ llmSampleSize: 100,
2207
+ ...options.refillLlm,
2208
+ };
2209
+
2210
+ let data = [];
2211
+ // uniqueId → index 内存索引,O(1) 查找
2212
+ let uidIndex = new Map();
2213
+ let clientErrors = new Map();
2214
+ // 客户端登录状态:userId → boolean
2215
+ let clientLoginStatus = new Map();
2216
+ // 活跃客户端追踪:clientId → { type, ip, port, userId, lastSeen }
2217
+ let activeClients = new Map();
2218
+ // refill 锁:防止多个 claimNextJob 同时触发 LLM refill
2219
+ let refillLock = null; // Promise | null
2220
+ // LLM 采样偏移量记忆:按猜测国家记录上次查询位置,避免重复采样
2221
+ // 格式: { "ES": 300, "PL": 500, "NL": 400 }
2222
+ let llmSampleOffsets = new Map();
2223
+ if (filePath) {
2224
+ // 初始化 SQLite 用户表(用于判重)
2225
+ initUserDb(filePath);
2226
+ // 从数据库恢复偏移量
2227
+ loadLlmSampleOffsets();
2228
+ }
2229
+
2230
+ /**
2231
+ * 从数据库加载 LLM 采样偏移量
2232
+ */
2233
+ function loadLlmSampleOffsets() {
2234
+ try {
2235
+ const row = db
2236
+ .prepare(`SELECT offsets FROM _llm_sample_offsets LIMIT 1`)
2237
+ .get();
2238
+ if (row && row.offsets) {
2239
+ const parsed = JSON.parse(row.offsets);
2240
+ if (parsed && typeof parsed === "object") {
2241
+ Object.entries(parsed).forEach(([k, v]) => {
2242
+ llmSampleOffsets.set(k, v);
2243
+ });
2244
+ console.error(
2245
+ `[data-store] 已恢复 LLM 采样偏移量: ${Array.from(
2246
+ llmSampleOffsets.entries(),
2247
+ )
2248
+ .map(([k, v]) => `${k}:${v}`)
2249
+ .join(", ")}`,
2250
+ );
2251
+ }
2252
+ }
2253
+ } catch (e) {
2254
+ // 表不存在或解析失败,使用空偏移量
2255
+ console.error(
2256
+ `[data-store] 加载 LLM 采样偏移量失败,使用空偏移量: ${e.message}`,
2257
+ );
2258
+ }
2259
+ }
2260
+
2261
+ /**
2262
+ * 将 LLM 采样偏移量持久化到数据库
2263
+ */
2264
+ function saveLlmSampleOffsets() {
2265
+ try {
2266
+ const offsetsJson = JSON.stringify(Object.fromEntries(llmSampleOffsets));
2267
+ // 表不存在则创建
2268
+ db.prepare(
2269
+ `CREATE TABLE IF NOT EXISTS _llm_sample_offsets (id INTEGER PRIMARY KEY CHECK (id = 1), offsets TEXT)`,
2270
+ ).run();
2271
+ // 插入或更新
2272
+ db.prepare(
2273
+ `INSERT OR REPLACE INTO _llm_sample_offsets (id, offsets) VALUES (1, ?)`,
2274
+ ).run(offsetsJson);
2275
+ } catch (e) {
2276
+ console.error(`[data-store] 保存 LLM 采样偏移量失败: ${e.message}`);
2277
+ }
2278
+ }
2279
+
2280
+ // stats 缓存
2281
+ let statsCache = null;
2282
+ let statsDirty = true;
2283
+
2284
+ function markStatsDirty() {
2285
+ statsDirty = true;
2286
+ groupsDirty = true;
2287
+ }
2288
+
2289
+ function computeStatsInternal() {
2290
+ if (db) {
2291
+ const total = getJobsCount();
2292
+ const statusCounts = {
2293
+ pending: 0,
2294
+ processing: 0,
2295
+ done: 0,
2296
+ error: 0,
2297
+ restricted: 0,
2298
+ };
2299
+ const rows = db
2300
+ .prepare(
2301
+ `
2302
+ SELECT status, COUNT(*) as count
2303
+ FROM jobs
2304
+ GROUP BY status
2305
+ `,
2306
+ )
2307
+ .all();
2308
+ for (const row of rows) {
2309
+ if (row.status && statusCounts[row.status] !== undefined) {
2310
+ statusCounts[row.status] = row.count;
2311
+ }
2312
+ }
2313
+ statsCache = { total, statusCounts };
2314
+ statsDirty = false;
2315
+ return statsCache;
2316
+ }
2317
+
2318
+ const total = data.length;
2319
+ const statusCounts = {
2320
+ pending: 0,
2321
+ processing: 0,
2322
+ done: 0,
2323
+ error: 0,
2324
+ restricted: 0,
2325
+ };
2326
+ for (const u of data) {
2327
+ statusCounts[u.status] = (statusCounts[u.status] || 0) + 1;
2328
+ }
2329
+ statsCache = { total, statusCounts };
2330
+ statsDirty = false;
2331
+ return statsCache;
2332
+ }
2333
+
2334
+ function getStats() {
2335
+ if (statsDirty) {
2336
+ return computeStatsInternal();
2337
+ }
2338
+ return statsCache;
2339
+ }
2340
+
2341
+ // 按 status 的分组索引,避免每次请求全量遍历
2342
+ let statusGroups = null;
2343
+ let groupsDirty = true;
2344
+
2345
+ const tier1LocSet = new Set(["PL", "NL", "BE"]);
2346
+ const tier2LocSet = new Set(["DE", "FR", "IT", "IE", "ES"]);
2347
+ function locationTier(u) {
2348
+ const loc = (u.guessedLocation || "").toUpperCase();
2349
+ if (tier1LocSet.has(loc)) return 0;
2350
+ if (tier2LocSet.has(loc)) return 1;
2351
+ return 2;
2352
+ }
2353
+
2354
+ function sortGroup(key, arr) {
2355
+ if (key === "done")
2356
+ arr.sort((a, b) => (b.processedAt || 0) - (a.processedAt || 0));
2357
+ else if (key === "pending")
2358
+ arr.sort((a, b) => {
2359
+ const aSeller = a.ttSeller === true && a.verified === false ? 0 : 1;
2360
+ const bSeller = b.ttSeller === true && b.verified === false ? 0 : 1;
2361
+ if (aSeller !== bSeller) return aSeller - bSeller;
2362
+ const la = locationTier(a),
2363
+ lb = locationTier(b);
2364
+ if (la !== lb) return la - lb;
2365
+ return (b.followerCount || 0) - (a.followerCount || 0);
2366
+ });
2367
+ else arr.sort((a, b) => (b.followerCount || 0) - (a.followerCount || 0));
2368
+ // 置顶冒泡到组首
2369
+ const pinned = arr.filter((u) => u.pinned);
2370
+ const unpinned = arr.filter((u) => !u.pinned);
2371
+ return pinned.concat(unpinned);
2372
+ }
2373
+
2374
+ function rebuildStatusGroups() {
2375
+ if (db) {
2376
+ statusGroups = {
2377
+ pending: [],
2378
+ processing: [],
2379
+ done: [],
2380
+ error: [],
2381
+ restricted: [],
2382
+ };
2383
+ for (const u of getAllJobs()) {
2384
+ const key = u.status || "pending";
2385
+ if (statusGroups[key]) statusGroups[key].push(u);
2386
+ else statusGroups[key] = [u];
2387
+ }
2388
+ for (const key of Object.keys(statusGroups)) {
2389
+ statusGroups[key] = sortGroup(key, statusGroups[key]);
2390
+ }
2391
+ groupsDirty = false;
2392
+ return;
2393
+ }
2394
+
2395
+ statusGroups = {
2396
+ pending: [],
2397
+ processing: [],
2398
+ done: [],
2399
+ error: [],
2400
+ restricted: [],
2401
+ };
2402
+ for (const u of data) {
2403
+ const key = u.status || "pending";
2404
+ if (statusGroups[key]) statusGroups[key].push(u);
2405
+ else statusGroups[key] = [u];
2406
+ }
2407
+ // 各组内排序
2408
+ for (const key of Object.keys(statusGroups)) {
2409
+ statusGroups[key] = sortGroup(key, statusGroups[key]);
2410
+ }
2411
+ groupsDirty = false;
2412
+ }
2413
+
2414
+ function getStatusGroups() {
2415
+ if (groupsDirty) rebuildStatusGroups();
2416
+ return statusGroups;
2417
+ }
2418
+
2419
+ function markGroupsDirty() {
2420
+ groupsDirty = true;
2421
+ }
2422
+
2423
+ // 视频存储(SQLite 真相源)
2424
+ let videos = [];
2425
+
2426
+ // 构建索引 + 推断 status
2427
+ for (let i = 0; i < data.length; i++) {
2428
+ const u = data[i];
2429
+ if (!u.status) u.status = inferStatus(u);
2430
+ uidIndex.set(u.uniqueId, i);
2431
+ }
2432
+
2433
+ function save() {
2434
+ return;
2435
+ }
2436
+
2437
+ function flushSave() {
2438
+ // 数据库模式:先保存 LLM 偏移量,再备份数据库
2439
+ if (db && dbPath) {
2440
+ try {
2441
+ saveLlmSampleOffsets();
2442
+ } catch (e) {
2443
+ console.error(`[data-store] 保存 LLM 偏移量失败: ${e.message}`);
2444
+ }
2445
+ }
2446
+ return Promise.resolve();
2447
+ }
2448
+
2449
+ /**
2450
+ * 数据库备份:使用 SQLite BACKUP 命令,保留最新 maxBackups 个备份
2451
+ * @param {number} maxBackups - 保留的备份数量,默认 3
2452
+ * @returns {string|null} 备份文件路径,失败返回 null
2453
+ */
2454
+ function backupDatabase(maxBackups = 3) {
2455
+ if (!db || !dbPath) {
2456
+ console.error("[data-store] 数据库未初始化,跳过备份");
2457
+ return null;
2458
+ }
2459
+
2460
+ try {
2461
+ // 生成备份文件名:result-20260627T094400.db
2462
+ const now = new Date();
2463
+ const timestamp = now
2464
+ .toISOString()
2465
+ .replace(/[-:T.]/g, "")
2466
+ .slice(0, 15); // YYYYMMDDHHmmss
2467
+ const baseName = path.basename(dbPath, ".db");
2468
+ const backupName = `${baseName}-${timestamp}.db`;
2469
+ const backupDir = path.dirname(dbPath);
2470
+ const backupPath = path.join(backupDir, backupName);
2471
+
2472
+ console.error(`[data-store] 正在备份数据库: ${backupName}`);
2473
+
2474
+ // 使用 better-sqlite3 的 backup API(原子性备份,安全可靠)
2475
+ const backupDb = new Database(backupPath);
2476
+ db.backup("main", backupDb, "main");
2477
+ backupDb.close();
2478
+
2479
+ // 验证备份文件大小
2480
+ const stat = fs.statSync(backupPath);
2481
+ const sizeMB = (stat.size / 1024 / 1024).toFixed(2);
2482
+ console.error(`[data-store] 备份完成: ${backupName} (${sizeMB} MB)`);
2483
+
2484
+ // 清理旧备份:保留最新 maxBackups 个
2485
+ cleanupOldBackups(backupDir, baseName, maxBackups);
2486
+
2487
+ return backupPath;
2488
+ } catch (e) {
2489
+ console.error(`[data-store] 备份失败: ${e.message}`);
2490
+ return null;
2491
+ }
2492
+ }
2493
+
2494
+ /**
2495
+ * 清理旧备份文件,保留最新 maxBackups 个
2496
+ */
2497
+ function cleanupOldBackups(backupDir, baseName, maxBackups) {
2498
+ try {
2499
+ // 查找所有备份文件:baseName-YYYYMMDDHHmmss.db
2500
+ const pattern = new RegExp(`^${baseName}-\\d{15}\\.db$`);
2501
+ const backups = fs
2502
+ .readdirSync(backupDir)
2503
+ .filter((f) => pattern.test(f))
2504
+ .sort() // 按时间戳排序(ASCII 排序 = 时间排序)
2505
+ .reverse(); // 最新的在前
2506
+
2507
+ if (backups.length > maxBackups) {
2508
+ const toDelete = backups.slice(maxBackups);
2509
+ for (const file of toDelete) {
2510
+ const filePath = path.join(backupDir, file);
2511
+ fs.unlinkSync(filePath);
2512
+ console.error(`[data-store] 已清理旧备份: ${file}`);
2513
+ }
2514
+ }
2515
+
2516
+ console.error(
2517
+ `[data-store] 备份清理完成: 保留 ${Math.min(backups.length, maxBackups)} / ${backups.length} 个备份`,
2518
+ );
2519
+ } catch (e) {
2520
+ console.error(`[data-store] 清理旧备份失败: ${e.message}`);
2521
+ }
2522
+ }
2523
+
2524
+ function stopBackup() {
2525
+ // 退出时执行备份
2526
+ if (db && dbPath) {
2527
+ backupDatabase();
2528
+ }
2529
+ }
2530
+
2531
+ function getUser(uid) {
2532
+ const idx = uidIndex.get(uid);
2533
+ if (idx !== undefined) return data[idx];
2534
+ if (db) return getJob(uid);
2535
+ return undefined;
2536
+ }
2537
+
2538
+ function hasUser(uid) {
2539
+ // 优先用内存索引,兜底用 SQLite
2540
+ if (uidIndex.has(uid)) return true;
2541
+ return hasUserInDb(uid);
2542
+ }
2543
+
2544
+ function userExists(uid) {
2545
+ // 优先用内存索引,兜底用 SQLite
2546
+ if (uidIndex.has(uid)) return true;
2547
+ return hasUserInDb(uid);
2548
+ }
2549
+
2550
+ function addUser(user, append) {
2551
+ const memoryIdx = uidIndex.get(user.uniqueId);
2552
+ if (db && memoryIdx === undefined) {
2553
+ // 用 users 表判重(所有发现过的用户合集),而不是 jobs 表
2554
+ if (hasUserInDb(user.uniqueId)) {
2555
+ return;
2556
+ }
2557
+ addJob(user);
2558
+ return;
2559
+ }
2560
+
2561
+ const existing = getUser(user.uniqueId);
2562
+ if (existing) {
2563
+ let changed = false;
2564
+ for (const key of Object.keys(user)) {
2565
+ if (key === "uniqueId" || key === "sources") continue;
2566
+ if (user[key] !== undefined && user[key] !== null && user[key] !== "") {
2567
+ if (existing[key] !== user[key]) {
2568
+ existing[key] = user[key];
2569
+ changed = true;
2570
+ }
2571
+ }
2572
+ }
2573
+ if (changed) save();
2574
+ } else {
2575
+ if (!user.status) user.status = inferStatus(user);
2576
+ if (user.processed) user.processedAt = user.processedAt || Date.now();
2577
+ if (!user.createdAt) user.createdAt = Date.now();
2578
+ if (append) {
2579
+ const idx = data.length;
2580
+ data.push(user);
2581
+ uidIndex.set(user.uniqueId, idx);
2582
+ } else {
2583
+ data.unshift(user);
2584
+ uidIndex.set(user.uniqueId, 0);
2585
+ }
2586
+ // 同步写入 SQLite
2587
+ addUserToDb(user);
2588
+ markStatsDirty();
2589
+ save();
2590
+ }
2591
+ }
2592
+
2593
+ function addRawUsers(users) {
2594
+ if (!Array.isArray(users)) return { added: 0, skipped: 0 };
2595
+ const now = Date.now();
2596
+ let added = 0;
2597
+ let skipped = 0;
2598
+
2599
+ for (const u of users) {
2600
+ const uniqueId = (u.uniqueId || "").replace(/^@/, "").trim();
2601
+ if (!uniqueId) continue;
2602
+ if (hasUser(uniqueId)) {
2603
+ skipped++;
2604
+ continue;
2605
+ }
2606
+ const userObj = {
2607
+ uniqueId,
2608
+ status: "pending",
2609
+ sources: Array.isArray(u.sources)
2610
+ ? u.sources
2611
+ : u.sources
2612
+ ? [u.sources]
2613
+ : ["tag"],
2614
+ guessedLocation: u.guessedLocation || u.locationCreated || null,
2615
+ locationCreated: u.locationCreated || null,
2616
+ createdAt: now,
2617
+ updatedAt: now,
2618
+ };
2619
+ const writeTxn = db.transaction((job) => {
2620
+ addUserToDb(job);
2621
+ addJobBaseToDb(job);
2622
+ });
2623
+ writeTxn(userObj);
2624
+ added++;
2625
+ }
2626
+
2627
+ return { added, skipped };
2628
+ }
2629
+
2630
+ function getPendingUsers() {
2631
+ if (db) {
2632
+ return getAllJobs().filter((u) => u.status === "pending");
2633
+ }
2634
+ return data.filter((u) => u.status === "pending");
2635
+ }
2636
+
2637
+ function getProcessedUsers() {
2638
+ if (db) {
2639
+ return getAllJobs().filter((u) => u.status === "done");
2640
+ }
2641
+ return data.filter((u) => u.status === "done");
2642
+ }
2643
+
2644
+ function getAllUsers() {
2645
+ if (db) {
2646
+ return getAllJobs();
2647
+ }
2648
+ return data;
2649
+ }
2650
+
2651
+ /**
2652
+ * 使用 LLM 对单个 job 的国家匹配度打分(0-100)
2653
+ * @param {Object} job - raw_jobs 中的一条记录
2654
+ * @param {string[]} targetLocations - 目标国家列表
2655
+ * @returns {Promise<{ uniqueId: string, score: number, reason: string }>}
2656
+ */
2657
+ async function scoreJobLocation(job, targetLocations) {
2658
+ const { fetch: undiciFetch } = await import("undici");
2659
+
2660
+ const prompt = `
2661
+ 你是一个 TikTok 用户数据分析助手。请根据以下用户信息,判断该用户是否来自以下**任意一个**目标国家。
2662
+
2663
+ 目标国家列表: ${targetLocations.join(", ")}
2664
+
2665
+ 重要:
2666
+ - 用户只要来自上述**任意一个**国家就算匹配。
2667
+ - guessed_location 是系统初步猜测的结果,**仅供参考**,不要完全依赖它。
2668
+ - 请综合用户名、昵称、签名、位置等信息做判断。
2669
+
2670
+ 用户信息:
2671
+ - 用户名: ${job.unique_id || "未知"}
2672
+ - 昵称: ${job.nickname || "未知"}
2673
+ - 签名: ${job.signature || "未知"}
2674
+ - 地区: ${job.region || "未知"}
2675
+ - 猜测国家(参考): ${job.guessed_location || "未知"}
2676
+ - 位置信息: ${job.location_created || "未知"}
2677
+ - 主页链接: ${job.bio_link || "未知"}
2678
+
2679
+ 返回 JSON(仅返回 JSON,无其他内容):
2680
+ {"score": 0-100, "reason": "English only, under 50 chars, no quotes/brackets"}
2681
+
2682
+ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unlikely
2683
+ `;
2684
+
2685
+ try {
2686
+ const apiKey = process.env.APIKEY || "";
2687
+ const response = await undiciFetch(
2688
+ "http://82.156.52.214:18000/v1/chat/completions",
2689
+ {
2690
+ method: "POST",
2691
+ headers: {
2692
+ "Content-Type": "application/json",
2693
+ Authorization: `Bearer ${apiKey}`,
2694
+ },
2695
+ body: JSON.stringify({
2696
+ model: "zc-fast",
2697
+ messages: [{ role: "user", content: prompt }],
2698
+ max_tokens: 512,
2699
+ temperature: 0.1,
2700
+ }),
2701
+ },
2702
+ );
2703
+
2704
+ const result = await response.json();
2705
+ const content = result.choices?.[0]?.message?.content || "";
2706
+
2707
+ // 解析 JSON 响应(多层容错)
2708
+ let parsed = null;
2709
+
2710
+ // 尝试 1: 直接解析
2711
+ try {
2712
+ parsed = JSON.parse(content);
2713
+ } catch {
2714
+ // 尝试 2: 提取 {} 包裹的内容
2715
+ const match = content.match(/\{[\s\S]*\}/);
2716
+ if (match) {
2717
+ try {
2718
+ parsed = JSON.parse(match[0]);
2719
+ } catch {
2720
+ // 尝试 3: 清理常见问题后解析
2721
+ const cleaned = match[0]
2722
+ .replace(/"/g, '"') // 弯引号 → 直引号
2723
+ .replace(/\s+/g, " ") // 多余空白
2724
+ .trim();
2725
+ try {
2726
+ parsed = JSON.parse(cleaned);
2727
+ } catch {
2728
+ // 尝试 4: 从文本中提取 score 和 reason(reason 可能包含引号等特殊字符)
2729
+ const scoreMatch = content.match(/"?score"?\s*:\s*(\d+)/i);
2730
+ if (scoreMatch) {
2731
+ let reason = "解析降级";
2732
+ // 找 "reason": 的位置,取到最后一个 } 前的内容
2733
+ const reasonKeyPos = content.search(/"?reason"?\s*:\s*"/i);
2734
+ if (reasonKeyPos !== -1) {
2735
+ const afterKey = content.substring(reasonKeyPos);
2736
+ const colonPos = afterKey.indexOf(":");
2737
+ const valueStart = afterKey.indexOf('"', colonPos + 1) + 1;
2738
+ const rawValue = afterKey.substring(valueStart);
2739
+ // 取到原始 content 最后一个 } 前
2740
+ const lastBrace = content.lastIndexOf("}");
2741
+ const reasonEnd = lastBrace - reasonKeyPos - valueStart;
2742
+ if (reasonEnd > 0) {
2743
+ reason = rawValue.substring(0, reasonEnd).trim();
2744
+ // 去掉首尾的引号
2745
+ if (reason.startsWith('"')) reason = reason.substring(1);
2746
+ if (reason.endsWith('"'))
2747
+ reason = reason.substring(0, reason.length - 1);
2748
+ }
2749
+ }
2750
+ parsed = {
2751
+ score: parseInt(scoreMatch[1]) || 50,
2752
+ reason,
2753
+ };
2754
+ }
2755
+ }
2756
+ }
2757
+ }
2758
+
2759
+ // 尝试 5: 如果以上都失败,用更宽松的正则提取
2760
+ if (!parsed) {
2761
+ const scoreMatch = content.match(/"score"\s*:\s*(\d+)/);
2762
+ const reasonMatch = content.match(/"reason"\s*:\s*"([^"]*)"/);
2763
+ if (scoreMatch) {
2764
+ parsed = {
2765
+ score: parseInt(scoreMatch[1]) || 50,
2766
+ reason: reasonMatch ? reasonMatch[1] : "解析降级 - 宽松模式",
2767
+ };
2768
+ }
2769
+ }
2770
+ }
2771
+
2772
+ if (parsed && typeof parsed.score === "number") {
2773
+ return {
2774
+ uniqueId: job.unique_id,
2775
+ score: Math.max(0, Math.min(100, parsed.score)),
2776
+ reason: parsed.reason || "",
2777
+ };
2778
+ }
2779
+
2780
+ // 所有解析都失败,返回默认分
2781
+ console.error(
2782
+ `[scoreJobLocation] JSON 解析失败 (${job.unique_id}): ${content.substring(0, 100)}`,
2783
+ );
2784
+ return {
2785
+ uniqueId: job.unique_id,
2786
+ score: 50,
2787
+ reason: "LLM 响应解析失败,使用默认分",
2788
+ };
2789
+ } catch (e) {
2790
+ console.error(
2791
+ `[scoreJobLocation] LLM 调用失败 (${job.unique_id}): ${e.message}`,
2792
+ );
2793
+ return {
2794
+ uniqueId: job.unique_id,
2795
+ score: 50,
2796
+ reason: `LLM 调用异常: ${e.message}`,
2797
+ };
2798
+ }
2799
+ }
2800
+
2801
+ /**
2802
+ * 批量对 jobs 进行 LLM 国家匹配度打分
2803
+ * @param {Object[]} jobs - raw_jobs 记录数组
2804
+ * @param {string[]} targetLocations - 目标国家列表
2805
+ * @param {number} batchSize - 每批处理数量(并发),默认 10
2806
+ * @returns {Promise<Array<{ uniqueId: string, score: number, reason: string }>>}
2807
+ */
2808
+ async function scoreJobsBatch(jobs, targetLocations, batchSize = 10) {
2809
+ const results = [];
2810
+ for (let i = 0; i < jobs.length; i += batchSize) {
2811
+ const batch = jobs.slice(i, i + batchSize);
2812
+ const batchResults = await Promise.all(
2813
+ batch.map((job) => scoreJobLocation(job, targetLocations)),
2814
+ );
2815
+ results.push(...batchResults);
2816
+ }
2817
+ return results;
2818
+ }
2819
+
2820
+ /**
2821
+ * 从 raw_jobs 中移动一批符合条件的任务到 jobs 表
2822
+ * @param {string[]} locations - 目标国家列表(null 表示不限制)
2823
+ * @param {number} limit - 每次移动的最大数量,默认 500
2824
+ * @param {Object} options - 可选配置
2825
+ * @param {boolean} options.llmScore - 是否启用 LLM 打分过滤,默认 false
2826
+ * @param {number} options.llmMinScore - LLM 最低分数阈值,默认 60
2827
+ * @param {number} options.llmSampleSize - LLM 打分的采样数量,默认 100
2828
+ * @returns {{ moved: number }} 实际移动的数量
2829
+ */
2830
+ function refillJobsFromRaw(locations = null, limit = 500, options = {}) {
2831
+ if (!db) {
2832
+ return { moved: 0, error: "db not ready" };
2833
+ }
2834
+
2835
+ const safeLimit = Math.max(1, Math.min(2000, parseInt(limit) || 500));
2836
+ const normalizedLocations = locations
2837
+ ? locations.map((loc) => String(loc).trim().toUpperCase()).filter(Boolean)
2838
+ : null;
2839
+
2840
+ const useLlm = !!options.llmScore;
2841
+ const llmMinScore = options.llmMinScore ?? 60;
2842
+ const llmSampleSize = options.llmSampleSize ?? 100;
2843
+
2844
+ // 构建 WHERE 条件
2845
+ const conditions = [
2846
+ "COALESCE(video_count, 0) > 6",
2847
+ "COALESCE(follower_count, 0) > 0",
2848
+ "COALESCE(following_count, 0) > 0",
2849
+ ];
2850
+ const args = [];
2851
+
2852
+ if (normalizedLocations && normalizedLocations.length > 0) {
2853
+ conditions.push(
2854
+ `UPPER(COALESCE(guessed_location, '')) IN (${normalizedLocations.map(() => "?").join(", ")})`,
2855
+ );
2856
+ args.push(...normalizedLocations);
2857
+ }
2858
+
2859
+ const whereSql = conditions.join(" AND ");
2860
+
2861
+ // 统计符合条件的数量
2862
+ const count =
2863
+ db
2864
+ .prepare(`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql}`)
2865
+ .get(...args)?.c || 0;
2866
+
2867
+ if (!count) {
2868
+ return { moved: 0 };
2869
+ }
2870
+
2871
+ // 如果启用 LLM 打分:先取 tag(一次性),再取非 tag 走 LLM 打分
2872
+ if (useLlm && normalizedLocations && normalizedLocations.length > 0) {
2873
+ const llmTotal = options.llmTotal ?? 200; // 总条数
2874
+ const llmTagRatio = options.llmTagRatio ?? 0.6; // tag 占比 60%
2875
+ const llmTagLimit = Math.floor(llmTotal * llmTagRatio); // tag 上限 120
2876
+ const llmNonTagTarget = llmTotal - llmTagLimit; // 非 tag 目标 80
2877
+ const llmMinScore = options.llmMinScore ?? 60;
2878
+ const llmSampleSize = options.llmSampleSize ?? 100;
2879
+ const maxBatches = options.llmMaxBatches ?? 10;
2880
+
2881
+ console.error(
2882
+ `[data-store] LLM 打分开始: 总目标 ${llmTotal} 条,tag 最多 ${llmTagLimit} 条(一次性),非 tag 目标 ${llmNonTagTarget} 条(LLM 打分)`,
2883
+ );
2884
+
2885
+ // 返回 Promise,调用方需要 await
2886
+ return (async () => {
2887
+ const allTagQualified = [];
2888
+ const allNonTagQualified = [];
2889
+ const allScores = [];
2890
+
2891
+ // ===== 第一步:一次性取所有 tag(全局,最多 llmTagLimit 条)=====
2892
+ let tagOffset = llmSampleOffsets.get("_tag") || 0;
2893
+ const tagGlobalCount = db
2894
+ .prepare(`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql} AND sources LIKE '%tag%'`)
2895
+ .get(...args)?.c || 0;
2896
+
2897
+ if (tagOffset >= tagGlobalCount) {
2898
+ tagOffset = 0;
2899
+ llmSampleOffsets.set("_tag", 0);
2900
+ }
2901
+
2902
+ console.error(`[data-store] Tag 全局共 ${tagGlobalCount} 条,从偏移量 ${tagOffset} 开始`);
2903
+
2904
+ while (allTagQualified.length < llmTagLimit && tagOffset < tagGlobalCount) {
2905
+ const batch = db
2906
+ .prepare(
2907
+ `
2908
+ SELECT * FROM raw_jobs WHERE ${whereSql} AND sources LIKE '%tag%'
2909
+ ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
2910
+ LIMIT ? OFFSET ?
2911
+ `,
2912
+ )
2913
+ .all(Math.min(llmSampleSize, llmTagLimit - allTagQualified.length), ...args, tagOffset);
2914
+
2915
+ if (!batch.length) break;
2916
+
2917
+ allTagQualified.push(...batch.map((s) => s.unique_id));
2918
+ tagOffset += batch.length;
2919
+
2920
+ console.error(
2921
+ `[data-store] Tag 本批 ${batch.length} 条,累计 ${allTagQualified.length}/${llmTagLimit}`,
2922
+ );
2923
+ }
2924
+
2925
+ llmSampleOffsets.set("_tag", tagOffset);
2926
+
2927
+ // ===== 第二步:按国家取非 tag,走 LLM 打分,直到合格数达到 llmNonTagTarget =====
2928
+ for (const location of normalizedLocations) {
2929
+ if (allNonTagQualified.length >= llmNonTagTarget) break;
2930
+
2931
+ const nonTagOffsetKey = `${location}:nonTag`;
2932
+ let offset = llmSampleOffsets.get(nonTagOffsetKey) || 0;
2933
+
2934
+ const locationArgs = [...args, location];
2935
+ const nonTagCount = db
2936
+ .prepare(
2937
+ `SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql} AND guessed_location = ? AND (sources NOT LIKE '%tag%' OR sources IS NULL)`,
2938
+ )
2939
+ .get(...locationArgs)?.c || 0;
2940
+
2941
+ if (nonTagCount === 0) {
2942
+ console.error(`[data-store] 国家 ${location}: 无非 tag 数据,跳过`);
2943
+ continue;
2944
+ }
2945
+
2946
+ if (offset >= nonTagCount) {
2947
+ offset = 0;
2948
+ llmSampleOffsets.set(nonTagOffsetKey, 0);
2949
+ }
2950
+
2951
+ console.error(
2952
+ `[data-store] 国家 ${location}: 非 tag 共 ${nonTagCount} 条,从偏移量 ${offset} 开始`,
2953
+ );
2954
+
2955
+ for (let batch = 0; batch < maxBatches; batch++) {
2956
+ if (allNonTagQualified.length >= llmNonTagTarget) break;
2957
+
2958
+ const samples = db
2959
+ .prepare(
2960
+ `
2961
+ SELECT * FROM raw_jobs WHERE ${whereSql} AND guessed_location = ?
2962
+ AND (sources NOT LIKE '%tag%' OR sources IS NULL)
2963
+ ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
2964
+ LIMIT ? OFFSET ?
2965
+ `,
2966
+ )
2967
+ .all(...locationArgs, llmSampleSize, offset);
2968
+
2969
+ if (!samples.length) break;
2970
+
2971
+ const scores = await scoreJobsBatch(samples, DEFAULT_TARGET_LOCATIONS);
2972
+ const qualified = scores.filter((s) => s.score >= llmMinScore);
2973
+ allNonTagQualified.push(...qualified.map((s) => s.uniqueId));
2974
+ allScores.push(...scores);
2975
+
2976
+ offset += samples.length;
2977
+ llmSampleOffsets.set(nonTagOffsetKey, offset);
2978
+
2979
+ console.error(
2980
+ `[data-store] ${location} 第 ${batch + 1} 批: 采样 ${samples.length} 条,本批合格 ${qualified.length} 条,非 tag 累计 ${allNonTagQualified.length}/${llmNonTagTarget}`,
2981
+ );
2982
+ }
2983
+ }
2984
+
2985
+ // ===== 最终结果 =====
2986
+ const qualified = [...allTagQualified, ...allNonTagQualified];
2987
+
2988
+ if (!qualified.length) {
2989
+ console.error(
2990
+ `[data-store] LLM 打分后无符合条件的任务(tag: ${allTagQualified.length}, 非 tag: ${allNonTagQualified.length})`,
2991
+ );
2992
+ return { moved: 0, scored: allScores.length, qualified: 0, scores: allScores };
2993
+ }
2994
+
2995
+ console.error(
2996
+ `[data-store] LLM 打分完成: tag ${allTagQualified.length} 条 + 非 tag ${allNonTagQualified.length} 条 = 共 ${qualified.length} 条`,
2997
+ );
2998
+
2999
+ // 移动符合条件的记录
3000
+ const placeholders = qualified.map(() => "?").join(", ");
3001
+ const moveTxn = db.transaction(() => {
3002
+ db.prepare(
3003
+ `
3004
+ INSERT OR IGNORE INTO jobs (
3005
+ unique_id, nickname, status, sources, pinned,
3006
+ tt_seller, verified, video_count, comment_count,
3007
+ guessed_location, location_created, confirmed_location,
3008
+ follower_count, following_count, heart_count,
3009
+ created_at, updated_at, region, signature, bio_link, sec_uid,
3010
+ status_code, latest_video_time
3011
+ )
3012
+ SELECT
3013
+ unique_id, nickname, 'pending', sources, pinned,
3014
+ tt_seller, verified, video_count, comment_count,
3015
+ guessed_location, location_created, confirmed_location,
3016
+ follower_count, following_count, heart_count,
3017
+ created_at, updated_at, region, signature, bio_link, sec_uid,
3018
+ status_code, latest_video_time
3019
+ FROM raw_jobs
3020
+ WHERE unique_id IN (${placeholders})
3021
+ `,
3022
+ ).run(...qualified);
3023
+
3024
+ db.prepare(`DELETE FROM raw_jobs WHERE unique_id IN (${placeholders})`).run(...qualified);
3025
+ });
3026
+ moveTxn();
3027
+ markStatsDirty();
3028
+
3029
+ // 持久化偏移量到数据库
3030
+ saveLlmSampleOffsets();
3031
+
3032
+ // 打印最终偏移量状态
3033
+ const finalOffsetSummary = Array.from(llmSampleOffsets.entries())
3034
+ .map(([k, v]) => `${k}:${v}`)
3035
+ .join(", ");
3036
+ console.error(`[data-store] 偏移量记忆更新: ${finalOffsetSummary}`);
3037
+
3038
+ return {
3039
+ moved: qualified.length,
3040
+ scored: allScores.length,
3041
+ qualified: qualified.length,
3042
+ scores: allScores,
3043
+ };
3044
+ })();
3045
+ }
3046
+
3047
+ // 常规移动:INSERT + DELETE 事务
3048
+ const moveTxn = db.transaction(() => {
3049
+ db.prepare(
3050
+ `
3051
+ INSERT OR IGNORE INTO jobs (
3052
+ unique_id, nickname, status, sources, pinned,
3053
+ tt_seller, verified, video_count, comment_count,
3054
+ guessed_location, location_created, confirmed_location,
3055
+ follower_count, following_count, heart_count,
3056
+ created_at, updated_at, region, signature, bio_link, sec_uid,
3057
+ status_code, latest_video_time, user_create_time
3058
+ )
3059
+ SELECT
3060
+ unique_id, nickname, 'pending', sources, pinned,
3061
+ tt_seller, verified, video_count, comment_count,
3062
+ guessed_location, location_created, confirmed_location,
3063
+ follower_count, following_count, heart_count,
3064
+ created_at, updated_at, region, signature, bio_link, sec_uid,
3065
+ status_code, latest_video_time, user_create_time
3066
+ FROM raw_jobs
3067
+ WHERE ${whereSql}
3068
+ ORDER BY
3069
+ CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
3070
+ COALESCE(video_count, 0) DESC, created_at DESC
3071
+ LIMIT ?
3072
+ `,
3073
+ ).run(...args, safeLimit);
3074
+
3075
+ // 删除已移动的记录:用子查询匹配刚 INSERT 的 unique_id
3076
+ db.prepare(
3077
+ `
3078
+ DELETE FROM raw_jobs
3079
+ WHERE unique_id IN (
3080
+ SELECT unique_id FROM raw_jobs
3081
+ WHERE ${whereSql}
3082
+ ORDER BY
3083
+ CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
3084
+ COALESCE(video_count, 0) DESC, created_at DESC
3085
+ LIMIT ?
3086
+ )
3087
+ `,
3088
+ ).run(...args, safeLimit);
3089
+ });
3090
+
3091
+ moveTxn();
3092
+ markStatsDirty();
3093
+
3094
+ const actualMoved = Math.min(count, safeLimit);
3095
+ return { moved: actualMoved };
3096
+ }
3097
+
3098
+ async function claimNextJob(
3099
+ userId,
3100
+ expireMs = 5 * 60 * 1000,
3101
+ locations = null,
3102
+ loggedIn = true,
3103
+ ) {
3104
+ // 记录客户端登录状态
3105
+ clientLoginStatus.set(userId, !!loggedIn);
3106
+ if (db) {
3107
+ const now = Date.now();
3108
+ const ongoingRow = db
3109
+ .prepare(
3110
+ `
3111
+ SELECT *
3112
+ FROM jobs
3113
+ WHERE status = 'processing'
3114
+ AND claimed_by = ?
3115
+ AND claimed_at IS NOT NULL
3116
+ AND ? - claimed_at < ?
3117
+ ORDER BY claimed_at DESC
3118
+ LIMIT 1
3119
+ `,
3120
+ )
3121
+ .get(userId, now, expireMs);
3122
+ if (ongoingRow) {
3123
+ db.prepare("UPDATE jobs SET claimed_at = ? WHERE unique_id = ?").run(
3124
+ now,
3125
+ ongoingRow.unique_id,
3126
+ );
3127
+ return {
3128
+ uniqueId: ongoingRow.unique_id,
3129
+ nickname: ongoingRow.nickname,
3130
+ claimedAt: now,
3131
+ claimedBy: userId,
3132
+ };
3133
+ }
3134
+
3135
+ const tier1 = new Set(["PL", "NL", "BE"]);
3136
+ const tier2 = new Set(["DE", "FR", "IT", "IE", "ES"]);
3137
+ const normalizedLocations = Array.isArray(locations)
3138
+ ? locations
3139
+ .map((loc) => String(loc).trim().toUpperCase())
3140
+ .filter(Boolean)
3141
+ : [];
3142
+
3143
+ function getLocationGroups() {
3144
+ const selected = normalizedLocations.length
3145
+ ? normalizedLocations
3146
+ : null;
3147
+ const tier1List = selected
3148
+ ? selected.filter((loc) => tier1.has(loc))
3149
+ : [...tier1];
3150
+ const tier2List = selected
3151
+ ? selected.filter((loc) => tier2.has(loc))
3152
+ : [...tier2];
3153
+ const otherList = selected
3154
+ ? selected.filter((loc) => !tier1.has(loc) && !tier2.has(loc))
3155
+ : null;
3156
+ const groups = [];
3157
+ if (tier1List.length > 0)
3158
+ groups.push({ type: "include", values: tier1List });
3159
+ if (tier2List.length > 0)
3160
+ groups.push({ type: "include", values: tier2List });
3161
+ if (selected) {
3162
+ if (otherList.length > 0)
3163
+ groups.push({ type: "include", values: otherList });
3164
+ } else {
3165
+ groups.push({ type: "exclude", values: [...tier1, ...tier2] });
3166
+ }
3167
+ return groups;
3168
+ }
3169
+
3170
+ const locationGroups = getLocationGroups();
3171
+
3172
+ function applyLocationGroup(where, args, group) {
3173
+ if (!group) return;
3174
+ if (group.type === "include") {
3175
+ where.push(
3176
+ `UPPER(COALESCE(guessed_location, '')) IN (${group.values.map(() => "?").join(", ")})`,
3177
+ );
3178
+ args.push(...group.values);
3179
+ return;
3180
+ }
3181
+ where.push(
3182
+ `UPPER(COALESCE(guessed_location, '')) NOT IN (${group.values.map(() => "?").join(", ")})`,
3183
+ );
3184
+ args.push(...group.values);
3185
+ }
3186
+
3187
+ function queryPendingOne({ requireVideo, group, filters = [] }) {
3188
+ const where = ["status = 'pending'"];
3189
+ const args = [];
3190
+ if (!loggedIn) {
3191
+ where.push("COALESCE(tt_seller, 0) != 1");
3192
+ // 未登录:只能领取 status_code 为空或 0 的任务
3193
+ where.push("(status_code IS NULL OR status_code = 0)");
3194
+ } else {
3195
+ // 登录:可以领取 status_code 为空、0、或 209002 的任务
3196
+ where.push(
3197
+ "(status_code IS NULL OR status_code = 0 OR status_code = 209002)",
3198
+ );
3199
+ }
3200
+ // 其他 status_code 值的任务不被领取
3201
+ if (requireVideo) {
3202
+ where.push("COALESCE(video_count, 0) > 0");
3203
+ }
3204
+ applyLocationGroup(where, args, group);
3205
+ for (const filter of filters) {
3206
+ where.push(filter);
3207
+ }
3208
+ return db
3209
+ .prepare(
3210
+ `
3211
+ SELECT *
3212
+ FROM jobs
3213
+ WHERE ${where.join(" AND ")}
3214
+ ORDER BY follower_count DESC, created_at ASC, unique_id ASC
3215
+ LIMIT 1
3216
+ `,
3217
+ )
3218
+ .get(...args);
3219
+ }
3220
+
3221
+ function queryPendingByGroup({ requireVideo, group, filters = [] }) {
3222
+ if (group?.type === "include" && group.values.length > 1) {
3223
+ for (const location of group.values) {
3224
+ const row = queryPendingOne({
3225
+ requireVideo,
3226
+ group: { type: "include", values: [location] },
3227
+ filters,
3228
+ });
3229
+ if (row) return row;
3230
+ }
3231
+ return null;
3232
+ }
3233
+ return queryPendingOne({ requireVideo, group, filters });
3234
+ }
3235
+
3236
+ function findPinnedPending(requireVideo) {
3237
+ const where = ["status = 'pending'", "COALESCE(pinned, 0) = 1"];
3238
+ const args = [];
3239
+ if (!loggedIn) {
3240
+ where.push("COALESCE(tt_seller, 0) != 1");
3241
+ }
3242
+ if (requireVideo) {
3243
+ where.push("COALESCE(video_count, 0) > 0");
3244
+ }
3245
+ if (normalizedLocations.length > 0) {
3246
+ where.push(
3247
+ `UPPER(COALESCE(guessed_location, '')) IN (${normalizedLocations.map(() => "?").join(", ")})`,
3248
+ );
3249
+ args.push(...normalizedLocations);
3250
+ }
3251
+ return db
3252
+ .prepare(
3253
+ `
3254
+ SELECT *
3255
+ FROM jobs
3256
+ WHERE ${where.join(" AND ")}
3257
+ ORDER BY created_at ASC, unique_id ASC
3258
+ LIMIT 1
3259
+ `,
3260
+ )
3261
+ .get(...args);
3262
+ }
3263
+
3264
+ function findPrioritizedPending(requireVideo) {
3265
+ for (const group of locationGroups) {
3266
+ const seed = queryPendingByGroup({
3267
+ requireVideo,
3268
+ group,
3269
+ filters: [
3270
+ "COALESCE(pinned, 0) = 0",
3271
+ `instr(COALESCE(sources, ''), '"seed"') > 0`,
3272
+ ],
3273
+ });
3274
+ if (seed) return seed;
3275
+ }
3276
+
3277
+ if (loggedIn) {
3278
+ for (const group of locationGroups) {
3279
+ const seller = queryPendingByGroup({
3280
+ requireVideo,
3281
+ group,
3282
+ filters: [
3283
+ "COALESCE(pinned, 0) = 0",
3284
+ "tt_seller = 1",
3285
+ "verified = 0",
3286
+ ],
3287
+ });
3288
+ if (seller) return seller;
3289
+ }
3290
+ }
3291
+
3292
+ for (const group of locationGroups) {
3293
+ const follow = queryPendingByGroup({
3294
+ requireVideo,
3295
+ group,
3296
+ filters: [
3297
+ "COALESCE(pinned, 0) = 0",
3298
+ `(
3299
+ instr(COALESCE(sources, ''), '"following"') > 0
3300
+ OR instr(COALESCE(sources, ''), '"follower"') > 0
3301
+ )`,
3302
+ ],
3303
+ });
3304
+ if (follow) return follow;
3305
+ }
3306
+
3307
+ for (const group of locationGroups) {
3308
+ const other = queryPendingByGroup({
3309
+ requireVideo,
3310
+ group,
3311
+ filters: ["COALESCE(pinned, 0) = 0"],
3312
+ });
3313
+ if (other) return other;
3314
+ }
3315
+
3316
+ return null;
3317
+ }
3318
+
3319
+ function claimRow(row) {
3320
+ if (!row) return null;
3321
+ db.prepare(
3322
+ "UPDATE jobs SET status = 'processing', claimed_at = ?, claimed_by = ? WHERE unique_id = ?",
3323
+ ).run(now, userId, row.unique_id);
3324
+ markStatsDirty();
3325
+ return {
3326
+ uniqueId: row.unique_id,
3327
+ nickname: row.nickname,
3328
+ claimedAt: now,
3329
+ claimedBy: userId,
3330
+ };
3331
+ }
3332
+
3333
+ const expiredRow = db
3334
+ .prepare(
3335
+ `
3336
+ SELECT *
3337
+ FROM jobs
3338
+ WHERE status = 'processing'
3339
+ AND claimed_at IS NOT NULL
3340
+ AND ? - claimed_at > ?
3341
+ ORDER BY claimed_at ASC
3342
+ LIMIT 1
3343
+ `,
3344
+ )
3345
+ .get(now, expireMs);
3346
+ let expiredCandidate = null;
3347
+ if (expiredRow) {
3348
+ db.prepare(
3349
+ "UPDATE jobs SET status = 'pending', claimed_at = NULL WHERE unique_id = ?",
3350
+ ).run(expiredRow.unique_id);
3351
+ expiredCandidate = mapJobRow({
3352
+ ...expiredRow,
3353
+ status: "pending",
3354
+ claimed_at: null,
3355
+ });
3356
+ }
3357
+
3358
+ for (const requireVideo of [true, false]) {
3359
+ const pinned = findPinnedPending(requireVideo);
3360
+ if (pinned) {
3361
+ return claimRow(pinned);
3362
+ }
3363
+ if (expiredCandidate) {
3364
+ return claimRow({
3365
+ unique_id: expiredCandidate.uniqueId,
3366
+ nickname: expiredCandidate.nickname,
3367
+ });
3368
+ }
3369
+ const ranked = findPrioritizedPending(requireVideo);
3370
+ if (ranked) {
3371
+ return claimRow(ranked);
3372
+ }
3373
+ }
3374
+ // 尝试从 raw_jobs 毛料库补充任务(使用 createStore 时配置的 LLM 打分)
3375
+ // 使用锁防止多个请求同时触发 LLM refill
3376
+ if (refillLock) {
3377
+ // 已有 refill 在进行中,等待完成后重新尝试领取
3378
+ await refillLock;
3379
+ for (const requireVideo of [true, false]) {
3380
+ const pinned = findPinnedPending(requireVideo);
3381
+ if (pinned) {
3382
+ return claimRow(pinned);
3383
+ }
3384
+ const ranked = findPrioritizedPending(requireVideo);
3385
+ if (ranked) {
3386
+ return claimRow(ranked);
3387
+ }
3388
+ }
3389
+ return null;
3390
+ }
3391
+ const refillResult = (async () => {
3392
+ refillLock = Promise.resolve(); // 占位
3393
+ const result = refillJobsFromRaw(
3394
+ normalizedLocations.length ? normalizedLocations : null,
3395
+ 500,
3396
+ refillLlmConfig,
3397
+ );
3398
+ // refillJobsFromRaw 在 LLM 模式下返回 Promise
3399
+ if (result && typeof result.then === "function") {
3400
+ return result.finally(() => {
3401
+ refillLock = null;
3402
+ });
3403
+ }
3404
+ return result;
3405
+ })();
3406
+ if (refillResult && typeof refillResult.then === "function") {
3407
+ const awaited = await refillResult;
3408
+ if (awaited.moved > 0) {
3409
+ console.error(
3410
+ `[data-store] 从 raw_jobs 补充了 ${awaited.moved} 条任务到 jobs`,
3411
+ );
3412
+ for (const requireVideo of [true, false]) {
3413
+ const pinned = findPinnedPending(requireVideo);
3414
+ if (pinned) {
3415
+ return claimRow(pinned);
3416
+ }
3417
+ const ranked = findPrioritizedPending(requireVideo);
3418
+ if (ranked) {
3419
+ return claimRow(ranked);
3420
+ }
3421
+ }
3422
+ }
3423
+ } else if (refillResult.moved > 0) {
3424
+ console.error(
3425
+ `[data-store] 从 raw_jobs 补充了 ${refillResult.moved} 条任务到 jobs`,
3426
+ );
3427
+ for (const requireVideo of [true, false]) {
3428
+ const pinned = findPinnedPending(requireVideo);
3429
+ if (pinned) {
3430
+ return claimRow(pinned);
3431
+ }
3432
+ const ranked = findPrioritizedPending(requireVideo);
3433
+ if (ranked) {
3434
+ return claimRow(ranked);
3435
+ }
3436
+ }
3437
+ }
3438
+
3439
+ return null;
3440
+ }
3441
+
3442
+ if (!db) {
3443
+ const now = Date.now();
3444
+
3445
+ // 0. 该客户端有未过期的任务,续期返回
3446
+ const ongoing = data.find(
3447
+ (u) =>
3448
+ u.status === "processing" &&
3449
+ u.claimedBy === userId &&
3450
+ u.claimedAt &&
3451
+ now - u.claimedAt < expireMs,
3452
+ );
3453
+ if (ongoing) {
3454
+ ongoing.claimedAt = now;
3455
+ save();
3456
+ return {
3457
+ uniqueId: ongoing.uniqueId,
3458
+ nickname: ongoing.nickname,
3459
+ claimedAt: ongoing.claimedAt,
3460
+ claimedBy: userId,
3461
+ };
3462
+ }
3463
+
3464
+ // 按猜测国家梯队排序
3465
+ const tier1 = new Set(["PL", "NL", "BE"]);
3466
+ const tier2 = new Set(["DE", "FR", "IT", "IE", "ES"]);
3467
+ function locationTier(u) {
3468
+ const loc = (u.guessedLocation || "").toUpperCase();
3469
+ if (tier1.has(loc)) return 0;
3470
+ if (tier2.has(loc)) return 1;
3471
+ return 2;
3472
+ }
3473
+
3474
+ // 国家过滤:如果指定了 locations,只保留 guessedLocation 在列表中的用户
3475
+ function locationFilter(u) {
3476
+ if (!locations || locations.length === 0) return true;
3477
+ return isLocationInList(u.guessedLocation, locations);
3478
+ }
3479
+
3480
+ // 从候选列表中按优先级取第一个:pinned > 超时回收 > seed > ttSeller(仅登录) > follow > other
3481
+ function pickCandidate(candidates) {
3482
+ let next = candidates.find((u) => u.pinned);
3483
+
3484
+ if (!next) {
3485
+ const expired = data.find(
3486
+ (u) =>
3487
+ u.status === "processing" &&
3488
+ u.claimedAt &&
3489
+ now - u.claimedAt > expireMs,
3490
+ );
3491
+ if (expired) {
3492
+ expired.status = "pending";
3493
+ markStatsDirty();
3494
+ delete expired.claimedAt;
3495
+ next = expired;
3496
+ }
3497
+ }
3498
+
3499
+ if (!next) {
3500
+ const seed = candidates.filter(
3501
+ (u) => u.sources && u.sources.includes("seed"),
3502
+ );
3503
+ seed.sort((a, b) => locationTier(a) - locationTier(b));
3504
+ next = seed[0] || null;
3505
+ }
3506
+
3507
+ // 未登录时跳过 ttSeller 优先级
3508
+ if (!next && loggedIn) {
3509
+ const ttSeller = candidates.filter(
3510
+ (u) => u.ttSeller === true && u.verified === false,
3511
+ );
3512
+ ttSeller.sort((a, b) => locationTier(a) - locationTier(b));
3513
+ next = ttSeller[0] || null;
3514
+ }
3515
+
3516
+ if (!next) {
3517
+ const follow = candidates.filter(
3518
+ (u) =>
3519
+ u.sources &&
3520
+ (u.sources.includes("following") ||
3521
+ u.sources.includes("follower")),
3522
+ );
3523
+ follow.sort((a, b) => locationTier(a) - locationTier(b));
3524
+ next = follow[0] || null;
3525
+ }
3526
+
3527
+ if (!next) {
3528
+ candidates.sort((a, b) => locationTier(a) - locationTier(b));
3529
+ next = candidates[0] || null;
3530
+ }
3531
+
3532
+ return next;
3533
+ }
3534
+
3535
+ // 先在有视频的 pending 用户中找;找不到再用全部 pending 用户兜底
3536
+ let pending = data.filter((u) => u.status === "pending");
3537
+ // 应用国家过滤
3538
+ if (locations && locations.length > 0) {
3539
+ pending = pending.filter(locationFilter);
3540
+ }
3541
+ // 未登录客户端不能领取 ttSeller 用户
3542
+ if (!loggedIn) {
3543
+ pending = pending.filter((u) => u.ttSeller !== true);
3544
+ }
3545
+ // status_code 过滤:只领取空值、0 或 209002 的任务
3546
+ pending = pending.filter(
3547
+ (u) =>
3548
+ u.statusCode == null ||
3549
+ u.statusCode === 0 ||
3550
+ (loggedIn && u.statusCode === 209002),
3551
+ );
3552
+ let hasVideo = pending.filter((u) => u.videoCount > 0);
3553
+ const next = pickCandidate(hasVideo) || pickCandidate(pending);
3554
+
3555
+ if (next) {
3556
+ next.status = "processing";
3557
+ markStatsDirty();
3558
+ next.claimedAt = now;
3559
+ next.claimedBy = userId;
3560
+ save();
3561
+ return {
3562
+ uniqueId: next.uniqueId,
3563
+ nickname: next.nickname,
3564
+ claimedAt: next.claimedAt,
3565
+ claimedBy: userId,
3566
+ };
3567
+ }
3568
+ return null;
3569
+ }
3570
+
3571
+ return null;
3572
+ }
3573
+
3574
+ function debugClaimNextJob(
3575
+ userId,
3576
+ expireMs = 5 * 60 * 1000,
3577
+ locations = null,
3578
+ loggedIn = true,
3579
+ ) {
3580
+ if (db) {
3581
+ const now = Date.now();
3582
+ const info = {
3583
+ path: "db",
3584
+ userId,
3585
+ expireMs,
3586
+ loggedIn,
3587
+ };
3588
+
3589
+ const ongoingRow = db
3590
+ .prepare(
3591
+ `
3592
+ SELECT *
3593
+ FROM jobs
3594
+ WHERE status = 'processing'
3595
+ AND claimed_by = ?
3596
+ AND claimed_at IS NOT NULL
3597
+ AND ? - claimed_at < ?
3598
+ ORDER BY claimed_at DESC
3599
+ LIMIT 1
3600
+ `,
3601
+ )
3602
+ .get(userId, now, expireMs);
3603
+ info.ongoing = ongoingRow
3604
+ ? {
3605
+ uniqueId: ongoingRow.unique_id,
3606
+ claimedBy: ongoingRow.claimed_by,
3607
+ claimedAt: ongoingRow.claimed_at,
3608
+ }
3609
+ : null;
3610
+
3611
+ const tier1 = new Set(["PL", "NL", "BE"]);
3612
+ const tier2 = new Set(["DE", "FR", "IT", "IE", "ES"]);
3613
+ const normalizedLocations = Array.isArray(locations)
3614
+ ? locations
3615
+ .map((loc) => String(loc).trim().toUpperCase())
3616
+ .filter(Boolean)
3617
+ : [];
3618
+
3619
+ function getLocationGroups() {
3620
+ const selected = normalizedLocations.length
3621
+ ? normalizedLocations
3622
+ : null;
3623
+ const tier1List = selected
3624
+ ? selected.filter((loc) => tier1.has(loc))
3625
+ : [...tier1];
3626
+ const tier2List = selected
3627
+ ? selected.filter((loc) => tier2.has(loc))
3628
+ : [...tier2];
3629
+ const otherList = selected
3630
+ ? selected.filter((loc) => !tier1.has(loc) && !tier2.has(loc))
3631
+ : null;
3632
+ const groups = [];
3633
+ if (tier1List.length > 0)
3634
+ groups.push({ type: "include", values: tier1List });
3635
+ if (tier2List.length > 0)
3636
+ groups.push({ type: "include", values: tier2List });
3637
+ if (selected) {
3638
+ if (otherList.length > 0)
3639
+ groups.push({ type: "include", values: otherList });
3640
+ } else {
3641
+ groups.push({ type: "exclude", values: [...tier1, ...tier2] });
3642
+ }
3643
+ return groups;
3644
+ }
3645
+
3646
+ const locationGroups = getLocationGroups();
3647
+ info.locationGroups = locationGroups;
3648
+
3649
+ function applyLocationGroup(where, args, group) {
3650
+ if (!group) return;
3651
+ if (group.type === "include") {
3652
+ where.push(
3653
+ `UPPER(COALESCE(guessed_location, '')) IN (${group.values.map(() => "?").join(", ")})`,
3654
+ );
3655
+ args.push(...group.values);
3656
+ return;
3657
+ }
3658
+ where.push(
3659
+ `UPPER(COALESCE(guessed_location, '')) NOT IN (${group.values.map(() => "?").join(", ")})`,
3660
+ );
3661
+ args.push(...group.values);
3662
+ }
3663
+
3664
+ function queryPendingOne({ requireVideo, group, filters = [] }) {
3665
+ const where = ["status = 'pending'"];
3666
+ const args = [];
3667
+ if (!loggedIn) {
3668
+ where.push("COALESCE(tt_seller, 0) != 1");
3669
+ where.push("(status_code IS NULL OR status_code = 0)");
3670
+ } else {
3671
+ where.push(
3672
+ "(status_code IS NULL OR status_code = 0 OR status_code = 209002)",
3673
+ );
3674
+ }
3675
+ if (requireVideo) {
3676
+ where.push("COALESCE(video_count, 0) > 0");
3677
+ }
3678
+ applyLocationGroup(where, args, group);
3679
+ for (const filter of filters) {
3680
+ where.push(filter);
3681
+ }
3682
+ const sql = `
3683
+ SELECT *
3684
+ FROM jobs
3685
+ WHERE ${where.join(" AND ")}
3686
+ ORDER BY follower_count DESC, created_at ASC, unique_id ASC
3687
+ LIMIT 1
3688
+ `;
3689
+ const row = db.prepare(sql).get(...args);
3690
+ return { row, sql, args };
3691
+ }
3692
+
3693
+ function queryPendingByGroup({ requireVideo, group, filters = [] }) {
3694
+ if (group?.type === "include" && group.values.length > 1) {
3695
+ for (const location of group.values) {
3696
+ const ret = queryPendingOne({
3697
+ requireVideo,
3698
+ group: { type: "include", values: [location] },
3699
+ filters,
3700
+ });
3701
+ if (ret.row) return ret;
3702
+ }
3703
+ return { row: null, sql: null, args: [] };
3704
+ }
3705
+ return queryPendingOne({ requireVideo, group, filters });
3706
+ }
3707
+
3708
+ function findPinnedPending(requireVideo) {
3709
+ const where = ["status = 'pending'", "COALESCE(pinned, 0) = 1"];
3710
+ const args = [];
3711
+ if (!loggedIn) {
3712
+ where.push("COALESCE(tt_seller, 0) != 1");
3713
+ }
3714
+ if (requireVideo) {
3715
+ where.push("COALESCE(video_count, 0) > 0");
3716
+ }
3717
+ if (normalizedLocations.length > 0) {
3718
+ where.push(
3719
+ `UPPER(COALESCE(guessed_location, '')) IN (${normalizedLocations.map(() => "?").join(", ")})`,
3720
+ );
3721
+ args.push(...normalizedLocations);
3722
+ }
3723
+ const sql = `
3724
+ SELECT *
3725
+ FROM jobs
3726
+ WHERE ${where.join(" AND ")}
3727
+ ORDER BY created_at ASC, unique_id ASC
3728
+ LIMIT 1
3729
+ `;
3730
+ const row = db.prepare(sql).get(...args);
3731
+ return { row, sql, args };
3732
+ }
3733
+
3734
+ const expiredSql = `
3735
+ SELECT *
3736
+ FROM jobs
3737
+ WHERE status = 'processing'
3738
+ AND claimed_at IS NOT NULL
3739
+ AND ? - claimed_at > ?
3740
+ ORDER BY claimed_at ASC
3741
+ LIMIT 1
3742
+ `;
3743
+ const expiredRow = db.prepare(expiredSql).get(now, expireMs);
3744
+ info.expired = expiredRow
3745
+ ? {
3746
+ uniqueId: expiredRow.unique_id,
3747
+ claimedBy: expiredRow.claimed_by,
3748
+ claimedAt: expiredRow.claimed_at,
3749
+ diffMs: now - expiredRow.claimed_at,
3750
+ }
3751
+ : null;
3752
+
3753
+ info.requireVideoPasses = [];
3754
+ for (const requireVideo of [true, false]) {
3755
+ const pass = { requireVideo };
3756
+ const pinned = findPinnedPending(requireVideo);
3757
+ pass.pinned = pinned.row
3758
+ ? {
3759
+ uniqueId: pinned.row.unique_id,
3760
+ sql: pinned.sql,
3761
+ args: pinned.args,
3762
+ }
3763
+ : null;
3764
+
3765
+ if (!pass.pinned) {
3766
+ for (const group of locationGroups) {
3767
+ const seed = queryPendingByGroup({
3768
+ requireVideo,
3769
+ group,
3770
+ filters: [
3771
+ "COALESCE(pinned, 0) = 0",
3772
+ `instr(COALESCE(sources, ''), '"seed"') > 0`,
3773
+ ],
3774
+ });
3775
+ if (seed.row) {
3776
+ pass.seed = {
3777
+ uniqueId: seed.row.unique_id,
3778
+ group,
3779
+ sql: seed.sql,
3780
+ args: seed.args,
3781
+ };
3782
+ break;
3783
+ }
3784
+ }
3785
+ }
3786
+
3787
+ if (!pass.pinned && !pass.seed && loggedIn) {
3788
+ for (const group of locationGroups) {
3789
+ const seller = queryPendingByGroup({
3790
+ requireVideo,
3791
+ group,
3792
+ filters: [
3793
+ "COALESCE(pinned, 0) = 0",
3794
+ "tt_seller = 1",
3795
+ "verified = 0",
3796
+ ],
3797
+ });
3798
+ if (seller.row) {
3799
+ pass.seller = {
3800
+ uniqueId: seller.row.unique_id,
3801
+ group,
3802
+ sql: seller.sql,
3803
+ args: seller.args,
3804
+ };
3805
+ break;
3806
+ }
3807
+ }
3808
+ }
3809
+
3810
+ if (!pass.pinned && !pass.seed && !pass.seller) {
3811
+ for (const group of locationGroups) {
3812
+ const follow = queryPendingByGroup({
3813
+ requireVideo,
3814
+ group,
3815
+ filters: [
3816
+ "COALESCE(pinned, 0) = 0",
3817
+ `(
3818
+ instr(COALESCE(sources, ''), '"following"') > 0
3819
+ OR instr(COALESCE(sources, ''), '"follower"') > 0
3820
+ )`,
3821
+ ],
3822
+ });
3823
+ if (follow.row) {
3824
+ pass.follow = {
3825
+ uniqueId: follow.row.unique_id,
3826
+ group,
3827
+ sql: follow.sql,
3828
+ args: follow.args,
3829
+ };
3830
+ break;
3831
+ }
3832
+ }
3833
+ }
3834
+
3835
+ if (!pass.pinned && !pass.seed && !pass.seller && !pass.follow) {
3836
+ for (const group of locationGroups) {
3837
+ const other = queryPendingByGroup({
3838
+ requireVideo,
3839
+ group,
3840
+ filters: ["COALESCE(pinned, 0) = 0"],
3841
+ });
3842
+ if (other.row) {
3843
+ pass.other = {
3844
+ uniqueId: other.row.unique_id,
3845
+ group,
3846
+ sql: other.sql,
3847
+ args: other.args,
3848
+ };
3849
+ break;
3850
+ }
3851
+ }
3852
+ }
3853
+
3854
+ info.requireVideoPasses.push(pass);
3855
+ }
3856
+
3857
+ return info;
3858
+ }
3859
+
3860
+ return {
3861
+ path: "memory",
3862
+ userId,
3863
+ expireMs,
3864
+ loggedIn,
3865
+ totalUsers: data.length,
3866
+ processingUsers: data.filter((u) => u.status === "processing").length,
3867
+ pendingUsers: data.filter((u) => u.status === "pending").length,
3868
+ };
3869
+ }
3870
+
3871
+ function processDiscoveredUsers(result) {
3872
+ const guessedLocation = result.guessedLocation || null;
3873
+ const discovered = [
3874
+ ...(result.discoveredVideoAuthors || []).map((v) => ({
3875
+ uniqueId:
3876
+ typeof v === "string"
3877
+ ? v.replace(/^@/, "")
3878
+ : v.uniqueId?.replace(/^@/, "") || "",
3879
+ nickname: typeof v === "string" ? null : v.nickname || null,
3880
+ locationCreated:
3881
+ typeof v === "string" ? null : v.locationCreated || null,
3882
+ guessedLocation:
3883
+ typeof v === "string"
3884
+ ? guessedLocation
3885
+ : v.guessedLocation || guessedLocation,
3886
+ sources: ["video"],
3887
+ })),
3888
+ ...(result.discoveredCommentAuthors || []).map((c) => {
3889
+ if (typeof c === "string")
3890
+ return {
3891
+ uniqueId: c.replace(/^@/, ""),
3892
+ sources: ["comment"],
3893
+ guessedLocation,
3894
+ };
3895
+ return {
3896
+ uniqueId: (c.author || c.uniqueId || "").replace(/^@/, ""),
3897
+ nickname: c.nickname || null,
3898
+ sources: ["comment"],
3899
+ guessedLocation: c.guessedLocation || guessedLocation,
3900
+ };
3901
+ }),
3902
+ ...(result.discoveredGuessAuthors || []).map((g) => {
3903
+ if (typeof g === "string")
3904
+ return {
3905
+ uniqueId: g.replace(/^@/, ""),
3906
+ sources: ["guess"],
3907
+ guessedLocation,
3908
+ };
3909
+ return {
3910
+ uniqueId: (g.author || g.uniqueId || "").replace(/^@/, ""),
3911
+ nickname: g.nickname || null,
3912
+ sources: ["guess"],
3913
+ guessedLocation: g.guessedLocation || guessedLocation,
3914
+ };
3915
+ }),
3916
+ ...(result.discoveredFollowing || []).map((f) => {
3917
+ const handle = Array.isArray(f) ? f[0] : f.handle || "";
3918
+ const name = Array.isArray(f) ? f[1] : f.displayName || null;
3919
+ return {
3920
+ uniqueId: handle.replace(/^@/, ""),
3921
+ nickname: name,
3922
+ sources: ["following"],
3923
+ guessedLocation:
3924
+ (typeof f === "object" && f.guessedLocation) || guessedLocation,
3925
+ };
3926
+ }),
3927
+ ...(result.discoveredFollowers || []).map((f) => {
3928
+ const handle = Array.isArray(f) ? f[0] : f.handle || "";
3929
+ const name = Array.isArray(f) ? f[1] : f.displayName || null;
3930
+ return {
3931
+ uniqueId: handle.replace(/^@/, ""),
3932
+ nickname: name,
3933
+ sources: ["follower"],
3934
+ guessedLocation:
3935
+ (typeof f === "object" && f.guessedLocation) || guessedLocation,
3936
+ };
3937
+ }),
3938
+ ...(result.discoveredRecommended || []).map((f) => {
3939
+ const handle = Array.isArray(f) ? f[0] : f.handle || "";
3940
+ const name = Array.isArray(f) ? f[1] : f.displayName || null;
3941
+ return {
3942
+ uniqueId: handle.replace(/^@/, ""),
3943
+ nickname: name,
3944
+ sources: ["recommended"],
3945
+ guessedLocation:
3946
+ (typeof f === "object" && f.guessedLocation) || guessedLocation,
3947
+ };
3948
+ }),
3949
+ ].filter((u) => u.uniqueId);
3950
+
3951
+ // 先对 discovered 内部去重,再用 uidIndex 批量判断
3952
+ const seen = new Set();
3953
+ const unique = [];
3954
+ for (const d of discovered) {
3955
+ if (!seen.has(d.uniqueId)) {
3956
+ seen.add(d.uniqueId);
3957
+ unique.push(d);
3958
+ }
3959
+ }
3960
+
3961
+ const newUsers = [];
3962
+ for (const d of unique) {
3963
+ if (!hasUser(d.uniqueId)) {
3964
+ addUserToDb(d);
3965
+ addJobBaseToDb(d);
3966
+ newUsers.push(d.uniqueId);
3967
+ }
3968
+ }
3969
+ return newUsers;
3970
+ }
3971
+
3972
+ function updateUserFromResult(user, result) {
3973
+ const oldStatus = user.status;
3974
+ if (result.restricted) {
3975
+ user.status = "restricted";
3976
+ if (result.userInfo) {
3977
+ const info = result.userInfo;
3978
+ for (const key of Object.keys(info)) {
3979
+ if (key === "uniqueId" || key === "sources") continue;
3980
+ if (
3981
+ info[key] !== undefined &&
3982
+ info[key] !== null &&
3983
+ info[key] !== ""
3984
+ ) {
3985
+ user[key] = info[key];
3986
+ }
3987
+ }
3988
+ }
3989
+ user.restricted = true;
3990
+ user.processed = true;
3991
+ user.processedAt = Date.now();
3992
+ user.sources = [...new Set([...(user.sources || []), "restricted"])];
3993
+ } else if (result.error) {
3994
+ user.status = "error";
3995
+ user.error = result.error;
3996
+ user.sources = [...new Set([...(user.sources || []), "error"])];
3997
+ } else {
3998
+ user.status = "done";
3999
+ user.processed = true;
4000
+ user.processedAt = Date.now();
4001
+ user.noVideo = result.noVideo || false;
4002
+ user.keepFollow = result.keepFollow || false;
4003
+ user.hasFollowData = result.hasFollowData || false;
4004
+
4005
+ if (result.userInfo) {
4006
+ const info = result.userInfo;
4007
+ for (const key of Object.keys(info)) {
4008
+ if (key === "uniqueId" || key === "sources") continue;
4009
+ if (
4010
+ info[key] !== undefined &&
4011
+ info[key] !== null &&
4012
+ info[key] !== ""
4013
+ ) {
4014
+ user[key] = info[key];
4015
+ }
4016
+ }
4017
+ }
4018
+
4019
+ user.followerCount = result.userInfo?.followerCount ?? user.followerCount;
4020
+ user.videoCount = result.userInfo?.videoCount ?? user.videoCount;
4021
+ user.nickname = result.userInfo?.nickname || user.nickname;
4022
+ user.locationCreated =
4023
+ result.userInfo?.locationCreated || user.locationCreated;
4024
+ user.ttSeller = result.userInfo?.ttSeller ?? user.ttSeller;
4025
+ user.verified = result.userInfo?.verified ?? user.verified;
4026
+ user.region = result.userInfo?.region || user.region;
4027
+ user.signature =
4028
+ result.userInfo?.signature ?? result.userInfo?.bio ?? user.signature;
4029
+ user.bioLink = result.userInfo?.bioLink ?? user.bioLink;
4030
+ user.followingCount =
4031
+ result.userInfo?.followingCount ?? user.followingCount;
4032
+ user.heartCount = result.userInfo?.heartCount ?? user.heartCount;
4033
+ if (result.userInfo?.secUid) user.secUid = result.userInfo.secUid;
4034
+ const extraFields = [
4035
+ "restricted",
4036
+ "error",
4037
+ "userInfo",
4038
+ "discoveredVideoAuthors",
4039
+ "discoveredCommentAuthors",
4040
+ "discoveredGuessAuthors",
4041
+ "discoveredFollowing",
4042
+ "discoveredFollowers",
4043
+ "discoveredRecommended",
4044
+ "uniqueId",
4045
+ "sources",
4046
+ "topRecentVideo", // 单独处理,不进入通用循环
4047
+ ];
4048
+ for (const key of Object.keys(result)) {
4049
+ if (extraFields.includes(key)) continue;
4050
+ if (
4051
+ result[key] !== undefined &&
4052
+ result[key] !== null &&
4053
+ result[key] !== ""
4054
+ ) {
4055
+ user[key] = result[key];
4056
+ }
4057
+ }
4058
+ // 将 topRecentVideo 对象展开为扁平字段
4059
+ if (result.topRecentVideo && typeof result.topRecentVideo === "object") {
4060
+ user.topVideoPlayCount = result.topRecentVideo.playCount || null;
4061
+ user.topVideoHref = result.topRecentVideo.href || null;
4062
+ }
4063
+ user.sources = [...new Set([...(user.sources || []), "processed"])];
4064
+ }
4065
+ if (user.status !== oldStatus) markStatsDirty();
4066
+ }
4067
+
4068
+ function commitJob(uniqueId, result) {
4069
+ if (db) {
4070
+ const user = getJob(uniqueId);
4071
+ if (!user) return { saved: false, error: "user not found" };
4072
+
4073
+ updateUserFromResult(user, result);
4074
+ user.claimedAt = null;
4075
+ const newUsers = processDiscoveredUsers(result);
4076
+ const persistRet = updateJobInfo(uniqueId, user, false);
4077
+ if (persistRet.error) {
4078
+ return { saved: false, error: persistRet.error };
4079
+ }
4080
+ return { saved: true, status: user.status, newUsers };
4081
+ }
4082
+
4083
+ const user = getUser(uniqueId);
4084
+ if (!user) return { saved: false, error: "user not found" };
4085
+
4086
+ updateUserFromResult(user, result);
4087
+ delete user.claimedAt;
4088
+ const newUsers = processDiscoveredUsers(result);
4089
+
4090
+ save();
4091
+ return { saved: true, status: user.status, newUsers };
4092
+ }
4093
+
4094
+ function commitNewExplore(uniqueId, result) {
4095
+ if (db) {
4096
+ const existing = getJob(uniqueId);
4097
+ if (existing) {
4098
+ updateUserFromResult(existing, result);
4099
+ const persistRet = updateJobInfo(uniqueId, existing, false);
4100
+ if (persistRet.error) {
4101
+ return { saved: false, error: persistRet.error };
4102
+ }
4103
+ const newUsers = processDiscoveredUsers(result);
4104
+ return {
4105
+ saved: true,
4106
+ created: false,
4107
+ status: existing.status,
4108
+ newUsers,
4109
+ };
4110
+ }
4111
+
4112
+ const userObj = {
4113
+ uniqueId,
4114
+ ...(result.userInfo || {}),
4115
+ sources: ["refresh-explore"],
4116
+ };
4117
+ updateUserFromResult(userObj, result);
4118
+ addJob(userObj);
4119
+ const newUsers = processDiscoveredUsers(result);
4120
+ return { saved: true, created: true, status: userObj.status, newUsers };
4121
+ }
4122
+
4123
+ const existing = getUser(uniqueId);
4124
+ if (existing) {
4125
+ updateUserFromResult(existing, result);
4126
+ const newUsers = processDiscoveredUsers(result);
4127
+ save();
4128
+ return { saved: true, created: false, status: existing.status, newUsers };
4129
+ }
4130
+
4131
+ const userObj = {
4132
+ uniqueId,
4133
+ ...(result.userInfo || {}),
4134
+ sources: ["refresh-explore"],
4135
+ };
4136
+ updateUserFromResult(userObj, result);
4137
+ addUser(userObj, true);
4138
+ const newUsers = processDiscoveredUsers(result);
4139
+
4140
+ save();
4141
+ return { saved: true, created: true, status: userObj.status, newUsers };
4142
+ }
4143
+
4144
+ function resetJob(uniqueId) {
4145
+ if (db) {
4146
+ const user = getJob(uniqueId);
4147
+ if (!user) return { saved: false, error: "user not found" };
4148
+ user.status = "pending";
4149
+ user.claimedAt = null;
4150
+ user.processedAt = null;
4151
+ user.processed = false;
4152
+ user.error = null;
4153
+ user.restricted = false;
4154
+ user.noVideo = false;
4155
+ const ret = updateJobInfo(uniqueId, user, false);
4156
+ if (ret.error) return { saved: false, error: ret.error };
4157
+ markStatsDirty();
4158
+ return { saved: true };
4159
+ }
4160
+
4161
+ const user = getUser(uniqueId);
4162
+ if (!user) return { saved: false, error: "user not found" };
4163
+ user.status = "pending";
4164
+ markStatsDirty();
4165
+ delete user.claimedAt;
4166
+ delete user.processedAt;
4167
+ delete user.processed;
4168
+ delete user.error;
4169
+ delete user.restricted;
4170
+ delete user.noVideo;
4171
+ save();
4172
+ return { saved: true };
4173
+ }
4174
+
4175
+ function togglePin(uniqueId) {
4176
+ if (db) {
4177
+ const user = getJob(uniqueId);
4178
+ if (!user) return { saved: false, error: "user not found" };
4179
+ const nextPinned = !user.pinned;
4180
+ const ret = updateJobInfo(uniqueId, { pinned: nextPinned }, false);
4181
+ if (ret.error) return { saved: false, error: ret.error };
4182
+ return { saved: true, pinned: nextPinned };
4183
+ }
4184
+
4185
+ const user = getUser(uniqueId);
4186
+ if (!user) return { saved: false, error: "user not found" };
4187
+ user.pinned = !user.pinned;
4188
+ save();
4189
+ return { saved: true, pinned: user.pinned };
4190
+ }
4191
+
4192
+ function getNextRedoJob(userId, maxAgeSeconds = 43200) {
4193
+ if (db) {
4194
+ const now = Date.now();
4195
+ const threshold = now - maxAgeSeconds * 1000;
4196
+ const defaultTime = new Date("2016-01-01T00:00:00Z").getTime();
4197
+ const targetLocations = DEFAULT_TARGET_LOCATIONS;
4198
+ const placeholders = targetLocations.map(() => "?").join(",");
4199
+ const row = db
4200
+ .prepare(
4201
+ `
4202
+ SELECT *
4203
+ FROM jobs
4204
+ WHERE tt_seller = 1
4205
+ AND verified = 0
4206
+ AND location_created IN (${placeholders})
4207
+ AND COALESCE(refresh_time, ?) < ?
4208
+ ORDER BY COALESCE(pinned, 0) DESC, COALESCE(refresh_time, ?) ASC
4209
+ LIMIT 1
4210
+ `,
4211
+ )
4212
+ .get(...targetLocations, defaultTime, threshold, defaultTime);
4213
+ if (!row) return null;
4214
+ db.prepare(
4215
+ "UPDATE jobs SET refresh_time = ?, updated_at = ? WHERE unique_id = ?",
4216
+ ).run(now, now, row.unique_id);
4217
+ return {
4218
+ uniqueId: row.unique_id,
4219
+ nickname: row.nickname,
4220
+ refreshTime: now,
4221
+ };
4222
+ }
4223
+
4224
+ const now = Date.now();
4225
+ const threshold = now - maxAgeSeconds * 1000;
4226
+ const defaultTime = new Date("2016-01-01T00:00:00Z").getTime();
4227
+
4228
+ // 筛选目标国家用户,按 refreshTime 升序取最远的(没有则默认 2016-01-01)
4229
+ const targetLocations = DEFAULT_TARGET_LOCATIONS;
4230
+ const targetUsers = data.filter(
4231
+ (u) =>
4232
+ u.ttSeller &&
4233
+ u.verified === false &&
4234
+ targetLocations.includes(u.locationCreated),
4235
+ );
4236
+ if (targetUsers.length === 0) return null;
4237
+
4238
+ const recentEnough = targetUsers.filter((u) => {
4239
+ const rt = u.refreshTime || defaultTime;
4240
+ return rt < threshold;
4241
+ });
4242
+ if (recentEnough.length === 0) return null;
4243
+
4244
+ recentEnough.sort((a, b) => {
4245
+ // pinned 优先,其次按 refreshTime 升序
4246
+ if ((a.pinned ? 1 : 0) !== (b.pinned ? 1 : 0)) {
4247
+ return (b.pinned ? 1 : 0) - (a.pinned ? 1 : 0);
4248
+ }
4249
+ const ta = a.refreshTime || defaultTime;
4250
+ const tb = b.refreshTime || defaultTime;
4251
+ return ta - tb;
4252
+ });
4253
+
4254
+ const next = recentEnough[0];
4255
+ next.refreshTime = now;
4256
+ save();
4257
+ return {
4258
+ uniqueId: next.uniqueId,
4259
+ nickname: next.nickname,
4260
+ refreshTime: next.refreshTime,
4261
+ };
4262
+ }
4263
+
4264
+ function commitRedoJob(uniqueId, result) {
4265
+ if (db) {
4266
+ const user = getJob(uniqueId);
4267
+ if (!user) return { saved: false, error: "user not found" };
4268
+ user.refreshTime = Date.now();
4269
+ if (result.userInfo) {
4270
+ const info = result.userInfo;
4271
+ for (const key of Object.keys(info)) {
4272
+ if (key === "uniqueId" || key === "sources") continue;
4273
+ if (
4274
+ info[key] !== undefined &&
4275
+ info[key] !== null &&
4276
+ info[key] !== ""
4277
+ ) {
4278
+ user[key] = info[key];
4279
+ }
4280
+ }
4281
+ }
4282
+ // 将 topRecentVideo 对象展开为扁平字段
4283
+ if (result.topRecentVideo && typeof result.topRecentVideo === "object") {
4284
+ user.topVideoPlayCount = result.topRecentVideo.playCount || null;
4285
+ user.topVideoHref = result.topRecentVideo.href || null;
4286
+ }
4287
+ const newUsers = processDiscoveredUsers(result);
4288
+ const ret = updateJobInfo(uniqueId, user, false);
4289
+ if (ret.error) return { saved: false, error: ret.error };
4290
+ return { saved: true, newUsers };
4291
+ }
4292
+
4293
+ const user = getUser(uniqueId);
4294
+ if (!user) return { saved: false, error: "user not found" };
4295
+
4296
+ user.refreshTime = Date.now();
4297
+
4298
+ if (result.userInfo) {
4299
+ const info = result.userInfo;
4300
+ for (const key of Object.keys(info)) {
4301
+ if (key === "uniqueId" || key === "sources") continue;
4302
+ if (info[key] !== undefined && info[key] !== null && info[key] !== "") {
4303
+ user[key] = info[key];
4304
+ }
4305
+ }
4306
+ }
4307
+ // 将 topRecentVideo 对象展开为扁平字段
4308
+ if (result.topRecentVideo && typeof result.topRecentVideo === "object") {
4309
+ user.topVideoPlayCount = result.topRecentVideo.playCount || null;
4310
+ user.topVideoHref = result.topRecentVideo.href || null;
4311
+ }
4312
+ const newUsers = processDiscoveredUsers(result);
4313
+ return { saved: true, newUsers };
4314
+ }
4315
+
4316
+ function reportClientError(
4317
+ userId,
4318
+ errorType,
4319
+ errorMessage,
4320
+ username,
4321
+ stage,
4322
+ errorStack,
4323
+ ) {
4324
+ const existing = clientErrors.get(userId);
4325
+ if (existing) {
4326
+ existing.timestamp = Date.now();
4327
+ if (errorType === "captcha") {
4328
+ existing.captchaCount = (existing.captchaCount || 0) + 1;
4329
+ if (!existing.captchaStage) existing.captchaStage = stage || "";
4330
+ if (!existing.captchaMessage)
4331
+ existing.captchaMessage = errorMessage || "";
4332
+ if (!existing.captchaStack) existing.captchaStack = errorStack || "";
4333
+ } else {
4334
+ existing.errorType = errorType;
4335
+ existing.errorMessage = errorMessage || "";
4336
+ existing.errorStack = errorStack || "";
4337
+ existing.stage = stage || "";
4338
+ existing.reportCount = (existing.reportCount || 1) + 1;
4339
+ }
4340
+ if (username) existing.username = username;
4341
+ } else {
4342
+ clientErrors.set(userId, {
4343
+ userId,
4344
+ errorType,
4345
+ errorMessage: errorMessage || "",
4346
+ errorStack: errorStack || "",
4347
+ username,
4348
+ stage: stage || "",
4349
+ timestamp: Date.now(),
4350
+ reportCount: 1,
4351
+ captchaCount: errorType === "captcha" ? 1 : 0,
4352
+ captchaStage: errorType === "captcha" ? stage || "" : "",
4353
+ captchaMessage: errorType === "captcha" ? errorMessage || "" : "",
4354
+ captchaStack: errorType === "captcha" ? errorStack || "" : "",
4355
+ });
4356
+ }
4357
+ }
4358
+
4359
+ function deleteClientError(userId) {
4360
+ clientErrors.delete(userId);
4361
+ }
4362
+
4363
+ function getClientErrors() {
4364
+ return Array.from(clientErrors.values());
4365
+ }
4366
+
4367
+ function getClientLoginStatus() {
4368
+ return Object.fromEntries(clientLoginStatus);
4369
+ }
4370
+
4371
+ function trackClient(clientId, info) {
4372
+ const existing = activeClients.get(clientId);
4373
+ if (existing) {
4374
+ if (info.type) existing.type = info.type;
4375
+ if (info.userId) existing.userId = info.userId;
4376
+ if (info.ip) existing.ip = info.ip;
4377
+ if (info.port !== undefined) existing.port = info.port;
4378
+ existing.lastSeen = Date.now();
4379
+ } else {
4380
+ activeClients.set(clientId, {
4381
+ ...info,
4382
+ lastSeen: Date.now(),
4383
+ });
4384
+ }
4385
+ }
4386
+
4387
+ function getActiveClients() {
4388
+ const now = Date.now();
4389
+ const stale = 2 * 60 * 1000;
4390
+ for (const [id, info] of activeClients) {
4391
+ if (now - info.lastSeen > stale) activeClients.delete(id);
4392
+ }
4393
+ return Array.from(activeClients.entries()).map(([clientId, info]) => ({
4394
+ clientId,
4395
+ type: info.type || "unknown",
4396
+ ip: info.ip || "",
4397
+ port: info.port || 0,
4398
+ userId: info.userId || "",
4399
+ lastSeen: info.lastSeen,
4400
+ }));
4401
+ }
4402
+
4403
+ function getPendingUserUpdateTasks(limit, countries) {
4404
+ const targetCountries = countries
4405
+ ? countries.map((c) => String(c).trim().toUpperCase())
4406
+ : [];
4407
+ const hasCountryFilter = targetCountries.length > 0;
4408
+
4409
+ if (db) {
4410
+ const l = Math.max(1, parseInt(limit) || 5);
4411
+
4412
+ let sql = `
4413
+ SELECT *
4414
+ FROM jobs_base
4415
+ WHERE COALESCE(tt_seller, '') = ''
4416
+ AND COALESCE(user_update_count, 0) <= 0
4417
+ `;
4418
+ const sqlParams = [];
4419
+
4420
+ if (hasCountryFilter) {
4421
+ const placeholders = targetCountries.map(() => "?").join(", ");
4422
+ sql += ` AND UPPER(COALESCE(guessed_location, '')) IN (${placeholders})`;
4423
+ sqlParams.push(...targetCountries);
4424
+ }
4425
+
4426
+ // 优先级:sources 包含 "tag" 的任务优先,其余按 created_at 排序
4427
+ sql += ` ORDER BY
4428
+ CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
4429
+ created_at ASC,
4430
+ unique_id ASC
4431
+ LIMIT ?`;
4432
+ sqlParams.push(l);
4433
+
4434
+ const rows = db.prepare(sql).all(...sqlParams);
4435
+ if (rows.length === 0) return [];
4436
+ const now = Date.now();
4437
+ const bumpStmt = db.prepare(
4438
+ `
4439
+ UPDATE jobs_base
4440
+ SET user_update_count = COALESCE(user_update_count, 0) + 1,
4441
+ updated_at = ?
4442
+ WHERE unique_id = ?
4443
+ `,
4444
+ );
4445
+ const bumpTxn = db.transaction((items) => {
4446
+ for (const item of items) {
4447
+ bumpStmt.run(now, item.unique_id);
4448
+ }
4449
+ });
4450
+ bumpTxn(rows);
4451
+ return rows.map((row) => {
4452
+ const mapped = mapJobRow(row);
4453
+ mapped.userUpdateCount = (mapped.userUpdateCount || 0) + 1;
4454
+ mapped.updatedAt = now;
4455
+ return mapped;
4456
+ });
4457
+ }
4458
+
4459
+ const l = Math.max(1, parseInt(limit) || 5);
4460
+ const pending = data
4461
+ .filter((u) => {
4462
+ const updateCount = u.userUpdateCount;
4463
+ const ttSellerEmpty =
4464
+ u.ttSeller === null || u.ttSeller === undefined || u.ttSeller === "";
4465
+ if (!ttSellerEmpty) return false;
4466
+ if (
4467
+ updateCount === null ||
4468
+ updateCount === undefined ||
4469
+ updateCount <= 0
4470
+ ) {
4471
+ if (hasCountryFilter) {
4472
+ const loc = (u.guessedLocation || "").toUpperCase();
4473
+ return targetCountries.includes(loc);
4474
+ }
4475
+ return true;
4476
+ }
4477
+ return false;
4478
+ })
4479
+ .sort((a, b) => {
4480
+ // 优先级:sources 包含 "tag" 的任务优先
4481
+ const aIsTag = (a.sources || "").includes("tag");
4482
+ const bIsTag = (b.sources || "").includes("tag");
4483
+ if (aIsTag !== bIsTag) return aIsTag ? -1 : 1;
4484
+ return (a.createdAt || 0) - (b.createdAt || 0);
4485
+ })
4486
+ .slice(0, l);
4487
+ // 接受任务时 userUpdateCount + 1
4488
+ pending.forEach((u) => {
4489
+ u.userUpdateCount = (u.userUpdateCount || 0) + 1;
4490
+ u.updatedAt = Date.now();
4491
+ });
4492
+ save();
4493
+ return pending;
4494
+ }
4495
+
4496
+ function updateUserInfo(uniqueId, info) {
4497
+ if (db) {
4498
+ return updateJobInfo(uniqueId, info, true);
4499
+ }
4500
+
4501
+ const user = getUser(uniqueId);
4502
+ if (!user) return { error: "user not found" };
4503
+ for (const key of Object.keys(info)) {
4504
+ if (key === "uniqueId" || key === "sources") continue;
4505
+ if (info[key] !== undefined && info[key] !== null && info[key] !== "") {
4506
+ user[key] = info[key];
4507
+ }
4508
+ }
4509
+ user.userUpdateCount = (user.userUpdateCount || 0) + 1;
4510
+ user.updatedAt = Date.now();
4511
+ save();
4512
+ return { ok: true, userUpdateCount: user.userUpdateCount };
4513
+ }
4514
+
4515
+ function updateUserLocation(uniqueId, location) {
4516
+ if (db) {
4517
+ const existing = db
4518
+ .prepare("SELECT * FROM jobs WHERE unique_id = ?")
4519
+ .get(uniqueId);
4520
+ if (!existing) return { error: "user not found" };
4521
+ const now = Date.now();
4522
+ db.prepare(
4523
+ "UPDATE jobs SET location_created = ?, modified_at = ?, updated_at = ? WHERE unique_id = ?",
4524
+ ).run(location, now, now, uniqueId);
4525
+ return { ok: true, location, modifiedAt: now };
4526
+ }
4527
+
4528
+ const user = getUser(uniqueId);
4529
+ if (!user) return { error: "user not found" };
4530
+ user.locationCreated = location;
4531
+ user.modifiedAt = Date.now();
4532
+ user.updatedAt = Date.now();
4533
+ user.userUpdateCount = (user.userUpdateCount || 0) + 1;
4534
+ save();
4535
+ return { ok: true, location, modifiedAt: user.modifiedAt };
4536
+ }
4537
+
4538
+ // 将单个 job 移动到 raw_jobs 表(完整字段复制 + 删除原记录)
4539
+ function moveJobToRaw(uniqueId) {
4540
+ if (!db) return false;
4541
+ const safeId = String(uniqueId).trim();
4542
+ if (!safeId) return false;
4543
+
4544
+ const moveSingleTxn = db.transaction(() => {
4545
+ db.prepare(
4546
+ `
4547
+ INSERT OR REPLACE INTO raw_jobs (
4548
+ unique_id, nickname, status, sources, claimed_by, claimed_at,
4549
+ error, pinned, no_video, restricted, user_update_count,
4550
+ tt_seller, verified, video_count, comment_count,
4551
+ guessed_location, location_created, confirmed_location, modified_at,
4552
+ follower_count, following_count, heart_count, refresh_time,
4553
+ processed, processed_at, created_at, updated_at,
4554
+ region, signature, bio_link, sec_uid, status_code, latest_video_time,
4555
+ user_create_time
4556
+ )
4557
+ SELECT
4558
+ unique_id, nickname, status, sources, claimed_by, claimed_at,
4559
+ error, pinned, no_video, restricted, user_update_count,
4560
+ tt_seller, verified, video_count, comment_count,
4561
+ guessed_location, location_created, confirmed_location, modified_at,
4562
+ follower_count, following_count, heart_count, refresh_time,
4563
+ processed, processed_at, created_at, updated_at,
4564
+ region, signature, bio_link, sec_uid, status_code, latest_video_time,
4565
+ user_create_time
4566
+ FROM jobs WHERE unique_id = ?
4567
+ `,
4568
+ ).run(safeId);
4569
+
4570
+ db.prepare("DELETE FROM jobs WHERE unique_id = ?").run(safeId);
4571
+ });
4572
+ moveSingleTxn();
4573
+ return true;
4574
+ }
4575
+
4576
+ function batchUpdateUserInfo(updates) {
4577
+ if (db) {
4578
+ const results = [];
4579
+ const rawMoveList = [];
4580
+ const sellerMoveList = [];
4581
+
4582
+ const txn = db.transaction((items) => {
4583
+ items.forEach((item) => {
4584
+ const uniqueId = item.uniqueId;
4585
+ // 处理 { error: true, statusCode: xxx } 的情况
4586
+ const info = item.info;
4587
+ let updateResult;
4588
+ if (info && info.error && info.statusCode !== undefined) {
4589
+ // 只更新 status_code,不更新其他字段
4590
+ updateResult = updateJobBaseInfo(
4591
+ uniqueId,
4592
+ { statusCode: info.statusCode },
4593
+ true,
4594
+ );
4595
+ } else {
4596
+ updateResult = updateJobBaseInfo(uniqueId, info, true);
4597
+ }
4598
+
4599
+ if (updateResult.error) {
4600
+ results.push({ uniqueId, error: updateResult.error });
4601
+ return;
4602
+ }
4603
+
4604
+ // 检查 tt_seller:商家且视频数>0移到 jobs,否则移到 raw_jobs
4605
+ const row = getJobBaseRow(uniqueId);
4606
+ const ttSeller = row ? row.tt_seller : null;
4607
+ const videoCount = row ? row.video_count || 0 : 0;
4608
+ if (ttSeller && videoCount > 0) {
4609
+ // 商家且有视频:标记移动到 jobs
4610
+ results.push({
4611
+ uniqueId,
4612
+ ok: true,
4613
+ userUpdateCount: updateResult.userUpdateCount,
4614
+ _movedToJobs: true,
4615
+ });
4616
+ sellerMoveList.push(uniqueId);
4617
+ } else {
4618
+ // 非商家或无视频:标记移动到 raw_jobs
4619
+ results.push({
4620
+ uniqueId,
4621
+ ok: true,
4622
+ userUpdateCount: updateResult.userUpdateCount,
4623
+ _movedToRaw: true,
4624
+ });
4625
+ rawMoveList.push(uniqueId);
4626
+ }
4627
+ });
4628
+ });
4629
+ txn(updates);
4630
+
4631
+ // 批量移动商家用户到 jobs
4632
+ if (sellerMoveList.length > 0) {
4633
+ const placeholders = sellerMoveList.map(() => "?").join(",");
4634
+ db.prepare(
4635
+ `
4636
+ INSERT OR REPLACE INTO jobs (
4637
+ unique_id, nickname, status, sources, claimed_by, claimed_at,
4638
+ error, pinned, no_video, restricted, user_update_count,
4639
+ tt_seller, verified, video_count, comment_count,
4640
+ guessed_location, location_created, confirmed_location, modified_at,
4641
+ follower_count, following_count, heart_count, refresh_time,
4642
+ processed, processed_at, created_at, updated_at,
4643
+ region, signature, bio_link, sec_uid, status_code, latest_video_time,
4644
+ user_create_time
4645
+ )
4646
+ SELECT
4647
+ unique_id, nickname, status, sources, claimed_by, claimed_at,
4648
+ error, pinned, no_video, restricted, user_update_count,
4649
+ tt_seller, verified, video_count, comment_count,
4650
+ guessed_location, location_created, confirmed_location, modified_at,
4651
+ follower_count, following_count, heart_count, refresh_time,
4652
+ processed, processed_at, created_at, updated_at,
4653
+ region, signature, bio_link, sec_uid, status_code, latest_video_time,
4654
+ user_create_time
4655
+ FROM jobs_base WHERE unique_id IN (${placeholders})
4656
+ `,
4657
+ ).run(...sellerMoveList);
4658
+
4659
+ db.prepare(
4660
+ `DELETE FROM jobs_base WHERE unique_id IN (${placeholders})`,
4661
+ ).run(...sellerMoveList);
4662
+ }
4663
+
4664
+ // 批量移动非商家用户到 raw_jobs
4665
+ if (rawMoveList.length > 0) {
4666
+ const placeholders = rawMoveList.map(() => "?").join(",");
4667
+ db.prepare(
4668
+ `
4669
+ INSERT OR REPLACE INTO raw_jobs (
4670
+ unique_id, nickname, status, sources, claimed_by, claimed_at,
4671
+ error, pinned, no_video, restricted, user_update_count,
4672
+ tt_seller, verified, video_count, comment_count,
4673
+ guessed_location, location_created, confirmed_location, modified_at,
4674
+ follower_count, following_count, heart_count, refresh_time,
4675
+ processed, processed_at, created_at, updated_at,
4676
+ region, signature, bio_link, sec_uid, status_code, latest_video_time,
4677
+ user_create_time
4678
+ )
4679
+ SELECT
4680
+ unique_id, nickname, status, sources, claimed_by, claimed_at,
4681
+ error, pinned, no_video, restricted, user_update_count,
4682
+ tt_seller, verified, video_count, comment_count,
4683
+ guessed_location, location_created, confirmed_location, modified_at,
4684
+ follower_count, following_count, heart_count, refresh_time,
4685
+ processed, processed_at, created_at, updated_at,
4686
+ region, signature, bio_link, sec_uid, status_code, latest_video_time,
4687
+ user_create_time
4688
+ FROM jobs_base WHERE unique_id IN (${placeholders})
4689
+ `,
4690
+ ).run(...rawMoveList);
4691
+
4692
+ db.prepare(
4693
+ `DELETE FROM jobs_base WHERE unique_id IN (${placeholders})`,
4694
+ ).run(...rawMoveList);
4695
+ }
4696
+
4697
+ // 清理内部标记
4698
+ return results.map((r) => {
4699
+ const { _movedToRaw, _movedToJobs, ...rest } = r;
4700
+ return rest;
4701
+ });
4702
+ }
4703
+
4704
+ const memResults = [];
4705
+ for (const item of updates) {
4706
+ const user = getUser(item.uniqueId);
4707
+ if (!user) {
4708
+ memResults.push({ uniqueId: item.uniqueId, error: "user not found" });
4709
+ continue;
4710
+ }
4711
+ const info = item.info;
4712
+ if (info && info.error && info.statusCode !== undefined) {
4713
+ // 只更新 status_code
4714
+ user.statusCode = info.statusCode;
4715
+ } else {
4716
+ for (const key of Object.keys(info)) {
4717
+ if (key === "uniqueId" || key === "sources" || key === "error")
4718
+ continue;
4719
+ if (
4720
+ info[key] !== undefined &&
4721
+ info[key] !== null &&
4722
+ info[key] !== ""
4723
+ ) {
4724
+ user[key] = info[key];
4725
+ }
4726
+ }
4727
+ }
4728
+ user.userUpdateCount = (user.userUpdateCount || 0) + 1;
4729
+ user.updatedAt = Date.now();
4730
+ memResults.push({
4731
+ uniqueId: item.uniqueId,
4732
+ ok: true,
4733
+ userUpdateCount: user.userUpdateCount,
4734
+ });
4735
+ }
4736
+ save();
4737
+ return memResults;
4738
+ }
4739
+
4740
+ // 视频登记
4741
+ function registerVideos(sourceUser, videoList, locationCreated, ttSeller) {
4742
+ if (!videoList || !Array.isArray(videoList) || videoList.length === 0) {
4743
+ return { registered: 0, skipped: 0 };
4744
+ }
4745
+
4746
+ if (db) {
4747
+ const insertStmt = db.prepare(`
4748
+ INSERT OR IGNORE INTO videos (
4749
+ id,
4750
+ href,
4751
+ author_unique_id,
4752
+ location_created,
4753
+ tt_seller,
4754
+ registered_at,
4755
+ user_update_count,
4756
+ create_time
4757
+ )
4758
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
4759
+ `);
4760
+ let registered = 0;
4761
+ let skipped = 0;
4762
+ const now = Date.now();
4763
+ const txn = db.transaction((items) => {
4764
+ for (const item of items) {
4765
+ const result = insertStmt.run(
4766
+ item.id,
4767
+ item.href || null,
4768
+ sourceUser,
4769
+ locationCreated || null,
4770
+ ttSeller ? 1 : 0,
4771
+ now,
4772
+ 0,
4773
+ item.createTime || null,
4774
+ );
4775
+ if (result.changes > 0) registered++;
4776
+ else skipped++;
4777
+ }
4778
+ });
4779
+ txn(videoList.filter((item) => item?.id));
4780
+ return { registered, skipped };
4781
+ }
4782
+
4783
+ const existingIds = new Set(videos.map((v) => v.id));
4784
+ let registered = 0;
4785
+ let skipped = 0;
4786
+
4787
+ for (const item of videoList) {
4788
+ if (existingIds.has(item.id)) {
4789
+ skipped++;
4790
+ continue;
4791
+ }
4792
+ videos.push({
4793
+ id: item.id,
4794
+ href: item.href,
4795
+ authorUniqueId: sourceUser,
4796
+ locationCreated: locationCreated || null,
4797
+ ttSeller: ttSeller || false,
4798
+ registeredAt: Date.now(),
4799
+ createTime: item.createTime || null,
4800
+ });
4801
+ existingIds.add(item.id);
4802
+ registered++;
4803
+ }
4804
+
4805
+ saveVideos();
4806
+ return { registered, skipped };
4807
+ }
4808
+
4809
+ function getVideos() {
4810
+ if (db) {
4811
+ return getAllVideoRows().map(mapVideoRow);
4812
+ }
4813
+ return videos;
4814
+ }
4815
+
4816
+ function getVideo(videoId) {
4817
+ if (!videoId) return null;
4818
+ if (db) {
4819
+ return mapVideoRow(getVideoRow(videoId));
4820
+ }
4821
+ return videos.find((video) => video.id === videoId) || null;
4822
+ }
4823
+
4824
+ function getVideosPage(limit, offset) {
4825
+ const safeLimit = Math.max(1, Math.min(100, parseInt(limit) || 50));
4826
+ const safeOffset = Math.max(0, parseInt(offset) || 0);
4827
+
4828
+ if (db) {
4829
+ const rows = db
4830
+ .prepare(
4831
+ `
4832
+ SELECT *
4833
+ FROM videos
4834
+ ORDER BY registered_at DESC, id DESC
4835
+ LIMIT ? OFFSET ?
4836
+ `,
4837
+ )
4838
+ .all(safeLimit, safeOffset);
4839
+ const total = db.prepare("SELECT COUNT(*) as c FROM videos").get().c;
4840
+ return {
4841
+ total,
4842
+ limit: safeLimit,
4843
+ offset: safeOffset,
4844
+ videos: rows.map(mapVideoRow),
4845
+ };
4846
+ }
4847
+
4848
+ return {
4849
+ total: videos.length,
4850
+ limit: safeLimit,
4851
+ offset: safeOffset,
4852
+ videos: videos.slice(safeOffset, safeOffset + safeLimit),
4853
+ };
4854
+ }
4855
+
4856
+ function getVideoCount() {
4857
+ if (db) {
4858
+ return db.prepare("SELECT COUNT(*) as c FROM videos").get().c;
4859
+ }
4860
+ return videos.length;
4861
+ }
4862
+
4863
+ function getPendingCommentTasks(limit) {
4864
+ if (db) {
4865
+ const l = Math.max(1, parseInt(limit) || 1);
4866
+ const rows = db
4867
+ .prepare(
4868
+ `
4869
+ SELECT *
4870
+ FROM videos
4871
+ WHERE user_update_count IS NULL OR user_update_count <= 0
4872
+ ORDER BY tt_seller DESC, registered_at ASC
4873
+ LIMIT ?
4874
+ `,
4875
+ )
4876
+ .all(l);
4877
+ if (rows.length === 0) return [];
4878
+ const bumpStmt = db.prepare(
4879
+ `
4880
+ UPDATE videos
4881
+ SET user_update_count = COALESCE(user_update_count, 0) + 1
4882
+ WHERE id = ?
4883
+ `,
4884
+ );
4885
+ const bumpTxn = db.transaction((items) => {
4886
+ for (const item of items) bumpStmt.run(item.id);
4887
+ });
4888
+ bumpTxn(rows);
4889
+ return rows.map((row) => {
4890
+ const mapped = mapVideoRow(row);
4891
+ mapped.userUpdateCount = (mapped.userUpdateCount || 0) + 1;
4892
+ return mapped;
4893
+ });
4894
+ }
4895
+
4896
+ // 筛选待处理视频(userUpdateCount <= 0 或 null/undefined)
4897
+ const pending = videos.filter((v) => (v.userUpdateCount || 0) <= 0);
4898
+ // ttSeller=true 优先
4899
+ pending.sort((a, b) => {
4900
+ if (a.ttSeller && !b.ttSeller) return -1;
4901
+ if (!a.ttSeller && b.ttSeller) return 1;
4902
+ return (a.registeredAt || 0) - (b.registeredAt || 0);
4903
+ });
4904
+ // 取前 limit 个
4905
+ const tasks = pending.slice(0, limit);
4906
+ // userUpdateCount +1
4907
+ for (const task of tasks) {
4908
+ task.userUpdateCount = (task.userUpdateCount || 0) + 1;
4909
+ }
4910
+ saveVideos();
4911
+ return tasks;
4912
+ }
4913
+
4914
+ function commitCommentTask(videoId) {
4915
+ if (db) {
4916
+ const video = getVideoRow(videoId);
4917
+ if (!video) return { ok: false, error: "video not found" };
4918
+ const nextCount = (video.user_update_count || 0) + 1;
4919
+ db.prepare(
4920
+ `
4921
+ UPDATE videos
4922
+ SET user_update_count = ?
4923
+ WHERE id = ?
4924
+ `,
4925
+ ).run(nextCount, videoId);
4926
+ return { ok: true, userUpdateCount: nextCount };
4927
+ }
4928
+
4929
+ const video = videos.find((v) => v.id === videoId);
4930
+ if (!video) return { ok: false, error: "video not found" };
4931
+ video.userUpdateCount = (video.userUpdateCount || 0) + 1;
4932
+ saveVideos();
4933
+ return { ok: true, userUpdateCount: video.userUpdateCount };
4934
+ }
4935
+
4936
+ return {
4937
+ save,
4938
+ flushSave,
4939
+ getUser,
4940
+ hasUser,
4941
+ userExists,
4942
+ addUser,
4943
+ addRawUsers,
4944
+ getPendingUsers,
4945
+ getProcessedUsers,
4946
+ getAllUsers,
4947
+ getUserDbCount,
4948
+ getJobsCount,
4949
+ getRawJobsCount,
4950
+ getPendingJobsCount,
4951
+ getPendingJobsUserUpdateCount,
4952
+ getDashboardStats: getDashboardStatsFromDb,
4953
+ getPendingByCountry: getPendingByCountryFromDb,
4954
+ getUserUpdateByCountry: getUserUpdateByCountryFromDb,
4955
+ getAttachStuckByCountry: getAttachStuckByCountryFromDb,
4956
+ getRawByCountry: getRawByCountryFromDb,
4957
+ moveJobsToRawByCountry,
4958
+ restoreAttachStuckByCountry,
4959
+ resetPendingByCountry,
4960
+ restoreRawJobsByCountry,
4961
+ restoreRawJobById,
4962
+ restoreRawJobsByFilter,
4963
+ getUsersPage: getUsersPageFromDb,
4964
+ getRawJobsPage: getRawJobsPageFromDb,
4965
+ getTargetUsers: getTargetUsersFromDb,
4966
+ getTargetUsersByCountry: getTargetUsersByCountryFromDb,
4967
+ getStats,
4968
+ getStatusGroups,
4969
+ markGroupsDirty,
4970
+ refillJobsFromRaw,
4971
+ scoreJobLocation,
4972
+ scoreJobsBatch,
4973
+ claimNextJob,
4974
+ commitJob,
4975
+ commitNewExplore,
4976
+ resetJob,
4977
+ togglePin,
4978
+ getNextRedoJob,
4979
+ commitRedoJob,
4980
+ getPendingUserUpdateTasks,
4981
+ updateUserInfo,
4982
+ updateUserLocation,
4983
+ batchUpdateUserInfo,
4984
+ reportClientError,
4985
+ deleteClientError,
4986
+ getClientErrors,
4987
+ getClientLoginStatus,
4988
+ trackClient,
4989
+ getActiveClients,
4990
+ registerVideos,
4991
+ getVideo,
4992
+ getVideos,
4993
+ getVideosPage,
4994
+ getVideoCount,
4995
+ getPendingCommentTasks,
4996
+ commitCommentTask,
4997
+ debugClaimNextJob,
4998
+ stopBackup,
4999
+ backupDatabase, // 手动备份数据库
5000
+ rawQuery,
5001
+ getLlmSampleOffsets, // 获取 LLM 采样偏移量状态
5002
+ // Tag 发现与打分
5003
+ insertTag,
5004
+ getTagsByStatus,
5005
+ getTagsByCountry,
5006
+ getDeadTags,
5007
+ claimTag,
5008
+ reportTagScore,
5009
+ getAllTags,
5010
+ normalizeTags,
5011
+ clearTags,
5012
+ data,
5013
+ };
5014
+
5015
+ // 辅助函数:获取 LLM 采样偏移量
5016
+ function getLlmSampleOffsets() {
5017
+ return Object.fromEntries(llmSampleOffsets);
5018
+ }
5019
+ }