tt-help-cli-ycl 1.3.93 → 1.3.95

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1580 +1,190 @@
1
- import fs from "fs";
2
- import path from "path";
3
- import Database from "better-sqlite3";
4
- import {
5
- isLocationInList,
6
- DEFAULT_TARGET_LOCATIONS,
7
- } from "../lib/target-locations.js";
8
-
9
- // SQLite 用户表(用于判重)
10
- let db = null;
11
- let dbPath = null;
12
-
13
- function normalizeDbFilePath(filePath) {
14
- if (!filePath) {
15
- throw new Error("db path is required");
16
- }
17
- const resolved = path.resolve(filePath);
18
- if (path.extname(resolved).toLowerCase() !== ".db") {
19
- throw new Error(`仅支持 .db 路径,当前为: ${filePath}`);
20
- }
21
- return resolved;
22
- }
23
-
24
- function resetDbConnection() {
25
- if (db) {
26
- db.close();
27
- db = null;
28
- }
29
- dbPath = null;
30
- }
31
-
32
- function loadLegacyUsersFromFiles(userFilePath, doneFilePath) {
33
- const merged = new Map();
34
-
35
- const tryLoad = (targetPath, label) => {
36
- if (!targetPath) return;
37
- if (!fs.existsSync(targetPath)) return;
38
- try {
39
- const parsed = JSON.parse(fs.readFileSync(targetPath, "utf-8"));
40
- if (!Array.isArray(parsed)) return;
41
- for (const item of parsed) {
42
- const uniqueId = item?.uniqueId || item?.unique_id;
43
- if (!uniqueId) continue;
44
- merged.set(uniqueId, {
45
- ...merged.get(uniqueId),
46
- ...item,
47
- uniqueId,
48
- });
49
- }
50
- } catch (e) {
51
- console.error(`[data-store] SQLite 导入 ${label} 失败: ${e.message}`);
52
- }
53
- };
54
-
55
- tryLoad(userFilePath, "result.json");
56
- tryLoad(doneFilePath, "result-done.json");
57
-
58
- return [...merged.values()];
59
- }
60
-
61
- function loadLegacyVideosFromFile(videoPath) {
62
- if (!videoPath) return [];
63
- if (!fs.existsSync(videoPath)) return [];
64
-
65
- try {
66
- const parsed = JSON.parse(fs.readFileSync(videoPath, "utf-8"));
67
- return Array.isArray(parsed) ? parsed : [];
68
- } catch (e) {
69
- console.error(
70
- `[data-store] SQLite 导入 result-videos.json 失败: ${e.message}`,
71
- );
72
- return [];
73
- }
74
- }
75
-
76
- function initUserDb(filePath) {
77
- dbPath = normalizeDbFilePath(filePath);
78
- fs.mkdirSync(path.dirname(dbPath), { recursive: true });
79
- db = new Database(dbPath);
80
- db.pragma("journal_mode = WAL");
81
- db.exec(`
82
- CREATE TABLE IF NOT EXISTS users (
83
- unique_id TEXT PRIMARY KEY,
84
- tt_seller TEXT,
85
- verified INTEGER,
86
- location_created TEXT,
87
- created_at TEXT,
88
- updated_at TEXT
89
- )
90
- `);
91
- db.exec(`
92
- CREATE TABLE IF NOT EXISTS jobs (
93
- unique_id TEXT PRIMARY KEY,
94
- nickname TEXT,
95
- status TEXT DEFAULT 'pending',
96
- sources TEXT,
97
- claimed_by TEXT,
98
- claimed_at INTEGER,
99
- error TEXT,
100
- pinned INTEGER DEFAULT 0,
101
- no_video INTEGER DEFAULT 0,
102
- restricted INTEGER DEFAULT 0,
103
- user_update_count INTEGER DEFAULT 0,
104
- tt_seller INTEGER,
105
- verified INTEGER,
106
- video_count INTEGER DEFAULT 0,
107
- comment_count INTEGER DEFAULT 0,
108
- guessed_location TEXT,
109
- location_created TEXT,
110
- confirmed_location TEXT,
111
- modified_at INTEGER,
112
- follower_count INTEGER DEFAULT 0,
113
- following_count INTEGER DEFAULT 0,
114
- heart_count INTEGER DEFAULT 0,
115
- refresh_time INTEGER,
116
- processed INTEGER DEFAULT 0,
117
- processed_at INTEGER,
118
- created_at INTEGER,
119
- updated_at INTEGER,
120
- region TEXT,
121
- signature TEXT,
122
- sec_uid TEXT,
123
- status_code INTEGER
124
- )
125
- `);
126
-
127
- // 迁移:为已存在的 jobs 表添加 status_code 列
128
- const existingJobColumns = new Set(
129
- db
130
- .prepare("PRAGMA table_info(jobs)")
131
- .all()
132
- .map((c) => c.name),
133
- );
134
- if (!existingJobColumns.has("status_code")) {
135
- db.exec(`ALTER TABLE jobs ADD COLUMN status_code INTEGER`);
136
- }
137
- if (!existingJobColumns.has("latest_video_time")) {
138
- db.exec(`ALTER TABLE jobs ADD COLUMN latest_video_time INTEGER`);
139
- }
140
- if (!existingJobColumns.has("confirmed_location")) {
141
- db.exec(`ALTER TABLE jobs ADD COLUMN confirmed_location TEXT`);
142
- }
143
- if (!existingJobColumns.has("modified_at")) {
144
- db.exec(`ALTER TABLE jobs ADD COLUMN modified_at INTEGER`);
145
- }
146
- if (!existingJobColumns.has("bio_link")) {
147
- db.exec(`ALTER TABLE jobs ADD COLUMN bio_link TEXT`);
148
- }
149
- if (!existingJobColumns.has("top_video_play_count")) {
150
- db.exec(`ALTER TABLE jobs ADD COLUMN top_video_play_count INTEGER`);
151
- }
152
- if (!existingJobColumns.has("top_video_href")) {
153
- db.exec(`ALTER TABLE jobs ADD COLUMN top_video_href TEXT`);
154
- }
155
- if (!existingJobColumns.has("user_create_time")) {
156
- db.exec(`ALTER TABLE jobs ADD COLUMN user_create_time INTEGER`);
157
- }
158
- db.exec(`
159
- CREATE TABLE IF NOT EXISTS jobs_base (
160
- unique_id TEXT PRIMARY KEY,
161
- nickname TEXT,
162
- status TEXT DEFAULT 'pending',
163
- sources TEXT,
164
- claimed_by TEXT,
165
- claimed_at INTEGER,
166
- error TEXT,
167
- pinned INTEGER DEFAULT 0,
168
- no_video INTEGER DEFAULT 0,
169
- restricted INTEGER DEFAULT 0,
170
- user_update_count INTEGER DEFAULT 0,
171
- tt_seller INTEGER,
172
- verified INTEGER,
173
- video_count INTEGER DEFAULT 0,
174
- comment_count INTEGER DEFAULT 0,
175
- guessed_location TEXT,
176
- location_created TEXT,
177
- confirmed_location TEXT,
178
- modified_at INTEGER,
179
- follower_count INTEGER DEFAULT 0,
180
- following_count INTEGER DEFAULT 0,
181
- heart_count INTEGER DEFAULT 0,
182
- refresh_time INTEGER,
183
- processed INTEGER DEFAULT 0,
184
- processed_at INTEGER,
185
- created_at INTEGER,
186
- updated_at INTEGER,
187
- region TEXT,
188
- signature TEXT,
189
- sec_uid TEXT,
190
- status_code INTEGER,
191
- latest_video_time INTEGER,
192
- bio_link TEXT
193
- )
194
- `);
195
-
196
- // 迁移:为已存在的 jobs_base 表补全列
197
- const existingJobBaseColumns = new Set(
198
- db
199
- .prepare("PRAGMA table_info(jobs_base)")
200
- .all()
201
- .map((c) => c.name),
202
- );
203
- if (!existingJobBaseColumns.has("status_code")) {
204
- db.exec(`ALTER TABLE jobs_base ADD COLUMN status_code INTEGER`);
205
- }
206
- if (!existingJobBaseColumns.has("latest_video_time")) {
207
- db.exec(`ALTER TABLE jobs_base ADD COLUMN latest_video_time INTEGER`);
208
- }
209
- if (!existingJobBaseColumns.has("confirmed_location")) {
210
- db.exec(`ALTER TABLE jobs_base ADD COLUMN confirmed_location TEXT`);
211
- }
212
- if (!existingJobBaseColumns.has("modified_at")) {
213
- db.exec(`ALTER TABLE jobs_base ADD COLUMN modified_at INTEGER`);
214
- }
215
- if (!existingJobBaseColumns.has("bio_link")) {
216
- db.exec(`ALTER TABLE jobs_base ADD COLUMN bio_link TEXT`);
217
- }
218
- if (!existingJobBaseColumns.has("user_create_time")) {
219
- db.exec(`ALTER TABLE jobs_base ADD COLUMN user_create_time INTEGER`);
220
- }
221
- db.exec(`
222
- CREATE TABLE IF NOT EXISTS raw_jobs (
223
- unique_id TEXT PRIMARY KEY,
224
- nickname TEXT,
225
- status TEXT DEFAULT 'pending',
226
- sources TEXT,
227
- claimed_by TEXT,
228
- claimed_at INTEGER,
229
- error TEXT,
230
- pinned INTEGER DEFAULT 0,
231
- no_video INTEGER DEFAULT 0,
232
- restricted INTEGER DEFAULT 0,
233
- user_update_count INTEGER DEFAULT 0,
234
- tt_seller INTEGER,
235
- verified INTEGER,
236
- video_count INTEGER DEFAULT 0,
237
- comment_count INTEGER DEFAULT 0,
238
- guessed_location TEXT,
239
- location_created TEXT,
240
- confirmed_location TEXT,
241
- modified_at INTEGER,
242
- follower_count INTEGER DEFAULT 0,
243
- following_count INTEGER DEFAULT 0,
244
- heart_count INTEGER DEFAULT 0,
245
- refresh_time INTEGER,
246
- processed INTEGER DEFAULT 0,
247
- processed_at INTEGER,
248
- created_at INTEGER,
249
- updated_at INTEGER,
250
- region TEXT,
251
- signature TEXT,
252
- sec_uid TEXT,
253
- status_code INTEGER,
254
- latest_video_time INTEGER
255
- )
256
- `);
257
-
258
- // 迁移:为已存在的 raw_jobs 表添加 status_code 列
259
- const existingRawJobColumns = new Set(
260
- db
261
- .prepare("PRAGMA table_info(raw_jobs)")
262
- .all()
263
- .map((c) => c.name),
264
- );
265
- if (!existingRawJobColumns.has("status_code")) {
266
- db.exec(`ALTER TABLE raw_jobs ADD COLUMN status_code INTEGER`);
267
- }
268
- if (!existingRawJobColumns.has("latest_video_time")) {
269
- db.exec(`ALTER TABLE raw_jobs ADD COLUMN latest_video_time INTEGER`);
270
- }
271
- if (!existingRawJobColumns.has("confirmed_location")) {
272
- db.exec(`ALTER TABLE raw_jobs ADD COLUMN confirmed_location TEXT`);
273
- }
274
- if (!existingRawJobColumns.has("modified_at")) {
275
- db.exec(`ALTER TABLE raw_jobs ADD COLUMN modified_at INTEGER`);
276
- }
277
- if (!existingRawJobColumns.has("bio_link")) {
278
- db.exec(`ALTER TABLE raw_jobs ADD COLUMN bio_link TEXT`);
279
- }
280
- if (!existingRawJobColumns.has("user_create_time")) {
281
- db.exec(`ALTER TABLE raw_jobs ADD COLUMN user_create_time INTEGER`);
282
- }
283
- db.exec(`
284
- CREATE TABLE IF NOT EXISTS videos (
285
- id TEXT PRIMARY KEY,
286
- href TEXT,
287
- author_unique_id TEXT,
288
- location_created TEXT,
289
- tt_seller INTEGER DEFAULT 0,
290
- registered_at INTEGER,
291
- user_update_count INTEGER DEFAULT 0,
292
- play_count INTEGER,
293
- digg_count INTEGER,
294
- comment_count INTEGER,
295
- share_count INTEGER,
296
- collect_count INTEGER,
297
- stats_updated_at INTEGER,
298
- create_time INTEGER
299
- )
300
- `);
301
- db.exec(`
302
- CREATE INDEX IF NOT EXISTS idx_jobs_status_video
303
- ON jobs(status, video_count DESC)
304
- `);
305
- db.exec(`
306
- CREATE INDEX IF NOT EXISTS idx_jobs_claimed_by_status
307
- ON jobs(claimed_by, status, claimed_at)
308
- `);
309
- db.exec(`
310
- CREATE INDEX IF NOT EXISTS idx_jobs_status_claimed_at
311
- ON jobs(status, claimed_at)
312
- `);
313
- db.exec(`
314
- CREATE INDEX IF NOT EXISTS idx_jobs_redo_target
315
- ON jobs(tt_seller, verified, location_created, refresh_time)
316
- `);
317
- db.exec(`
318
- CREATE INDEX IF NOT EXISTS idx_jobs_pending_priority
319
- ON jobs(status, pinned DESC, guessed_location, follower_count DESC)
320
- `);
321
- db.exec(`
322
- CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_pinned
323
- ON jobs(created_at ASC, unique_id ASC)
324
- WHERE status = 'pending' AND COALESCE(pinned, 0) = 1
325
- `);
326
- db.exec(`
327
- CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_seller
328
- ON jobs(UPPER(COALESCE(guessed_location, '')), follower_count DESC, created_at ASC, unique_id ASC)
329
- WHERE status = 'pending'
330
- AND COALESCE(pinned, 0) = 0
331
- AND tt_seller = 1
332
- AND verified = 0
333
- `);
334
- db.exec(`
335
- CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_follow
336
- ON jobs(UPPER(COALESCE(guessed_location, '')), follower_count DESC, created_at ASC, unique_id ASC)
337
- WHERE status = 'pending'
338
- AND COALESCE(pinned, 0) = 0
339
- AND (
340
- instr(COALESCE(sources, ''), '"following"') > 0
341
- OR instr(COALESCE(sources, ''), '"follower"') > 0
342
- )
343
- `);
344
- db.exec(`
345
- CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_other
346
- ON jobs(UPPER(COALESCE(guessed_location, '')), follower_count DESC, created_at ASC, unique_id ASC)
347
- WHERE status = 'pending' AND COALESCE(pinned, 0) = 0
348
- `);
349
- db.exec(`
350
- CREATE INDEX IF NOT EXISTS idx_jobs_user_update_queue
351
- ON jobs(created_at ASC, unique_id ASC)
352
- WHERE (tt_seller IS NULL OR tt_seller = '')
353
- AND (user_update_count IS NULL OR user_update_count <= 0)
354
- `);
355
- db.exec(`
356
- CREATE INDEX IF NOT EXISTS idx_jobs_user_update_queue_expr
357
- ON jobs(created_at ASC, unique_id ASC)
358
- WHERE COALESCE(tt_seller, '') = ''
359
- AND COALESCE(user_update_count, 0) <= 0
360
- `);
361
- db.exec(`
362
- CREATE INDEX IF NOT EXISTS idx_videos_comment_queue
363
- ON videos(user_update_count, tt_seller DESC, registered_at ASC)
364
- `);
365
- db.exec(`
366
- CREATE INDEX IF NOT EXISTS idx_videos_comment_queue_pending
367
- ON videos(tt_seller DESC, registered_at ASC, id)
368
- WHERE user_update_count IS NULL OR user_update_count <= 0
369
- `);
370
-
371
- const existingVideoColumns = new Set(
372
- db
373
- .prepare("PRAGMA table_info(videos)")
374
- .all()
375
- .map((column) => column.name),
376
- );
377
- const requiredVideoColumns = {
378
- play_count: "INTEGER",
379
- digg_count: "INTEGER",
380
- comment_count: "INTEGER",
381
- share_count: "INTEGER",
382
- collect_count: "INTEGER",
383
- stats_updated_at: "INTEGER",
384
- };
385
- for (const [column, type] of Object.entries(requiredVideoColumns)) {
386
- if (!existingVideoColumns.has(column)) {
387
- db.exec(`ALTER TABLE videos ADD COLUMN ${column} ${type}`);
388
- }
389
- }
390
-
391
- // 迁移:videos 表添加 create_time 列
392
- if (!existingVideoColumns.has("create_time")) {
393
- db.exec(`ALTER TABLE videos ADD COLUMN create_time INTEGER`);
394
- }
395
-
396
- // tags 表:标签发现与打分系统
397
- db.exec(`
398
- CREATE TABLE IF NOT EXISTS tags (
399
- id INTEGER PRIMARY KEY AUTOINCREMENT,
400
- tag TEXT NOT NULL UNIQUE,
401
- status TEXT NOT NULL DEFAULT 'new',
402
- score REAL NOT NULL DEFAULT 0,
403
- created_at TEXT NOT NULL DEFAULT (datetime('now')),
404
- scored_at TEXT,
405
- score_count INTEGER NOT NULL DEFAULT 0,
406
- countries TEXT NOT NULL DEFAULT '[]',
407
- matched_countries TEXT DEFAULT '[]',
408
- total_posts INTEGER DEFAULT 0,
409
- author_count INTEGER DEFAULT 0,
410
- matched_authors INTEGER DEFAULT 0,
411
- pushed_users INTEGER DEFAULT 0,
412
- source TEXT NOT NULL DEFAULT 'llm',
413
- user_prompt TEXT,
414
- last_error TEXT
415
- )
416
- `);
417
- db.exec(`CREATE INDEX IF NOT EXISTS idx_tags_status ON tags(status)`);
418
- db.exec(`CREATE INDEX IF NOT EXISTS idx_tags_score ON tags(score DESC)`);
419
-
420
- const count = db.prepare("SELECT COUNT(*) as c FROM users").get().c;
421
- console.log(`[data-store] SQLite users 表初始化完成: ${count} 条`);
422
- }
423
-
424
- export function importLegacyJsonToDb({
425
- dbFilePath,
426
- usersFilePath,
427
- doneFilePath,
428
- videosFilePath,
429
- }) {
430
- resetDbConnection();
431
- initUserDb(dbFilePath);
432
-
433
- const legacyUsers = loadLegacyUsersFromFiles(usersFilePath, doneFilePath);
434
- const legacyVideos = loadLegacyVideosFromFile(videosFilePath);
435
-
436
- const beforeUsers = db.prepare("SELECT COUNT(*) as c FROM users").get().c;
437
- const beforeJobs = db.prepare("SELECT COUNT(*) as c FROM jobs").get().c;
438
- const beforeVideos = db.prepare("SELECT COUNT(*) as c FROM videos").get().c;
439
-
440
- const insertUserStmt = db.prepare(`
441
- INSERT OR IGNORE INTO users (unique_id) VALUES (?)
442
- `);
443
- const insertVideoStmt = db.prepare(`
444
- INSERT OR IGNORE INTO videos (
445
- id,
446
- href,
447
- author_unique_id,
448
- location_created,
449
- tt_seller,
450
- registered_at,
451
- user_update_count,
452
- create_time
453
- )
454
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)
455
- `);
456
-
457
- const importUsersTxn = db.transaction((items) => {
458
- for (const item of items) {
459
- const uniqueId = item.uniqueId || item.unique_id;
460
- if (!uniqueId) continue;
461
- insertUserStmt.run(uniqueId);
462
- addJobToDb({ ...item, uniqueId });
463
- }
464
- });
465
-
466
- const importVideosTxn = db.transaction((items) => {
467
- for (const item of items) {
468
- if (!item?.id) continue;
469
- insertVideoStmt.run(
470
- item.id,
471
- item.href || null,
472
- item.authorUniqueId || item.author_unique_id || null,
473
- item.locationCreated || item.location_created || null,
474
- item.ttSeller ? 1 : 0,
475
- item.registeredAt || item.registered_at || Date.now(),
476
- item.userUpdateCount || item.user_update_count || 0,
477
- item.createTime || item.create_time || null,
478
- );
479
- }
480
- });
481
-
482
- importUsersTxn(legacyUsers);
483
- importVideosTxn(legacyVideos);
484
-
485
- const afterUsers = db.prepare("SELECT COUNT(*) as c FROM users").get().c;
486
- const afterJobs = db.prepare("SELECT COUNT(*) as c FROM jobs").get().c;
487
- const afterVideos = db.prepare("SELECT COUNT(*) as c FROM videos").get().c;
488
-
489
- return {
490
- dbPath,
491
- usersImported: afterUsers - beforeUsers,
492
- jobsImported: afterJobs - beforeJobs,
493
- videosImported: afterVideos - beforeVideos,
494
- totalUsers: afterUsers,
495
- totalJobs: afterJobs,
496
- totalVideos: afterVideos,
497
- };
498
- }
499
-
500
- export function closeStoreDb() {
501
- resetDbConnection();
502
- }
503
-
504
- function hasUserInDb(uid) {
505
- if (!db) return false;
506
- const row = db.prepare("SELECT 1 FROM users WHERE unique_id = ?").get(uid);
507
- return !!row;
508
- }
509
-
510
- function addUserToDb(user) {
511
- if (!db) return;
512
- db.prepare(
513
- `
514
- INSERT OR IGNORE INTO users (unique_id, tt_seller, verified, location_created, created_at, updated_at)
515
- VALUES (?, ?, ?, ?, ?, ?)
516
- `,
517
- ).run(
518
- user.uniqueId,
519
- user.ttSeller === undefined ||
520
- user.ttSeller === null ||
521
- user.ttSeller === ""
522
- ? null
523
- : user.ttSeller
524
- ? 1
525
- : 0,
526
- user.verified === undefined ||
527
- user.verified === null ||
528
- user.verified === ""
529
- ? null
530
- : user.verified
531
- ? 1
532
- : 0,
533
- user.locationCreated || null,
534
- new Date().toISOString(),
535
- new Date().toISOString(),
536
- );
537
- }
538
-
539
- function addJobToDb(user) {
540
- if (!db) return;
541
- const now = Date.now();
542
- db.prepare(
543
- `
544
- INSERT OR IGNORE INTO jobs (
545
- unique_id,
546
- nickname,
547
- status,
548
- sources,
549
- claimed_by,
550
- claimed_at,
551
- error,
552
- pinned,
553
- no_video,
554
- restricted,
555
- user_update_count,
556
- tt_seller,
557
- verified,
558
- video_count,
559
- comment_count,
560
- guessed_location,
561
- location_created,
562
- follower_count,
563
- following_count,
564
- heart_count,
565
- refresh_time,
566
- processed,
567
- processed_at,
568
- created_at,
569
- updated_at,
570
- region,
571
- signature,
572
- bio_link,
573
- sec_uid
574
- )
575
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
576
- `,
577
- ).run(
578
- user.uniqueId,
579
- user.nickname || null,
580
- user.status || inferStatus(user),
581
- JSON.stringify(
582
- Array.isArray(user.sources) ? [...new Set(user.sources)] : [],
583
- ),
584
- user.claimedBy || null,
585
- user.claimedAt || null,
586
- user.error || null,
587
- user.pinned ? 1 : 0,
588
- user.noVideo ? 1 : 0,
589
- user.restricted ? 1 : 0,
590
- user.userUpdateCount || 0,
591
- user.ttSeller === undefined ||
592
- user.ttSeller === null ||
593
- user.ttSeller === ""
594
- ? null
595
- : user.ttSeller
596
- ? 1
597
- : 0,
598
- user.verified === undefined ||
599
- user.verified === null ||
600
- user.verified === ""
601
- ? null
602
- : user.verified
603
- ? 1
604
- : 0,
605
- user.videoCount || 0,
606
- user.commentCount || 0,
607
- user.guessedLocation || null,
608
- user.locationCreated || null,
609
- user.followerCount || 0,
610
- user.followingCount || 0,
611
- user.heartCount || 0,
612
- user.refreshTime || null,
613
- user.processed ? 1 : 0,
614
- user.processedAt || null,
615
- user.createdAt || now,
616
- user.updatedAt || now,
617
- user.region || null,
618
- user.signature || null,
619
- user.bioLink?.link || user.bioLink?.url || user.bioLink || null,
620
- user.secUid || null,
621
- );
622
- }
623
-
624
- function getUserDbCount() {
625
- if (!db) return 0;
626
- return db.prepare("SELECT COUNT(*) as c FROM users").get().c;
627
- }
628
-
629
- function getJobsCount() {
630
- if (!db) return 0;
631
- return db.prepare("SELECT COUNT(*) as c FROM jobs").get().c;
632
- }
633
-
634
- function getPendingJobsCount() {
635
- if (!db) return 0;
636
- return db
637
- .prepare("SELECT COUNT(*) as c FROM jobs WHERE status = 'pending'")
638
- .get().c;
639
- }
640
-
641
- function getPendingJobsUserUpdateCount() {
642
- if (!db) return 0;
643
- return db
644
- .prepare(
645
- `
646
- SELECT COUNT(*) as c
647
- FROM jobs
648
- WHERE COALESCE(tt_seller, '') = ''
649
- AND COALESCE(user_update_count, 0) <= 0
650
- `,
651
- )
652
- .get().c;
653
- }
654
-
655
- function getRawJobsCount() {
656
- if (!db) return 0;
657
- return db.prepare("SELECT COUNT(*) as c FROM raw_jobs").get().c;
658
- }
659
-
660
- function getDashboardStatsFromDb(targetLocations = []) {
661
- if (!db) return null;
662
-
663
- const targetPlaceholders = targetLocations.map(() => "?").join(", ");
664
- const targetParams = targetLocations.length ? targetLocations : [];
665
-
666
- // 合并所有 jobs 表的聚合统计为单次扫描
667
- const aggregateRow = db
668
- .prepare(
669
- `
670
- SELECT
671
- COUNT(*) as total,
672
- SUM(CASE WHEN status = 'pending' THEN 1 ELSE 0 END) as pending,
673
- SUM(CASE WHEN status = 'processing' THEN 1 ELSE 0 END) as processing,
674
- SUM(CASE WHEN status = 'done' THEN 1 ELSE 0 END) as done,
675
- SUM(CASE WHEN status = 'error' THEN 1 ELSE 0 END) as error,
676
- SUM(CASE WHEN status = 'restricted' THEN 1 ELSE 0 END) as restricted,
677
- SUM(CASE WHEN tt_seller = 1 AND verified = 0 ${
678
- targetLocations.length
679
- ? `AND location_created IN (${targetPlaceholders})`
680
- : "AND 1 = 0"
681
- } THEN 1 ELSE 0 END) as targetUsers,
682
- SUM(CASE WHEN no_video = 1 THEN 1 ELSE 0 END) as noVideo,
683
- SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"video"') > 0 THEN 1 ELSE 0 END) as video,
684
- SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"comment"') > 0 THEN 1 ELSE 0 END) as comment,
685
- SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"guess"') > 0 THEN 1 ELSE 0 END) as guess,
686
- SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"following"') > 0 THEN 1 ELSE 0 END) as following,
687
- SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"follower"') > 0 THEN 1 ELSE 0 END) as follower,
688
- SUM(CASE
689
- WHEN status != 'done'
690
- AND instr(COALESCE(sources, ''), '"video"') = 0
691
- AND instr(COALESCE(sources, ''), '"comment"') = 0
692
- AND instr(COALESCE(sources, ''), '"guess"') = 0
693
- AND instr(COALESCE(sources, ''), '"following"') = 0
694
- AND instr(COALESCE(sources, ''), '"follower"') = 0
695
- THEN 1 ELSE 0 END) as seed
696
- FROM jobs
697
- `,
698
- )
699
- .get(...targetParams);
700
-
701
- // userUpdateTasks 单独从 jobs_base 统计
702
- const userUpdateTasksRow = db
703
- .prepare(
704
- `
705
- SELECT COUNT(*) as userUpdateTasks
706
- FROM jobs_base
707
- WHERE COALESCE(tt_seller, '') = ''
708
- AND COALESCE(user_update_count, 0) <= 0
709
- `,
710
- )
711
- .get();
712
-
713
- // countryStats 和 targetCountryStats 需要 GROUP BY,保留为独立查询
714
- const countryStats = db
715
- .prepare(
716
- `
717
- SELECT
718
- COALESCE(location_created, '未知') as country,
719
- COUNT(*) as count,
720
- SUM(CASE
721
- WHEN tt_seller = 1 AND verified = 0 ${
722
- targetLocations.length
723
- ? `AND location_created IN (${targetPlaceholders})`
724
- : "AND 1 = 0"
725
- }
726
- THEN 1 ELSE 0 END) as targetCount
727
- FROM jobs
728
- WHERE status = 'done'
729
- GROUP BY COALESCE(location_created, '未知')
730
- ORDER BY count DESC
731
- `,
732
- )
733
- .all(...targetParams);
734
-
735
- const targetCountryStats = targetLocations.length
736
- ? db
737
- .prepare(
738
- `
739
- SELECT location_created as country, COUNT(*) as count
740
- FROM jobs
741
- WHERE tt_seller = 1
742
- AND verified = 0
743
- AND location_created IN (${targetPlaceholders})
744
- GROUP BY location_created
745
- ORDER BY count DESC
746
- `,
747
- )
748
- .all(...targetLocations)
749
- : [];
750
-
751
- const jobsBaseCount = db
752
- .prepare("SELECT COUNT(*) as total FROM jobs_base")
753
- .get().total;
754
-
755
- return {
756
- totalUsers: aggregateRow.total,
757
- rawJobs: getRawJobsCount(),
758
- dbTotalUsers: getUserDbCount(),
759
- jobsTotal: aggregateRow.total,
760
- jobsBaseTotal: jobsBaseCount,
761
- jobsPending: aggregateRow.pending,
762
- processedUsers: aggregateRow.done,
763
- pendingUsers: aggregateRow.pending,
764
- processingUsers: aggregateRow.processing,
765
- restrictedUsers: aggregateRow.restricted,
766
- errorUsers: aggregateRow.error,
767
- targetUsers: aggregateRow.targetUsers,
768
- userUpdateTasks: userUpdateTasksRow.userUpdateTasks,
769
- targetCountryStats,
770
- countryStats,
771
- sourceStats: {
772
- seed: aggregateRow.seed || 0,
773
- video: aggregateRow.video || 0,
774
- comment: aggregateRow.comment || 0,
775
- guess: aggregateRow.guess || 0,
776
- following: aggregateRow.following || 0,
777
- follower: aggregateRow.follower || 0,
778
- processed: aggregateRow.done,
779
- restricted: aggregateRow.restricted,
780
- error: aggregateRow.error,
781
- noVideo: aggregateRow.noVideo || 0,
782
- },
783
- };
784
- }
785
-
786
- function getPendingByCountryFromDb() {
787
- if (!db) return [];
788
-
789
- // 按 guessed_location 分组统计待处理任务
790
- const rows = db
791
- .prepare(
792
- `
793
- SELECT
794
- COALESCE(guessed_location, '未知') as country,
795
- COUNT(*) as count
796
- FROM jobs
797
- WHERE status = 'pending'
798
- GROUP BY COALESCE(guessed_location, '未知')
799
- ORDER BY count DESC
800
- `,
801
- )
802
- .all();
803
-
804
- return rows;
805
- }
806
-
807
- function getUserUpdateByCountryFromDb() {
808
- if (!db) return [];
809
-
810
- // 按 guessed_location 分组统计待补资料任务
811
- const rows = db
812
- .prepare(
813
- `
814
- SELECT
815
- COALESCE(guessed_location, '未知') as country,
816
- COUNT(*) as count
817
- FROM jobs_base
818
- WHERE tt_seller IS NULL
819
- AND COALESCE(user_update_count, 0) <= 0
820
- GROUP BY COALESCE(guessed_location, '未知')
821
- ORDER BY count DESC
822
- `,
823
- )
824
- .all();
825
-
826
- return rows;
827
- }
828
-
829
- function getAttachStuckByCountryFromDb() {
830
- if (!db) return [];
831
-
832
- return db
833
- .prepare(
834
- `
835
- SELECT
836
- COALESCE(guessed_location, '未知') as country,
837
- COUNT(*) as count
838
- FROM jobs_base
839
- WHERE tt_seller IS NULL
840
- AND COALESCE(user_update_count, 0) = 1
841
- GROUP BY COALESCE(guessed_location, '未知')
842
- ORDER BY count DESC
843
- `,
844
- )
845
- .all();
846
- }
847
-
848
- function restoreAttachStuckByCountry(country) {
849
- if (!db) {
850
- return { restored: 0, country, error: "db not ready" };
851
- }
852
-
853
- const normalizedCountry = String(country == null ? "未知" : country).trim();
854
- if (!normalizedCountry) {
855
- return {
856
- restored: 0,
857
- country: normalizedCountry,
858
- error: "country is required",
859
- };
860
- }
861
-
862
- const whereSql = `
863
- COALESCE(tt_seller, '') = ''
864
- AND COALESCE(user_update_count, 0) = 1
865
- AND COALESCE(guessed_location, '未知') = ?
866
- `;
867
- const count =
868
- db
869
- .prepare(
870
- `
871
- SELECT COUNT(*) as c
872
- FROM jobs_base
873
- WHERE ${whereSql}
874
- `,
875
- )
876
- .get(normalizedCountry)?.c || 0;
877
-
878
- if (!count) {
879
- return { restored: 0, country: normalizedCountry };
880
- }
881
-
882
- db.prepare(
883
- `
884
- UPDATE jobs_base
885
- SET user_update_count = 0,
886
- updated_at = ?,
887
- claimed_by = NULL,
888
- claimed_at = NULL
889
- WHERE ${whereSql}
890
- `,
891
- ).run(Date.now(), normalizedCountry);
892
-
893
- return { restored: count, country: normalizedCountry };
894
- }
895
-
896
- function resetPendingByCountry(country) {
897
- if (!db) {
898
- return { reset: 0, country, error: "db not ready" };
899
- }
900
-
901
- const normalizedCountry = String(country == null ? "未知" : country).trim();
902
- if (!normalizedCountry) {
903
- return {
904
- reset: 0,
905
- country: normalizedCountry,
906
- error: "country is required",
907
- };
908
- }
909
-
910
- const whereSql = `
911
- status = 'pending'
912
- AND COALESCE(guessed_location, '未知') = ?
913
- `;
914
- const count =
915
- db
916
- .prepare(
917
- `
918
- SELECT COUNT(*) as c
919
- FROM jobs
920
- WHERE ${whereSql}
921
- `,
922
- )
923
- .get(normalizedCountry)?.c || 0;
924
-
925
- if (!count) {
926
- return { reset: 0, country: normalizedCountry };
927
- }
928
-
929
- db.prepare(
930
- `
931
- UPDATE jobs
932
- SET user_update_count = 0,
933
- updated_at = ?,
934
- claimed_by = NULL,
935
- claimed_at = NULL
936
- WHERE ${whereSql}
937
- `,
938
- ).run(Date.now(), normalizedCountry);
939
-
940
- return { reset: count, country: normalizedCountry };
941
- }
942
-
943
- function getRawByCountryFromDb() {
944
- if (!db) return [];
945
-
946
- return db
947
- .prepare(
948
- `
949
- SELECT
950
- COALESCE(guessed_location, '未知') as country,
951
- COUNT(*) as count
952
- FROM raw_jobs
953
- GROUP BY COALESCE(guessed_location, '未知')
954
- ORDER BY count DESC
955
- `,
956
- )
957
- .all();
958
- }
959
-
960
- function moveJobsToRawByCountry(scope, country) {
961
- if (!db) {
962
- return { moved: 0, scope, country, error: "db not ready" };
963
- }
964
-
965
- const normalizedScope = String(scope || "").trim();
966
- const normalizedCountry = String(country == null ? "未知" : country).trim();
967
- if (!normalizedCountry) {
968
- return {
969
- moved: 0,
970
- scope: normalizedScope,
971
- country: normalizedCountry,
972
- error: "country is required",
973
- };
974
- }
975
-
976
- // pending 操作 jobs 表(与 getPendingByCountryFromDb 数据源一致)
977
- // userUpdate 操作 jobs_base 表(与 getUserUpdateByCountryFromDb 数据源一致)
978
- let sourceTable = "";
979
- let scopeWhere = "";
980
- let columns = "";
981
-
982
- if (normalizedScope === "pending") {
983
- sourceTable = "jobs";
984
- scopeWhere = `status = 'pending'`;
985
- columns = `
986
- unique_id, nickname, status, sources, claimed_by, claimed_at,
987
- error, pinned, no_video, restricted, user_update_count,
988
- tt_seller, verified, video_count, comment_count,
989
- guessed_location, location_created, follower_count,
990
- following_count, heart_count, refresh_time, processed,
991
- processed_at, created_at, updated_at, region, signature,
992
- sec_uid, latest_video_time, user_create_time
993
- `;
994
- } else if (normalizedScope === "userUpdate") {
995
- sourceTable = "jobs_base";
996
- scopeWhere = `tt_seller IS NULL AND COALESCE(user_update_count, 0) <= 0`;
997
- columns = `
998
- unique_id, nickname, status, sources, claimed_by, claimed_at,
999
- error, pinned, no_video, restricted, user_update_count,
1000
- tt_seller, verified, video_count, comment_count,
1001
- guessed_location, location_created, follower_count,
1002
- following_count, heart_count, refresh_time, processed,
1003
- processed_at, created_at, updated_at, region, signature,
1004
- sec_uid, latest_video_time, user_create_time
1005
- `;
1006
- } else {
1007
- return {
1008
- moved: 0,
1009
- scope: normalizedScope,
1010
- country: normalizedCountry,
1011
- error: "unsupported scope",
1012
- };
1013
- }
1014
-
1015
- const whereSql = `
1016
- ${scopeWhere}
1017
- AND COALESCE(guessed_location, '未知') = ?
1018
- `;
1019
- const count =
1020
- db
1021
- .prepare(
1022
- `
1023
- SELECT COUNT(*) as c
1024
- FROM ${sourceTable}
1025
- WHERE ${whereSql}
1026
- `,
1027
- )
1028
- .get(normalizedCountry)?.c || 0;
1029
-
1030
- if (!count) {
1031
- return { moved: 0, scope: normalizedScope, country: normalizedCountry };
1032
- }
1033
-
1034
- const moveTxn = db.transaction((targetCountry) => {
1035
- db.prepare(
1036
- `
1037
- INSERT OR REPLACE INTO raw_jobs (
1038
- ${columns}
1039
- )
1040
- SELECT
1041
- ${columns}
1042
- FROM ${sourceTable}
1043
- WHERE ${whereSql}
1044
- `,
1045
- ).run(targetCountry);
1046
-
1047
- db.prepare(
1048
- `
1049
- DELETE FROM ${sourceTable}
1050
- WHERE ${whereSql}
1051
- `,
1052
- ).run(targetCountry);
1053
- });
1054
-
1055
- moveTxn(normalizedCountry);
1056
- return { moved: count, scope: normalizedScope, country: normalizedCountry };
1057
- }
1058
-
1059
- function restoreRawJobsByCountry(country) {
1060
- if (!db) {
1061
- return { restored: 0, country, error: "db not ready" };
1062
- }
1063
-
1064
- const normalizedCountry = String(country == null ? "未知" : country).trim();
1065
- if (!normalizedCountry) {
1066
- return {
1067
- restored: 0,
1068
- country: normalizedCountry,
1069
- error: "country is required",
1070
- };
1071
- }
1072
-
1073
- const whereSql = `COALESCE(guessed_location, '未知') = ?`;
1074
- const count =
1075
- db
1076
- .prepare(
1077
- `
1078
- SELECT COUNT(*) as c
1079
- FROM raw_jobs
1080
- WHERE ${whereSql}
1081
- `,
1082
- )
1083
- .get(normalizedCountry)?.c || 0;
1084
-
1085
- if (!count) {
1086
- return { restored: 0, country: normalizedCountry };
1087
- }
1088
-
1089
- const restoreTxn = db.transaction((targetCountry) => {
1090
- db.prepare(
1091
- `
1092
- INSERT OR REPLACE INTO jobs (
1093
- unique_id,
1094
- nickname,
1095
- status,
1096
- sources,
1097
- claimed_by,
1098
- claimed_at,
1099
- error,
1100
- pinned,
1101
- no_video,
1102
- restricted,
1103
- user_update_count,
1104
- tt_seller,
1105
- verified,
1106
- video_count,
1107
- comment_count,
1108
- guessed_location,
1109
- location_created,
1110
- follower_count,
1111
- following_count,
1112
- heart_count,
1113
- refresh_time,
1114
- processed,
1115
- processed_at,
1116
- created_at,
1117
- updated_at,
1118
- region,
1119
- signature,
1120
- sec_uid
1121
- )
1122
- SELECT
1123
- unique_id,
1124
- nickname,
1125
- status,
1126
- sources,
1127
- claimed_by,
1128
- claimed_at,
1129
- error,
1130
- pinned,
1131
- no_video,
1132
- restricted,
1133
- user_update_count,
1134
- tt_seller,
1135
- verified,
1136
- video_count,
1137
- comment_count,
1138
- guessed_location,
1139
- location_created,
1140
- follower_count,
1141
- following_count,
1142
- heart_count,
1143
- refresh_time,
1144
- processed,
1145
- processed_at,
1146
- created_at,
1147
- updated_at,
1148
- region,
1149
- signature,
1150
- sec_uid
1151
- FROM raw_jobs
1152
- WHERE ${whereSql}
1153
- `,
1154
- ).run(targetCountry);
1155
-
1156
- db.prepare(
1157
- `
1158
- DELETE FROM raw_jobs
1159
- WHERE ${whereSql}
1160
- `,
1161
- ).run(targetCountry);
1162
- });
1163
-
1164
- restoreTxn(normalizedCountry);
1165
- return { restored: count, country: normalizedCountry };
1166
- }
1167
-
1168
- function restoreRawJobById(uniqueId) {
1169
- if (!db) {
1170
- return { restored: 0, uniqueId, error: "db not ready" };
1171
- }
1172
-
1173
- const safeId = String(uniqueId).trim();
1174
- if (!safeId) {
1175
- return { restored: 0, uniqueId: safeId, error: "uniqueId is required" };
1176
- }
1177
-
1178
- const exists =
1179
- db
1180
- .prepare("SELECT COUNT(*) as c FROM raw_jobs WHERE unique_id = ?")
1181
- .get(safeId)?.c || 0;
1182
-
1183
- if (!exists) {
1184
- return { restored: 0, uniqueId: safeId };
1185
- }
1186
-
1187
- const restoreTxn = db.transaction(() => {
1188
- db.prepare(
1189
- `
1190
- INSERT OR REPLACE INTO jobs (
1191
- unique_id, nickname, status, sources, claimed_by, claimed_at, error,
1192
- pinned, no_video, restricted, user_update_count, tt_seller, verified,
1193
- video_count, comment_count, guessed_location, location_created,
1194
- follower_count, following_count, heart_count, refresh_time,
1195
- processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
1196
- )
1197
- SELECT
1198
- unique_id, nickname, status, sources, claimed_by, claimed_at, error,
1199
- pinned, no_video, restricted, user_update_count, tt_seller, verified,
1200
- video_count, comment_count, guessed_location, location_created,
1201
- follower_count, following_count, heart_count, refresh_time,
1202
- processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
1203
- FROM raw_jobs WHERE unique_id = ?
1204
- `,
1205
- ).run(safeId);
1206
-
1207
- db.prepare("DELETE FROM raw_jobs WHERE unique_id = ?").run(safeId);
1208
- });
1209
-
1210
- restoreTxn();
1211
- return { restored: 1, uniqueId: safeId };
1212
- }
1213
-
1214
- function restoreRawJobsByFilter({ search, location, hasVideo, hasFollower }) {
1215
- if (!db) {
1216
- return { restored: 0, error: "db not ready" };
1217
- }
1218
-
1219
- const where = [];
1220
- const args = [];
1221
-
1222
- if (search) {
1223
- where.push(
1224
- "(LOWER(unique_id) LIKE ? OR LOWER(COALESCE(nickname, '')) LIKE ?)",
1225
- );
1226
- const likeVal = `%${search.toLowerCase()}%`;
1227
- args.push(likeVal, likeVal);
1228
- }
1229
-
1230
- if (location) {
1231
- where.push("COALESCE(guessed_location, '未知') = ?");
1232
- args.push(location);
1233
- }
1234
-
1235
- if (hasVideo) {
1236
- where.push("COALESCE(video_count, 0) > 0");
1237
- }
1238
-
1239
- if (hasFollower) {
1240
- where.push("COALESCE(follower_count, 0) > 0");
1241
- }
1
+ /**
2
+ * 数据存储主模块 — createStore() 编排器
3
+ *
4
+ * 本文件是数据存储的入口点,负责编排各子模块:
5
+ * - db-schema.js: 建表、迁移、全局连接管理
6
+ * - db-columns.js: 共享列名常量和 SQL 生成
7
+ * - db-crud.js: 基础 CRUD(增删改查、行映射)
8
+ * - db-stats.js: 仪表盘统计、按国家分组
9
+ * - db-raw-jobs.js: raw_jobs 移入/恢复
10
+ * - db-tags.js: Tag 发现与打分
11
+ * - llm-scoring.js: LLM 国家匹配度打分
12
+ *
13
+ * createStore() 保留为运行时编排器,管理:
14
+ * - 任务认领/提交(claimNextJob/commitJob)
15
+ * - 客户端追踪、视频管理、备份
16
+ * - 内存索引、stats 缓存
17
+ */
1242
18
 
1243
- if (where.length === 0) {
1244
- return { restored: 0, error: "at least one filter is required" };
1245
- }
1246
-
1247
- const whereSql = where.join(" AND ");
1248
-
1249
- const count =
1250
- db
1251
- .prepare(`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql}`)
1252
- .get(...args)?.c || 0;
1253
-
1254
- if (!count) {
1255
- return { restored: 0 };
1256
- }
1257
-
1258
- const restoreTxn = db.transaction(() => {
1259
- db.prepare(
1260
- `
1261
- INSERT OR REPLACE INTO jobs (
1262
- unique_id, nickname, status, sources, claimed_by, claimed_at, error,
1263
- pinned, no_video, restricted, user_update_count, tt_seller, verified,
1264
- video_count, comment_count, guessed_location, location_created,
1265
- follower_count, following_count, heart_count, refresh_time,
1266
- processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
1267
- )
1268
- SELECT
1269
- unique_id, nickname, status, sources, claimed_by, claimed_at, error,
1270
- pinned, no_video, restricted, user_update_count, tt_seller, verified,
1271
- video_count, comment_count, guessed_location, location_created,
1272
- follower_count, following_count, heart_count, refresh_time,
1273
- processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
1274
- FROM raw_jobs WHERE ${whereSql}
1275
- `,
1276
- ).run(...args);
19
+ import fs from "fs";
20
+ import path from "path";
21
+ import Database from "better-sqlite3";
22
+ import {
23
+ isLocationInList,
24
+ DEFAULT_TARGET_LOCATIONS,
25
+ } from "../lib/target-locations.js";
1277
26
 
1278
- db.prepare(`DELETE FROM raw_jobs WHERE ${whereSql}`).run(...args);
1279
- });
27
+ // Schema 与连接管理
28
+ import {
29
+ getDb,
30
+ getDbPath,
31
+ initDb,
32
+ resetDbConnection,
33
+ loadLegacyUsersFromFiles,
34
+ loadLegacyVideosFromFile,
35
+ } from "./db-schema.js";
36
+
37
+ // CRUD 操作
38
+ import {
39
+ snakeToCamel,
40
+ camelToSnake,
41
+ normalizeJobValue,
42
+ mapJobRow,
43
+ mapVideoRow,
44
+ inferStatus,
45
+ hasUserInDb,
46
+ addUserToDb,
47
+ addJobToDb,
48
+ addJobBaseToDb,
49
+ addJob,
50
+ getJobRow,
51
+ getJobBaseRow,
52
+ getJob,
53
+ getAllJobs,
54
+ getVideoRow,
55
+ getAllVideoRows,
56
+ updateJobInfo,
57
+ updateJobBaseInfo,
58
+ getUserDbCount,
59
+ getJobsCount,
60
+ getPendingJobsCount,
61
+ getPendingJobsUserUpdateCount,
62
+ getRawJobsCount,
63
+ } from "./db-crud.js";
64
+
65
+ // 统计查询
66
+ import {
67
+ getDashboardStatsFromDb,
68
+ getPendingByCountryFromDb,
69
+ getUserUpdateByCountryFromDb,
70
+ getAttachStuckByCountryFromDb,
71
+ getRawByCountryFromDb,
72
+ restoreAttachStuckByCountry,
73
+ resetPendingByCountry,
74
+ } from "./db-stats.js";
75
+
76
+ // Raw Jobs 管理
77
+ import {
78
+ moveJobsToRawByCountry,
79
+ restoreRawJobsByCountry,
80
+ restoreRawJobById,
81
+ restoreRawJobsByFilter,
82
+ getRawJobsPageFromDb,
83
+ } from "./db-raw-jobs.js";
84
+
85
+ // Tag CRUD
86
+ import {
87
+ insertTag,
88
+ getTagsByStatus,
89
+ getTagsByCountry,
90
+ getDeadTags,
91
+ claimTag,
92
+ reportTagScore,
93
+ getAllTags,
94
+ rawQuery,
95
+ normalizeTags,
96
+ clearTags,
97
+ } from "./db-tags.js";
98
+
99
+ // LLM 打分
100
+ import {
101
+ scoreJobLocation,
102
+ scoreJobsBatch,
103
+ createLlmOffsetStore,
104
+ } from "./llm-scoring.js";
1280
105
 
1281
- restoreTxn();
1282
- return { restored: count };
1283
- }
106
+ // ===== 薄包装函数(保持外部 API 不变)=====
1284
107
 
1285
- function getRawJobsPageFromDb({
1286
- search,
1287
- location,
1288
- limit,
1289
- offset,
1290
- hasVideo,
1291
- hasFollower,
108
+ /**
109
+ * 导入历史 JSON 数据到 SQLite
110
+ */
111
+ export function importLegacyJsonToDb({
112
+ dbFilePath,
113
+ usersFilePath,
114
+ doneFilePath,
115
+ videosFilePath,
1292
116
  }) {
1293
- if (!db) return null;
1294
-
1295
- const safeLimit = Math.max(1, Math.min(200, parseInt(limit) || 50));
1296
- const safeOffset = Math.max(0, parseInt(offset) || 0);
1297
- const where = [];
1298
- const args = [];
1299
-
1300
- if (search) {
1301
- where.push(
1302
- "(LOWER(unique_id) LIKE ? OR LOWER(COALESCE(nickname, '')) LIKE ?)",
1303
- );
1304
- const pattern = `%${String(search).toLowerCase()}%`;
1305
- args.push(pattern, pattern);
1306
- }
1307
- if (location) {
1308
- where.push("COALESCE(guessed_location, '未知') = ?");
1309
- args.push(location);
1310
- }
1311
- if (hasVideo) {
1312
- where.push("COALESCE(video_count, 0) > 0");
1313
- }
1314
- if (hasFollower) {
1315
- where.push("COALESCE(follower_count, 0) > 0");
1316
- }
1317
-
1318
- const whereSql = where.length ? `WHERE ${where.join(" AND ")}` : "";
1319
- const total = db
1320
- .prepare(`SELECT COUNT(*) as c FROM raw_jobs ${whereSql}`)
1321
- .get(...args).c;
1322
-
1323
- const rows = db
1324
- .prepare(
1325
- `
1326
- SELECT *
1327
- FROM raw_jobs
1328
- ${whereSql}
1329
- ORDER BY created_at DESC, unique_id ASC
1330
- LIMIT ? OFFSET ?
1331
- `,
1332
- )
1333
- .all(...args, safeLimit, safeOffset);
1334
-
1335
- return {
1336
- total,
1337
- limit: safeLimit,
1338
- offset: safeOffset,
1339
- users: rows.map(mapJobRow),
1340
- };
1341
- }
1342
-
1343
- // ====== Tag 发现与打分 CRUD ======
1344
-
1345
- function insertTag(tag, countries, source = "llm") {
1346
- if (!db) return { inserted: false, error: "db not ready" };
1347
- // 防止存入带 # 前缀的 tag
1348
- const normalized = tag.replace(/^#+/, "").trim().toLowerCase();
1349
- if (!normalized || normalized.length < 2) {
1350
- return { inserted: false, error: "invalid tag" };
1351
- }
1352
- try {
1353
- const result = db
1354
- .prepare(
1355
- `
1356
- INSERT OR IGNORE INTO tags (tag, countries, source)
1357
- VALUES (?, ?, ?)
1358
- `,
1359
- )
1360
- .run(normalized, JSON.stringify(countries), source);
1361
- return { inserted: result.changes > 0, tag: normalized };
1362
- } catch (e) {
1363
- return { inserted: false, error: e.message };
1364
- }
1365
- }
1366
-
1367
- function getTagsByStatus(status, limit = 100) {
1368
- if (!db) return [];
1369
- const rows = db
1370
- .prepare(
1371
- `
1372
- SELECT * FROM tags WHERE status = ? ORDER BY score ASC, created_at ASC LIMIT ?
1373
- `,
1374
- )
1375
- .all(status, limit);
1376
- return rows.map((r) => ({
1377
- ...r,
1378
- countries: JSON.parse(r.countries || "[]"),
1379
- matched_countries: JSON.parse(r.matched_countries || "[]"),
1380
- }));
1381
- }
1382
-
1383
- function getTagsByCountry(country, minScore = 0) {
1384
- if (!db) return [];
1385
- const rows = db
1386
- .prepare(
1387
- `
1388
- SELECT * FROM tags WHERE status != 'dead'
1389
- ORDER BY score DESC
1390
- `,
1391
- )
1392
- .all();
1393
- // Filter in JS since countries is JSON
1394
- return rows
1395
- .map((r) => ({
1396
- ...r,
1397
- countries: JSON.parse(r.countries || "[]"),
1398
- matched_countries: JSON.parse(r.matched_countries || "[]"),
1399
- }))
1400
- .filter((r) => r.countries.includes(country) && r.score >= minScore);
1401
- }
1402
-
1403
- function getDeadTags(country) {
1404
- if (!db) return [];
1405
- const rows = db
1406
- .prepare(
1407
- `
1408
- SELECT * FROM tags WHERE status = 'dead' ORDER BY score ASC
1409
- `,
1410
- )
1411
- .all();
1412
- return rows
1413
- .map((r) => ({
1414
- ...r,
1415
- countries: JSON.parse(r.countries || "[]"),
1416
- matched_countries: JSON.parse(r.matched_countries || "[]"),
1417
- }))
1418
- .filter((r) => r.countries.includes(country));
1419
- }
117
+ resetDbConnection();
118
+ initDb(dbFilePath);
1420
119
 
1421
- function claimTag(tag) {
1422
- if (!db) return { ok: false, error: "db not ready" };
1423
- // 原子操作:只有 status='new' 时才更新为 'scoring',避免竞态
1424
- const result = db
1425
- .prepare(
1426
- "UPDATE tags SET status = 'scoring' WHERE tag = ? AND status = 'new'",
1427
- )
1428
- .run(tag);
1429
- if (result.changes === 0) {
1430
- // 检查是否不存在 vs 已被别人锁定
1431
- const row = db.prepare("SELECT status FROM tags WHERE tag = ?").get(tag);
1432
- if (!row) return { ok: false, error: "tag not found" };
1433
- return { ok: false, error: `tag status is ${row.status}, already claimed` };
1434
- }
1435
- return { ok: true, tag };
1436
- }
120
+ const db = getDb();
121
+ const dbPath = getDbPath();
1437
122
 
1438
- function reportTagScore(tag, fields) {
1439
- if (!db) return { ok: false, error: "db not ready" };
1440
- const {
1441
- score,
1442
- status,
1443
- totalPosts,
1444
- authorCount,
1445
- matchedAuthors,
1446
- matchedCountries,
1447
- pushedUsers,
1448
- error,
1449
- } = fields;
1450
- const matchedCountriesJson = matchedCountries
1451
- ? JSON.stringify(matchedCountries)
1452
- : null;
1453
- const now = new Date().toISOString();
1454
-
1455
- try {
1456
- const result = db
1457
- .prepare(
1458
- `
1459
- UPDATE tags SET
1460
- score = COALESCE(?, score),
1461
- status = COALESCE(?, status),
1462
- total_posts = COALESCE(?, total_posts),
1463
- author_count = COALESCE(?, author_count),
1464
- matched_authors = COALESCE(?, matched_authors),
1465
- matched_countries = COALESCE(?, matched_countries),
1466
- pushed_users = COALESCE(?, pushed_users),
1467
- last_error = COALESCE(?, last_error),
1468
- scored_at = ?,
1469
- score_count = score_count + 1
1470
- WHERE tag = ?
1471
- `,
1472
- )
1473
- .run(
1474
- score ?? null,
1475
- status ?? null,
1476
- totalPosts ?? null,
1477
- authorCount ?? null,
1478
- matchedAuthors ?? null,
1479
- matchedCountriesJson,
1480
- pushedUsers ?? null,
1481
- error ?? null,
1482
- now,
1483
- tag,
1484
- );
1485
- return { ok: result.changes > 0, tag };
1486
- } catch (e) {
1487
- return { ok: false, error: e.message };
1488
- }
1489
- }
123
+ const legacyUsers = loadLegacyUsersFromFiles(usersFilePath, doneFilePath);
124
+ const legacyVideos = loadLegacyVideosFromFile(videosFilePath);
1490
125
 
1491
- function getAllTags(limit = 200) {
1492
- if (!db) return [];
1493
- const rows = db
1494
- .prepare(
1495
- `
1496
- SELECT * FROM tags ORDER BY score DESC, created_at DESC LIMIT ?
1497
- `,
1498
- )
1499
- .all(limit);
1500
- return rows.map((r) => ({
1501
- ...r,
1502
- countries: JSON.parse(r.countries || "[]"),
1503
- matched_countries: JSON.parse(r.matched_countries || "[]"),
1504
- }));
1505
- }
126
+ const beforeUsers = getDb()
127
+ .prepare("SELECT COUNT(*) as c FROM users")
128
+ .get().c;
129
+ const beforeJobs = getDb().prepare("SELECT COUNT(*) as c FROM jobs").get().c;
130
+ const beforeVideos = getDb()
131
+ .prepare("SELECT COUNT(*) as c FROM videos")
132
+ .get().c;
1506
133
 
1507
- // 调试接口:直接执行 SQL 查询,返回原始数据
1508
- function rawQuery(sql, params = []) {
1509
- if (!db) return { error: "db not ready" };
1510
- try {
1511
- const rows = db.prepare(sql).all(...params);
1512
- return { rows };
1513
- } catch (e) {
1514
- return { error: e.message };
1515
- }
1516
- }
134
+ const insertUserStmt = getDb().prepare(
135
+ `INSERT OR IGNORE INTO users (unique_id) VALUES (?)`,
136
+ );
137
+ const insertVideoStmt = getDb().prepare(
138
+ `INSERT OR IGNORE INTO videos (id, href, author_unique_id, location_created, tt_seller, registered_at, user_update_count, create_time) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
139
+ );
1517
140
 
1518
- // 清理 tags 表中以 # 开头的脏数据
1519
- function normalizeTags() {
1520
- if (!db) return { ok: false, error: "db not ready" };
1521
- const dirtyRows = db
1522
- .prepare("SELECT id, tag, countries FROM tags WHERE tag LIKE '#%'")
1523
- .all();
1524
- const fixed = [];
1525
- const merged = [];
1526
- const skipped = [];
1527
-
1528
- for (const row of dirtyRows) {
1529
- const cleanTag = row.tag.replace(/^#+/, "").trim().toLowerCase();
1530
- if (!cleanTag || cleanTag.length < 2) {
1531
- db.prepare("DELETE FROM tags WHERE id = ?").run(row.id);
1532
- skipped.push({
1533
- dirty: row.tag,
1534
- reason: "empty after normalize, deleted",
1535
- });
1536
- continue;
141
+ const importUsersTxn = getDb().transaction((items) => {
142
+ for (const item of items) {
143
+ const uniqueId = item.uniqueId || item.unique_id;
144
+ if (!uniqueId) continue;
145
+ insertUserStmt.run(uniqueId);
146
+ addJobToDb({ ...item, uniqueId });
1537
147
  }
148
+ });
1538
149
 
1539
- // 检查 cleanTag 是否已存在
1540
- const existing = db
1541
- .prepare("SELECT * FROM tags WHERE tag = ?")
1542
- .get(cleanTag);
1543
- if (existing) {
1544
- // 合并:保留已有 clean 版本,合并 countries
1545
- const oldCountries = JSON.parse(row.countries || "[]");
1546
- const existCountries = JSON.parse(existing.countries || "[]");
1547
- const mergedCountries = [
1548
- ...new Set([...existCountries, ...oldCountries]),
1549
- ];
1550
- db.prepare("UPDATE tags SET countries = ? WHERE tag = ?").run(
1551
- JSON.stringify(mergedCountries),
1552
- cleanTag,
150
+ const importVideosTxn = getDb().transaction((items) => {
151
+ for (const item of items) {
152
+ if (!item?.id) continue;
153
+ insertVideoStmt.run(
154
+ item.id,
155
+ item.href || null,
156
+ item.authorUniqueId || item.author_unique_id || null,
157
+ item.locationCreated || item.location_created || null,
158
+ item.ttSeller ? 1 : 0,
159
+ item.registeredAt || item.registered_at || Date.now(),
160
+ item.userUpdateCount || item.user_update_count || 0,
161
+ item.createTime || item.create_time || null,
1553
162
  );
1554
- // 删除脏数据
1555
- db.prepare("DELETE FROM tags WHERE id = ?").run(row.id);
1556
- merged.push({ dirty: row.tag, clean: cleanTag, id: row.id });
1557
- } else {
1558
- // 直接重命名
1559
- db.prepare("UPDATE tags SET tag = ? WHERE id = ?").run(cleanTag, row.id);
1560
- fixed.push({ dirty: row.tag, clean: cleanTag, id: row.id });
1561
163
  }
1562
- }
164
+ });
165
+
166
+ importUsersTxn(legacyUsers);
167
+ importVideosTxn(legacyVideos);
168
+
169
+ const afterUsers = getDb().prepare("SELECT COUNT(*) as c FROM users").get().c;
170
+ const afterJobs = getDb().prepare("SELECT COUNT(*) as c FROM jobs").get().c;
171
+ const afterVideos = getDb()
172
+ .prepare("SELECT COUNT(*) as c FROM videos")
173
+ .get().c;
1563
174
 
1564
175
  return {
1565
- ok: true,
1566
- fixed: fixed.length,
1567
- merged: merged.length,
1568
- skipped: skipped.length,
1569
- details: { fixed, merged, skipped },
176
+ dbPath: getDbPath(),
177
+ usersImported: afterUsers - beforeUsers,
178
+ jobsImported: afterJobs - beforeJobs,
179
+ videosImported: afterVideos - beforeVideos,
180
+ totalUsers: afterUsers,
181
+ totalJobs: afterJobs,
182
+ totalVideos: afterVideos,
1570
183
  };
1571
184
  }
1572
185
 
1573
- function clearTags() {
1574
- if (!db) return { ok: false, error: "db not ready" };
1575
- const count = db.prepare("SELECT COUNT(*) as c FROM tags").get().c;
1576
- db.exec("DELETE FROM tags");
1577
- return { ok: true, deleted: count };
186
+ export function closeStoreDb() {
187
+ resetDbConnection();
1578
188
  }
1579
189
 
1580
190
  function getUsersPageFromDb({
@@ -1587,7 +197,7 @@ function getUsersPageFromDb({
1587
197
  offset,
1588
198
  targetLocations = [],
1589
199
  }) {
1590
- if (!db) return null;
200
+ if (!getDb()) return null;
1591
201
 
1592
202
  const safeLimit = Math.max(1, Math.min(200, parseInt(limit) || 50));
1593
203
  const safeOffset = Math.max(0, parseInt(offset) || 0);
@@ -1636,7 +246,7 @@ function getUsersPageFromDb({
1636
246
  if (cachedCount && Date.now() - cachedCount.time < 5000) {
1637
247
  total = cachedCount.c;
1638
248
  } else {
1639
- total = db
249
+ total = getDb()
1640
250
  .prepare(`SELECT COUNT(*) as c FROM jobs ${whereSql}`)
1641
251
  .get(...args).c;
1642
252
  getUsersPageFromDb._countCache.set(cacheKey, {
@@ -1646,7 +256,7 @@ function getUsersPageFromDb({
1646
256
  }
1647
257
 
1648
258
  // 只查询前端需要的列,避免 SELECT * 带来的大字段传输和 mapJobRow 开销
1649
- const rows = db
259
+ const rows = getDb()
1650
260
  .prepare(
1651
261
  `
1652
262
  SELECT
@@ -1688,13 +298,13 @@ function getUsersPageFromDb({
1688
298
  }
1689
299
 
1690
300
  function getTargetUsersFromDb(targetLocations = []) {
1691
- if (!db) return null;
301
+ if (!getDb()) return null;
1692
302
  if (!targetLocations.length) {
1693
303
  return { total: 0, users: [] };
1694
304
  }
1695
305
 
1696
306
  const placeholders = targetLocations.map(() => "?").join(", ");
1697
- const rows = db
307
+ const rows = getDb()
1698
308
  .prepare(
1699
309
  `
1700
310
  SELECT
@@ -1703,6 +313,8 @@ function getTargetUsersFromDb(targetLocations = []) {
1703
313
  location_created, latest_video_time, refresh_time,
1704
314
  guessed_location, pinned, processed_at, video_count,
1705
315
  no_video, claimed_by, claimed_at, created_at, updated_at
316
+ FROM jobs
317
+ WHERE tt_seller = 1
1706
318
  AND verified = 0
1707
319
  AND location_created IN (${placeholders})
1708
320
  ORDER BY COALESCE(follower_count, 0) DESC, unique_id ASC
@@ -1718,7 +330,7 @@ function getTargetUsersFromDb(targetLocations = []) {
1718
330
  }
1719
331
 
1720
332
  function getTargetUsersByCountryFromDb(targetLocations = [], options = {}) {
1721
- if (!db) return null;
333
+ if (!getDb()) return null;
1722
334
  if (!targetLocations.length) {
1723
335
  return { countries: [] };
1724
336
  }
@@ -1735,7 +347,7 @@ function getTargetUsersByCountryFromDb(targetLocations = [], options = {}) {
1735
347
 
1736
348
  // 摘要模式:只返回各国统计数,不返回用户数据
1737
349
  if (summaryOnly) {
1738
- const statsRows = db
350
+ const statsRows = getDb()
1739
351
  .prepare(
1740
352
  `
1741
353
  SELECT location_created as country, COUNT(*) as count
@@ -1803,397 +415,79 @@ function getTargetUsersByCountryFromDb(targetLocations = [], options = {}) {
1803
415
  /SELECT[^FROM]*FROM/,
1804
416
  "SELECT COUNT(*) as cnt FROM",
1805
417
  );
1806
- const total = db.prepare(countSql).get(...params)?.cnt || 0;
418
+ const total =
419
+ getDb()
420
+ .prepare(countSql)
421
+ .get(...params)?.cnt || 0;
1807
422
 
1808
423
  sql += ` LIMIT ? OFFSET ?`;
1809
424
  const safeLimit = Math.min(Math.floor(limit), 10000);
1810
425
  const safeOffset = Math.max(Math.floor(offset), 0);
1811
426
 
1812
- const rows = db
1813
- .prepare(sql)
1814
- .all(...params, safeLimit, safeOffset)
1815
- .map(mapJobRow);
1816
-
1817
- return {
1818
- total,
1819
- limit: safeLimit,
1820
- offset: safeOffset,
1821
- users: rows,
1822
- };
1823
- }
1824
-
1825
- const rows = db
1826
- .prepare(
1827
- `
1828
- SELECT
1829
- unique_id,
1830
- nickname,
1831
- follower_count,
1832
- video_count,
1833
- tt_seller,
1834
- verified,
1835
- location_created,
1836
- confirmed_location,
1837
- modified_at,
1838
- latest_video_time,
1839
- refresh_time,
1840
- status,
1841
- sources
1842
- FROM jobs
1843
- WHERE tt_seller = 1
1844
- AND verified = 0
1845
- AND location_created IN (${placeholders})
1846
- ORDER BY location_created ASC, COALESCE(latest_video_time, 0) DESC
1847
- `,
1848
- )
1849
- .all(...targetLocations)
1850
- .map(mapJobRow);
1851
-
1852
- const countryMap = new Map();
1853
- for (const row of rows) {
1854
- const country = row.locationCreated || "未知";
1855
- if (!countryMap.has(country)) {
1856
- countryMap.set(country, []);
1857
- }
1858
- countryMap.get(country).push(row);
1859
- }
1860
-
1861
- const countries = [];
1862
- for (const [country, users] of countryMap) {
1863
- countries.push({
1864
- country,
1865
- count: users.length,
1866
- users,
1867
- });
1868
- }
1869
-
1870
- return {
1871
- total: rows.length,
1872
- countries,
1873
- };
1874
- }
1875
-
1876
- function snakeToCamel(key) {
1877
- return key.replace(/_([a-z])/g, (_, ch) => ch.toUpperCase());
1878
- }
1879
-
1880
- function camelToSnake(key) {
1881
- return key.replace(/[A-Z]/g, (ch) => `_${ch.toLowerCase()}`);
1882
- }
1883
-
1884
- const jobBooleanColumns = new Set([
1885
- "pinned",
1886
- "no_video",
1887
- "restricted",
1888
- "processed",
1889
- "tt_seller",
1890
- "verified",
1891
- "error",
1892
- ]);
1893
-
1894
- const videoBooleanColumns = new Set(["tt_seller"]);
1895
-
1896
- const writableJobColumns = new Set([
1897
- "nickname",
1898
- "status",
1899
- "sources",
1900
- "claimed_by",
1901
- "claimed_at",
1902
- "error",
1903
- "pinned",
1904
- "no_video",
1905
- "restricted",
1906
- "user_update_count",
1907
- "tt_seller",
1908
- "verified",
1909
- "video_count",
1910
- "comment_count",
1911
- "guessed_location",
1912
- "location_created",
1913
- "confirmed_location",
1914
- "modified_at",
1915
- "follower_count",
1916
- "following_count",
1917
- "heart_count",
1918
- "refresh_time",
1919
- "processed",
1920
- "processed_at",
1921
- "updated_at",
1922
- "region",
1923
- "signature",
1924
- "bio_link",
1925
- "sec_uid",
1926
- "status_code",
1927
- "latest_video_time",
1928
- "top_video_play_count",
1929
- "top_video_href",
1930
- "user_create_time",
1931
- ]);
1932
-
1933
- function normalizeJobValue(column, value) {
1934
- if (value === undefined || value === null) return null;
1935
- if (column === "sources") {
1936
- if (!Array.isArray(value)) return JSON.stringify([]);
1937
- return JSON.stringify([...new Set(value)]);
1938
- }
1939
- if (jobBooleanColumns.has(column)) {
1940
- return value ? 1 : 0;
1941
- }
1942
- // 防御:如果值是对象或数组,转为 JSON 字符串
1943
- if (typeof value === "object") return JSON.stringify(value);
1944
- return value;
1945
- }
1946
-
1947
- function mapJobRow(row) {
1948
- if (!row) return undefined;
1949
- const mapped = {};
1950
- for (const [key, value] of Object.entries(row)) {
1951
- const camelKey = snakeToCamel(key);
1952
- if (key === "sources") {
1953
- try {
1954
- mapped[camelKey] = value ? JSON.parse(value) : [];
1955
- } catch {
1956
- mapped[camelKey] = [];
1957
- }
1958
- continue;
1959
- }
1960
- if (jobBooleanColumns.has(key)) {
1961
- mapped[camelKey] = value === null || value === undefined ? null : !!value;
1962
- continue;
1963
- }
1964
- mapped[camelKey] = value;
1965
- }
1966
- return mapped;
1967
- }
1968
-
1969
- function getJobRow(uniqueId) {
1970
- if (!db) return null;
1971
- return db.prepare("SELECT * FROM jobs WHERE unique_id = ?").get(uniqueId);
1972
- }
1973
-
1974
- function getJobBaseRow(uniqueId) {
1975
- if (!db) return null;
1976
- return db
1977
- .prepare("SELECT * FROM jobs_base WHERE unique_id = ?")
1978
- .get(uniqueId);
1979
- }
1980
-
1981
- function getJob(uniqueId) {
1982
- return mapJobRow(getJobRow(uniqueId));
1983
- }
1984
-
1985
- function getAllJobs() {
1986
- if (!db) return [];
1987
- return db.prepare("SELECT * FROM jobs").all().map(mapJobRow);
1988
- }
1989
-
1990
- function mapVideoRow(row) {
1991
- if (!row) return undefined;
1992
- const mapped = {};
1993
- for (const [key, value] of Object.entries(row)) {
1994
- const camelKey = snakeToCamel(key);
1995
- if (videoBooleanColumns.has(key)) {
1996
- mapped[camelKey] = value === null || value === undefined ? null : !!value;
1997
- continue;
1998
- }
1999
- mapped[camelKey] = value;
2000
- }
2001
- return mapped;
2002
- }
2003
-
2004
- function getVideoRow(videoId) {
2005
- if (!db) return null;
2006
- return db.prepare("SELECT * FROM videos WHERE id = ?").get(videoId);
2007
- }
2008
-
2009
- function getAllVideoRows() {
2010
- if (!db) return [];
2011
- return db.prepare("SELECT * FROM videos").all();
2012
- }
2013
-
2014
- function updateJobInfo(uniqueId, info, incrementCount = true) {
2015
- if (!db) return { error: "db not initialized" };
2016
- const existing = getJobRow(uniqueId);
2017
- if (!existing) return { error: "user not found" };
2018
-
2019
- const nextValues = {};
2020
- for (const [key, value] of Object.entries(info || {})) {
2021
- if (key === "uniqueId" || key === "unique_id") continue;
2022
- if (value === undefined || value === "") continue;
2023
- let column = camelToSnake(key);
2024
- // 字段别名:bio → signature, createTime → user_create_time
2025
- if (column === "bio") column = "signature";
2026
- if (column === "create_time") column = "user_create_time";
2027
- if (!writableJobColumns.has(column)) continue;
2028
- nextValues[column] = normalizeJobValue(column, value);
2029
- }
2030
-
2031
- nextValues.updated_at = Date.now();
2032
- if (incrementCount) {
2033
- nextValues.user_update_count = (existing.user_update_count || 0) + 1;
2034
- }
2035
-
2036
- const columns = Object.keys(nextValues);
2037
- if (columns.length > 0) {
2038
- const sql = `UPDATE jobs SET ${columns.map((column) => `${column} = ?`).join(", ")} WHERE unique_id = ?`;
2039
- db.prepare(sql).run(
2040
- ...columns.map((column) => nextValues[column]),
2041
- uniqueId,
2042
- );
2043
- }
2044
-
2045
- return {
2046
- ok: true,
2047
- userUpdateCount:
2048
- nextValues.user_update_count ?? existing.user_update_count ?? 0,
2049
- };
2050
- }
2051
-
2052
- function inferStatus(u) {
2053
- if (u.restricted) return "restricted";
2054
- if (u.error) return "error";
2055
- if (u.processed) return "done";
2056
- return "pending";
2057
- }
427
+ const rows = getDb()
428
+ .prepare(sql)
429
+ .all(...params, safeLimit, safeOffset)
430
+ .map(mapJobRow);
2058
431
 
2059
- function updateJobBaseInfo(uniqueId, info, incrementCount = true) {
2060
- if (!db) return { error: "db not initialized" };
2061
- const existing = getJobBaseRow(uniqueId);
2062
- if (!existing) return { error: "user not found" };
2063
-
2064
- const nextValues = {};
2065
- for (const [key, value] of Object.entries(info || {})) {
2066
- if (key === "uniqueId" || key === "unique_id") continue;
2067
- if (value === undefined || value === "") continue;
2068
- let column = camelToSnake(key);
2069
- // 字段别名:bio → signature, createTime → user_create_time
2070
- if (column === "bio") column = "signature";
2071
- if (column === "create_time") column = "user_create_time";
2072
- if (!writableJobColumns.has(column)) continue;
2073
- nextValues[column] = normalizeJobValue(column, value);
432
+ return {
433
+ total,
434
+ limit: safeLimit,
435
+ offset: safeOffset,
436
+ users: rows,
437
+ };
2074
438
  }
2075
439
 
2076
- nextValues.updated_at = Date.now();
2077
- if (incrementCount) {
2078
- nextValues.user_update_count = (existing.user_update_count || 0) + 1;
440
+ const rows = getDb()
441
+ .prepare(
442
+ `
443
+ SELECT
444
+ unique_id,
445
+ nickname,
446
+ follower_count,
447
+ video_count,
448
+ tt_seller,
449
+ verified,
450
+ location_created,
451
+ confirmed_location,
452
+ modified_at,
453
+ latest_video_time,
454
+ refresh_time,
455
+ status,
456
+ sources
457
+ FROM jobs
458
+ WHERE tt_seller = 1
459
+ AND verified = 0
460
+ AND location_created IN (${placeholders})
461
+ ORDER BY location_created ASC, COALESCE(latest_video_time, 0) DESC
462
+ `,
463
+ )
464
+ .all(...targetLocations)
465
+ .map(mapJobRow);
466
+
467
+ const countryMap = new Map();
468
+ for (const row of rows) {
469
+ const country = row.locationCreated || "未知";
470
+ if (!countryMap.has(country)) {
471
+ countryMap.set(country, []);
472
+ }
473
+ countryMap.get(country).push(row);
2079
474
  }
2080
475
 
2081
- const columns = Object.keys(nextValues);
2082
- if (columns.length > 0) {
2083
- const sql = `UPDATE jobs_base SET ${columns.map((column) => `${column} = ?`).join(", ")} WHERE unique_id = ?`;
2084
- db.prepare(sql).run(
2085
- ...columns.map((column) => nextValues[column]),
2086
- uniqueId,
2087
- );
476
+ const countries = [];
477
+ for (const [country, users] of countryMap) {
478
+ countries.push({
479
+ country,
480
+ count: users.length,
481
+ users,
482
+ });
2088
483
  }
2089
484
 
2090
485
  return {
2091
- ok: true,
2092
- userUpdateCount:
2093
- nextValues.user_update_count ?? existing.user_update_count ?? 0,
486
+ total: rows.length,
487
+ countries,
2094
488
  };
2095
489
  }
2096
490
 
2097
- function addJobBaseToDb(user) {
2098
- if (!db) return;
2099
- const now = Date.now();
2100
- db.prepare(
2101
- `
2102
- INSERT OR IGNORE INTO jobs_base (
2103
- unique_id,
2104
- nickname,
2105
- status,
2106
- sources,
2107
- claimed_by,
2108
- claimed_at,
2109
- error,
2110
- pinned,
2111
- no_video,
2112
- restricted,
2113
- user_update_count,
2114
- tt_seller,
2115
- verified,
2116
- video_count,
2117
- comment_count,
2118
- guessed_location,
2119
- location_created,
2120
- follower_count,
2121
- following_count,
2122
- heart_count,
2123
- refresh_time,
2124
- processed,
2125
- processed_at,
2126
- created_at,
2127
- updated_at,
2128
- region,
2129
- signature,
2130
- bio_link,
2131
- sec_uid
2132
- )
2133
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2134
- `,
2135
- ).run(
2136
- user.uniqueId,
2137
- user.nickname || null,
2138
- user.status || inferStatus(user),
2139
- JSON.stringify(
2140
- Array.isArray(user.sources) ? [...new Set(user.sources)] : [],
2141
- ),
2142
- user.claimedBy || null,
2143
- user.claimedAt || null,
2144
- user.error || null,
2145
- user.pinned ? 1 : 0,
2146
- user.noVideo ? 1 : 0,
2147
- user.restricted ? 1 : 0,
2148
- user.userUpdateCount || 0,
2149
- user.ttSeller === undefined ||
2150
- user.ttSeller === null ||
2151
- user.ttSeller === ""
2152
- ? null
2153
- : user.ttSeller
2154
- ? 1
2155
- : 0,
2156
- user.verified === undefined ||
2157
- user.verified === null ||
2158
- user.verified === ""
2159
- ? null
2160
- : user.verified
2161
- ? 1
2162
- : 0,
2163
- user.videoCount || 0,
2164
- user.commentCount || 0,
2165
- user.guessedLocation || null,
2166
- user.locationCreated || null,
2167
- user.followerCount || 0,
2168
- user.followingCount || 0,
2169
- user.heartCount || 0,
2170
- user.refreshTime || null,
2171
- user.processed ? 1 : 0,
2172
- user.processedAt || null,
2173
- user.createdAt || now,
2174
- user.updatedAt || now,
2175
- user.region || null,
2176
- user.signature || null,
2177
- user.bioLink?.link || user.bioLink?.url || user.bioLink || null,
2178
- user.secUid || null,
2179
- );
2180
- }
2181
-
2182
- function addJob(user) {
2183
- if (!db) {
2184
- addUserToDb(user);
2185
- return;
2186
- }
2187
- if (!user.status) user.status = inferStatus(user);
2188
- if (!user.createdAt) user.createdAt = Date.now();
2189
- if (!user.updatedAt) user.updatedAt = user.createdAt;
2190
- const writeTxn = db.transaction((job) => {
2191
- addUserToDb(job);
2192
- addJobToDb(job);
2193
- });
2194
- writeTxn(user);
2195
- }
2196
-
2197
491
  export function createStore(filePath, options = {}) {
2198
492
  if (!filePath) {
2199
493
  throw new Error("createStore requires an explicit .db path");
@@ -2219,62 +513,12 @@ export function createStore(filePath, options = {}) {
2219
513
  let refillLock = null; // Promise | null
2220
514
  // LLM 采样偏移量记忆:按猜测国家记录上次查询位置,避免重复采样
2221
515
  // 格式: { "ES": 300, "PL": 500, "NL": 400 }
2222
- let llmSampleOffsets = new Map();
516
+ const offsetStore = createLlmOffsetStore();
2223
517
  if (filePath) {
2224
518
  // 初始化 SQLite 用户表(用于判重)
2225
- initUserDb(filePath);
519
+ initDb(filePath);
2226
520
  // 从数据库恢复偏移量
2227
- loadLlmSampleOffsets();
2228
- }
2229
-
2230
- /**
2231
- * 从数据库加载 LLM 采样偏移量
2232
- */
2233
- function loadLlmSampleOffsets() {
2234
- try {
2235
- const row = db
2236
- .prepare(`SELECT offsets FROM _llm_sample_offsets LIMIT 1`)
2237
- .get();
2238
- if (row && row.offsets) {
2239
- const parsed = JSON.parse(row.offsets);
2240
- if (parsed && typeof parsed === "object") {
2241
- Object.entries(parsed).forEach(([k, v]) => {
2242
- llmSampleOffsets.set(k, v);
2243
- });
2244
- console.error(
2245
- `[data-store] 已恢复 LLM 采样偏移量: ${Array.from(
2246
- llmSampleOffsets.entries(),
2247
- )
2248
- .map(([k, v]) => `${k}:${v}`)
2249
- .join(", ")}`,
2250
- );
2251
- }
2252
- }
2253
- } catch (e) {
2254
- // 表不存在或解析失败,使用空偏移量
2255
- console.error(
2256
- `[data-store] 加载 LLM 采样偏移量失败,使用空偏移量: ${e.message}`,
2257
- );
2258
- }
2259
- }
2260
-
2261
- /**
2262
- * 将 LLM 采样偏移量持久化到数据库
2263
- */
2264
- function saveLlmSampleOffsets() {
2265
- try {
2266
- const offsetsJson = JSON.stringify(Object.fromEntries(llmSampleOffsets));
2267
- // 表不存在则创建
2268
- db.prepare(
2269
- `CREATE TABLE IF NOT EXISTS _llm_sample_offsets (id INTEGER PRIMARY KEY CHECK (id = 1), offsets TEXT)`,
2270
- ).run();
2271
- // 插入或更新
2272
- db.prepare(
2273
- `INSERT OR REPLACE INTO _llm_sample_offsets (id, offsets) VALUES (1, ?)`,
2274
- ).run(offsetsJson);
2275
- } catch (e) {
2276
- console.error(`[data-store] 保存 LLM 采样偏移量失败: ${e.message}`);
2277
- }
521
+ offsetStore.load();
2278
522
  }
2279
523
 
2280
524
  // stats 缓存
@@ -2287,7 +531,7 @@ export function createStore(filePath, options = {}) {
2287
531
  }
2288
532
 
2289
533
  function computeStatsInternal() {
2290
- if (db) {
534
+ if (getDb()) {
2291
535
  const total = getJobsCount();
2292
536
  const statusCounts = {
2293
537
  pending: 0,
@@ -2296,7 +540,7 @@ export function createStore(filePath, options = {}) {
2296
540
  error: 0,
2297
541
  restricted: 0,
2298
542
  };
2299
- const rows = db
543
+ const rows = getDb()
2300
544
  .prepare(
2301
545
  `
2302
546
  SELECT status, COUNT(*) as count
@@ -2372,7 +616,7 @@ export function createStore(filePath, options = {}) {
2372
616
  }
2373
617
 
2374
618
  function rebuildStatusGroups() {
2375
- if (db) {
619
+ if (getDb()) {
2376
620
  statusGroups = {
2377
621
  pending: [],
2378
622
  processing: [],
@@ -2436,9 +680,9 @@ export function createStore(filePath, options = {}) {
2436
680
 
2437
681
  function flushSave() {
2438
682
  // 数据库模式:先保存 LLM 偏移量,再备份数据库
2439
- if (db && dbPath) {
683
+ if (getDb() && getDbPath()) {
2440
684
  try {
2441
- saveLlmSampleOffsets();
685
+ offsetStore.save();
2442
686
  } catch (e) {
2443
687
  console.error(`[data-store] 保存 LLM 偏移量失败: ${e.message}`);
2444
688
  }
@@ -2452,7 +696,7 @@ export function createStore(filePath, options = {}) {
2452
696
  * @returns {string|null} 备份文件路径,失败返回 null
2453
697
  */
2454
698
  function backupDatabase(maxBackups = 3) {
2455
- if (!db || !dbPath) {
699
+ if (!getDb() || !getDbPath()) {
2456
700
  console.error("[data-store] 数据库未初始化,跳过备份");
2457
701
  return null;
2458
702
  }
@@ -2464,17 +708,16 @@ export function createStore(filePath, options = {}) {
2464
708
  .toISOString()
2465
709
  .replace(/[-:T.]/g, "")
2466
710
  .slice(0, 15); // YYYYMMDDHHmmss
2467
- const baseName = path.basename(dbPath, ".db");
711
+ const baseName = path.basename(getDbPath(), ".db");
2468
712
  const backupName = `${baseName}-${timestamp}.db`;
2469
- const backupDir = path.dirname(dbPath);
713
+ const backupDir = path.dirname(getDbPath());
2470
714
  const backupPath = path.join(backupDir, backupName);
2471
715
 
2472
716
  console.error(`[data-store] 正在备份数据库: ${backupName}`);
2473
717
 
2474
- // 使用 better-sqlite3 backup API(原子性备份,安全可靠)
2475
- const backupDb = new Database(backupPath);
2476
- db.backup("main", backupDb, "main");
2477
- backupDb.close();
718
+ // WAL checkpoint 确保所有数据落盘,再同步复制文件
719
+ getDb().exec("PRAGMA wal_checkpoint(TRUNCATE)");
720
+ fs.copyFileSync(getDbPath(), backupPath);
2478
721
 
2479
722
  // 验证备份文件大小
2480
723
  const stat = fs.statSync(backupPath);
@@ -2523,7 +766,7 @@ export function createStore(filePath, options = {}) {
2523
766
 
2524
767
  function stopBackup() {
2525
768
  // 退出时执行备份
2526
- if (db && dbPath) {
769
+ if (getDb() && getDbPath()) {
2527
770
  backupDatabase();
2528
771
  }
2529
772
  }
@@ -2531,7 +774,7 @@ export function createStore(filePath, options = {}) {
2531
774
  function getUser(uid) {
2532
775
  const idx = uidIndex.get(uid);
2533
776
  if (idx !== undefined) return data[idx];
2534
- if (db) return getJob(uid);
777
+ if (getDb()) return getJob(uid);
2535
778
  return undefined;
2536
779
  }
2537
780
 
@@ -2549,12 +792,25 @@ export function createStore(filePath, options = {}) {
2549
792
 
2550
793
  function addUser(user, append) {
2551
794
  const memoryIdx = uidIndex.get(user.uniqueId);
2552
- if (db && memoryIdx === undefined) {
795
+ if (getDb() && memoryIdx === undefined) {
2553
796
  // 用 users 表判重(所有发现过的用户合集),而不是 jobs 表
2554
797
  if (hasUserInDb(user.uniqueId)) {
2555
798
  return;
2556
799
  }
2557
- addJob(user);
800
+ const now = Date.now();
801
+ const writeTxn = getDb().transaction((job) => {
802
+ addUserToDb({
803
+ ...job,
804
+ createdAt: job.createdAt || now,
805
+ updatedAt: job.updatedAt || now,
806
+ });
807
+ addJobBaseToDb({
808
+ ...job,
809
+ createdAt: job.createdAt || now,
810
+ updatedAt: job.updatedAt || now,
811
+ });
812
+ });
813
+ writeTxn(user);
2558
814
  return;
2559
815
  }
2560
816
 
@@ -2616,7 +872,7 @@ export function createStore(filePath, options = {}) {
2616
872
  createdAt: now,
2617
873
  updatedAt: now,
2618
874
  };
2619
- const writeTxn = db.transaction((job) => {
875
+ const writeTxn = getDb().transaction((job) => {
2620
876
  addUserToDb(job);
2621
877
  addJobBaseToDb(job);
2622
878
  });
@@ -2628,195 +884,26 @@ export function createStore(filePath, options = {}) {
2628
884
  }
2629
885
 
2630
886
  function getPendingUsers() {
2631
- if (db) {
887
+ if (getDb()) {
2632
888
  return getAllJobs().filter((u) => u.status === "pending");
2633
889
  }
2634
890
  return data.filter((u) => u.status === "pending");
2635
891
  }
2636
892
 
2637
893
  function getProcessedUsers() {
2638
- if (db) {
894
+ if (getDb()) {
2639
895
  return getAllJobs().filter((u) => u.status === "done");
2640
896
  }
2641
897
  return data.filter((u) => u.status === "done");
2642
898
  }
2643
899
 
2644
900
  function getAllUsers() {
2645
- if (db) {
901
+ if (getDb()) {
2646
902
  return getAllJobs();
2647
903
  }
2648
904
  return data;
2649
905
  }
2650
906
 
2651
- /**
2652
- * 使用 LLM 对单个 job 的国家匹配度打分(0-100)
2653
- * @param {Object} job - raw_jobs 中的一条记录
2654
- * @param {string[]} targetLocations - 目标国家列表
2655
- * @returns {Promise<{ uniqueId: string, score: number, reason: string }>}
2656
- */
2657
- async function scoreJobLocation(job, targetLocations) {
2658
- const { fetch: undiciFetch } = await import("undici");
2659
-
2660
- const prompt = `
2661
- 你是一个 TikTok 用户数据分析助手。请根据以下用户信息,判断该用户是否来自以下**任意一个**目标国家。
2662
-
2663
- 目标国家列表: ${targetLocations.join(", ")}
2664
-
2665
- 重要:
2666
- - 用户只要来自上述**任意一个**国家就算匹配。
2667
- - guessed_location 是系统初步猜测的结果,**仅供参考**,不要完全依赖它。
2668
- - 请综合用户名、昵称、签名、位置等信息做判断。
2669
-
2670
- 用户信息:
2671
- - 用户名: ${job.unique_id || "未知"}
2672
- - 昵称: ${job.nickname || "未知"}
2673
- - 签名: ${job.signature || "未知"}
2674
- - 地区: ${job.region || "未知"}
2675
- - 猜测国家(参考): ${job.guessed_location || "未知"}
2676
- - 位置信息: ${job.location_created || "未知"}
2677
- - 主页链接: ${job.bio_link || "未知"}
2678
-
2679
- 返回 JSON(仅返回 JSON,无其他内容):
2680
- {"score": 0-100, "reason": "English only, under 50 chars, no quotes/brackets"}
2681
-
2682
- Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unlikely
2683
- `;
2684
-
2685
- try {
2686
- const apiKey = process.env.APIKEY || "";
2687
- const response = await undiciFetch(
2688
- "http://82.156.52.214:18000/v1/chat/completions",
2689
- {
2690
- method: "POST",
2691
- headers: {
2692
- "Content-Type": "application/json",
2693
- Authorization: `Bearer ${apiKey}`,
2694
- },
2695
- body: JSON.stringify({
2696
- model: "zc-fast",
2697
- messages: [{ role: "user", content: prompt }],
2698
- max_tokens: 512,
2699
- temperature: 0.1,
2700
- }),
2701
- },
2702
- );
2703
-
2704
- const result = await response.json();
2705
- const content = result.choices?.[0]?.message?.content || "";
2706
-
2707
- // 解析 JSON 响应(多层容错)
2708
- let parsed = null;
2709
-
2710
- // 尝试 1: 直接解析
2711
- try {
2712
- parsed = JSON.parse(content);
2713
- } catch {
2714
- // 尝试 2: 提取 {} 包裹的内容
2715
- const match = content.match(/\{[\s\S]*\}/);
2716
- if (match) {
2717
- try {
2718
- parsed = JSON.parse(match[0]);
2719
- } catch {
2720
- // 尝试 3: 清理常见问题后解析
2721
- const cleaned = match[0]
2722
- .replace(/"/g, '"') // 弯引号 → 直引号
2723
- .replace(/\s+/g, " ") // 多余空白
2724
- .trim();
2725
- try {
2726
- parsed = JSON.parse(cleaned);
2727
- } catch {
2728
- // 尝试 4: 从文本中提取 score 和 reason(reason 可能包含引号等特殊字符)
2729
- const scoreMatch = content.match(/"?score"?\s*:\s*(\d+)/i);
2730
- if (scoreMatch) {
2731
- let reason = "解析降级";
2732
- // 找 "reason": 的位置,取到最后一个 } 前的内容
2733
- const reasonKeyPos = content.search(/"?reason"?\s*:\s*"/i);
2734
- if (reasonKeyPos !== -1) {
2735
- const afterKey = content.substring(reasonKeyPos);
2736
- const colonPos = afterKey.indexOf(":");
2737
- const valueStart = afterKey.indexOf('"', colonPos + 1) + 1;
2738
- const rawValue = afterKey.substring(valueStart);
2739
- // 取到原始 content 最后一个 } 前
2740
- const lastBrace = content.lastIndexOf("}");
2741
- const reasonEnd = lastBrace - reasonKeyPos - valueStart;
2742
- if (reasonEnd > 0) {
2743
- reason = rawValue.substring(0, reasonEnd).trim();
2744
- // 去掉首尾的引号
2745
- if (reason.startsWith('"')) reason = reason.substring(1);
2746
- if (reason.endsWith('"'))
2747
- reason = reason.substring(0, reason.length - 1);
2748
- }
2749
- }
2750
- parsed = {
2751
- score: parseInt(scoreMatch[1]) || 50,
2752
- reason,
2753
- };
2754
- }
2755
- }
2756
- }
2757
- }
2758
-
2759
- // 尝试 5: 如果以上都失败,用更宽松的正则提取
2760
- if (!parsed) {
2761
- const scoreMatch = content.match(/"score"\s*:\s*(\d+)/);
2762
- const reasonMatch = content.match(/"reason"\s*:\s*"([^"]*)"/);
2763
- if (scoreMatch) {
2764
- parsed = {
2765
- score: parseInt(scoreMatch[1]) || 50,
2766
- reason: reasonMatch ? reasonMatch[1] : "解析降级 - 宽松模式",
2767
- };
2768
- }
2769
- }
2770
- }
2771
-
2772
- if (parsed && typeof parsed.score === "number") {
2773
- return {
2774
- uniqueId: job.unique_id,
2775
- score: Math.max(0, Math.min(100, parsed.score)),
2776
- reason: parsed.reason || "",
2777
- };
2778
- }
2779
-
2780
- // 所有解析都失败,返回默认分
2781
- console.error(
2782
- `[scoreJobLocation] JSON 解析失败 (${job.unique_id}): ${content.substring(0, 100)}`,
2783
- );
2784
- return {
2785
- uniqueId: job.unique_id,
2786
- score: 50,
2787
- reason: "LLM 响应解析失败,使用默认分",
2788
- };
2789
- } catch (e) {
2790
- console.error(
2791
- `[scoreJobLocation] LLM 调用失败 (${job.unique_id}): ${e.message}`,
2792
- );
2793
- return {
2794
- uniqueId: job.unique_id,
2795
- score: 50,
2796
- reason: `LLM 调用异常: ${e.message}`,
2797
- };
2798
- }
2799
- }
2800
-
2801
- /**
2802
- * 批量对 jobs 进行 LLM 国家匹配度打分
2803
- * @param {Object[]} jobs - raw_jobs 记录数组
2804
- * @param {string[]} targetLocations - 目标国家列表
2805
- * @param {number} batchSize - 每批处理数量(并发),默认 10
2806
- * @returns {Promise<Array<{ uniqueId: string, score: number, reason: string }>>}
2807
- */
2808
- async function scoreJobsBatch(jobs, targetLocations, batchSize = 10) {
2809
- const results = [];
2810
- for (let i = 0; i < jobs.length; i += batchSize) {
2811
- const batch = jobs.slice(i, i + batchSize);
2812
- const batchResults = await Promise.all(
2813
- batch.map((job) => scoreJobLocation(job, targetLocations)),
2814
- );
2815
- results.push(...batchResults);
2816
- }
2817
- return results;
2818
- }
2819
-
2820
907
  /**
2821
908
  * 从 raw_jobs 中移动一批符合条件的任务到 jobs 表
2822
909
  * @param {string[]} locations - 目标国家列表(null 表示不限制)
@@ -2828,8 +915,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
2828
915
  * @returns {{ moved: number }} 实际移动的数量
2829
916
  */
2830
917
  function refillJobsFromRaw(locations = null, limit = 500, options = {}) {
2831
- if (!db) {
2832
- return { moved: 0, error: "db not ready" };
918
+ if (!getDb()) {
919
+ return { moved: 0, error: "getDb() not ready" };
2833
920
  }
2834
921
 
2835
922
  const safeLimit = Math.max(1, Math.min(2000, parseInt(limit) || 500));
@@ -2860,7 +947,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
2860
947
 
2861
948
  // 统计符合条件的数量
2862
949
  const count =
2863
- db
950
+ getDb()
2864
951
  .prepare(`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql}`)
2865
952
  .get(...args)?.c || 0;
2866
953
 
@@ -2868,156 +955,142 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
2868
955
  return { moved: 0 };
2869
956
  }
2870
957
 
2871
- // 如果启用 LLM 打分,先采样一批进行评分(累积模式:按猜测国家分组,使用偏移量记忆避免重复采样)
958
+ // 如果启用 LLM 打分:先取 tag(一次性),再取非 tag 走 LLM 打分
2872
959
  if (useLlm && normalizedLocations && normalizedLocations.length > 0) {
2873
- const llmMinReturn = options.llmMinReturn ?? 60; // 最少返回合格数
2874
- const llmMinTagReturn = options.llmMinTagReturn ?? 30; // tag 最少合格数
2875
- const llmMinNonTagReturn = options.llmMinNonTagReturn ?? 30; // tag 最少合格数
2876
- const maxBatches = options.llmMaxBatches ?? 10; // 最多采样轮次,防止无限循环
2877
-
2878
- // 打印当前偏移量状态
2879
- const offsetSummary = Array.from(llmSampleOffsets.entries())
2880
- .map(([k, v]) => `${k}:${v}`)
2881
- .join(", ");
960
+ const llmTotal = options.llmTotal ?? 200; // 总条数
961
+ const llmTagRatio = options.llmTagRatio ?? 0.6; // tag 占比 60%
962
+ const llmTagLimit = Math.floor(llmTotal * llmTagRatio); // tag 上限 120
963
+ const llmNonTagTarget = llmTotal - llmTagLimit; // 非 tag 目标 80
964
+ const llmMinScore = options.llmMinScore ?? 60;
965
+ const llmSampleSize = options.llmSampleSize ?? 100;
966
+ const maxBatches = options.llmMaxBatches ?? 10;
967
+
2882
968
  console.error(
2883
- `[data-store] LLM 打分开始: 符合条件 ${count} 条,每批 ${llmSampleSize} 条,最低分 ${llmMinScore},tag 最少 ${llmMinTagReturn},非 tag 最少 ${llmMinNonTagReturn}`,
969
+ `[data-store] LLM 打分开始: 总目标 ${llmTotal} 条,tag 最多 ${llmTagLimit} 条(一次性),非 tag 目标 ${llmNonTagTarget} 条(LLM 打分)`,
2884
970
  );
2885
- if (offsetSummary) {
2886
- console.error(`[data-store] 偏移量记忆: ${offsetSummary}`);
2887
- }
2888
971
 
2889
972
  // 返回 Promise,调用方需要 await
2890
973
  return (async () => {
2891
- const allTagQualified = []; // tag 合格列表(直接合格)
2892
- const allNonTagQualified = []; // 非 tag 合格列表(LLM 打分合格)
974
+ const allTagQualified = [];
975
+ const allNonTagQualified = [];
2893
976
  const allScores = [];
2894
977
 
2895
- // 按猜测国家分组处理,每个国家使用独立的偏移量
2896
- const locationGroups = normalizedLocations;
2897
- let totalBatches = 0;
978
+ // ===== 第一步:一次性取所有 tag(全局,最多 llmTagLimit 条)=====
979
+ let tagOffset = offsetStore.get("_tag") || 0;
980
+ const tagGlobalCount =
981
+ getDb()
982
+ .prepare(
983
+ `SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql} AND sources LIKE '%tag%'`,
984
+ )
985
+ .get(...args)?.c || 0;
2898
986
 
2899
- for (const location of locationGroups) {
2900
- // 获取该国家上次的偏移量
2901
- let offset = llmSampleOffsets.get(location) || 0;
987
+ if (tagOffset >= tagGlobalCount) {
988
+ tagOffset = 0;
989
+ offsetStore.set("_tag", 0);
990
+ }
2902
991
 
2903
- // 查询该国家的总数量
2904
- const locationCountSql = `SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql} AND guessed_location = ?`;
2905
- const locationArgs = [...args, location];
2906
- const locationCount =
2907
- db.prepare(locationCountSql).get(...locationArgs)?.c || 0;
992
+ console.error(
993
+ `[data-store] Tag 全局共 ${tagGlobalCount} 条,从偏移量 ${tagOffset} 开始`,
994
+ );
2908
995
 
2909
- if (locationCount === 0) {
2910
- console.error(
2911
- `[data-store] 国家 ${location}: raw_jobs 中无数据,跳过`,
996
+ while (
997
+ allTagQualified.length < llmTagLimit &&
998
+ tagOffset < tagGlobalCount
999
+ ) {
1000
+ const batch = getDb()
1001
+ .prepare(
1002
+ `
1003
+ SELECT * FROM raw_jobs WHERE ${whereSql} AND sources LIKE '%tag%'
1004
+ ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
1005
+ LIMIT ? OFFSET ?
1006
+ `,
1007
+ )
1008
+ .all(
1009
+ ...args,
1010
+ Math.min(llmSampleSize, llmTagLimit - allTagQualified.length),
1011
+ tagOffset,
2912
1012
  );
1013
+
1014
+ if (!batch.length) break;
1015
+
1016
+ allTagQualified.push(...batch.map((s) => s.unique_id));
1017
+ tagOffset += batch.length;
1018
+
1019
+ console.error(
1020
+ `[data-store] Tag 本批 ${batch.length} 条,累计 ${allTagQualified.length}/${llmTagLimit}`,
1021
+ );
1022
+ }
1023
+
1024
+ offsetStore.set("_tag", tagOffset);
1025
+
1026
+ // ===== 第二步:按国家取非 tag,走 LLM 打分,直到合格数达到 llmNonTagTarget =====
1027
+ for (const location of normalizedLocations) {
1028
+ if (allNonTagQualified.length >= llmNonTagTarget) break;
1029
+
1030
+ const nonTagOffsetKey = `${location}:nonTag`;
1031
+ let offset = offsetStore.get(nonTagOffsetKey) || 0;
1032
+
1033
+ const locationArgs = [...args, location];
1034
+ const nonTagCount =
1035
+ getDb()
1036
+ .prepare(
1037
+ `SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql} AND guessed_location = ? AND (sources NOT LIKE '%tag%' OR sources IS NULL)`,
1038
+ )
1039
+ .get(...locationArgs)?.c || 0;
1040
+
1041
+ if (nonTagCount === 0) {
1042
+ console.error(`[data-store] 国家 ${location}: 无非 tag 数据,跳过`);
2913
1043
  continue;
2914
1044
  }
2915
1045
 
2916
- // 如果偏移量超过总数,重置为 0(一轮结束,重新开始)
2917
- if (offset >= locationCount) {
1046
+ if (offset >= nonTagCount) {
2918
1047
  offset = 0;
2919
- llmSampleOffsets.set(location, 0);
1048
+ offsetStore.set(nonTagOffsetKey, 0);
2920
1049
  }
2921
1050
 
2922
1051
  console.error(
2923
- `[data-store] 国家 ${location}: 共 ${locationCount} 条,从偏移量 ${offset} 开始`,
1052
+ `[data-store] 国家 ${location}: 非 tag 共 ${nonTagCount} 条,从偏移量 ${offset} 开始`,
2924
1053
  );
2925
1054
 
2926
1055
  for (let batch = 0; batch < maxBatches; batch++) {
2927
- const remaining = locationCount - offset;
2928
- if (remaining <= 0) break;
1056
+ if (allNonTagQualified.length >= llmNonTagTarget) break;
2929
1057
 
2930
- const sampleLimit = Math.min(llmSampleSize, remaining);
2931
- const samples = db
1058
+ const samples = getDb()
2932
1059
  .prepare(
2933
1060
  `
2934
1061
  SELECT * FROM raw_jobs WHERE ${whereSql} AND guessed_location = ?
2935
- ORDER BY
2936
- CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
2937
- COALESCE(video_count, 0) DESC, created_at DESC
1062
+ AND (sources NOT LIKE '%tag%' OR sources IS NULL)
1063
+ ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
2938
1064
  LIMIT ? OFFSET ?
2939
1065
  `,
2940
1066
  )
2941
- .all(...locationArgs, sampleLimit, offset);
1067
+ .all(...locationArgs, llmSampleSize, offset);
2942
1068
 
2943
- if (samples.length === 0) break;
1069
+ if (!samples.length) break;
2944
1070
 
2945
- // 分离 tag 来源和非 tag 来源:tag 来源跳过 LLM 打分直接合格
2946
- const tagSamples = samples.filter((s) =>
2947
- (s.sources || "").includes("tag"),
1071
+ const scores = await scoreJobsBatch(
1072
+ samples,
1073
+ DEFAULT_TARGET_LOCATIONS,
2948
1074
  );
2949
- const nonTagSamples = samples.filter(
2950
- (s) => !(s.sources || "").includes("tag"),
2951
- );
2952
-
2953
- // tag 来源直接加入合格列表
2954
- if (tagSamples.length > 0) {
2955
- allTagQualified.push(...tagSamples.map((s) => s.unique_id));
2956
- console.error(
2957
- `[data-store] ${location}: 本批 ${tagSamples.length} 条 tag 来源任务跳过 LLM 打分直接合格`,
2958
- );
2959
- }
2960
-
2961
- // 非 tag 来源走 LLM 打分
2962
- let batchQualified = [];
2963
- let scores = [];
2964
- if (nonTagSamples.length > 0) {
2965
- scores = await scoreJobsBatch(
2966
- nonTagSamples,
2967
- DEFAULT_TARGET_LOCATIONS,
2968
- );
2969
- batchQualified = scores.filter((s) => s.score >= llmMinScore);
2970
- allNonTagQualified.push(...batchQualified.map((s) => s.uniqueId));
2971
- }
2972
-
1075
+ const qualified = scores.filter((s) => s.score >= llmMinScore);
1076
+ allNonTagQualified.push(...qualified.map((s) => s.uniqueId));
2973
1077
  allScores.push(...scores);
2974
1078
 
2975
- totalBatches++;
2976
- const totalQualified = allTagQualified.length + allNonTagQualified.length;
1079
+ offset += samples.length;
1080
+ offsetStore.set(nonTagOffsetKey, offset);
1081
+
2977
1082
  console.error(
2978
- `[data-store] ${location} 第 ${batch + 1} 批: 采样 ${samples.length} 条,tag 合格 ${allTagQualified.length},非 tag 合格 ${allNonTagQualified.length},累计 ${totalQualified} 条`,
1083
+ `[data-store] ${location} 第 ${batch + 1} 批: 采样 ${samples.length} 条,本批合格 ${qualified.length} 条,非 tag 累计 ${allNonTagQualified.length}/${llmNonTagTarget}`,
2979
1084
  );
2980
-
2981
- // 更新偏移量记忆
2982
- offset += samples.length;
2983
- llmSampleOffsets.set(location, offset);
2984
-
2985
- // 检查是否两个类型都达到阈值,都达到才停止
2986
- const tagReached = allTagQualified.length >= llmMinTagReturn;
2987
- const nonTagReached = allNonTagQualified.length >= llmMinNonTagReturn;
2988
- if (tagReached && nonTagReached) {
2989
- console.error(
2990
- `[data-store] 两类任务均已达标 (tag: ${allTagQualified.length}/${llmMinTagReturn}, 非 tag: ${allNonTagQualified.length}/${llmMinNonTagReturn}),停止采样`,
2991
- );
2992
- break;
2993
- }
2994
1085
  }
2995
-
2996
- // 检查是否两个类型都达到阈值,都达到才停止所有国家采样
2997
- const tagReachedGlobal = allTagQualified.length >= llmMinTagReturn;
2998
- const nonTagReachedGlobal = allNonTagQualified.length >= llmMinNonTagReturn;
2999
- if (tagReachedGlobal && nonTagReachedGlobal) break;
3000
1086
  }
3001
1087
 
3002
- // 最终合格列表:tag 优先 + 非 tag 按分数排序
3003
- // 限制 tag 占比:最多占 safeLimit 的 70%,留 30% 给非 tag
3004
- const tagMaxCount = Math.floor(safeLimit * 0.7);
3005
- const tagCount = Math.min(allTagQualified.length, tagMaxCount);
3006
- const nonTagMaxCount = safeLimit - tagCount;
3007
-
3008
- const nonTagQualifiedScores = allScores
3009
- .filter((s) => s.score >= llmMinScore)
3010
- .sort((a, b) => b.score - a.score);
3011
- const finalNonTagQualified = nonTagQualifiedScores.slice(0, nonTagMaxCount).map((s) => s.uniqueId);
3012
-
3013
- const qualified = [
3014
- ...allTagQualified.slice(0, tagCount),
3015
- ...finalNonTagQualified,
3016
- ];
1088
+ // ===== 最终结果 =====
1089
+ const qualified = [...allTagQualified, ...allNonTagQualified];
3017
1090
 
3018
1091
  if (!qualified.length) {
3019
1092
  console.error(
3020
- `[data-store] LLM 打分后无符合条件的任务(阈值: ${llmMinScore},共采样 ${allScores.length} 条)`,
1093
+ `[data-store] LLM 打分后无符合条件的任务(tag: ${allTagQualified.length}, 非 tag: ${allNonTagQualified.length})`,
3021
1094
  );
3022
1095
  return {
3023
1096
  moved: 0,
@@ -3027,11 +1100,16 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3027
1100
  };
3028
1101
  }
3029
1102
 
1103
+ console.error(
1104
+ `[data-store] LLM 打分完成: tag ${allTagQualified.length} 条 + 非 tag ${allNonTagQualified.length} 条 = 共 ${qualified.length} 条`,
1105
+ );
1106
+
3030
1107
  // 移动符合条件的记录
3031
1108
  const placeholders = qualified.map(() => "?").join(", ");
3032
- const moveTxn = db.transaction(() => {
3033
- db.prepare(
3034
- `
1109
+ const moveTxn = getDb().transaction(() => {
1110
+ getDb()
1111
+ .prepare(
1112
+ `
3035
1113
  INSERT OR IGNORE INTO jobs (
3036
1114
  unique_id, nickname, status, sources, pinned,
3037
1115
  tt_seller, verified, video_count, comment_count,
@@ -3050,41 +1128,41 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3050
1128
  FROM raw_jobs
3051
1129
  WHERE unique_id IN (${placeholders})
3052
1130
  `,
3053
- ).run(...qualified);
1131
+ )
1132
+ .run(...qualified);
3054
1133
 
3055
- db.prepare(
3056
- `DELETE FROM raw_jobs WHERE unique_id IN (${placeholders})`,
3057
- ).run(...qualified);
1134
+ getDb()
1135
+ .prepare(
1136
+ `DELETE FROM raw_jobs WHERE unique_id IN (${placeholders})`,
1137
+ )
1138
+ .run(...qualified);
3058
1139
  });
3059
-
3060
1140
  moveTxn();
3061
1141
  markStatsDirty();
3062
1142
 
3063
1143
  // 持久化偏移量到数据库
3064
- saveLlmSampleOffsets();
1144
+ offsetStore.save();
3065
1145
 
3066
1146
  // 打印最终偏移量状态
3067
- const finalOffsetSummary = Array.from(llmSampleOffsets.entries())
1147
+ const finalOffsetSummary = Array.from(offsetStore.entries())
3068
1148
  .map(([k, v]) => `${k}:${v}`)
3069
1149
  .join(", ");
3070
- console.error(
3071
- `[data-store] LLM 打分完成: 共采样 ${allScores.length} 条,合格 ${qualified.length} 条,已移动到 jobs`,
3072
- );
3073
1150
  console.error(`[data-store] 偏移量记忆更新: ${finalOffsetSummary}`);
3074
- const scoresDetail = allScores.map((s) => s);
1151
+
3075
1152
  return {
3076
1153
  moved: qualified.length,
3077
1154
  scored: allScores.length,
3078
1155
  qualified: qualified.length,
3079
- scores: scoresDetail,
1156
+ scores: allScores,
3080
1157
  };
3081
1158
  })();
3082
1159
  }
3083
1160
 
3084
1161
  // 常规移动:INSERT + DELETE 事务
3085
- const moveTxn = db.transaction(() => {
3086
- db.prepare(
3087
- `
1162
+ const moveTxn = getDb().transaction(() => {
1163
+ getDb()
1164
+ .prepare(
1165
+ `
3088
1166
  INSERT OR IGNORE INTO jobs (
3089
1167
  unique_id, nickname, status, sources, pinned,
3090
1168
  tt_seller, verified, video_count, comment_count,
@@ -3107,11 +1185,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3107
1185
  COALESCE(video_count, 0) DESC, created_at DESC
3108
1186
  LIMIT ?
3109
1187
  `,
3110
- ).run(...args, safeLimit);
1188
+ )
1189
+ .run(...args, safeLimit);
3111
1190
 
3112
1191
  // 删除已移动的记录:用子查询匹配刚 INSERT 的 unique_id
3113
- db.prepare(
3114
- `
1192
+ getDb()
1193
+ .prepare(
1194
+ `
3115
1195
  DELETE FROM raw_jobs
3116
1196
  WHERE unique_id IN (
3117
1197
  SELECT unique_id FROM raw_jobs
@@ -3122,7 +1202,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3122
1202
  LIMIT ?
3123
1203
  )
3124
1204
  `,
3125
- ).run(...args, safeLimit);
1205
+ )
1206
+ .run(...args, safeLimit);
3126
1207
  });
3127
1208
 
3128
1209
  moveTxn();
@@ -3140,9 +1221,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3140
1221
  ) {
3141
1222
  // 记录客户端登录状态
3142
1223
  clientLoginStatus.set(userId, !!loggedIn);
3143
- if (db) {
1224
+ if (getDb()) {
3144
1225
  const now = Date.now();
3145
- const ongoingRow = db
1226
+ const ongoingRow = getDb()
3146
1227
  .prepare(
3147
1228
  `
3148
1229
  SELECT *
@@ -3157,10 +1238,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3157
1238
  )
3158
1239
  .get(userId, now, expireMs);
3159
1240
  if (ongoingRow) {
3160
- db.prepare("UPDATE jobs SET claimed_at = ? WHERE unique_id = ?").run(
3161
- now,
3162
- ongoingRow.unique_id,
3163
- );
1241
+ getDb()
1242
+ .prepare("UPDATE jobs SET claimed_at = ? WHERE unique_id = ?")
1243
+ .run(now, ongoingRow.unique_id);
3164
1244
  return {
3165
1245
  uniqueId: ongoingRow.unique_id,
3166
1246
  nickname: ongoingRow.nickname,
@@ -3242,7 +1322,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3242
1322
  for (const filter of filters) {
3243
1323
  where.push(filter);
3244
1324
  }
3245
- return db
1325
+ return getDb()
3246
1326
  .prepare(
3247
1327
  `
3248
1328
  SELECT *
@@ -3285,7 +1365,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3285
1365
  );
3286
1366
  args.push(...normalizedLocations);
3287
1367
  }
3288
- return db
1368
+ return getDb()
3289
1369
  .prepare(
3290
1370
  `
3291
1371
  SELECT *
@@ -3355,9 +1435,11 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3355
1435
 
3356
1436
  function claimRow(row) {
3357
1437
  if (!row) return null;
3358
- db.prepare(
3359
- "UPDATE jobs SET status = 'processing', claimed_at = ?, claimed_by = ? WHERE unique_id = ?",
3360
- ).run(now, userId, row.unique_id);
1438
+ getDb()
1439
+ .prepare(
1440
+ "UPDATE jobs SET status = 'processing', claimed_at = ?, claimed_by = ? WHERE unique_id = ?",
1441
+ )
1442
+ .run(now, userId, row.unique_id);
3361
1443
  markStatsDirty();
3362
1444
  return {
3363
1445
  uniqueId: row.unique_id,
@@ -3367,7 +1449,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3367
1449
  };
3368
1450
  }
3369
1451
 
3370
- const expiredRow = db
1452
+ const expiredRow = getDb()
3371
1453
  .prepare(
3372
1454
  `
3373
1455
  SELECT *
@@ -3382,9 +1464,11 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3382
1464
  .get(now, expireMs);
3383
1465
  let expiredCandidate = null;
3384
1466
  if (expiredRow) {
3385
- db.prepare(
3386
- "UPDATE jobs SET status = 'pending', claimed_at = NULL WHERE unique_id = ?",
3387
- ).run(expiredRow.unique_id);
1467
+ getDb()
1468
+ .prepare(
1469
+ "UPDATE jobs SET status = 'pending', claimed_at = NULL WHERE unique_id = ?",
1470
+ )
1471
+ .run(expiredRow.unique_id);
3388
1472
  expiredCandidate = mapJobRow({
3389
1473
  ...expiredRow,
3390
1474
  status: "pending",
@@ -3476,7 +1560,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3476
1560
  return null;
3477
1561
  }
3478
1562
 
3479
- if (!db) {
1563
+ if (!getDb()) {
3480
1564
  const now = Date.now();
3481
1565
 
3482
1566
  // 0. 该客户端有未过期的任务,续期返回
@@ -3614,16 +1698,16 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3614
1698
  locations = null,
3615
1699
  loggedIn = true,
3616
1700
  ) {
3617
- if (db) {
1701
+ if (getDb()) {
3618
1702
  const now = Date.now();
3619
1703
  const info = {
3620
- path: "db",
1704
+ path: "getDb()",
3621
1705
  userId,
3622
1706
  expireMs,
3623
1707
  loggedIn,
3624
1708
  };
3625
1709
 
3626
- const ongoingRow = db
1710
+ const ongoingRow = getDb()
3627
1711
  .prepare(
3628
1712
  `
3629
1713
  SELECT *
@@ -3723,7 +1807,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3723
1807
  ORDER BY follower_count DESC, created_at ASC, unique_id ASC
3724
1808
  LIMIT 1
3725
1809
  `;
3726
- const row = db.prepare(sql).get(...args);
1810
+ const row = getDb()
1811
+ .prepare(sql)
1812
+ .get(...args);
3727
1813
  return { row, sql, args };
3728
1814
  }
3729
1815
 
@@ -3764,7 +1850,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3764
1850
  ORDER BY created_at ASC, unique_id ASC
3765
1851
  LIMIT 1
3766
1852
  `;
3767
- const row = db.prepare(sql).get(...args);
1853
+ const row = getDb()
1854
+ .prepare(sql)
1855
+ .get(...args);
3768
1856
  return { row, sql, args };
3769
1857
  }
3770
1858
 
@@ -3777,7 +1865,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3777
1865
  ORDER BY claimed_at ASC
3778
1866
  LIMIT 1
3779
1867
  `;
3780
- const expiredRow = db.prepare(expiredSql).get(now, expireMs);
1868
+ const expiredRow = getDb().prepare(expiredSql).get(now, expireMs);
3781
1869
  info.expired = expiredRow
3782
1870
  ? {
3783
1871
  uniqueId: expiredRow.unique_id,
@@ -4103,7 +2191,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4103
2191
  }
4104
2192
 
4105
2193
  function commitJob(uniqueId, result) {
4106
- if (db) {
2194
+ if (getDb()) {
4107
2195
  const user = getJob(uniqueId);
4108
2196
  if (!user) return { saved: false, error: "user not found" };
4109
2197
 
@@ -4129,7 +2217,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4129
2217
  }
4130
2218
 
4131
2219
  function commitNewExplore(uniqueId, result) {
4132
- if (db) {
2220
+ if (getDb()) {
4133
2221
  const existing = getJob(uniqueId);
4134
2222
  if (existing) {
4135
2223
  updateUserFromResult(existing, result);
@@ -4179,7 +2267,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4179
2267
  }
4180
2268
 
4181
2269
  function resetJob(uniqueId) {
4182
- if (db) {
2270
+ if (getDb()) {
4183
2271
  const user = getJob(uniqueId);
4184
2272
  if (!user) return { saved: false, error: "user not found" };
4185
2273
  user.status = "pending";
@@ -4210,7 +2298,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4210
2298
  }
4211
2299
 
4212
2300
  function togglePin(uniqueId) {
4213
- if (db) {
2301
+ if (getDb()) {
4214
2302
  const user = getJob(uniqueId);
4215
2303
  if (!user) return { saved: false, error: "user not found" };
4216
2304
  const nextPinned = !user.pinned;
@@ -4227,13 +2315,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4227
2315
  }
4228
2316
 
4229
2317
  function getNextRedoJob(userId, maxAgeSeconds = 43200) {
4230
- if (db) {
2318
+ if (getDb()) {
4231
2319
  const now = Date.now();
4232
2320
  const threshold = now - maxAgeSeconds * 1000;
4233
2321
  const defaultTime = new Date("2016-01-01T00:00:00Z").getTime();
4234
2322
  const targetLocations = DEFAULT_TARGET_LOCATIONS;
4235
2323
  const placeholders = targetLocations.map(() => "?").join(",");
4236
- const row = db
2324
+ const row = getDb()
4237
2325
  .prepare(
4238
2326
  `
4239
2327
  SELECT *
@@ -4248,9 +2336,11 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4248
2336
  )
4249
2337
  .get(...targetLocations, defaultTime, threshold, defaultTime);
4250
2338
  if (!row) return null;
4251
- db.prepare(
4252
- "UPDATE jobs SET refresh_time = ?, updated_at = ? WHERE unique_id = ?",
4253
- ).run(now, now, row.unique_id);
2339
+ getDb()
2340
+ .prepare(
2341
+ "UPDATE jobs SET refresh_time = ?, updated_at = ? WHERE unique_id = ?",
2342
+ )
2343
+ .run(now, now, row.unique_id);
4254
2344
  return {
4255
2345
  uniqueId: row.unique_id,
4256
2346
  nickname: row.nickname,
@@ -4299,7 +2389,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4299
2389
  }
4300
2390
 
4301
2391
  function commitRedoJob(uniqueId, result) {
4302
- if (db) {
2392
+ if (getDb()) {
4303
2393
  const user = getJob(uniqueId);
4304
2394
  if (!user) return { saved: false, error: "user not found" };
4305
2395
  user.refreshTime = Date.now();
@@ -4443,13 +2533,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4443
2533
  : [];
4444
2534
  const hasCountryFilter = targetCountries.length > 0;
4445
2535
 
4446
- if (db) {
2536
+ if (getDb()) {
4447
2537
  const l = Math.max(1, parseInt(limit) || 5);
4448
2538
 
4449
2539
  let sql = `
4450
2540
  SELECT *
4451
2541
  FROM jobs_base
4452
- WHERE COALESCE(tt_seller, '') = ''
2542
+ WHERE (COALESCE(tt_seller, '') = '' OR tt_seller = 1)
4453
2543
  AND COALESCE(user_update_count, 0) <= 0
4454
2544
  `;
4455
2545
  const sqlParams = [];
@@ -4460,18 +2550,21 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4460
2550
  sqlParams.push(...targetCountries);
4461
2551
  }
4462
2552
 
4463
- // 优先级:sources 包含 "tag" 的任务优先,其余按 created_at 排序
2553
+ // 优先级:tt_seller=1 的商家重处理任务优先 > tag 来源 > 其余(最新任务优先)
4464
2554
  sql += ` ORDER BY
2555
+ CASE WHEN tt_seller = 1 THEN 0 ELSE 1 END,
4465
2556
  CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
4466
- created_at ASC,
4467
- unique_id ASC
2557
+ created_at DESC,
2558
+ unique_id DESC
4468
2559
  LIMIT ?`;
4469
2560
  sqlParams.push(l);
4470
2561
 
4471
- const rows = db.prepare(sql).all(...sqlParams);
2562
+ const rows = getDb()
2563
+ .prepare(sql)
2564
+ .all(...sqlParams);
4472
2565
  if (rows.length === 0) return [];
4473
2566
  const now = Date.now();
4474
- const bumpStmt = db.prepare(
2567
+ const bumpStmt = getDb().prepare(
4475
2568
  `
4476
2569
  UPDATE jobs_base
4477
2570
  SET user_update_count = COALESCE(user_update_count, 0) + 1,
@@ -4479,7 +2572,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4479
2572
  WHERE unique_id = ?
4480
2573
  `,
4481
2574
  );
4482
- const bumpTxn = db.transaction((items) => {
2575
+ const bumpTxn = getDb().transaction((items) => {
4483
2576
  for (const item of items) {
4484
2577
  bumpStmt.run(now, item.unique_id);
4485
2578
  }
@@ -4497,9 +2590,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4497
2590
  const pending = data
4498
2591
  .filter((u) => {
4499
2592
  const updateCount = u.userUpdateCount;
4500
- const ttSellerEmpty =
4501
- u.ttSeller === null || u.ttSeller === undefined || u.ttSeller === "";
4502
- if (!ttSellerEmpty) return false;
2593
+ // ttSeller 为空 或 ttSeller=1(商家重处理)都可以领取
2594
+ const ttSellerEligible =
2595
+ u.ttSeller === null ||
2596
+ u.ttSeller === undefined ||
2597
+ u.ttSeller === "" ||
2598
+ u.ttSeller === 1;
2599
+ if (!ttSellerEligible) return false;
4503
2600
  if (
4504
2601
  updateCount === null ||
4505
2602
  updateCount === undefined ||
@@ -4514,7 +2611,10 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4514
2611
  return false;
4515
2612
  })
4516
2613
  .sort((a, b) => {
4517
- // 优先级:sources 包含 "tag" 的任务优先
2614
+ // 优先级:tt_seller=1 的商家重处理任务优先 > tag 来源 > 其余
2615
+ const aIsSeller = a.ttSeller === 1 ? 0 : 1;
2616
+ const bIsSeller = b.ttSeller === 1 ? 0 : 1;
2617
+ if (aIsSeller !== bIsSeller) return aIsSeller - bIsSeller;
4518
2618
  const aIsTag = (a.sources || "").includes("tag");
4519
2619
  const bIsTag = (b.sources || "").includes("tag");
4520
2620
  if (aIsTag !== bIsTag) return aIsTag ? -1 : 1;
@@ -4531,7 +2631,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4531
2631
  }
4532
2632
 
4533
2633
  function updateUserInfo(uniqueId, info) {
4534
- if (db) {
2634
+ if (getDb()) {
4535
2635
  return updateJobInfo(uniqueId, info, true);
4536
2636
  }
4537
2637
 
@@ -4550,15 +2650,17 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4550
2650
  }
4551
2651
 
4552
2652
  function updateUserLocation(uniqueId, location) {
4553
- if (db) {
4554
- const existing = db
2653
+ if (getDb()) {
2654
+ const existing = getDb()
4555
2655
  .prepare("SELECT * FROM jobs WHERE unique_id = ?")
4556
2656
  .get(uniqueId);
4557
2657
  if (!existing) return { error: "user not found" };
4558
2658
  const now = Date.now();
4559
- db.prepare(
4560
- "UPDATE jobs SET location_created = ?, modified_at = ?, updated_at = ? WHERE unique_id = ?",
4561
- ).run(location, now, now, uniqueId);
2659
+ getDb()
2660
+ .prepare(
2661
+ "UPDATE jobs SET location_created = ?, modified_at = ?, updated_at = ? WHERE unique_id = ?",
2662
+ )
2663
+ .run(location, now, now, uniqueId);
4562
2664
  return { ok: true, location, modifiedAt: now };
4563
2665
  }
4564
2666
 
@@ -4574,13 +2676,14 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4574
2676
 
4575
2677
  // 将单个 job 移动到 raw_jobs 表(完整字段复制 + 删除原记录)
4576
2678
  function moveJobToRaw(uniqueId) {
4577
- if (!db) return false;
2679
+ if (!getDb()) return false;
4578
2680
  const safeId = String(uniqueId).trim();
4579
2681
  if (!safeId) return false;
4580
2682
 
4581
- const moveSingleTxn = db.transaction(() => {
4582
- db.prepare(
4583
- `
2683
+ const moveSingleTxn = getDb().transaction(() => {
2684
+ getDb()
2685
+ .prepare(
2686
+ `
4584
2687
  INSERT OR REPLACE INTO raw_jobs (
4585
2688
  unique_id, nickname, status, sources, claimed_by, claimed_at,
4586
2689
  error, pinned, no_video, restricted, user_update_count,
@@ -4602,21 +2705,22 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4602
2705
  user_create_time
4603
2706
  FROM jobs WHERE unique_id = ?
4604
2707
  `,
4605
- ).run(safeId);
2708
+ )
2709
+ .run(safeId);
4606
2710
 
4607
- db.prepare("DELETE FROM jobs WHERE unique_id = ?").run(safeId);
2711
+ getDb().prepare("DELETE FROM jobs WHERE unique_id = ?").run(safeId);
4608
2712
  });
4609
2713
  moveSingleTxn();
4610
2714
  return true;
4611
2715
  }
4612
2716
 
4613
2717
  function batchUpdateUserInfo(updates) {
4614
- if (db) {
2718
+ if (getDb()) {
4615
2719
  const results = [];
4616
2720
  const rawMoveList = [];
4617
2721
  const sellerMoveList = [];
4618
2722
 
4619
- const txn = db.transaction((items) => {
2723
+ const txn = getDb().transaction((items) => {
4620
2724
  items.forEach((item) => {
4621
2725
  const uniqueId = item.uniqueId;
4622
2726
  // 处理 { error: true, statusCode: xxx } 的情况
@@ -4668,8 +2772,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4668
2772
  // 批量移动商家用户到 jobs
4669
2773
  if (sellerMoveList.length > 0) {
4670
2774
  const placeholders = sellerMoveList.map(() => "?").join(",");
4671
- db.prepare(
4672
- `
2775
+ getDb()
2776
+ .prepare(
2777
+ `
4673
2778
  INSERT OR REPLACE INTO jobs (
4674
2779
  unique_id, nickname, status, sources, claimed_by, claimed_at,
4675
2780
  error, pinned, no_video, restricted, user_update_count,
@@ -4691,18 +2796,20 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4691
2796
  user_create_time
4692
2797
  FROM jobs_base WHERE unique_id IN (${placeholders})
4693
2798
  `,
4694
- ).run(...sellerMoveList);
2799
+ )
2800
+ .run(...sellerMoveList);
4695
2801
 
4696
- db.prepare(
4697
- `DELETE FROM jobs_base WHERE unique_id IN (${placeholders})`,
4698
- ).run(...sellerMoveList);
2802
+ getDb()
2803
+ .prepare(`DELETE FROM jobs_base WHERE unique_id IN (${placeholders})`)
2804
+ .run(...sellerMoveList);
4699
2805
  }
4700
2806
 
4701
2807
  // 批量移动非商家用户到 raw_jobs
4702
2808
  if (rawMoveList.length > 0) {
4703
2809
  const placeholders = rawMoveList.map(() => "?").join(",");
4704
- db.prepare(
4705
- `
2810
+ getDb()
2811
+ .prepare(
2812
+ `
4706
2813
  INSERT OR REPLACE INTO raw_jobs (
4707
2814
  unique_id, nickname, status, sources, claimed_by, claimed_at,
4708
2815
  error, pinned, no_video, restricted, user_update_count,
@@ -4724,11 +2831,12 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4724
2831
  user_create_time
4725
2832
  FROM jobs_base WHERE unique_id IN (${placeholders})
4726
2833
  `,
4727
- ).run(...rawMoveList);
2834
+ )
2835
+ .run(...rawMoveList);
4728
2836
 
4729
- db.prepare(
4730
- `DELETE FROM jobs_base WHERE unique_id IN (${placeholders})`,
4731
- ).run(...rawMoveList);
2837
+ getDb()
2838
+ .prepare(`DELETE FROM jobs_base WHERE unique_id IN (${placeholders})`)
2839
+ .run(...rawMoveList);
4732
2840
  }
4733
2841
 
4734
2842
  // 清理内部标记
@@ -4780,8 +2888,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4780
2888
  return { registered: 0, skipped: 0 };
4781
2889
  }
4782
2890
 
4783
- if (db) {
4784
- const insertStmt = db.prepare(`
2891
+ if (getDb()) {
2892
+ const insertStmt = getDb().prepare(`
4785
2893
  INSERT OR IGNORE INTO videos (
4786
2894
  id,
4787
2895
  href,
@@ -4797,7 +2905,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4797
2905
  let registered = 0;
4798
2906
  let skipped = 0;
4799
2907
  const now = Date.now();
4800
- const txn = db.transaction((items) => {
2908
+ const txn = getDb().transaction((items) => {
4801
2909
  for (const item of items) {
4802
2910
  const result = insertStmt.run(
4803
2911
  item.id,
@@ -4844,7 +2952,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4844
2952
  }
4845
2953
 
4846
2954
  function getVideos() {
4847
- if (db) {
2955
+ if (getDb()) {
4848
2956
  return getAllVideoRows().map(mapVideoRow);
4849
2957
  }
4850
2958
  return videos;
@@ -4852,7 +2960,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4852
2960
 
4853
2961
  function getVideo(videoId) {
4854
2962
  if (!videoId) return null;
4855
- if (db) {
2963
+ if (getDb()) {
4856
2964
  return mapVideoRow(getVideoRow(videoId));
4857
2965
  }
4858
2966
  return videos.find((video) => video.id === videoId) || null;
@@ -4862,8 +2970,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4862
2970
  const safeLimit = Math.max(1, Math.min(100, parseInt(limit) || 50));
4863
2971
  const safeOffset = Math.max(0, parseInt(offset) || 0);
4864
2972
 
4865
- if (db) {
4866
- const rows = db
2973
+ if (getDb()) {
2974
+ const rows = getDb()
4867
2975
  .prepare(
4868
2976
  `
4869
2977
  SELECT *
@@ -4873,7 +2981,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4873
2981
  `,
4874
2982
  )
4875
2983
  .all(safeLimit, safeOffset);
4876
- const total = db.prepare("SELECT COUNT(*) as c FROM videos").get().c;
2984
+ const total = getDb().prepare("SELECT COUNT(*) as c FROM videos").get().c;
4877
2985
  return {
4878
2986
  total,
4879
2987
  limit: safeLimit,
@@ -4891,16 +2999,16 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4891
2999
  }
4892
3000
 
4893
3001
  function getVideoCount() {
4894
- if (db) {
4895
- return db.prepare("SELECT COUNT(*) as c FROM videos").get().c;
3002
+ if (getDb()) {
3003
+ return getDb().prepare("SELECT COUNT(*) as c FROM videos").get().c;
4896
3004
  }
4897
3005
  return videos.length;
4898
3006
  }
4899
3007
 
4900
3008
  function getPendingCommentTasks(limit) {
4901
- if (db) {
3009
+ if (getDb()) {
4902
3010
  const l = Math.max(1, parseInt(limit) || 1);
4903
- const rows = db
3011
+ const rows = getDb()
4904
3012
  .prepare(
4905
3013
  `
4906
3014
  SELECT *
@@ -4912,14 +3020,14 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4912
3020
  )
4913
3021
  .all(l);
4914
3022
  if (rows.length === 0) return [];
4915
- const bumpStmt = db.prepare(
3023
+ const bumpStmt = getDb().prepare(
4916
3024
  `
4917
3025
  UPDATE videos
4918
3026
  SET user_update_count = COALESCE(user_update_count, 0) + 1
4919
3027
  WHERE id = ?
4920
3028
  `,
4921
3029
  );
4922
- const bumpTxn = db.transaction((items) => {
3030
+ const bumpTxn = getDb().transaction((items) => {
4923
3031
  for (const item of items) bumpStmt.run(item.id);
4924
3032
  });
4925
3033
  bumpTxn(rows);
@@ -4949,17 +3057,19 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4949
3057
  }
4950
3058
 
4951
3059
  function commitCommentTask(videoId) {
4952
- if (db) {
3060
+ if (getDb()) {
4953
3061
  const video = getVideoRow(videoId);
4954
3062
  if (!video) return { ok: false, error: "video not found" };
4955
3063
  const nextCount = (video.user_update_count || 0) + 1;
4956
- db.prepare(
4957
- `
3064
+ getDb()
3065
+ .prepare(
3066
+ `
4958
3067
  UPDATE videos
4959
3068
  SET user_update_count = ?
4960
3069
  WHERE id = ?
4961
3070
  `,
4962
- ).run(nextCount, videoId);
3071
+ )
3072
+ .run(nextCount, videoId);
4963
3073
  return { ok: true, userUpdateCount: nextCount };
4964
3074
  }
4965
3075
 
@@ -5024,6 +3134,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
5024
3134
  getClientLoginStatus,
5025
3135
  trackClient,
5026
3136
  getActiveClients,
3137
+ moveSellerJobsToBase, // 将 jobs/raw_jobs 中 tt_seller=1 且 video_count=0 的记录移动到 jobs_base
5027
3138
  registerVideos,
5028
3139
  getVideo,
5029
3140
  getVideos,
@@ -5051,6 +3162,126 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
5051
3162
 
5052
3163
  // 辅助函数:获取 LLM 采样偏移量
5053
3164
  function getLlmSampleOffsets() {
5054
- return Object.fromEntries(llmSampleOffsets);
3165
+ return Object.fromEntries(offsetStore.entries());
3166
+ }
3167
+
3168
+ // ===== 将 jobs/raw_jobs 中商家用户移动到 jobs_base =====
3169
+
3170
+ /**
3171
+ * 将 jobs 和 raw_jobs 中 tt_seller=1 且 video_count=0 的记录移动到 jobs_base,
3172
+ * 并重置 user_update_count=0,保留 tt_seller=1 以便后续优先级判断。
3173
+ */
3174
+ function moveSellerJobsToBase() {
3175
+ const db = getDb();
3176
+ if (!db) return { ok: false, error: "db not ready" };
3177
+
3178
+ const COLUMNS = [
3179
+ "unique_id",
3180
+ "nickname",
3181
+ "status",
3182
+ "sources",
3183
+ "claimed_by",
3184
+ "claimed_at",
3185
+ "error",
3186
+ "pinned",
3187
+ "no_video",
3188
+ "restricted",
3189
+ "user_update_count",
3190
+ "tt_seller",
3191
+ "verified",
3192
+ "video_count",
3193
+ "comment_count",
3194
+ "guessed_location",
3195
+ "location_created",
3196
+ "confirmed_location",
3197
+ "modified_at",
3198
+ "follower_count",
3199
+ "following_count",
3200
+ "heart_count",
3201
+ "refresh_time",
3202
+ "processed",
3203
+ "processed_at",
3204
+ "created_at",
3205
+ "updated_at",
3206
+ "region",
3207
+ "signature",
3208
+ "sec_uid",
3209
+ "status_code",
3210
+ "latest_video_time",
3211
+ "bio_link",
3212
+ ];
3213
+ const cols = COLUMNS.join(",");
3214
+ const insertSql = `INSERT OR IGNORE INTO jobs_base (${cols}) SELECT ${cols} FROM `;
3215
+ const condition = "WHERE tt_seller = 1 AND video_count = 0";
3216
+
3217
+ let fromJobs = 0;
3218
+ let fromRawJobs = 0;
3219
+
3220
+ try {
3221
+ // 1. jobs → jobs_base
3222
+ const result1 = db.prepare(insertSql + "jobs " + condition).run();
3223
+ fromJobs = result1.changes || 0;
3224
+
3225
+ // 2. raw_jobs → jobs_base
3226
+ const result2 = db.prepare(insertSql + "raw_jobs " + condition).run();
3227
+ fromRawJobs = result2.changes || 0;
3228
+ } catch (e) {
3229
+ return { ok: false, error: e.message };
3230
+ }
3231
+
3232
+ // 3. 重置 user_update_count=0,保留 tt_seller=1 以便后续优先级判断
3233
+ // 包括:新移动过来的记录 + jobs_base 中已有的滞留记录
3234
+ let resetCount = 0;
3235
+ try {
3236
+ const resetStmt = db.prepare(
3237
+ `UPDATE jobs_base
3238
+ SET user_update_count = 0
3239
+ WHERE video_count = 0
3240
+ AND tt_seller = 1`,
3241
+ );
3242
+ resetStmt.run();
3243
+ resetCount = resetStmt.changes || 0;
3244
+ } catch (e) {
3245
+ return {
3246
+ ok: false,
3247
+ error: `reset failed: ${e.message}, inserted: jobs=${fromJobs}, raw_jobs=${fromRawJobs}`,
3248
+ };
3249
+ }
3250
+
3251
+ // 4. 删除 jobs 和 raw_jobs 中已移动的记录
3252
+ try {
3253
+ db.prepare("DELETE FROM jobs " + condition).run();
3254
+ db.prepare("DELETE FROM raw_jobs " + condition).run();
3255
+ } catch (e) {
3256
+ return {
3257
+ ok: false,
3258
+ error: `delete failed: ${e.message}, inserted: jobs=${fromJobs}, raw_jobs=${fromRawJobs}`,
3259
+ };
3260
+ }
3261
+
3262
+ // 5. 验证:统计 jobs_base 中可被 attach 领取的记录数(tt_seller=1 且 user_update_count<=0)
3263
+ let available = 0;
3264
+ try {
3265
+ const row = db
3266
+ .prepare(
3267
+ `SELECT COUNT(*) as total FROM jobs_base
3268
+ WHERE tt_seller = 1
3269
+ AND COALESCE(user_update_count, 0) <= 0
3270
+ AND video_count = 0`,
3271
+ )
3272
+ .get();
3273
+ available = row.total;
3274
+ } catch (_) {
3275
+ // ignore
3276
+ }
3277
+
3278
+ return {
3279
+ ok: true,
3280
+ fromJobs,
3281
+ fromRawJobs,
3282
+ totalInserted: fromJobs + fromRawJobs,
3283
+ resetCount,
3284
+ availableInBase: available,
3285
+ };
5055
3286
  }
5056
3287
  }