tt-help-cli-ycl 1.3.93 → 1.3.94

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1580 +1,190 @@
1
- import fs from "fs";
2
- import path from "path";
3
- import Database from "better-sqlite3";
4
- import {
5
- isLocationInList,
6
- DEFAULT_TARGET_LOCATIONS,
7
- } from "../lib/target-locations.js";
8
-
9
- // SQLite 用户表(用于判重)
10
- let db = null;
11
- let dbPath = null;
12
-
13
- function normalizeDbFilePath(filePath) {
14
- if (!filePath) {
15
- throw new Error("db path is required");
16
- }
17
- const resolved = path.resolve(filePath);
18
- if (path.extname(resolved).toLowerCase() !== ".db") {
19
- throw new Error(`仅支持 .db 路径,当前为: ${filePath}`);
20
- }
21
- return resolved;
22
- }
23
-
24
- function resetDbConnection() {
25
- if (db) {
26
- db.close();
27
- db = null;
28
- }
29
- dbPath = null;
30
- }
31
-
32
- function loadLegacyUsersFromFiles(userFilePath, doneFilePath) {
33
- const merged = new Map();
34
-
35
- const tryLoad = (targetPath, label) => {
36
- if (!targetPath) return;
37
- if (!fs.existsSync(targetPath)) return;
38
- try {
39
- const parsed = JSON.parse(fs.readFileSync(targetPath, "utf-8"));
40
- if (!Array.isArray(parsed)) return;
41
- for (const item of parsed) {
42
- const uniqueId = item?.uniqueId || item?.unique_id;
43
- if (!uniqueId) continue;
44
- merged.set(uniqueId, {
45
- ...merged.get(uniqueId),
46
- ...item,
47
- uniqueId,
48
- });
49
- }
50
- } catch (e) {
51
- console.error(`[data-store] SQLite 导入 ${label} 失败: ${e.message}`);
52
- }
53
- };
54
-
55
- tryLoad(userFilePath, "result.json");
56
- tryLoad(doneFilePath, "result-done.json");
57
-
58
- return [...merged.values()];
59
- }
60
-
61
- function loadLegacyVideosFromFile(videoPath) {
62
- if (!videoPath) return [];
63
- if (!fs.existsSync(videoPath)) return [];
64
-
65
- try {
66
- const parsed = JSON.parse(fs.readFileSync(videoPath, "utf-8"));
67
- return Array.isArray(parsed) ? parsed : [];
68
- } catch (e) {
69
- console.error(
70
- `[data-store] SQLite 导入 result-videos.json 失败: ${e.message}`,
71
- );
72
- return [];
73
- }
74
- }
75
-
76
- function initUserDb(filePath) {
77
- dbPath = normalizeDbFilePath(filePath);
78
- fs.mkdirSync(path.dirname(dbPath), { recursive: true });
79
- db = new Database(dbPath);
80
- db.pragma("journal_mode = WAL");
81
- db.exec(`
82
- CREATE TABLE IF NOT EXISTS users (
83
- unique_id TEXT PRIMARY KEY,
84
- tt_seller TEXT,
85
- verified INTEGER,
86
- location_created TEXT,
87
- created_at TEXT,
88
- updated_at TEXT
89
- )
90
- `);
91
- db.exec(`
92
- CREATE TABLE IF NOT EXISTS jobs (
93
- unique_id TEXT PRIMARY KEY,
94
- nickname TEXT,
95
- status TEXT DEFAULT 'pending',
96
- sources TEXT,
97
- claimed_by TEXT,
98
- claimed_at INTEGER,
99
- error TEXT,
100
- pinned INTEGER DEFAULT 0,
101
- no_video INTEGER DEFAULT 0,
102
- restricted INTEGER DEFAULT 0,
103
- user_update_count INTEGER DEFAULT 0,
104
- tt_seller INTEGER,
105
- verified INTEGER,
106
- video_count INTEGER DEFAULT 0,
107
- comment_count INTEGER DEFAULT 0,
108
- guessed_location TEXT,
109
- location_created TEXT,
110
- confirmed_location TEXT,
111
- modified_at INTEGER,
112
- follower_count INTEGER DEFAULT 0,
113
- following_count INTEGER DEFAULT 0,
114
- heart_count INTEGER DEFAULT 0,
115
- refresh_time INTEGER,
116
- processed INTEGER DEFAULT 0,
117
- processed_at INTEGER,
118
- created_at INTEGER,
119
- updated_at INTEGER,
120
- region TEXT,
121
- signature TEXT,
122
- sec_uid TEXT,
123
- status_code INTEGER
124
- )
125
- `);
126
-
127
- // 迁移:为已存在的 jobs 表添加 status_code 列
128
- const existingJobColumns = new Set(
129
- db
130
- .prepare("PRAGMA table_info(jobs)")
131
- .all()
132
- .map((c) => c.name),
133
- );
134
- if (!existingJobColumns.has("status_code")) {
135
- db.exec(`ALTER TABLE jobs ADD COLUMN status_code INTEGER`);
136
- }
137
- if (!existingJobColumns.has("latest_video_time")) {
138
- db.exec(`ALTER TABLE jobs ADD COLUMN latest_video_time INTEGER`);
139
- }
140
- if (!existingJobColumns.has("confirmed_location")) {
141
- db.exec(`ALTER TABLE jobs ADD COLUMN confirmed_location TEXT`);
142
- }
143
- if (!existingJobColumns.has("modified_at")) {
144
- db.exec(`ALTER TABLE jobs ADD COLUMN modified_at INTEGER`);
145
- }
146
- if (!existingJobColumns.has("bio_link")) {
147
- db.exec(`ALTER TABLE jobs ADD COLUMN bio_link TEXT`);
148
- }
149
- if (!existingJobColumns.has("top_video_play_count")) {
150
- db.exec(`ALTER TABLE jobs ADD COLUMN top_video_play_count INTEGER`);
151
- }
152
- if (!existingJobColumns.has("top_video_href")) {
153
- db.exec(`ALTER TABLE jobs ADD COLUMN top_video_href TEXT`);
154
- }
155
- if (!existingJobColumns.has("user_create_time")) {
156
- db.exec(`ALTER TABLE jobs ADD COLUMN user_create_time INTEGER`);
157
- }
158
- db.exec(`
159
- CREATE TABLE IF NOT EXISTS jobs_base (
160
- unique_id TEXT PRIMARY KEY,
161
- nickname TEXT,
162
- status TEXT DEFAULT 'pending',
163
- sources TEXT,
164
- claimed_by TEXT,
165
- claimed_at INTEGER,
166
- error TEXT,
167
- pinned INTEGER DEFAULT 0,
168
- no_video INTEGER DEFAULT 0,
169
- restricted INTEGER DEFAULT 0,
170
- user_update_count INTEGER DEFAULT 0,
171
- tt_seller INTEGER,
172
- verified INTEGER,
173
- video_count INTEGER DEFAULT 0,
174
- comment_count INTEGER DEFAULT 0,
175
- guessed_location TEXT,
176
- location_created TEXT,
177
- confirmed_location TEXT,
178
- modified_at INTEGER,
179
- follower_count INTEGER DEFAULT 0,
180
- following_count INTEGER DEFAULT 0,
181
- heart_count INTEGER DEFAULT 0,
182
- refresh_time INTEGER,
183
- processed INTEGER DEFAULT 0,
184
- processed_at INTEGER,
185
- created_at INTEGER,
186
- updated_at INTEGER,
187
- region TEXT,
188
- signature TEXT,
189
- sec_uid TEXT,
190
- status_code INTEGER,
191
- latest_video_time INTEGER,
192
- bio_link TEXT
193
- )
194
- `);
195
-
196
- // 迁移:为已存在的 jobs_base 表补全列
197
- const existingJobBaseColumns = new Set(
198
- db
199
- .prepare("PRAGMA table_info(jobs_base)")
200
- .all()
201
- .map((c) => c.name),
202
- );
203
- if (!existingJobBaseColumns.has("status_code")) {
204
- db.exec(`ALTER TABLE jobs_base ADD COLUMN status_code INTEGER`);
205
- }
206
- if (!existingJobBaseColumns.has("latest_video_time")) {
207
- db.exec(`ALTER TABLE jobs_base ADD COLUMN latest_video_time INTEGER`);
208
- }
209
- if (!existingJobBaseColumns.has("confirmed_location")) {
210
- db.exec(`ALTER TABLE jobs_base ADD COLUMN confirmed_location TEXT`);
211
- }
212
- if (!existingJobBaseColumns.has("modified_at")) {
213
- db.exec(`ALTER TABLE jobs_base ADD COLUMN modified_at INTEGER`);
214
- }
215
- if (!existingJobBaseColumns.has("bio_link")) {
216
- db.exec(`ALTER TABLE jobs_base ADD COLUMN bio_link TEXT`);
217
- }
218
- if (!existingJobBaseColumns.has("user_create_time")) {
219
- db.exec(`ALTER TABLE jobs_base ADD COLUMN user_create_time INTEGER`);
220
- }
221
- db.exec(`
222
- CREATE TABLE IF NOT EXISTS raw_jobs (
223
- unique_id TEXT PRIMARY KEY,
224
- nickname TEXT,
225
- status TEXT DEFAULT 'pending',
226
- sources TEXT,
227
- claimed_by TEXT,
228
- claimed_at INTEGER,
229
- error TEXT,
230
- pinned INTEGER DEFAULT 0,
231
- no_video INTEGER DEFAULT 0,
232
- restricted INTEGER DEFAULT 0,
233
- user_update_count INTEGER DEFAULT 0,
234
- tt_seller INTEGER,
235
- verified INTEGER,
236
- video_count INTEGER DEFAULT 0,
237
- comment_count INTEGER DEFAULT 0,
238
- guessed_location TEXT,
239
- location_created TEXT,
240
- confirmed_location TEXT,
241
- modified_at INTEGER,
242
- follower_count INTEGER DEFAULT 0,
243
- following_count INTEGER DEFAULT 0,
244
- heart_count INTEGER DEFAULT 0,
245
- refresh_time INTEGER,
246
- processed INTEGER DEFAULT 0,
247
- processed_at INTEGER,
248
- created_at INTEGER,
249
- updated_at INTEGER,
250
- region TEXT,
251
- signature TEXT,
252
- sec_uid TEXT,
253
- status_code INTEGER,
254
- latest_video_time INTEGER
255
- )
256
- `);
257
-
258
- // 迁移:为已存在的 raw_jobs 表添加 status_code 列
259
- const existingRawJobColumns = new Set(
260
- db
261
- .prepare("PRAGMA table_info(raw_jobs)")
262
- .all()
263
- .map((c) => c.name),
264
- );
265
- if (!existingRawJobColumns.has("status_code")) {
266
- db.exec(`ALTER TABLE raw_jobs ADD COLUMN status_code INTEGER`);
267
- }
268
- if (!existingRawJobColumns.has("latest_video_time")) {
269
- db.exec(`ALTER TABLE raw_jobs ADD COLUMN latest_video_time INTEGER`);
270
- }
271
- if (!existingRawJobColumns.has("confirmed_location")) {
272
- db.exec(`ALTER TABLE raw_jobs ADD COLUMN confirmed_location TEXT`);
273
- }
274
- if (!existingRawJobColumns.has("modified_at")) {
275
- db.exec(`ALTER TABLE raw_jobs ADD COLUMN modified_at INTEGER`);
276
- }
277
- if (!existingRawJobColumns.has("bio_link")) {
278
- db.exec(`ALTER TABLE raw_jobs ADD COLUMN bio_link TEXT`);
279
- }
280
- if (!existingRawJobColumns.has("user_create_time")) {
281
- db.exec(`ALTER TABLE raw_jobs ADD COLUMN user_create_time INTEGER`);
282
- }
283
- db.exec(`
284
- CREATE TABLE IF NOT EXISTS videos (
285
- id TEXT PRIMARY KEY,
286
- href TEXT,
287
- author_unique_id TEXT,
288
- location_created TEXT,
289
- tt_seller INTEGER DEFAULT 0,
290
- registered_at INTEGER,
291
- user_update_count INTEGER DEFAULT 0,
292
- play_count INTEGER,
293
- digg_count INTEGER,
294
- comment_count INTEGER,
295
- share_count INTEGER,
296
- collect_count INTEGER,
297
- stats_updated_at INTEGER,
298
- create_time INTEGER
299
- )
300
- `);
301
- db.exec(`
302
- CREATE INDEX IF NOT EXISTS idx_jobs_status_video
303
- ON jobs(status, video_count DESC)
304
- `);
305
- db.exec(`
306
- CREATE INDEX IF NOT EXISTS idx_jobs_claimed_by_status
307
- ON jobs(claimed_by, status, claimed_at)
308
- `);
309
- db.exec(`
310
- CREATE INDEX IF NOT EXISTS idx_jobs_status_claimed_at
311
- ON jobs(status, claimed_at)
312
- `);
313
- db.exec(`
314
- CREATE INDEX IF NOT EXISTS idx_jobs_redo_target
315
- ON jobs(tt_seller, verified, location_created, refresh_time)
316
- `);
317
- db.exec(`
318
- CREATE INDEX IF NOT EXISTS idx_jobs_pending_priority
319
- ON jobs(status, pinned DESC, guessed_location, follower_count DESC)
320
- `);
321
- db.exec(`
322
- CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_pinned
323
- ON jobs(created_at ASC, unique_id ASC)
324
- WHERE status = 'pending' AND COALESCE(pinned, 0) = 1
325
- `);
326
- db.exec(`
327
- CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_seller
328
- ON jobs(UPPER(COALESCE(guessed_location, '')), follower_count DESC, created_at ASC, unique_id ASC)
329
- WHERE status = 'pending'
330
- AND COALESCE(pinned, 0) = 0
331
- AND tt_seller = 1
332
- AND verified = 0
333
- `);
334
- db.exec(`
335
- CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_follow
336
- ON jobs(UPPER(COALESCE(guessed_location, '')), follower_count DESC, created_at ASC, unique_id ASC)
337
- WHERE status = 'pending'
338
- AND COALESCE(pinned, 0) = 0
339
- AND (
340
- instr(COALESCE(sources, ''), '"following"') > 0
341
- OR instr(COALESCE(sources, ''), '"follower"') > 0
342
- )
343
- `);
344
- db.exec(`
345
- CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_other
346
- ON jobs(UPPER(COALESCE(guessed_location, '')), follower_count DESC, created_at ASC, unique_id ASC)
347
- WHERE status = 'pending' AND COALESCE(pinned, 0) = 0
348
- `);
349
- db.exec(`
350
- CREATE INDEX IF NOT EXISTS idx_jobs_user_update_queue
351
- ON jobs(created_at ASC, unique_id ASC)
352
- WHERE (tt_seller IS NULL OR tt_seller = '')
353
- AND (user_update_count IS NULL OR user_update_count <= 0)
354
- `);
355
- db.exec(`
356
- CREATE INDEX IF NOT EXISTS idx_jobs_user_update_queue_expr
357
- ON jobs(created_at ASC, unique_id ASC)
358
- WHERE COALESCE(tt_seller, '') = ''
359
- AND COALESCE(user_update_count, 0) <= 0
360
- `);
361
- db.exec(`
362
- CREATE INDEX IF NOT EXISTS idx_videos_comment_queue
363
- ON videos(user_update_count, tt_seller DESC, registered_at ASC)
364
- `);
365
- db.exec(`
366
- CREATE INDEX IF NOT EXISTS idx_videos_comment_queue_pending
367
- ON videos(tt_seller DESC, registered_at ASC, id)
368
- WHERE user_update_count IS NULL OR user_update_count <= 0
369
- `);
370
-
371
- const existingVideoColumns = new Set(
372
- db
373
- .prepare("PRAGMA table_info(videos)")
374
- .all()
375
- .map((column) => column.name),
376
- );
377
- const requiredVideoColumns = {
378
- play_count: "INTEGER",
379
- digg_count: "INTEGER",
380
- comment_count: "INTEGER",
381
- share_count: "INTEGER",
382
- collect_count: "INTEGER",
383
- stats_updated_at: "INTEGER",
384
- };
385
- for (const [column, type] of Object.entries(requiredVideoColumns)) {
386
- if (!existingVideoColumns.has(column)) {
387
- db.exec(`ALTER TABLE videos ADD COLUMN ${column} ${type}`);
388
- }
389
- }
390
-
391
- // 迁移:videos 表添加 create_time 列
392
- if (!existingVideoColumns.has("create_time")) {
393
- db.exec(`ALTER TABLE videos ADD COLUMN create_time INTEGER`);
394
- }
395
-
396
- // tags 表:标签发现与打分系统
397
- db.exec(`
398
- CREATE TABLE IF NOT EXISTS tags (
399
- id INTEGER PRIMARY KEY AUTOINCREMENT,
400
- tag TEXT NOT NULL UNIQUE,
401
- status TEXT NOT NULL DEFAULT 'new',
402
- score REAL NOT NULL DEFAULT 0,
403
- created_at TEXT NOT NULL DEFAULT (datetime('now')),
404
- scored_at TEXT,
405
- score_count INTEGER NOT NULL DEFAULT 0,
406
- countries TEXT NOT NULL DEFAULT '[]',
407
- matched_countries TEXT DEFAULT '[]',
408
- total_posts INTEGER DEFAULT 0,
409
- author_count INTEGER DEFAULT 0,
410
- matched_authors INTEGER DEFAULT 0,
411
- pushed_users INTEGER DEFAULT 0,
412
- source TEXT NOT NULL DEFAULT 'llm',
413
- user_prompt TEXT,
414
- last_error TEXT
415
- )
416
- `);
417
- db.exec(`CREATE INDEX IF NOT EXISTS idx_tags_status ON tags(status)`);
418
- db.exec(`CREATE INDEX IF NOT EXISTS idx_tags_score ON tags(score DESC)`);
419
-
420
- const count = db.prepare("SELECT COUNT(*) as c FROM users").get().c;
421
- console.log(`[data-store] SQLite users 表初始化完成: ${count} 条`);
422
- }
423
-
424
- export function importLegacyJsonToDb({
425
- dbFilePath,
426
- usersFilePath,
427
- doneFilePath,
428
- videosFilePath,
429
- }) {
430
- resetDbConnection();
431
- initUserDb(dbFilePath);
432
-
433
- const legacyUsers = loadLegacyUsersFromFiles(usersFilePath, doneFilePath);
434
- const legacyVideos = loadLegacyVideosFromFile(videosFilePath);
435
-
436
- const beforeUsers = db.prepare("SELECT COUNT(*) as c FROM users").get().c;
437
- const beforeJobs = db.prepare("SELECT COUNT(*) as c FROM jobs").get().c;
438
- const beforeVideos = db.prepare("SELECT COUNT(*) as c FROM videos").get().c;
439
-
440
- const insertUserStmt = db.prepare(`
441
- INSERT OR IGNORE INTO users (unique_id) VALUES (?)
442
- `);
443
- const insertVideoStmt = db.prepare(`
444
- INSERT OR IGNORE INTO videos (
445
- id,
446
- href,
447
- author_unique_id,
448
- location_created,
449
- tt_seller,
450
- registered_at,
451
- user_update_count,
452
- create_time
453
- )
454
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)
455
- `);
456
-
457
- const importUsersTxn = db.transaction((items) => {
458
- for (const item of items) {
459
- const uniqueId = item.uniqueId || item.unique_id;
460
- if (!uniqueId) continue;
461
- insertUserStmt.run(uniqueId);
462
- addJobToDb({ ...item, uniqueId });
463
- }
464
- });
465
-
466
- const importVideosTxn = db.transaction((items) => {
467
- for (const item of items) {
468
- if (!item?.id) continue;
469
- insertVideoStmt.run(
470
- item.id,
471
- item.href || null,
472
- item.authorUniqueId || item.author_unique_id || null,
473
- item.locationCreated || item.location_created || null,
474
- item.ttSeller ? 1 : 0,
475
- item.registeredAt || item.registered_at || Date.now(),
476
- item.userUpdateCount || item.user_update_count || 0,
477
- item.createTime || item.create_time || null,
478
- );
479
- }
480
- });
481
-
482
- importUsersTxn(legacyUsers);
483
- importVideosTxn(legacyVideos);
484
-
485
- const afterUsers = db.prepare("SELECT COUNT(*) as c FROM users").get().c;
486
- const afterJobs = db.prepare("SELECT COUNT(*) as c FROM jobs").get().c;
487
- const afterVideos = db.prepare("SELECT COUNT(*) as c FROM videos").get().c;
488
-
489
- return {
490
- dbPath,
491
- usersImported: afterUsers - beforeUsers,
492
- jobsImported: afterJobs - beforeJobs,
493
- videosImported: afterVideos - beforeVideos,
494
- totalUsers: afterUsers,
495
- totalJobs: afterJobs,
496
- totalVideos: afterVideos,
497
- };
498
- }
499
-
500
- export function closeStoreDb() {
501
- resetDbConnection();
502
- }
503
-
504
- function hasUserInDb(uid) {
505
- if (!db) return false;
506
- const row = db.prepare("SELECT 1 FROM users WHERE unique_id = ?").get(uid);
507
- return !!row;
508
- }
509
-
510
- function addUserToDb(user) {
511
- if (!db) return;
512
- db.prepare(
513
- `
514
- INSERT OR IGNORE INTO users (unique_id, tt_seller, verified, location_created, created_at, updated_at)
515
- VALUES (?, ?, ?, ?, ?, ?)
516
- `,
517
- ).run(
518
- user.uniqueId,
519
- user.ttSeller === undefined ||
520
- user.ttSeller === null ||
521
- user.ttSeller === ""
522
- ? null
523
- : user.ttSeller
524
- ? 1
525
- : 0,
526
- user.verified === undefined ||
527
- user.verified === null ||
528
- user.verified === ""
529
- ? null
530
- : user.verified
531
- ? 1
532
- : 0,
533
- user.locationCreated || null,
534
- new Date().toISOString(),
535
- new Date().toISOString(),
536
- );
537
- }
538
-
539
- function addJobToDb(user) {
540
- if (!db) return;
541
- const now = Date.now();
542
- db.prepare(
543
- `
544
- INSERT OR IGNORE INTO jobs (
545
- unique_id,
546
- nickname,
547
- status,
548
- sources,
549
- claimed_by,
550
- claimed_at,
551
- error,
552
- pinned,
553
- no_video,
554
- restricted,
555
- user_update_count,
556
- tt_seller,
557
- verified,
558
- video_count,
559
- comment_count,
560
- guessed_location,
561
- location_created,
562
- follower_count,
563
- following_count,
564
- heart_count,
565
- refresh_time,
566
- processed,
567
- processed_at,
568
- created_at,
569
- updated_at,
570
- region,
571
- signature,
572
- bio_link,
573
- sec_uid
574
- )
575
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
576
- `,
577
- ).run(
578
- user.uniqueId,
579
- user.nickname || null,
580
- user.status || inferStatus(user),
581
- JSON.stringify(
582
- Array.isArray(user.sources) ? [...new Set(user.sources)] : [],
583
- ),
584
- user.claimedBy || null,
585
- user.claimedAt || null,
586
- user.error || null,
587
- user.pinned ? 1 : 0,
588
- user.noVideo ? 1 : 0,
589
- user.restricted ? 1 : 0,
590
- user.userUpdateCount || 0,
591
- user.ttSeller === undefined ||
592
- user.ttSeller === null ||
593
- user.ttSeller === ""
594
- ? null
595
- : user.ttSeller
596
- ? 1
597
- : 0,
598
- user.verified === undefined ||
599
- user.verified === null ||
600
- user.verified === ""
601
- ? null
602
- : user.verified
603
- ? 1
604
- : 0,
605
- user.videoCount || 0,
606
- user.commentCount || 0,
607
- user.guessedLocation || null,
608
- user.locationCreated || null,
609
- user.followerCount || 0,
610
- user.followingCount || 0,
611
- user.heartCount || 0,
612
- user.refreshTime || null,
613
- user.processed ? 1 : 0,
614
- user.processedAt || null,
615
- user.createdAt || now,
616
- user.updatedAt || now,
617
- user.region || null,
618
- user.signature || null,
619
- user.bioLink?.link || user.bioLink?.url || user.bioLink || null,
620
- user.secUid || null,
621
- );
622
- }
623
-
624
- function getUserDbCount() {
625
- if (!db) return 0;
626
- return db.prepare("SELECT COUNT(*) as c FROM users").get().c;
627
- }
628
-
629
- function getJobsCount() {
630
- if (!db) return 0;
631
- return db.prepare("SELECT COUNT(*) as c FROM jobs").get().c;
632
- }
633
-
634
- function getPendingJobsCount() {
635
- if (!db) return 0;
636
- return db
637
- .prepare("SELECT COUNT(*) as c FROM jobs WHERE status = 'pending'")
638
- .get().c;
639
- }
640
-
641
- function getPendingJobsUserUpdateCount() {
642
- if (!db) return 0;
643
- return db
644
- .prepare(
645
- `
646
- SELECT COUNT(*) as c
647
- FROM jobs
648
- WHERE COALESCE(tt_seller, '') = ''
649
- AND COALESCE(user_update_count, 0) <= 0
650
- `,
651
- )
652
- .get().c;
653
- }
654
-
655
- function getRawJobsCount() {
656
- if (!db) return 0;
657
- return db.prepare("SELECT COUNT(*) as c FROM raw_jobs").get().c;
658
- }
659
-
660
- function getDashboardStatsFromDb(targetLocations = []) {
661
- if (!db) return null;
662
-
663
- const targetPlaceholders = targetLocations.map(() => "?").join(", ");
664
- const targetParams = targetLocations.length ? targetLocations : [];
665
-
666
- // 合并所有 jobs 表的聚合统计为单次扫描
667
- const aggregateRow = db
668
- .prepare(
669
- `
670
- SELECT
671
- COUNT(*) as total,
672
- SUM(CASE WHEN status = 'pending' THEN 1 ELSE 0 END) as pending,
673
- SUM(CASE WHEN status = 'processing' THEN 1 ELSE 0 END) as processing,
674
- SUM(CASE WHEN status = 'done' THEN 1 ELSE 0 END) as done,
675
- SUM(CASE WHEN status = 'error' THEN 1 ELSE 0 END) as error,
676
- SUM(CASE WHEN status = 'restricted' THEN 1 ELSE 0 END) as restricted,
677
- SUM(CASE WHEN tt_seller = 1 AND verified = 0 ${
678
- targetLocations.length
679
- ? `AND location_created IN (${targetPlaceholders})`
680
- : "AND 1 = 0"
681
- } THEN 1 ELSE 0 END) as targetUsers,
682
- SUM(CASE WHEN no_video = 1 THEN 1 ELSE 0 END) as noVideo,
683
- SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"video"') > 0 THEN 1 ELSE 0 END) as video,
684
- SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"comment"') > 0 THEN 1 ELSE 0 END) as comment,
685
- SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"guess"') > 0 THEN 1 ELSE 0 END) as guess,
686
- SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"following"') > 0 THEN 1 ELSE 0 END) as following,
687
- SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"follower"') > 0 THEN 1 ELSE 0 END) as follower,
688
- SUM(CASE
689
- WHEN status != 'done'
690
- AND instr(COALESCE(sources, ''), '"video"') = 0
691
- AND instr(COALESCE(sources, ''), '"comment"') = 0
692
- AND instr(COALESCE(sources, ''), '"guess"') = 0
693
- AND instr(COALESCE(sources, ''), '"following"') = 0
694
- AND instr(COALESCE(sources, ''), '"follower"') = 0
695
- THEN 1 ELSE 0 END) as seed
696
- FROM jobs
697
- `,
698
- )
699
- .get(...targetParams);
700
-
701
- // userUpdateTasks 单独从 jobs_base 统计
702
- const userUpdateTasksRow = db
703
- .prepare(
704
- `
705
- SELECT COUNT(*) as userUpdateTasks
706
- FROM jobs_base
707
- WHERE COALESCE(tt_seller, '') = ''
708
- AND COALESCE(user_update_count, 0) <= 0
709
- `,
710
- )
711
- .get();
712
-
713
- // countryStats 和 targetCountryStats 需要 GROUP BY,保留为独立查询
714
- const countryStats = db
715
- .prepare(
716
- `
717
- SELECT
718
- COALESCE(location_created, '未知') as country,
719
- COUNT(*) as count,
720
- SUM(CASE
721
- WHEN tt_seller = 1 AND verified = 0 ${
722
- targetLocations.length
723
- ? `AND location_created IN (${targetPlaceholders})`
724
- : "AND 1 = 0"
725
- }
726
- THEN 1 ELSE 0 END) as targetCount
727
- FROM jobs
728
- WHERE status = 'done'
729
- GROUP BY COALESCE(location_created, '未知')
730
- ORDER BY count DESC
731
- `,
732
- )
733
- .all(...targetParams);
734
-
735
- const targetCountryStats = targetLocations.length
736
- ? db
737
- .prepare(
738
- `
739
- SELECT location_created as country, COUNT(*) as count
740
- FROM jobs
741
- WHERE tt_seller = 1
742
- AND verified = 0
743
- AND location_created IN (${targetPlaceholders})
744
- GROUP BY location_created
745
- ORDER BY count DESC
746
- `,
747
- )
748
- .all(...targetLocations)
749
- : [];
750
-
751
- const jobsBaseCount = db
752
- .prepare("SELECT COUNT(*) as total FROM jobs_base")
753
- .get().total;
754
-
755
- return {
756
- totalUsers: aggregateRow.total,
757
- rawJobs: getRawJobsCount(),
758
- dbTotalUsers: getUserDbCount(),
759
- jobsTotal: aggregateRow.total,
760
- jobsBaseTotal: jobsBaseCount,
761
- jobsPending: aggregateRow.pending,
762
- processedUsers: aggregateRow.done,
763
- pendingUsers: aggregateRow.pending,
764
- processingUsers: aggregateRow.processing,
765
- restrictedUsers: aggregateRow.restricted,
766
- errorUsers: aggregateRow.error,
767
- targetUsers: aggregateRow.targetUsers,
768
- userUpdateTasks: userUpdateTasksRow.userUpdateTasks,
769
- targetCountryStats,
770
- countryStats,
771
- sourceStats: {
772
- seed: aggregateRow.seed || 0,
773
- video: aggregateRow.video || 0,
774
- comment: aggregateRow.comment || 0,
775
- guess: aggregateRow.guess || 0,
776
- following: aggregateRow.following || 0,
777
- follower: aggregateRow.follower || 0,
778
- processed: aggregateRow.done,
779
- restricted: aggregateRow.restricted,
780
- error: aggregateRow.error,
781
- noVideo: aggregateRow.noVideo || 0,
782
- },
783
- };
784
- }
785
-
786
- function getPendingByCountryFromDb() {
787
- if (!db) return [];
788
-
789
- // 按 guessed_location 分组统计待处理任务
790
- const rows = db
791
- .prepare(
792
- `
793
- SELECT
794
- COALESCE(guessed_location, '未知') as country,
795
- COUNT(*) as count
796
- FROM jobs
797
- WHERE status = 'pending'
798
- GROUP BY COALESCE(guessed_location, '未知')
799
- ORDER BY count DESC
800
- `,
801
- )
802
- .all();
803
-
804
- return rows;
805
- }
806
-
807
- function getUserUpdateByCountryFromDb() {
808
- if (!db) return [];
809
-
810
- // 按 guessed_location 分组统计待补资料任务
811
- const rows = db
812
- .prepare(
813
- `
814
- SELECT
815
- COALESCE(guessed_location, '未知') as country,
816
- COUNT(*) as count
817
- FROM jobs_base
818
- WHERE tt_seller IS NULL
819
- AND COALESCE(user_update_count, 0) <= 0
820
- GROUP BY COALESCE(guessed_location, '未知')
821
- ORDER BY count DESC
822
- `,
823
- )
824
- .all();
825
-
826
- return rows;
827
- }
828
-
829
- function getAttachStuckByCountryFromDb() {
830
- if (!db) return [];
831
-
832
- return db
833
- .prepare(
834
- `
835
- SELECT
836
- COALESCE(guessed_location, '未知') as country,
837
- COUNT(*) as count
838
- FROM jobs_base
839
- WHERE tt_seller IS NULL
840
- AND COALESCE(user_update_count, 0) = 1
841
- GROUP BY COALESCE(guessed_location, '未知')
842
- ORDER BY count DESC
843
- `,
844
- )
845
- .all();
846
- }
847
-
848
- function restoreAttachStuckByCountry(country) {
849
- if (!db) {
850
- return { restored: 0, country, error: "db not ready" };
851
- }
852
-
853
- const normalizedCountry = String(country == null ? "未知" : country).trim();
854
- if (!normalizedCountry) {
855
- return {
856
- restored: 0,
857
- country: normalizedCountry,
858
- error: "country is required",
859
- };
860
- }
861
-
862
- const whereSql = `
863
- COALESCE(tt_seller, '') = ''
864
- AND COALESCE(user_update_count, 0) = 1
865
- AND COALESCE(guessed_location, '未知') = ?
866
- `;
867
- const count =
868
- db
869
- .prepare(
870
- `
871
- SELECT COUNT(*) as c
872
- FROM jobs_base
873
- WHERE ${whereSql}
874
- `,
875
- )
876
- .get(normalizedCountry)?.c || 0;
877
-
878
- if (!count) {
879
- return { restored: 0, country: normalizedCountry };
880
- }
881
-
882
- db.prepare(
883
- `
884
- UPDATE jobs_base
885
- SET user_update_count = 0,
886
- updated_at = ?,
887
- claimed_by = NULL,
888
- claimed_at = NULL
889
- WHERE ${whereSql}
890
- `,
891
- ).run(Date.now(), normalizedCountry);
892
-
893
- return { restored: count, country: normalizedCountry };
894
- }
895
-
896
- function resetPendingByCountry(country) {
897
- if (!db) {
898
- return { reset: 0, country, error: "db not ready" };
899
- }
900
-
901
- const normalizedCountry = String(country == null ? "未知" : country).trim();
902
- if (!normalizedCountry) {
903
- return {
904
- reset: 0,
905
- country: normalizedCountry,
906
- error: "country is required",
907
- };
908
- }
909
-
910
- const whereSql = `
911
- status = 'pending'
912
- AND COALESCE(guessed_location, '未知') = ?
913
- `;
914
- const count =
915
- db
916
- .prepare(
917
- `
918
- SELECT COUNT(*) as c
919
- FROM jobs
920
- WHERE ${whereSql}
921
- `,
922
- )
923
- .get(normalizedCountry)?.c || 0;
924
-
925
- if (!count) {
926
- return { reset: 0, country: normalizedCountry };
927
- }
928
-
929
- db.prepare(
930
- `
931
- UPDATE jobs
932
- SET user_update_count = 0,
933
- updated_at = ?,
934
- claimed_by = NULL,
935
- claimed_at = NULL
936
- WHERE ${whereSql}
937
- `,
938
- ).run(Date.now(), normalizedCountry);
939
-
940
- return { reset: count, country: normalizedCountry };
941
- }
942
-
943
- function getRawByCountryFromDb() {
944
- if (!db) return [];
945
-
946
- return db
947
- .prepare(
948
- `
949
- SELECT
950
- COALESCE(guessed_location, '未知') as country,
951
- COUNT(*) as count
952
- FROM raw_jobs
953
- GROUP BY COALESCE(guessed_location, '未知')
954
- ORDER BY count DESC
955
- `,
956
- )
957
- .all();
958
- }
959
-
960
- function moveJobsToRawByCountry(scope, country) {
961
- if (!db) {
962
- return { moved: 0, scope, country, error: "db not ready" };
963
- }
964
-
965
- const normalizedScope = String(scope || "").trim();
966
- const normalizedCountry = String(country == null ? "未知" : country).trim();
967
- if (!normalizedCountry) {
968
- return {
969
- moved: 0,
970
- scope: normalizedScope,
971
- country: normalizedCountry,
972
- error: "country is required",
973
- };
974
- }
975
-
976
- // pending 操作 jobs 表(与 getPendingByCountryFromDb 数据源一致)
977
- // userUpdate 操作 jobs_base 表(与 getUserUpdateByCountryFromDb 数据源一致)
978
- let sourceTable = "";
979
- let scopeWhere = "";
980
- let columns = "";
981
-
982
- if (normalizedScope === "pending") {
983
- sourceTable = "jobs";
984
- scopeWhere = `status = 'pending'`;
985
- columns = `
986
- unique_id, nickname, status, sources, claimed_by, claimed_at,
987
- error, pinned, no_video, restricted, user_update_count,
988
- tt_seller, verified, video_count, comment_count,
989
- guessed_location, location_created, follower_count,
990
- following_count, heart_count, refresh_time, processed,
991
- processed_at, created_at, updated_at, region, signature,
992
- sec_uid, latest_video_time, user_create_time
993
- `;
994
- } else if (normalizedScope === "userUpdate") {
995
- sourceTable = "jobs_base";
996
- scopeWhere = `tt_seller IS NULL AND COALESCE(user_update_count, 0) <= 0`;
997
- columns = `
998
- unique_id, nickname, status, sources, claimed_by, claimed_at,
999
- error, pinned, no_video, restricted, user_update_count,
1000
- tt_seller, verified, video_count, comment_count,
1001
- guessed_location, location_created, follower_count,
1002
- following_count, heart_count, refresh_time, processed,
1003
- processed_at, created_at, updated_at, region, signature,
1004
- sec_uid, latest_video_time, user_create_time
1005
- `;
1006
- } else {
1007
- return {
1008
- moved: 0,
1009
- scope: normalizedScope,
1010
- country: normalizedCountry,
1011
- error: "unsupported scope",
1012
- };
1013
- }
1014
-
1015
- const whereSql = `
1016
- ${scopeWhere}
1017
- AND COALESCE(guessed_location, '未知') = ?
1018
- `;
1019
- const count =
1020
- db
1021
- .prepare(
1022
- `
1023
- SELECT COUNT(*) as c
1024
- FROM ${sourceTable}
1025
- WHERE ${whereSql}
1026
- `,
1027
- )
1028
- .get(normalizedCountry)?.c || 0;
1029
-
1030
- if (!count) {
1031
- return { moved: 0, scope: normalizedScope, country: normalizedCountry };
1032
- }
1033
-
1034
- const moveTxn = db.transaction((targetCountry) => {
1035
- db.prepare(
1036
- `
1037
- INSERT OR REPLACE INTO raw_jobs (
1038
- ${columns}
1039
- )
1040
- SELECT
1041
- ${columns}
1042
- FROM ${sourceTable}
1043
- WHERE ${whereSql}
1044
- `,
1045
- ).run(targetCountry);
1046
-
1047
- db.prepare(
1048
- `
1049
- DELETE FROM ${sourceTable}
1050
- WHERE ${whereSql}
1051
- `,
1052
- ).run(targetCountry);
1053
- });
1054
-
1055
- moveTxn(normalizedCountry);
1056
- return { moved: count, scope: normalizedScope, country: normalizedCountry };
1057
- }
1058
-
1059
- function restoreRawJobsByCountry(country) {
1060
- if (!db) {
1061
- return { restored: 0, country, error: "db not ready" };
1062
- }
1063
-
1064
- const normalizedCountry = String(country == null ? "未知" : country).trim();
1065
- if (!normalizedCountry) {
1066
- return {
1067
- restored: 0,
1068
- country: normalizedCountry,
1069
- error: "country is required",
1070
- };
1071
- }
1072
-
1073
- const whereSql = `COALESCE(guessed_location, '未知') = ?`;
1074
- const count =
1075
- db
1076
- .prepare(
1077
- `
1078
- SELECT COUNT(*) as c
1079
- FROM raw_jobs
1080
- WHERE ${whereSql}
1081
- `,
1082
- )
1083
- .get(normalizedCountry)?.c || 0;
1084
-
1085
- if (!count) {
1086
- return { restored: 0, country: normalizedCountry };
1087
- }
1088
-
1089
- const restoreTxn = db.transaction((targetCountry) => {
1090
- db.prepare(
1091
- `
1092
- INSERT OR REPLACE INTO jobs (
1093
- unique_id,
1094
- nickname,
1095
- status,
1096
- sources,
1097
- claimed_by,
1098
- claimed_at,
1099
- error,
1100
- pinned,
1101
- no_video,
1102
- restricted,
1103
- user_update_count,
1104
- tt_seller,
1105
- verified,
1106
- video_count,
1107
- comment_count,
1108
- guessed_location,
1109
- location_created,
1110
- follower_count,
1111
- following_count,
1112
- heart_count,
1113
- refresh_time,
1114
- processed,
1115
- processed_at,
1116
- created_at,
1117
- updated_at,
1118
- region,
1119
- signature,
1120
- sec_uid
1121
- )
1122
- SELECT
1123
- unique_id,
1124
- nickname,
1125
- status,
1126
- sources,
1127
- claimed_by,
1128
- claimed_at,
1129
- error,
1130
- pinned,
1131
- no_video,
1132
- restricted,
1133
- user_update_count,
1134
- tt_seller,
1135
- verified,
1136
- video_count,
1137
- comment_count,
1138
- guessed_location,
1139
- location_created,
1140
- follower_count,
1141
- following_count,
1142
- heart_count,
1143
- refresh_time,
1144
- processed,
1145
- processed_at,
1146
- created_at,
1147
- updated_at,
1148
- region,
1149
- signature,
1150
- sec_uid
1151
- FROM raw_jobs
1152
- WHERE ${whereSql}
1153
- `,
1154
- ).run(targetCountry);
1155
-
1156
- db.prepare(
1157
- `
1158
- DELETE FROM raw_jobs
1159
- WHERE ${whereSql}
1160
- `,
1161
- ).run(targetCountry);
1162
- });
1163
-
1164
- restoreTxn(normalizedCountry);
1165
- return { restored: count, country: normalizedCountry };
1166
- }
1167
-
1168
- function restoreRawJobById(uniqueId) {
1169
- if (!db) {
1170
- return { restored: 0, uniqueId, error: "db not ready" };
1171
- }
1172
-
1173
- const safeId = String(uniqueId).trim();
1174
- if (!safeId) {
1175
- return { restored: 0, uniqueId: safeId, error: "uniqueId is required" };
1176
- }
1177
-
1178
- const exists =
1179
- db
1180
- .prepare("SELECT COUNT(*) as c FROM raw_jobs WHERE unique_id = ?")
1181
- .get(safeId)?.c || 0;
1182
-
1183
- if (!exists) {
1184
- return { restored: 0, uniqueId: safeId };
1185
- }
1186
-
1187
- const restoreTxn = db.transaction(() => {
1188
- db.prepare(
1189
- `
1190
- INSERT OR REPLACE INTO jobs (
1191
- unique_id, nickname, status, sources, claimed_by, claimed_at, error,
1192
- pinned, no_video, restricted, user_update_count, tt_seller, verified,
1193
- video_count, comment_count, guessed_location, location_created,
1194
- follower_count, following_count, heart_count, refresh_time,
1195
- processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
1196
- )
1197
- SELECT
1198
- unique_id, nickname, status, sources, claimed_by, claimed_at, error,
1199
- pinned, no_video, restricted, user_update_count, tt_seller, verified,
1200
- video_count, comment_count, guessed_location, location_created,
1201
- follower_count, following_count, heart_count, refresh_time,
1202
- processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
1203
- FROM raw_jobs WHERE unique_id = ?
1204
- `,
1205
- ).run(safeId);
1206
-
1207
- db.prepare("DELETE FROM raw_jobs WHERE unique_id = ?").run(safeId);
1208
- });
1209
-
1210
- restoreTxn();
1211
- return { restored: 1, uniqueId: safeId };
1212
- }
1213
-
1214
- function restoreRawJobsByFilter({ search, location, hasVideo, hasFollower }) {
1215
- if (!db) {
1216
- return { restored: 0, error: "db not ready" };
1217
- }
1218
-
1219
- const where = [];
1220
- const args = [];
1221
-
1222
- if (search) {
1223
- where.push(
1224
- "(LOWER(unique_id) LIKE ? OR LOWER(COALESCE(nickname, '')) LIKE ?)",
1225
- );
1226
- const likeVal = `%${search.toLowerCase()}%`;
1227
- args.push(likeVal, likeVal);
1228
- }
1229
-
1230
- if (location) {
1231
- where.push("COALESCE(guessed_location, '未知') = ?");
1232
- args.push(location);
1233
- }
1234
-
1235
- if (hasVideo) {
1236
- where.push("COALESCE(video_count, 0) > 0");
1237
- }
1238
-
1239
- if (hasFollower) {
1240
- where.push("COALESCE(follower_count, 0) > 0");
1241
- }
1
+ /**
2
+ * 数据存储主模块 — createStore() 编排器
3
+ *
4
+ * 本文件是数据存储的入口点,负责编排各子模块:
5
+ * - db-schema.js: 建表、迁移、全局连接管理
6
+ * - db-columns.js: 共享列名常量和 SQL 生成
7
+ * - db-crud.js: 基础 CRUD(增删改查、行映射)
8
+ * - db-stats.js: 仪表盘统计、按国家分组
9
+ * - db-raw-jobs.js: raw_jobs 移入/恢复
10
+ * - db-tags.js: Tag 发现与打分
11
+ * - llm-scoring.js: LLM 国家匹配度打分
12
+ *
13
+ * createStore() 保留为运行时编排器,管理:
14
+ * - 任务认领/提交(claimNextJob/commitJob)
15
+ * - 客户端追踪、视频管理、备份
16
+ * - 内存索引、stats 缓存
17
+ */
1242
18
 
1243
- if (where.length === 0) {
1244
- return { restored: 0, error: "at least one filter is required" };
1245
- }
1246
-
1247
- const whereSql = where.join(" AND ");
1248
-
1249
- const count =
1250
- db
1251
- .prepare(`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql}`)
1252
- .get(...args)?.c || 0;
1253
-
1254
- if (!count) {
1255
- return { restored: 0 };
1256
- }
1257
-
1258
- const restoreTxn = db.transaction(() => {
1259
- db.prepare(
1260
- `
1261
- INSERT OR REPLACE INTO jobs (
1262
- unique_id, nickname, status, sources, claimed_by, claimed_at, error,
1263
- pinned, no_video, restricted, user_update_count, tt_seller, verified,
1264
- video_count, comment_count, guessed_location, location_created,
1265
- follower_count, following_count, heart_count, refresh_time,
1266
- processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
1267
- )
1268
- SELECT
1269
- unique_id, nickname, status, sources, claimed_by, claimed_at, error,
1270
- pinned, no_video, restricted, user_update_count, tt_seller, verified,
1271
- video_count, comment_count, guessed_location, location_created,
1272
- follower_count, following_count, heart_count, refresh_time,
1273
- processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
1274
- FROM raw_jobs WHERE ${whereSql}
1275
- `,
1276
- ).run(...args);
19
+ import fs from "fs";
20
+ import path from "path";
21
+ import Database from "better-sqlite3";
22
+ import {
23
+ isLocationInList,
24
+ DEFAULT_TARGET_LOCATIONS,
25
+ } from "../lib/target-locations.js";
1277
26
 
1278
- db.prepare(`DELETE FROM raw_jobs WHERE ${whereSql}`).run(...args);
1279
- });
27
+ // Schema 与连接管理
28
+ import {
29
+ getDb,
30
+ getDbPath,
31
+ initDb,
32
+ resetDbConnection,
33
+ loadLegacyUsersFromFiles,
34
+ loadLegacyVideosFromFile,
35
+ } from "./db-schema.js";
36
+
37
+ // CRUD 操作
38
+ import {
39
+ snakeToCamel,
40
+ camelToSnake,
41
+ normalizeJobValue,
42
+ mapJobRow,
43
+ mapVideoRow,
44
+ inferStatus,
45
+ hasUserInDb,
46
+ addUserToDb,
47
+ addJobToDb,
48
+ addJobBaseToDb,
49
+ addJob,
50
+ getJobRow,
51
+ getJobBaseRow,
52
+ getJob,
53
+ getAllJobs,
54
+ getVideoRow,
55
+ getAllVideoRows,
56
+ updateJobInfo,
57
+ updateJobBaseInfo,
58
+ getUserDbCount,
59
+ getJobsCount,
60
+ getPendingJobsCount,
61
+ getPendingJobsUserUpdateCount,
62
+ getRawJobsCount,
63
+ } from "./db-crud.js";
64
+
65
+ // 统计查询
66
+ import {
67
+ getDashboardStatsFromDb,
68
+ getPendingByCountryFromDb,
69
+ getUserUpdateByCountryFromDb,
70
+ getAttachStuckByCountryFromDb,
71
+ getRawByCountryFromDb,
72
+ restoreAttachStuckByCountry,
73
+ resetPendingByCountry,
74
+ } from "./db-stats.js";
75
+
76
+ // Raw Jobs 管理
77
+ import {
78
+ moveJobsToRawByCountry,
79
+ restoreRawJobsByCountry,
80
+ restoreRawJobById,
81
+ restoreRawJobsByFilter,
82
+ getRawJobsPageFromDb,
83
+ } from "./db-raw-jobs.js";
84
+
85
+ // Tag CRUD
86
+ import {
87
+ insertTag,
88
+ getTagsByStatus,
89
+ getTagsByCountry,
90
+ getDeadTags,
91
+ claimTag,
92
+ reportTagScore,
93
+ getAllTags,
94
+ rawQuery,
95
+ normalizeTags,
96
+ clearTags,
97
+ } from "./db-tags.js";
98
+
99
+ // LLM 打分
100
+ import {
101
+ scoreJobLocation,
102
+ scoreJobsBatch,
103
+ createLlmOffsetStore,
104
+ } from "./llm-scoring.js";
1280
105
 
1281
- restoreTxn();
1282
- return { restored: count };
1283
- }
106
+ // ===== 薄包装函数(保持外部 API 不变)=====
1284
107
 
1285
- function getRawJobsPageFromDb({
1286
- search,
1287
- location,
1288
- limit,
1289
- offset,
1290
- hasVideo,
1291
- hasFollower,
108
+ /**
109
+ * 导入历史 JSON 数据到 SQLite
110
+ */
111
+ export function importLegacyJsonToDb({
112
+ dbFilePath,
113
+ usersFilePath,
114
+ doneFilePath,
115
+ videosFilePath,
1292
116
  }) {
1293
- if (!db) return null;
1294
-
1295
- const safeLimit = Math.max(1, Math.min(200, parseInt(limit) || 50));
1296
- const safeOffset = Math.max(0, parseInt(offset) || 0);
1297
- const where = [];
1298
- const args = [];
1299
-
1300
- if (search) {
1301
- where.push(
1302
- "(LOWER(unique_id) LIKE ? OR LOWER(COALESCE(nickname, '')) LIKE ?)",
1303
- );
1304
- const pattern = `%${String(search).toLowerCase()}%`;
1305
- args.push(pattern, pattern);
1306
- }
1307
- if (location) {
1308
- where.push("COALESCE(guessed_location, '未知') = ?");
1309
- args.push(location);
1310
- }
1311
- if (hasVideo) {
1312
- where.push("COALESCE(video_count, 0) > 0");
1313
- }
1314
- if (hasFollower) {
1315
- where.push("COALESCE(follower_count, 0) > 0");
1316
- }
1317
-
1318
- const whereSql = where.length ? `WHERE ${where.join(" AND ")}` : "";
1319
- const total = db
1320
- .prepare(`SELECT COUNT(*) as c FROM raw_jobs ${whereSql}`)
1321
- .get(...args).c;
1322
-
1323
- const rows = db
1324
- .prepare(
1325
- `
1326
- SELECT *
1327
- FROM raw_jobs
1328
- ${whereSql}
1329
- ORDER BY created_at DESC, unique_id ASC
1330
- LIMIT ? OFFSET ?
1331
- `,
1332
- )
1333
- .all(...args, safeLimit, safeOffset);
1334
-
1335
- return {
1336
- total,
1337
- limit: safeLimit,
1338
- offset: safeOffset,
1339
- users: rows.map(mapJobRow),
1340
- };
1341
- }
1342
-
1343
- // ====== Tag 发现与打分 CRUD ======
1344
-
1345
- function insertTag(tag, countries, source = "llm") {
1346
- if (!db) return { inserted: false, error: "db not ready" };
1347
- // 防止存入带 # 前缀的 tag
1348
- const normalized = tag.replace(/^#+/, "").trim().toLowerCase();
1349
- if (!normalized || normalized.length < 2) {
1350
- return { inserted: false, error: "invalid tag" };
1351
- }
1352
- try {
1353
- const result = db
1354
- .prepare(
1355
- `
1356
- INSERT OR IGNORE INTO tags (tag, countries, source)
1357
- VALUES (?, ?, ?)
1358
- `,
1359
- )
1360
- .run(normalized, JSON.stringify(countries), source);
1361
- return { inserted: result.changes > 0, tag: normalized };
1362
- } catch (e) {
1363
- return { inserted: false, error: e.message };
1364
- }
1365
- }
1366
-
1367
- function getTagsByStatus(status, limit = 100) {
1368
- if (!db) return [];
1369
- const rows = db
1370
- .prepare(
1371
- `
1372
- SELECT * FROM tags WHERE status = ? ORDER BY score ASC, created_at ASC LIMIT ?
1373
- `,
1374
- )
1375
- .all(status, limit);
1376
- return rows.map((r) => ({
1377
- ...r,
1378
- countries: JSON.parse(r.countries || "[]"),
1379
- matched_countries: JSON.parse(r.matched_countries || "[]"),
1380
- }));
1381
- }
1382
-
1383
- function getTagsByCountry(country, minScore = 0) {
1384
- if (!db) return [];
1385
- const rows = db
1386
- .prepare(
1387
- `
1388
- SELECT * FROM tags WHERE status != 'dead'
1389
- ORDER BY score DESC
1390
- `,
1391
- )
1392
- .all();
1393
- // Filter in JS since countries is JSON
1394
- return rows
1395
- .map((r) => ({
1396
- ...r,
1397
- countries: JSON.parse(r.countries || "[]"),
1398
- matched_countries: JSON.parse(r.matched_countries || "[]"),
1399
- }))
1400
- .filter((r) => r.countries.includes(country) && r.score >= minScore);
1401
- }
1402
-
1403
- function getDeadTags(country) {
1404
- if (!db) return [];
1405
- const rows = db
1406
- .prepare(
1407
- `
1408
- SELECT * FROM tags WHERE status = 'dead' ORDER BY score ASC
1409
- `,
1410
- )
1411
- .all();
1412
- return rows
1413
- .map((r) => ({
1414
- ...r,
1415
- countries: JSON.parse(r.countries || "[]"),
1416
- matched_countries: JSON.parse(r.matched_countries || "[]"),
1417
- }))
1418
- .filter((r) => r.countries.includes(country));
1419
- }
117
+ resetDbConnection();
118
+ initDb(dbFilePath);
1420
119
 
1421
- function claimTag(tag) {
1422
- if (!db) return { ok: false, error: "db not ready" };
1423
- // 原子操作:只有 status='new' 时才更新为 'scoring',避免竞态
1424
- const result = db
1425
- .prepare(
1426
- "UPDATE tags SET status = 'scoring' WHERE tag = ? AND status = 'new'",
1427
- )
1428
- .run(tag);
1429
- if (result.changes === 0) {
1430
- // 检查是否不存在 vs 已被别人锁定
1431
- const row = db.prepare("SELECT status FROM tags WHERE tag = ?").get(tag);
1432
- if (!row) return { ok: false, error: "tag not found" };
1433
- return { ok: false, error: `tag status is ${row.status}, already claimed` };
1434
- }
1435
- return { ok: true, tag };
1436
- }
120
+ const db = getDb();
121
+ const dbPath = getDbPath();
1437
122
 
1438
- function reportTagScore(tag, fields) {
1439
- if (!db) return { ok: false, error: "db not ready" };
1440
- const {
1441
- score,
1442
- status,
1443
- totalPosts,
1444
- authorCount,
1445
- matchedAuthors,
1446
- matchedCountries,
1447
- pushedUsers,
1448
- error,
1449
- } = fields;
1450
- const matchedCountriesJson = matchedCountries
1451
- ? JSON.stringify(matchedCountries)
1452
- : null;
1453
- const now = new Date().toISOString();
1454
-
1455
- try {
1456
- const result = db
1457
- .prepare(
1458
- `
1459
- UPDATE tags SET
1460
- score = COALESCE(?, score),
1461
- status = COALESCE(?, status),
1462
- total_posts = COALESCE(?, total_posts),
1463
- author_count = COALESCE(?, author_count),
1464
- matched_authors = COALESCE(?, matched_authors),
1465
- matched_countries = COALESCE(?, matched_countries),
1466
- pushed_users = COALESCE(?, pushed_users),
1467
- last_error = COALESCE(?, last_error),
1468
- scored_at = ?,
1469
- score_count = score_count + 1
1470
- WHERE tag = ?
1471
- `,
1472
- )
1473
- .run(
1474
- score ?? null,
1475
- status ?? null,
1476
- totalPosts ?? null,
1477
- authorCount ?? null,
1478
- matchedAuthors ?? null,
1479
- matchedCountriesJson,
1480
- pushedUsers ?? null,
1481
- error ?? null,
1482
- now,
1483
- tag,
1484
- );
1485
- return { ok: result.changes > 0, tag };
1486
- } catch (e) {
1487
- return { ok: false, error: e.message };
1488
- }
1489
- }
123
+ const legacyUsers = loadLegacyUsersFromFiles(usersFilePath, doneFilePath);
124
+ const legacyVideos = loadLegacyVideosFromFile(videosFilePath);
1490
125
 
1491
- function getAllTags(limit = 200) {
1492
- if (!db) return [];
1493
- const rows = db
1494
- .prepare(
1495
- `
1496
- SELECT * FROM tags ORDER BY score DESC, created_at DESC LIMIT ?
1497
- `,
1498
- )
1499
- .all(limit);
1500
- return rows.map((r) => ({
1501
- ...r,
1502
- countries: JSON.parse(r.countries || "[]"),
1503
- matched_countries: JSON.parse(r.matched_countries || "[]"),
1504
- }));
1505
- }
126
+ const beforeUsers = getDb()
127
+ .prepare("SELECT COUNT(*) as c FROM users")
128
+ .get().c;
129
+ const beforeJobs = getDb().prepare("SELECT COUNT(*) as c FROM jobs").get().c;
130
+ const beforeVideos = getDb()
131
+ .prepare("SELECT COUNT(*) as c FROM videos")
132
+ .get().c;
1506
133
 
1507
- // 调试接口:直接执行 SQL 查询,返回原始数据
1508
- function rawQuery(sql, params = []) {
1509
- if (!db) return { error: "db not ready" };
1510
- try {
1511
- const rows = db.prepare(sql).all(...params);
1512
- return { rows };
1513
- } catch (e) {
1514
- return { error: e.message };
1515
- }
1516
- }
134
+ const insertUserStmt = getDb().prepare(
135
+ `INSERT OR IGNORE INTO users (unique_id) VALUES (?)`,
136
+ );
137
+ const insertVideoStmt = getDb().prepare(
138
+ `INSERT OR IGNORE INTO videos (id, href, author_unique_id, location_created, tt_seller, registered_at, user_update_count, create_time) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
139
+ );
1517
140
 
1518
- // 清理 tags 表中以 # 开头的脏数据
1519
- function normalizeTags() {
1520
- if (!db) return { ok: false, error: "db not ready" };
1521
- const dirtyRows = db
1522
- .prepare("SELECT id, tag, countries FROM tags WHERE tag LIKE '#%'")
1523
- .all();
1524
- const fixed = [];
1525
- const merged = [];
1526
- const skipped = [];
1527
-
1528
- for (const row of dirtyRows) {
1529
- const cleanTag = row.tag.replace(/^#+/, "").trim().toLowerCase();
1530
- if (!cleanTag || cleanTag.length < 2) {
1531
- db.prepare("DELETE FROM tags WHERE id = ?").run(row.id);
1532
- skipped.push({
1533
- dirty: row.tag,
1534
- reason: "empty after normalize, deleted",
1535
- });
1536
- continue;
141
+ const importUsersTxn = getDb().transaction((items) => {
142
+ for (const item of items) {
143
+ const uniqueId = item.uniqueId || item.unique_id;
144
+ if (!uniqueId) continue;
145
+ insertUserStmt.run(uniqueId);
146
+ addJobToDb({ ...item, uniqueId });
1537
147
  }
148
+ });
1538
149
 
1539
- // 检查 cleanTag 是否已存在
1540
- const existing = db
1541
- .prepare("SELECT * FROM tags WHERE tag = ?")
1542
- .get(cleanTag);
1543
- if (existing) {
1544
- // 合并:保留已有 clean 版本,合并 countries
1545
- const oldCountries = JSON.parse(row.countries || "[]");
1546
- const existCountries = JSON.parse(existing.countries || "[]");
1547
- const mergedCountries = [
1548
- ...new Set([...existCountries, ...oldCountries]),
1549
- ];
1550
- db.prepare("UPDATE tags SET countries = ? WHERE tag = ?").run(
1551
- JSON.stringify(mergedCountries),
1552
- cleanTag,
150
+ const importVideosTxn = getDb().transaction((items) => {
151
+ for (const item of items) {
152
+ if (!item?.id) continue;
153
+ insertVideoStmt.run(
154
+ item.id,
155
+ item.href || null,
156
+ item.authorUniqueId || item.author_unique_id || null,
157
+ item.locationCreated || item.location_created || null,
158
+ item.ttSeller ? 1 : 0,
159
+ item.registeredAt || item.registered_at || Date.now(),
160
+ item.userUpdateCount || item.user_update_count || 0,
161
+ item.createTime || item.create_time || null,
1553
162
  );
1554
- // 删除脏数据
1555
- db.prepare("DELETE FROM tags WHERE id = ?").run(row.id);
1556
- merged.push({ dirty: row.tag, clean: cleanTag, id: row.id });
1557
- } else {
1558
- // 直接重命名
1559
- db.prepare("UPDATE tags SET tag = ? WHERE id = ?").run(cleanTag, row.id);
1560
- fixed.push({ dirty: row.tag, clean: cleanTag, id: row.id });
1561
163
  }
1562
- }
164
+ });
165
+
166
+ importUsersTxn(legacyUsers);
167
+ importVideosTxn(legacyVideos);
168
+
169
+ const afterUsers = getDb().prepare("SELECT COUNT(*) as c FROM users").get().c;
170
+ const afterJobs = getDb().prepare("SELECT COUNT(*) as c FROM jobs").get().c;
171
+ const afterVideos = getDb()
172
+ .prepare("SELECT COUNT(*) as c FROM videos")
173
+ .get().c;
1563
174
 
1564
175
  return {
1565
- ok: true,
1566
- fixed: fixed.length,
1567
- merged: merged.length,
1568
- skipped: skipped.length,
1569
- details: { fixed, merged, skipped },
176
+ dbPath: getDbPath(),
177
+ usersImported: afterUsers - beforeUsers,
178
+ jobsImported: afterJobs - beforeJobs,
179
+ videosImported: afterVideos - beforeVideos,
180
+ totalUsers: afterUsers,
181
+ totalJobs: afterJobs,
182
+ totalVideos: afterVideos,
1570
183
  };
1571
184
  }
1572
185
 
1573
- function clearTags() {
1574
- if (!db) return { ok: false, error: "db not ready" };
1575
- const count = db.prepare("SELECT COUNT(*) as c FROM tags").get().c;
1576
- db.exec("DELETE FROM tags");
1577
- return { ok: true, deleted: count };
186
+ export function closeStoreDb() {
187
+ resetDbConnection();
1578
188
  }
1579
189
 
1580
190
  function getUsersPageFromDb({
@@ -1587,7 +197,7 @@ function getUsersPageFromDb({
1587
197
  offset,
1588
198
  targetLocations = [],
1589
199
  }) {
1590
- if (!db) return null;
200
+ if (!getDb()) return null;
1591
201
 
1592
202
  const safeLimit = Math.max(1, Math.min(200, parseInt(limit) || 50));
1593
203
  const safeOffset = Math.max(0, parseInt(offset) || 0);
@@ -1636,7 +246,7 @@ function getUsersPageFromDb({
1636
246
  if (cachedCount && Date.now() - cachedCount.time < 5000) {
1637
247
  total = cachedCount.c;
1638
248
  } else {
1639
- total = db
249
+ total = getDb()
1640
250
  .prepare(`SELECT COUNT(*) as c FROM jobs ${whereSql}`)
1641
251
  .get(...args).c;
1642
252
  getUsersPageFromDb._countCache.set(cacheKey, {
@@ -1646,7 +256,7 @@ function getUsersPageFromDb({
1646
256
  }
1647
257
 
1648
258
  // 只查询前端需要的列,避免 SELECT * 带来的大字段传输和 mapJobRow 开销
1649
- const rows = db
259
+ const rows = getDb()
1650
260
  .prepare(
1651
261
  `
1652
262
  SELECT
@@ -1688,13 +298,13 @@ function getUsersPageFromDb({
1688
298
  }
1689
299
 
1690
300
  function getTargetUsersFromDb(targetLocations = []) {
1691
- if (!db) return null;
301
+ if (!getDb()) return null;
1692
302
  if (!targetLocations.length) {
1693
303
  return { total: 0, users: [] };
1694
304
  }
1695
305
 
1696
306
  const placeholders = targetLocations.map(() => "?").join(", ");
1697
- const rows = db
307
+ const rows = getDb()
1698
308
  .prepare(
1699
309
  `
1700
310
  SELECT
@@ -1703,6 +313,8 @@ function getTargetUsersFromDb(targetLocations = []) {
1703
313
  location_created, latest_video_time, refresh_time,
1704
314
  guessed_location, pinned, processed_at, video_count,
1705
315
  no_video, claimed_by, claimed_at, created_at, updated_at
316
+ FROM jobs
317
+ WHERE tt_seller = 1
1706
318
  AND verified = 0
1707
319
  AND location_created IN (${placeholders})
1708
320
  ORDER BY COALESCE(follower_count, 0) DESC, unique_id ASC
@@ -1718,7 +330,7 @@ function getTargetUsersFromDb(targetLocations = []) {
1718
330
  }
1719
331
 
1720
332
  function getTargetUsersByCountryFromDb(targetLocations = [], options = {}) {
1721
- if (!db) return null;
333
+ if (!getDb()) return null;
1722
334
  if (!targetLocations.length) {
1723
335
  return { countries: [] };
1724
336
  }
@@ -1735,7 +347,7 @@ function getTargetUsersByCountryFromDb(targetLocations = [], options = {}) {
1735
347
 
1736
348
  // 摘要模式:只返回各国统计数,不返回用户数据
1737
349
  if (summaryOnly) {
1738
- const statsRows = db
350
+ const statsRows = getDb()
1739
351
  .prepare(
1740
352
  `
1741
353
  SELECT location_created as country, COUNT(*) as count
@@ -1803,397 +415,79 @@ function getTargetUsersByCountryFromDb(targetLocations = [], options = {}) {
1803
415
  /SELECT[^FROM]*FROM/,
1804
416
  "SELECT COUNT(*) as cnt FROM",
1805
417
  );
1806
- const total = db.prepare(countSql).get(...params)?.cnt || 0;
418
+ const total =
419
+ getDb()
420
+ .prepare(countSql)
421
+ .get(...params)?.cnt || 0;
1807
422
 
1808
423
  sql += ` LIMIT ? OFFSET ?`;
1809
424
  const safeLimit = Math.min(Math.floor(limit), 10000);
1810
425
  const safeOffset = Math.max(Math.floor(offset), 0);
1811
426
 
1812
- const rows = db
1813
- .prepare(sql)
1814
- .all(...params, safeLimit, safeOffset)
1815
- .map(mapJobRow);
1816
-
1817
- return {
1818
- total,
1819
- limit: safeLimit,
1820
- offset: safeOffset,
1821
- users: rows,
1822
- };
1823
- }
1824
-
1825
- const rows = db
1826
- .prepare(
1827
- `
1828
- SELECT
1829
- unique_id,
1830
- nickname,
1831
- follower_count,
1832
- video_count,
1833
- tt_seller,
1834
- verified,
1835
- location_created,
1836
- confirmed_location,
1837
- modified_at,
1838
- latest_video_time,
1839
- refresh_time,
1840
- status,
1841
- sources
1842
- FROM jobs
1843
- WHERE tt_seller = 1
1844
- AND verified = 0
1845
- AND location_created IN (${placeholders})
1846
- ORDER BY location_created ASC, COALESCE(latest_video_time, 0) DESC
1847
- `,
1848
- )
1849
- .all(...targetLocations)
1850
- .map(mapJobRow);
1851
-
1852
- const countryMap = new Map();
1853
- for (const row of rows) {
1854
- const country = row.locationCreated || "未知";
1855
- if (!countryMap.has(country)) {
1856
- countryMap.set(country, []);
1857
- }
1858
- countryMap.get(country).push(row);
1859
- }
1860
-
1861
- const countries = [];
1862
- for (const [country, users] of countryMap) {
1863
- countries.push({
1864
- country,
1865
- count: users.length,
1866
- users,
1867
- });
1868
- }
1869
-
1870
- return {
1871
- total: rows.length,
1872
- countries,
1873
- };
1874
- }
1875
-
1876
- function snakeToCamel(key) {
1877
- return key.replace(/_([a-z])/g, (_, ch) => ch.toUpperCase());
1878
- }
1879
-
1880
- function camelToSnake(key) {
1881
- return key.replace(/[A-Z]/g, (ch) => `_${ch.toLowerCase()}`);
1882
- }
1883
-
1884
- const jobBooleanColumns = new Set([
1885
- "pinned",
1886
- "no_video",
1887
- "restricted",
1888
- "processed",
1889
- "tt_seller",
1890
- "verified",
1891
- "error",
1892
- ]);
1893
-
1894
- const videoBooleanColumns = new Set(["tt_seller"]);
1895
-
1896
- const writableJobColumns = new Set([
1897
- "nickname",
1898
- "status",
1899
- "sources",
1900
- "claimed_by",
1901
- "claimed_at",
1902
- "error",
1903
- "pinned",
1904
- "no_video",
1905
- "restricted",
1906
- "user_update_count",
1907
- "tt_seller",
1908
- "verified",
1909
- "video_count",
1910
- "comment_count",
1911
- "guessed_location",
1912
- "location_created",
1913
- "confirmed_location",
1914
- "modified_at",
1915
- "follower_count",
1916
- "following_count",
1917
- "heart_count",
1918
- "refresh_time",
1919
- "processed",
1920
- "processed_at",
1921
- "updated_at",
1922
- "region",
1923
- "signature",
1924
- "bio_link",
1925
- "sec_uid",
1926
- "status_code",
1927
- "latest_video_time",
1928
- "top_video_play_count",
1929
- "top_video_href",
1930
- "user_create_time",
1931
- ]);
1932
-
1933
- function normalizeJobValue(column, value) {
1934
- if (value === undefined || value === null) return null;
1935
- if (column === "sources") {
1936
- if (!Array.isArray(value)) return JSON.stringify([]);
1937
- return JSON.stringify([...new Set(value)]);
1938
- }
1939
- if (jobBooleanColumns.has(column)) {
1940
- return value ? 1 : 0;
1941
- }
1942
- // 防御:如果值是对象或数组,转为 JSON 字符串
1943
- if (typeof value === "object") return JSON.stringify(value);
1944
- return value;
1945
- }
1946
-
1947
- function mapJobRow(row) {
1948
- if (!row) return undefined;
1949
- const mapped = {};
1950
- for (const [key, value] of Object.entries(row)) {
1951
- const camelKey = snakeToCamel(key);
1952
- if (key === "sources") {
1953
- try {
1954
- mapped[camelKey] = value ? JSON.parse(value) : [];
1955
- } catch {
1956
- mapped[camelKey] = [];
1957
- }
1958
- continue;
1959
- }
1960
- if (jobBooleanColumns.has(key)) {
1961
- mapped[camelKey] = value === null || value === undefined ? null : !!value;
1962
- continue;
1963
- }
1964
- mapped[camelKey] = value;
1965
- }
1966
- return mapped;
1967
- }
1968
-
1969
- function getJobRow(uniqueId) {
1970
- if (!db) return null;
1971
- return db.prepare("SELECT * FROM jobs WHERE unique_id = ?").get(uniqueId);
1972
- }
1973
-
1974
- function getJobBaseRow(uniqueId) {
1975
- if (!db) return null;
1976
- return db
1977
- .prepare("SELECT * FROM jobs_base WHERE unique_id = ?")
1978
- .get(uniqueId);
1979
- }
1980
-
1981
- function getJob(uniqueId) {
1982
- return mapJobRow(getJobRow(uniqueId));
1983
- }
1984
-
1985
- function getAllJobs() {
1986
- if (!db) return [];
1987
- return db.prepare("SELECT * FROM jobs").all().map(mapJobRow);
1988
- }
1989
-
1990
- function mapVideoRow(row) {
1991
- if (!row) return undefined;
1992
- const mapped = {};
1993
- for (const [key, value] of Object.entries(row)) {
1994
- const camelKey = snakeToCamel(key);
1995
- if (videoBooleanColumns.has(key)) {
1996
- mapped[camelKey] = value === null || value === undefined ? null : !!value;
1997
- continue;
1998
- }
1999
- mapped[camelKey] = value;
2000
- }
2001
- return mapped;
2002
- }
2003
-
2004
- function getVideoRow(videoId) {
2005
- if (!db) return null;
2006
- return db.prepare("SELECT * FROM videos WHERE id = ?").get(videoId);
2007
- }
2008
-
2009
- function getAllVideoRows() {
2010
- if (!db) return [];
2011
- return db.prepare("SELECT * FROM videos").all();
2012
- }
2013
-
2014
- function updateJobInfo(uniqueId, info, incrementCount = true) {
2015
- if (!db) return { error: "db not initialized" };
2016
- const existing = getJobRow(uniqueId);
2017
- if (!existing) return { error: "user not found" };
2018
-
2019
- const nextValues = {};
2020
- for (const [key, value] of Object.entries(info || {})) {
2021
- if (key === "uniqueId" || key === "unique_id") continue;
2022
- if (value === undefined || value === "") continue;
2023
- let column = camelToSnake(key);
2024
- // 字段别名:bio → signature, createTime → user_create_time
2025
- if (column === "bio") column = "signature";
2026
- if (column === "create_time") column = "user_create_time";
2027
- if (!writableJobColumns.has(column)) continue;
2028
- nextValues[column] = normalizeJobValue(column, value);
2029
- }
2030
-
2031
- nextValues.updated_at = Date.now();
2032
- if (incrementCount) {
2033
- nextValues.user_update_count = (existing.user_update_count || 0) + 1;
2034
- }
2035
-
2036
- const columns = Object.keys(nextValues);
2037
- if (columns.length > 0) {
2038
- const sql = `UPDATE jobs SET ${columns.map((column) => `${column} = ?`).join(", ")} WHERE unique_id = ?`;
2039
- db.prepare(sql).run(
2040
- ...columns.map((column) => nextValues[column]),
2041
- uniqueId,
2042
- );
2043
- }
2044
-
2045
- return {
2046
- ok: true,
2047
- userUpdateCount:
2048
- nextValues.user_update_count ?? existing.user_update_count ?? 0,
2049
- };
2050
- }
2051
-
2052
- function inferStatus(u) {
2053
- if (u.restricted) return "restricted";
2054
- if (u.error) return "error";
2055
- if (u.processed) return "done";
2056
- return "pending";
2057
- }
427
+ const rows = getDb()
428
+ .prepare(sql)
429
+ .all(...params, safeLimit, safeOffset)
430
+ .map(mapJobRow);
2058
431
 
2059
- function updateJobBaseInfo(uniqueId, info, incrementCount = true) {
2060
- if (!db) return { error: "db not initialized" };
2061
- const existing = getJobBaseRow(uniqueId);
2062
- if (!existing) return { error: "user not found" };
2063
-
2064
- const nextValues = {};
2065
- for (const [key, value] of Object.entries(info || {})) {
2066
- if (key === "uniqueId" || key === "unique_id") continue;
2067
- if (value === undefined || value === "") continue;
2068
- let column = camelToSnake(key);
2069
- // 字段别名:bio → signature, createTime → user_create_time
2070
- if (column === "bio") column = "signature";
2071
- if (column === "create_time") column = "user_create_time";
2072
- if (!writableJobColumns.has(column)) continue;
2073
- nextValues[column] = normalizeJobValue(column, value);
432
+ return {
433
+ total,
434
+ limit: safeLimit,
435
+ offset: safeOffset,
436
+ users: rows,
437
+ };
2074
438
  }
2075
439
 
2076
- nextValues.updated_at = Date.now();
2077
- if (incrementCount) {
2078
- nextValues.user_update_count = (existing.user_update_count || 0) + 1;
440
+ const rows = getDb()
441
+ .prepare(
442
+ `
443
+ SELECT
444
+ unique_id,
445
+ nickname,
446
+ follower_count,
447
+ video_count,
448
+ tt_seller,
449
+ verified,
450
+ location_created,
451
+ confirmed_location,
452
+ modified_at,
453
+ latest_video_time,
454
+ refresh_time,
455
+ status,
456
+ sources
457
+ FROM jobs
458
+ WHERE tt_seller = 1
459
+ AND verified = 0
460
+ AND location_created IN (${placeholders})
461
+ ORDER BY location_created ASC, COALESCE(latest_video_time, 0) DESC
462
+ `,
463
+ )
464
+ .all(...targetLocations)
465
+ .map(mapJobRow);
466
+
467
+ const countryMap = new Map();
468
+ for (const row of rows) {
469
+ const country = row.locationCreated || "未知";
470
+ if (!countryMap.has(country)) {
471
+ countryMap.set(country, []);
472
+ }
473
+ countryMap.get(country).push(row);
2079
474
  }
2080
475
 
2081
- const columns = Object.keys(nextValues);
2082
- if (columns.length > 0) {
2083
- const sql = `UPDATE jobs_base SET ${columns.map((column) => `${column} = ?`).join(", ")} WHERE unique_id = ?`;
2084
- db.prepare(sql).run(
2085
- ...columns.map((column) => nextValues[column]),
2086
- uniqueId,
2087
- );
476
+ const countries = [];
477
+ for (const [country, users] of countryMap) {
478
+ countries.push({
479
+ country,
480
+ count: users.length,
481
+ users,
482
+ });
2088
483
  }
2089
484
 
2090
485
  return {
2091
- ok: true,
2092
- userUpdateCount:
2093
- nextValues.user_update_count ?? existing.user_update_count ?? 0,
486
+ total: rows.length,
487
+ countries,
2094
488
  };
2095
489
  }
2096
490
 
2097
- function addJobBaseToDb(user) {
2098
- if (!db) return;
2099
- const now = Date.now();
2100
- db.prepare(
2101
- `
2102
- INSERT OR IGNORE INTO jobs_base (
2103
- unique_id,
2104
- nickname,
2105
- status,
2106
- sources,
2107
- claimed_by,
2108
- claimed_at,
2109
- error,
2110
- pinned,
2111
- no_video,
2112
- restricted,
2113
- user_update_count,
2114
- tt_seller,
2115
- verified,
2116
- video_count,
2117
- comment_count,
2118
- guessed_location,
2119
- location_created,
2120
- follower_count,
2121
- following_count,
2122
- heart_count,
2123
- refresh_time,
2124
- processed,
2125
- processed_at,
2126
- created_at,
2127
- updated_at,
2128
- region,
2129
- signature,
2130
- bio_link,
2131
- sec_uid
2132
- )
2133
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2134
- `,
2135
- ).run(
2136
- user.uniqueId,
2137
- user.nickname || null,
2138
- user.status || inferStatus(user),
2139
- JSON.stringify(
2140
- Array.isArray(user.sources) ? [...new Set(user.sources)] : [],
2141
- ),
2142
- user.claimedBy || null,
2143
- user.claimedAt || null,
2144
- user.error || null,
2145
- user.pinned ? 1 : 0,
2146
- user.noVideo ? 1 : 0,
2147
- user.restricted ? 1 : 0,
2148
- user.userUpdateCount || 0,
2149
- user.ttSeller === undefined ||
2150
- user.ttSeller === null ||
2151
- user.ttSeller === ""
2152
- ? null
2153
- : user.ttSeller
2154
- ? 1
2155
- : 0,
2156
- user.verified === undefined ||
2157
- user.verified === null ||
2158
- user.verified === ""
2159
- ? null
2160
- : user.verified
2161
- ? 1
2162
- : 0,
2163
- user.videoCount || 0,
2164
- user.commentCount || 0,
2165
- user.guessedLocation || null,
2166
- user.locationCreated || null,
2167
- user.followerCount || 0,
2168
- user.followingCount || 0,
2169
- user.heartCount || 0,
2170
- user.refreshTime || null,
2171
- user.processed ? 1 : 0,
2172
- user.processedAt || null,
2173
- user.createdAt || now,
2174
- user.updatedAt || now,
2175
- user.region || null,
2176
- user.signature || null,
2177
- user.bioLink?.link || user.bioLink?.url || user.bioLink || null,
2178
- user.secUid || null,
2179
- );
2180
- }
2181
-
2182
- function addJob(user) {
2183
- if (!db) {
2184
- addUserToDb(user);
2185
- return;
2186
- }
2187
- if (!user.status) user.status = inferStatus(user);
2188
- if (!user.createdAt) user.createdAt = Date.now();
2189
- if (!user.updatedAt) user.updatedAt = user.createdAt;
2190
- const writeTxn = db.transaction((job) => {
2191
- addUserToDb(job);
2192
- addJobToDb(job);
2193
- });
2194
- writeTxn(user);
2195
- }
2196
-
2197
491
  export function createStore(filePath, options = {}) {
2198
492
  if (!filePath) {
2199
493
  throw new Error("createStore requires an explicit .db path");
@@ -2219,62 +513,12 @@ export function createStore(filePath, options = {}) {
2219
513
  let refillLock = null; // Promise | null
2220
514
  // LLM 采样偏移量记忆:按猜测国家记录上次查询位置,避免重复采样
2221
515
  // 格式: { "ES": 300, "PL": 500, "NL": 400 }
2222
- let llmSampleOffsets = new Map();
516
+ const offsetStore = createLlmOffsetStore();
2223
517
  if (filePath) {
2224
518
  // 初始化 SQLite 用户表(用于判重)
2225
- initUserDb(filePath);
519
+ initDb(filePath);
2226
520
  // 从数据库恢复偏移量
2227
- loadLlmSampleOffsets();
2228
- }
2229
-
2230
- /**
2231
- * 从数据库加载 LLM 采样偏移量
2232
- */
2233
- function loadLlmSampleOffsets() {
2234
- try {
2235
- const row = db
2236
- .prepare(`SELECT offsets FROM _llm_sample_offsets LIMIT 1`)
2237
- .get();
2238
- if (row && row.offsets) {
2239
- const parsed = JSON.parse(row.offsets);
2240
- if (parsed && typeof parsed === "object") {
2241
- Object.entries(parsed).forEach(([k, v]) => {
2242
- llmSampleOffsets.set(k, v);
2243
- });
2244
- console.error(
2245
- `[data-store] 已恢复 LLM 采样偏移量: ${Array.from(
2246
- llmSampleOffsets.entries(),
2247
- )
2248
- .map(([k, v]) => `${k}:${v}`)
2249
- .join(", ")}`,
2250
- );
2251
- }
2252
- }
2253
- } catch (e) {
2254
- // 表不存在或解析失败,使用空偏移量
2255
- console.error(
2256
- `[data-store] 加载 LLM 采样偏移量失败,使用空偏移量: ${e.message}`,
2257
- );
2258
- }
2259
- }
2260
-
2261
- /**
2262
- * 将 LLM 采样偏移量持久化到数据库
2263
- */
2264
- function saveLlmSampleOffsets() {
2265
- try {
2266
- const offsetsJson = JSON.stringify(Object.fromEntries(llmSampleOffsets));
2267
- // 表不存在则创建
2268
- db.prepare(
2269
- `CREATE TABLE IF NOT EXISTS _llm_sample_offsets (id INTEGER PRIMARY KEY CHECK (id = 1), offsets TEXT)`,
2270
- ).run();
2271
- // 插入或更新
2272
- db.prepare(
2273
- `INSERT OR REPLACE INTO _llm_sample_offsets (id, offsets) VALUES (1, ?)`,
2274
- ).run(offsetsJson);
2275
- } catch (e) {
2276
- console.error(`[data-store] 保存 LLM 采样偏移量失败: ${e.message}`);
2277
- }
521
+ offsetStore.load();
2278
522
  }
2279
523
 
2280
524
  // stats 缓存
@@ -2287,7 +531,7 @@ export function createStore(filePath, options = {}) {
2287
531
  }
2288
532
 
2289
533
  function computeStatsInternal() {
2290
- if (db) {
534
+ if (getDb()) {
2291
535
  const total = getJobsCount();
2292
536
  const statusCounts = {
2293
537
  pending: 0,
@@ -2296,7 +540,7 @@ export function createStore(filePath, options = {}) {
2296
540
  error: 0,
2297
541
  restricted: 0,
2298
542
  };
2299
- const rows = db
543
+ const rows = getDb()
2300
544
  .prepare(
2301
545
  `
2302
546
  SELECT status, COUNT(*) as count
@@ -2372,7 +616,7 @@ export function createStore(filePath, options = {}) {
2372
616
  }
2373
617
 
2374
618
  function rebuildStatusGroups() {
2375
- if (db) {
619
+ if (getDb()) {
2376
620
  statusGroups = {
2377
621
  pending: [],
2378
622
  processing: [],
@@ -2436,9 +680,9 @@ export function createStore(filePath, options = {}) {
2436
680
 
2437
681
  function flushSave() {
2438
682
  // 数据库模式:先保存 LLM 偏移量,再备份数据库
2439
- if (db && dbPath) {
683
+ if (getDb() && getDbPath()) {
2440
684
  try {
2441
- saveLlmSampleOffsets();
685
+ offsetStore.save();
2442
686
  } catch (e) {
2443
687
  console.error(`[data-store] 保存 LLM 偏移量失败: ${e.message}`);
2444
688
  }
@@ -2452,7 +696,7 @@ export function createStore(filePath, options = {}) {
2452
696
  * @returns {string|null} 备份文件路径,失败返回 null
2453
697
  */
2454
698
  function backupDatabase(maxBackups = 3) {
2455
- if (!db || !dbPath) {
699
+ if (!getDb() || !getDbPath()) {
2456
700
  console.error("[data-store] 数据库未初始化,跳过备份");
2457
701
  return null;
2458
702
  }
@@ -2464,16 +708,16 @@ export function createStore(filePath, options = {}) {
2464
708
  .toISOString()
2465
709
  .replace(/[-:T.]/g, "")
2466
710
  .slice(0, 15); // YYYYMMDDHHmmss
2467
- const baseName = path.basename(dbPath, ".db");
711
+ const baseName = path.basename(getDbPath(), ".db");
2468
712
  const backupName = `${baseName}-${timestamp}.db`;
2469
- const backupDir = path.dirname(dbPath);
713
+ const backupDir = path.dirname(getDbPath());
2470
714
  const backupPath = path.join(backupDir, backupName);
2471
715
 
2472
716
  console.error(`[data-store] 正在备份数据库: ${backupName}`);
2473
717
 
2474
718
  // 使用 better-sqlite3 的 backup API(原子性备份,安全可靠)
2475
719
  const backupDb = new Database(backupPath);
2476
- db.backup("main", backupDb, "main");
720
+ getDb().backup("main", backupDb, "main");
2477
721
  backupDb.close();
2478
722
 
2479
723
  // 验证备份文件大小
@@ -2523,7 +767,7 @@ export function createStore(filePath, options = {}) {
2523
767
 
2524
768
  function stopBackup() {
2525
769
  // 退出时执行备份
2526
- if (db && dbPath) {
770
+ if (getDb() && getDbPath()) {
2527
771
  backupDatabase();
2528
772
  }
2529
773
  }
@@ -2531,7 +775,7 @@ export function createStore(filePath, options = {}) {
2531
775
  function getUser(uid) {
2532
776
  const idx = uidIndex.get(uid);
2533
777
  if (idx !== undefined) return data[idx];
2534
- if (db) return getJob(uid);
778
+ if (getDb()) return getJob(uid);
2535
779
  return undefined;
2536
780
  }
2537
781
 
@@ -2549,12 +793,25 @@ export function createStore(filePath, options = {}) {
2549
793
 
2550
794
  function addUser(user, append) {
2551
795
  const memoryIdx = uidIndex.get(user.uniqueId);
2552
- if (db && memoryIdx === undefined) {
796
+ if (getDb() && memoryIdx === undefined) {
2553
797
  // 用 users 表判重(所有发现过的用户合集),而不是 jobs 表
2554
798
  if (hasUserInDb(user.uniqueId)) {
2555
799
  return;
2556
800
  }
2557
- addJob(user);
801
+ const now = Date.now();
802
+ const writeTxn = getDb().transaction((job) => {
803
+ addUserToDb({
804
+ ...job,
805
+ createdAt: job.createdAt || now,
806
+ updatedAt: job.updatedAt || now,
807
+ });
808
+ addJobBaseToDb({
809
+ ...job,
810
+ createdAt: job.createdAt || now,
811
+ updatedAt: job.updatedAt || now,
812
+ });
813
+ });
814
+ writeTxn(user);
2558
815
  return;
2559
816
  }
2560
817
 
@@ -2616,7 +873,7 @@ export function createStore(filePath, options = {}) {
2616
873
  createdAt: now,
2617
874
  updatedAt: now,
2618
875
  };
2619
- const writeTxn = db.transaction((job) => {
876
+ const writeTxn = getDb().transaction((job) => {
2620
877
  addUserToDb(job);
2621
878
  addJobBaseToDb(job);
2622
879
  });
@@ -2628,195 +885,26 @@ export function createStore(filePath, options = {}) {
2628
885
  }
2629
886
 
2630
887
  function getPendingUsers() {
2631
- if (db) {
888
+ if (getDb()) {
2632
889
  return getAllJobs().filter((u) => u.status === "pending");
2633
890
  }
2634
891
  return data.filter((u) => u.status === "pending");
2635
892
  }
2636
893
 
2637
894
  function getProcessedUsers() {
2638
- if (db) {
895
+ if (getDb()) {
2639
896
  return getAllJobs().filter((u) => u.status === "done");
2640
897
  }
2641
898
  return data.filter((u) => u.status === "done");
2642
899
  }
2643
900
 
2644
901
  function getAllUsers() {
2645
- if (db) {
902
+ if (getDb()) {
2646
903
  return getAllJobs();
2647
904
  }
2648
905
  return data;
2649
906
  }
2650
907
 
2651
- /**
2652
- * 使用 LLM 对单个 job 的国家匹配度打分(0-100)
2653
- * @param {Object} job - raw_jobs 中的一条记录
2654
- * @param {string[]} targetLocations - 目标国家列表
2655
- * @returns {Promise<{ uniqueId: string, score: number, reason: string }>}
2656
- */
2657
- async function scoreJobLocation(job, targetLocations) {
2658
- const { fetch: undiciFetch } = await import("undici");
2659
-
2660
- const prompt = `
2661
- 你是一个 TikTok 用户数据分析助手。请根据以下用户信息,判断该用户是否来自以下**任意一个**目标国家。
2662
-
2663
- 目标国家列表: ${targetLocations.join(", ")}
2664
-
2665
- 重要:
2666
- - 用户只要来自上述**任意一个**国家就算匹配。
2667
- - guessed_location 是系统初步猜测的结果,**仅供参考**,不要完全依赖它。
2668
- - 请综合用户名、昵称、签名、位置等信息做判断。
2669
-
2670
- 用户信息:
2671
- - 用户名: ${job.unique_id || "未知"}
2672
- - 昵称: ${job.nickname || "未知"}
2673
- - 签名: ${job.signature || "未知"}
2674
- - 地区: ${job.region || "未知"}
2675
- - 猜测国家(参考): ${job.guessed_location || "未知"}
2676
- - 位置信息: ${job.location_created || "未知"}
2677
- - 主页链接: ${job.bio_link || "未知"}
2678
-
2679
- 返回 JSON(仅返回 JSON,无其他内容):
2680
- {"score": 0-100, "reason": "English only, under 50 chars, no quotes/brackets"}
2681
-
2682
- Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unlikely
2683
- `;
2684
-
2685
- try {
2686
- const apiKey = process.env.APIKEY || "";
2687
- const response = await undiciFetch(
2688
- "http://82.156.52.214:18000/v1/chat/completions",
2689
- {
2690
- method: "POST",
2691
- headers: {
2692
- "Content-Type": "application/json",
2693
- Authorization: `Bearer ${apiKey}`,
2694
- },
2695
- body: JSON.stringify({
2696
- model: "zc-fast",
2697
- messages: [{ role: "user", content: prompt }],
2698
- max_tokens: 512,
2699
- temperature: 0.1,
2700
- }),
2701
- },
2702
- );
2703
-
2704
- const result = await response.json();
2705
- const content = result.choices?.[0]?.message?.content || "";
2706
-
2707
- // 解析 JSON 响应(多层容错)
2708
- let parsed = null;
2709
-
2710
- // 尝试 1: 直接解析
2711
- try {
2712
- parsed = JSON.parse(content);
2713
- } catch {
2714
- // 尝试 2: 提取 {} 包裹的内容
2715
- const match = content.match(/\{[\s\S]*\}/);
2716
- if (match) {
2717
- try {
2718
- parsed = JSON.parse(match[0]);
2719
- } catch {
2720
- // 尝试 3: 清理常见问题后解析
2721
- const cleaned = match[0]
2722
- .replace(/"/g, '"') // 弯引号 → 直引号
2723
- .replace(/\s+/g, " ") // 多余空白
2724
- .trim();
2725
- try {
2726
- parsed = JSON.parse(cleaned);
2727
- } catch {
2728
- // 尝试 4: 从文本中提取 score 和 reason(reason 可能包含引号等特殊字符)
2729
- const scoreMatch = content.match(/"?score"?\s*:\s*(\d+)/i);
2730
- if (scoreMatch) {
2731
- let reason = "解析降级";
2732
- // 找 "reason": 的位置,取到最后一个 } 前的内容
2733
- const reasonKeyPos = content.search(/"?reason"?\s*:\s*"/i);
2734
- if (reasonKeyPos !== -1) {
2735
- const afterKey = content.substring(reasonKeyPos);
2736
- const colonPos = afterKey.indexOf(":");
2737
- const valueStart = afterKey.indexOf('"', colonPos + 1) + 1;
2738
- const rawValue = afterKey.substring(valueStart);
2739
- // 取到原始 content 最后一个 } 前
2740
- const lastBrace = content.lastIndexOf("}");
2741
- const reasonEnd = lastBrace - reasonKeyPos - valueStart;
2742
- if (reasonEnd > 0) {
2743
- reason = rawValue.substring(0, reasonEnd).trim();
2744
- // 去掉首尾的引号
2745
- if (reason.startsWith('"')) reason = reason.substring(1);
2746
- if (reason.endsWith('"'))
2747
- reason = reason.substring(0, reason.length - 1);
2748
- }
2749
- }
2750
- parsed = {
2751
- score: parseInt(scoreMatch[1]) || 50,
2752
- reason,
2753
- };
2754
- }
2755
- }
2756
- }
2757
- }
2758
-
2759
- // 尝试 5: 如果以上都失败,用更宽松的正则提取
2760
- if (!parsed) {
2761
- const scoreMatch = content.match(/"score"\s*:\s*(\d+)/);
2762
- const reasonMatch = content.match(/"reason"\s*:\s*"([^"]*)"/);
2763
- if (scoreMatch) {
2764
- parsed = {
2765
- score: parseInt(scoreMatch[1]) || 50,
2766
- reason: reasonMatch ? reasonMatch[1] : "解析降级 - 宽松模式",
2767
- };
2768
- }
2769
- }
2770
- }
2771
-
2772
- if (parsed && typeof parsed.score === "number") {
2773
- return {
2774
- uniqueId: job.unique_id,
2775
- score: Math.max(0, Math.min(100, parsed.score)),
2776
- reason: parsed.reason || "",
2777
- };
2778
- }
2779
-
2780
- // 所有解析都失败,返回默认分
2781
- console.error(
2782
- `[scoreJobLocation] JSON 解析失败 (${job.unique_id}): ${content.substring(0, 100)}`,
2783
- );
2784
- return {
2785
- uniqueId: job.unique_id,
2786
- score: 50,
2787
- reason: "LLM 响应解析失败,使用默认分",
2788
- };
2789
- } catch (e) {
2790
- console.error(
2791
- `[scoreJobLocation] LLM 调用失败 (${job.unique_id}): ${e.message}`,
2792
- );
2793
- return {
2794
- uniqueId: job.unique_id,
2795
- score: 50,
2796
- reason: `LLM 调用异常: ${e.message}`,
2797
- };
2798
- }
2799
- }
2800
-
2801
- /**
2802
- * 批量对 jobs 进行 LLM 国家匹配度打分
2803
- * @param {Object[]} jobs - raw_jobs 记录数组
2804
- * @param {string[]} targetLocations - 目标国家列表
2805
- * @param {number} batchSize - 每批处理数量(并发),默认 10
2806
- * @returns {Promise<Array<{ uniqueId: string, score: number, reason: string }>>}
2807
- */
2808
- async function scoreJobsBatch(jobs, targetLocations, batchSize = 10) {
2809
- const results = [];
2810
- for (let i = 0; i < jobs.length; i += batchSize) {
2811
- const batch = jobs.slice(i, i + batchSize);
2812
- const batchResults = await Promise.all(
2813
- batch.map((job) => scoreJobLocation(job, targetLocations)),
2814
- );
2815
- results.push(...batchResults);
2816
- }
2817
- return results;
2818
- }
2819
-
2820
908
  /**
2821
909
  * 从 raw_jobs 中移动一批符合条件的任务到 jobs 表
2822
910
  * @param {string[]} locations - 目标国家列表(null 表示不限制)
@@ -2828,8 +916,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
2828
916
  * @returns {{ moved: number }} 实际移动的数量
2829
917
  */
2830
918
  function refillJobsFromRaw(locations = null, limit = 500, options = {}) {
2831
- if (!db) {
2832
- return { moved: 0, error: "db not ready" };
919
+ if (!getDb()) {
920
+ return { moved: 0, error: "getDb() not ready" };
2833
921
  }
2834
922
 
2835
923
  const safeLimit = Math.max(1, Math.min(2000, parseInt(limit) || 500));
@@ -2860,7 +948,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
2860
948
 
2861
949
  // 统计符合条件的数量
2862
950
  const count =
2863
- db
951
+ getDb()
2864
952
  .prepare(`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql}`)
2865
953
  .get(...args)?.c || 0;
2866
954
 
@@ -2868,156 +956,142 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
2868
956
  return { moved: 0 };
2869
957
  }
2870
958
 
2871
- // 如果启用 LLM 打分,先采样一批进行评分(累积模式:按猜测国家分组,使用偏移量记忆避免重复采样)
959
+ // 如果启用 LLM 打分:先取 tag(一次性),再取非 tag 走 LLM 打分
2872
960
  if (useLlm && normalizedLocations && normalizedLocations.length > 0) {
2873
- const llmMinReturn = options.llmMinReturn ?? 60; // 最少返回合格数
2874
- const llmMinTagReturn = options.llmMinTagReturn ?? 30; // tag 最少合格数
2875
- const llmMinNonTagReturn = options.llmMinNonTagReturn ?? 30; // tag 最少合格数
2876
- const maxBatches = options.llmMaxBatches ?? 10; // 最多采样轮次,防止无限循环
2877
-
2878
- // 打印当前偏移量状态
2879
- const offsetSummary = Array.from(llmSampleOffsets.entries())
2880
- .map(([k, v]) => `${k}:${v}`)
2881
- .join(", ");
961
+ const llmTotal = options.llmTotal ?? 200; // 总条数
962
+ const llmTagRatio = options.llmTagRatio ?? 0.6; // tag 占比 60%
963
+ const llmTagLimit = Math.floor(llmTotal * llmTagRatio); // tag 上限 120
964
+ const llmNonTagTarget = llmTotal - llmTagLimit; // 非 tag 目标 80
965
+ const llmMinScore = options.llmMinScore ?? 60;
966
+ const llmSampleSize = options.llmSampleSize ?? 100;
967
+ const maxBatches = options.llmMaxBatches ?? 10;
968
+
2882
969
  console.error(
2883
- `[data-store] LLM 打分开始: 符合条件 ${count} 条,每批 ${llmSampleSize} 条,最低分 ${llmMinScore},tag 最少 ${llmMinTagReturn},非 tag 最少 ${llmMinNonTagReturn}`,
970
+ `[data-store] LLM 打分开始: 总目标 ${llmTotal} 条,tag 最多 ${llmTagLimit} 条(一次性),非 tag 目标 ${llmNonTagTarget} 条(LLM 打分)`,
2884
971
  );
2885
- if (offsetSummary) {
2886
- console.error(`[data-store] 偏移量记忆: ${offsetSummary}`);
2887
- }
2888
972
 
2889
973
  // 返回 Promise,调用方需要 await
2890
974
  return (async () => {
2891
- const allTagQualified = []; // tag 合格列表(直接合格)
2892
- const allNonTagQualified = []; // 非 tag 合格列表(LLM 打分合格)
975
+ const allTagQualified = [];
976
+ const allNonTagQualified = [];
2893
977
  const allScores = [];
2894
978
 
2895
- // 按猜测国家分组处理,每个国家使用独立的偏移量
2896
- const locationGroups = normalizedLocations;
2897
- let totalBatches = 0;
979
+ // ===== 第一步:一次性取所有 tag(全局,最多 llmTagLimit 条)=====
980
+ let tagOffset = offsetStore.get("_tag") || 0;
981
+ const tagGlobalCount =
982
+ getDb()
983
+ .prepare(
984
+ `SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql} AND sources LIKE '%tag%'`,
985
+ )
986
+ .get(...args)?.c || 0;
2898
987
 
2899
- for (const location of locationGroups) {
2900
- // 获取该国家上次的偏移量
2901
- let offset = llmSampleOffsets.get(location) || 0;
988
+ if (tagOffset >= tagGlobalCount) {
989
+ tagOffset = 0;
990
+ offsetStore.set("_tag", 0);
991
+ }
2902
992
 
2903
- // 查询该国家的总数量
2904
- const locationCountSql = `SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql} AND guessed_location = ?`;
2905
- const locationArgs = [...args, location];
2906
- const locationCount =
2907
- db.prepare(locationCountSql).get(...locationArgs)?.c || 0;
993
+ console.error(
994
+ `[data-store] Tag 全局共 ${tagGlobalCount} 条,从偏移量 ${tagOffset} 开始`,
995
+ );
2908
996
 
2909
- if (locationCount === 0) {
2910
- console.error(
2911
- `[data-store] 国家 ${location}: raw_jobs 中无数据,跳过`,
997
+ while (
998
+ allTagQualified.length < llmTagLimit &&
999
+ tagOffset < tagGlobalCount
1000
+ ) {
1001
+ const batch = getDb()
1002
+ .prepare(
1003
+ `
1004
+ SELECT * FROM raw_jobs WHERE ${whereSql} AND sources LIKE '%tag%'
1005
+ ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
1006
+ LIMIT ? OFFSET ?
1007
+ `,
1008
+ )
1009
+ .all(
1010
+ ...args,
1011
+ Math.min(llmSampleSize, llmTagLimit - allTagQualified.length),
1012
+ tagOffset,
2912
1013
  );
1014
+
1015
+ if (!batch.length) break;
1016
+
1017
+ allTagQualified.push(...batch.map((s) => s.unique_id));
1018
+ tagOffset += batch.length;
1019
+
1020
+ console.error(
1021
+ `[data-store] Tag 本批 ${batch.length} 条,累计 ${allTagQualified.length}/${llmTagLimit}`,
1022
+ );
1023
+ }
1024
+
1025
+ offsetStore.set("_tag", tagOffset);
1026
+
1027
+ // ===== 第二步:按国家取非 tag,走 LLM 打分,直到合格数达到 llmNonTagTarget =====
1028
+ for (const location of normalizedLocations) {
1029
+ if (allNonTagQualified.length >= llmNonTagTarget) break;
1030
+
1031
+ const nonTagOffsetKey = `${location}:nonTag`;
1032
+ let offset = offsetStore.get(nonTagOffsetKey) || 0;
1033
+
1034
+ const locationArgs = [...args, location];
1035
+ const nonTagCount =
1036
+ getDb()
1037
+ .prepare(
1038
+ `SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql} AND guessed_location = ? AND (sources NOT LIKE '%tag%' OR sources IS NULL)`,
1039
+ )
1040
+ .get(...locationArgs)?.c || 0;
1041
+
1042
+ if (nonTagCount === 0) {
1043
+ console.error(`[data-store] 国家 ${location}: 无非 tag 数据,跳过`);
2913
1044
  continue;
2914
1045
  }
2915
1046
 
2916
- // 如果偏移量超过总数,重置为 0(一轮结束,重新开始)
2917
- if (offset >= locationCount) {
1047
+ if (offset >= nonTagCount) {
2918
1048
  offset = 0;
2919
- llmSampleOffsets.set(location, 0);
1049
+ offsetStore.set(nonTagOffsetKey, 0);
2920
1050
  }
2921
1051
 
2922
1052
  console.error(
2923
- `[data-store] 国家 ${location}: 共 ${locationCount} 条,从偏移量 ${offset} 开始`,
1053
+ `[data-store] 国家 ${location}: 非 tag 共 ${nonTagCount} 条,从偏移量 ${offset} 开始`,
2924
1054
  );
2925
1055
 
2926
1056
  for (let batch = 0; batch < maxBatches; batch++) {
2927
- const remaining = locationCount - offset;
2928
- if (remaining <= 0) break;
1057
+ if (allNonTagQualified.length >= llmNonTagTarget) break;
2929
1058
 
2930
- const sampleLimit = Math.min(llmSampleSize, remaining);
2931
- const samples = db
1059
+ const samples = getDb()
2932
1060
  .prepare(
2933
1061
  `
2934
1062
  SELECT * FROM raw_jobs WHERE ${whereSql} AND guessed_location = ?
2935
- ORDER BY
2936
- CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
2937
- COALESCE(video_count, 0) DESC, created_at DESC
1063
+ AND (sources NOT LIKE '%tag%' OR sources IS NULL)
1064
+ ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
2938
1065
  LIMIT ? OFFSET ?
2939
1066
  `,
2940
1067
  )
2941
- .all(...locationArgs, sampleLimit, offset);
1068
+ .all(...locationArgs, llmSampleSize, offset);
2942
1069
 
2943
- if (samples.length === 0) break;
1070
+ if (!samples.length) break;
2944
1071
 
2945
- // 分离 tag 来源和非 tag 来源:tag 来源跳过 LLM 打分直接合格
2946
- const tagSamples = samples.filter((s) =>
2947
- (s.sources || "").includes("tag"),
1072
+ const scores = await scoreJobsBatch(
1073
+ samples,
1074
+ DEFAULT_TARGET_LOCATIONS,
2948
1075
  );
2949
- const nonTagSamples = samples.filter(
2950
- (s) => !(s.sources || "").includes("tag"),
2951
- );
2952
-
2953
- // tag 来源直接加入合格列表
2954
- if (tagSamples.length > 0) {
2955
- allTagQualified.push(...tagSamples.map((s) => s.unique_id));
2956
- console.error(
2957
- `[data-store] ${location}: 本批 ${tagSamples.length} 条 tag 来源任务跳过 LLM 打分直接合格`,
2958
- );
2959
- }
2960
-
2961
- // 非 tag 来源走 LLM 打分
2962
- let batchQualified = [];
2963
- let scores = [];
2964
- if (nonTagSamples.length > 0) {
2965
- scores = await scoreJobsBatch(
2966
- nonTagSamples,
2967
- DEFAULT_TARGET_LOCATIONS,
2968
- );
2969
- batchQualified = scores.filter((s) => s.score >= llmMinScore);
2970
- allNonTagQualified.push(...batchQualified.map((s) => s.uniqueId));
2971
- }
2972
-
1076
+ const qualified = scores.filter((s) => s.score >= llmMinScore);
1077
+ allNonTagQualified.push(...qualified.map((s) => s.uniqueId));
2973
1078
  allScores.push(...scores);
2974
1079
 
2975
- totalBatches++;
2976
- const totalQualified = allTagQualified.length + allNonTagQualified.length;
1080
+ offset += samples.length;
1081
+ offsetStore.set(nonTagOffsetKey, offset);
1082
+
2977
1083
  console.error(
2978
- `[data-store] ${location} 第 ${batch + 1} 批: 采样 ${samples.length} 条,tag 合格 ${allTagQualified.length},非 tag 合格 ${allNonTagQualified.length},累计 ${totalQualified} 条`,
1084
+ `[data-store] ${location} 第 ${batch + 1} 批: 采样 ${samples.length} 条,本批合格 ${qualified.length} 条,非 tag 累计 ${allNonTagQualified.length}/${llmNonTagTarget}`,
2979
1085
  );
2980
-
2981
- // 更新偏移量记忆
2982
- offset += samples.length;
2983
- llmSampleOffsets.set(location, offset);
2984
-
2985
- // 检查是否两个类型都达到阈值,都达到才停止
2986
- const tagReached = allTagQualified.length >= llmMinTagReturn;
2987
- const nonTagReached = allNonTagQualified.length >= llmMinNonTagReturn;
2988
- if (tagReached && nonTagReached) {
2989
- console.error(
2990
- `[data-store] 两类任务均已达标 (tag: ${allTagQualified.length}/${llmMinTagReturn}, 非 tag: ${allNonTagQualified.length}/${llmMinNonTagReturn}),停止采样`,
2991
- );
2992
- break;
2993
- }
2994
1086
  }
2995
-
2996
- // 检查是否两个类型都达到阈值,都达到才停止所有国家采样
2997
- const tagReachedGlobal = allTagQualified.length >= llmMinTagReturn;
2998
- const nonTagReachedGlobal = allNonTagQualified.length >= llmMinNonTagReturn;
2999
- if (tagReachedGlobal && nonTagReachedGlobal) break;
3000
1087
  }
3001
1088
 
3002
- // 最终合格列表:tag 优先 + 非 tag 按分数排序
3003
- // 限制 tag 占比:最多占 safeLimit 的 70%,留 30% 给非 tag
3004
- const tagMaxCount = Math.floor(safeLimit * 0.7);
3005
- const tagCount = Math.min(allTagQualified.length, tagMaxCount);
3006
- const nonTagMaxCount = safeLimit - tagCount;
3007
-
3008
- const nonTagQualifiedScores = allScores
3009
- .filter((s) => s.score >= llmMinScore)
3010
- .sort((a, b) => b.score - a.score);
3011
- const finalNonTagQualified = nonTagQualifiedScores.slice(0, nonTagMaxCount).map((s) => s.uniqueId);
3012
-
3013
- const qualified = [
3014
- ...allTagQualified.slice(0, tagCount),
3015
- ...finalNonTagQualified,
3016
- ];
1089
+ // ===== 最终结果 =====
1090
+ const qualified = [...allTagQualified, ...allNonTagQualified];
3017
1091
 
3018
1092
  if (!qualified.length) {
3019
1093
  console.error(
3020
- `[data-store] LLM 打分后无符合条件的任务(阈值: ${llmMinScore},共采样 ${allScores.length} 条)`,
1094
+ `[data-store] LLM 打分后无符合条件的任务(tag: ${allTagQualified.length}, 非 tag: ${allNonTagQualified.length})`,
3021
1095
  );
3022
1096
  return {
3023
1097
  moved: 0,
@@ -3027,11 +1101,16 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3027
1101
  };
3028
1102
  }
3029
1103
 
1104
+ console.error(
1105
+ `[data-store] LLM 打分完成: tag ${allTagQualified.length} 条 + 非 tag ${allNonTagQualified.length} 条 = 共 ${qualified.length} 条`,
1106
+ );
1107
+
3030
1108
  // 移动符合条件的记录
3031
1109
  const placeholders = qualified.map(() => "?").join(", ");
3032
- const moveTxn = db.transaction(() => {
3033
- db.prepare(
3034
- `
1110
+ const moveTxn = getDb().transaction(() => {
1111
+ getDb()
1112
+ .prepare(
1113
+ `
3035
1114
  INSERT OR IGNORE INTO jobs (
3036
1115
  unique_id, nickname, status, sources, pinned,
3037
1116
  tt_seller, verified, video_count, comment_count,
@@ -3050,41 +1129,41 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3050
1129
  FROM raw_jobs
3051
1130
  WHERE unique_id IN (${placeholders})
3052
1131
  `,
3053
- ).run(...qualified);
1132
+ )
1133
+ .run(...qualified);
3054
1134
 
3055
- db.prepare(
3056
- `DELETE FROM raw_jobs WHERE unique_id IN (${placeholders})`,
3057
- ).run(...qualified);
1135
+ getDb()
1136
+ .prepare(
1137
+ `DELETE FROM raw_jobs WHERE unique_id IN (${placeholders})`,
1138
+ )
1139
+ .run(...qualified);
3058
1140
  });
3059
-
3060
1141
  moveTxn();
3061
1142
  markStatsDirty();
3062
1143
 
3063
1144
  // 持久化偏移量到数据库
3064
- saveLlmSampleOffsets();
1145
+ offsetStore.save();
3065
1146
 
3066
1147
  // 打印最终偏移量状态
3067
- const finalOffsetSummary = Array.from(llmSampleOffsets.entries())
1148
+ const finalOffsetSummary = Array.from(offsetStore.entries())
3068
1149
  .map(([k, v]) => `${k}:${v}`)
3069
1150
  .join(", ");
3070
- console.error(
3071
- `[data-store] LLM 打分完成: 共采样 ${allScores.length} 条,合格 ${qualified.length} 条,已移动到 jobs`,
3072
- );
3073
1151
  console.error(`[data-store] 偏移量记忆更新: ${finalOffsetSummary}`);
3074
- const scoresDetail = allScores.map((s) => s);
1152
+
3075
1153
  return {
3076
1154
  moved: qualified.length,
3077
1155
  scored: allScores.length,
3078
1156
  qualified: qualified.length,
3079
- scores: scoresDetail,
1157
+ scores: allScores,
3080
1158
  };
3081
1159
  })();
3082
1160
  }
3083
1161
 
3084
1162
  // 常规移动:INSERT + DELETE 事务
3085
- const moveTxn = db.transaction(() => {
3086
- db.prepare(
3087
- `
1163
+ const moveTxn = getDb().transaction(() => {
1164
+ getDb()
1165
+ .prepare(
1166
+ `
3088
1167
  INSERT OR IGNORE INTO jobs (
3089
1168
  unique_id, nickname, status, sources, pinned,
3090
1169
  tt_seller, verified, video_count, comment_count,
@@ -3107,11 +1186,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3107
1186
  COALESCE(video_count, 0) DESC, created_at DESC
3108
1187
  LIMIT ?
3109
1188
  `,
3110
- ).run(...args, safeLimit);
1189
+ )
1190
+ .run(...args, safeLimit);
3111
1191
 
3112
1192
  // 删除已移动的记录:用子查询匹配刚 INSERT 的 unique_id
3113
- db.prepare(
3114
- `
1193
+ getDb()
1194
+ .prepare(
1195
+ `
3115
1196
  DELETE FROM raw_jobs
3116
1197
  WHERE unique_id IN (
3117
1198
  SELECT unique_id FROM raw_jobs
@@ -3122,7 +1203,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3122
1203
  LIMIT ?
3123
1204
  )
3124
1205
  `,
3125
- ).run(...args, safeLimit);
1206
+ )
1207
+ .run(...args, safeLimit);
3126
1208
  });
3127
1209
 
3128
1210
  moveTxn();
@@ -3140,9 +1222,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3140
1222
  ) {
3141
1223
  // 记录客户端登录状态
3142
1224
  clientLoginStatus.set(userId, !!loggedIn);
3143
- if (db) {
1225
+ if (getDb()) {
3144
1226
  const now = Date.now();
3145
- const ongoingRow = db
1227
+ const ongoingRow = getDb()
3146
1228
  .prepare(
3147
1229
  `
3148
1230
  SELECT *
@@ -3157,10 +1239,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3157
1239
  )
3158
1240
  .get(userId, now, expireMs);
3159
1241
  if (ongoingRow) {
3160
- db.prepare("UPDATE jobs SET claimed_at = ? WHERE unique_id = ?").run(
3161
- now,
3162
- ongoingRow.unique_id,
3163
- );
1242
+ getDb()
1243
+ .prepare("UPDATE jobs SET claimed_at = ? WHERE unique_id = ?")
1244
+ .run(now, ongoingRow.unique_id);
3164
1245
  return {
3165
1246
  uniqueId: ongoingRow.unique_id,
3166
1247
  nickname: ongoingRow.nickname,
@@ -3242,7 +1323,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3242
1323
  for (const filter of filters) {
3243
1324
  where.push(filter);
3244
1325
  }
3245
- return db
1326
+ return getDb()
3246
1327
  .prepare(
3247
1328
  `
3248
1329
  SELECT *
@@ -3285,7 +1366,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3285
1366
  );
3286
1367
  args.push(...normalizedLocations);
3287
1368
  }
3288
- return db
1369
+ return getDb()
3289
1370
  .prepare(
3290
1371
  `
3291
1372
  SELECT *
@@ -3355,9 +1436,11 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3355
1436
 
3356
1437
  function claimRow(row) {
3357
1438
  if (!row) return null;
3358
- db.prepare(
3359
- "UPDATE jobs SET status = 'processing', claimed_at = ?, claimed_by = ? WHERE unique_id = ?",
3360
- ).run(now, userId, row.unique_id);
1439
+ getDb()
1440
+ .prepare(
1441
+ "UPDATE jobs SET status = 'processing', claimed_at = ?, claimed_by = ? WHERE unique_id = ?",
1442
+ )
1443
+ .run(now, userId, row.unique_id);
3361
1444
  markStatsDirty();
3362
1445
  return {
3363
1446
  uniqueId: row.unique_id,
@@ -3367,7 +1450,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3367
1450
  };
3368
1451
  }
3369
1452
 
3370
- const expiredRow = db
1453
+ const expiredRow = getDb()
3371
1454
  .prepare(
3372
1455
  `
3373
1456
  SELECT *
@@ -3382,9 +1465,11 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3382
1465
  .get(now, expireMs);
3383
1466
  let expiredCandidate = null;
3384
1467
  if (expiredRow) {
3385
- db.prepare(
3386
- "UPDATE jobs SET status = 'pending', claimed_at = NULL WHERE unique_id = ?",
3387
- ).run(expiredRow.unique_id);
1468
+ getDb()
1469
+ .prepare(
1470
+ "UPDATE jobs SET status = 'pending', claimed_at = NULL WHERE unique_id = ?",
1471
+ )
1472
+ .run(expiredRow.unique_id);
3388
1473
  expiredCandidate = mapJobRow({
3389
1474
  ...expiredRow,
3390
1475
  status: "pending",
@@ -3476,7 +1561,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3476
1561
  return null;
3477
1562
  }
3478
1563
 
3479
- if (!db) {
1564
+ if (!getDb()) {
3480
1565
  const now = Date.now();
3481
1566
 
3482
1567
  // 0. 该客户端有未过期的任务,续期返回
@@ -3614,16 +1699,16 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3614
1699
  locations = null,
3615
1700
  loggedIn = true,
3616
1701
  ) {
3617
- if (db) {
1702
+ if (getDb()) {
3618
1703
  const now = Date.now();
3619
1704
  const info = {
3620
- path: "db",
1705
+ path: "getDb()",
3621
1706
  userId,
3622
1707
  expireMs,
3623
1708
  loggedIn,
3624
1709
  };
3625
1710
 
3626
- const ongoingRow = db
1711
+ const ongoingRow = getDb()
3627
1712
  .prepare(
3628
1713
  `
3629
1714
  SELECT *
@@ -3723,7 +1808,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3723
1808
  ORDER BY follower_count DESC, created_at ASC, unique_id ASC
3724
1809
  LIMIT 1
3725
1810
  `;
3726
- const row = db.prepare(sql).get(...args);
1811
+ const row = getDb()
1812
+ .prepare(sql)
1813
+ .get(...args);
3727
1814
  return { row, sql, args };
3728
1815
  }
3729
1816
 
@@ -3764,7 +1851,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3764
1851
  ORDER BY created_at ASC, unique_id ASC
3765
1852
  LIMIT 1
3766
1853
  `;
3767
- const row = db.prepare(sql).get(...args);
1854
+ const row = getDb()
1855
+ .prepare(sql)
1856
+ .get(...args);
3768
1857
  return { row, sql, args };
3769
1858
  }
3770
1859
 
@@ -3777,7 +1866,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3777
1866
  ORDER BY claimed_at ASC
3778
1867
  LIMIT 1
3779
1868
  `;
3780
- const expiredRow = db.prepare(expiredSql).get(now, expireMs);
1869
+ const expiredRow = getDb().prepare(expiredSql).get(now, expireMs);
3781
1870
  info.expired = expiredRow
3782
1871
  ? {
3783
1872
  uniqueId: expiredRow.unique_id,
@@ -4103,7 +2192,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4103
2192
  }
4104
2193
 
4105
2194
  function commitJob(uniqueId, result) {
4106
- if (db) {
2195
+ if (getDb()) {
4107
2196
  const user = getJob(uniqueId);
4108
2197
  if (!user) return { saved: false, error: "user not found" };
4109
2198
 
@@ -4129,7 +2218,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4129
2218
  }
4130
2219
 
4131
2220
  function commitNewExplore(uniqueId, result) {
4132
- if (db) {
2221
+ if (getDb()) {
4133
2222
  const existing = getJob(uniqueId);
4134
2223
  if (existing) {
4135
2224
  updateUserFromResult(existing, result);
@@ -4179,7 +2268,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4179
2268
  }
4180
2269
 
4181
2270
  function resetJob(uniqueId) {
4182
- if (db) {
2271
+ if (getDb()) {
4183
2272
  const user = getJob(uniqueId);
4184
2273
  if (!user) return { saved: false, error: "user not found" };
4185
2274
  user.status = "pending";
@@ -4210,7 +2299,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4210
2299
  }
4211
2300
 
4212
2301
  function togglePin(uniqueId) {
4213
- if (db) {
2302
+ if (getDb()) {
4214
2303
  const user = getJob(uniqueId);
4215
2304
  if (!user) return { saved: false, error: "user not found" };
4216
2305
  const nextPinned = !user.pinned;
@@ -4227,13 +2316,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4227
2316
  }
4228
2317
 
4229
2318
  function getNextRedoJob(userId, maxAgeSeconds = 43200) {
4230
- if (db) {
2319
+ if (getDb()) {
4231
2320
  const now = Date.now();
4232
2321
  const threshold = now - maxAgeSeconds * 1000;
4233
2322
  const defaultTime = new Date("2016-01-01T00:00:00Z").getTime();
4234
2323
  const targetLocations = DEFAULT_TARGET_LOCATIONS;
4235
2324
  const placeholders = targetLocations.map(() => "?").join(",");
4236
- const row = db
2325
+ const row = getDb()
4237
2326
  .prepare(
4238
2327
  `
4239
2328
  SELECT *
@@ -4248,9 +2337,11 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4248
2337
  )
4249
2338
  .get(...targetLocations, defaultTime, threshold, defaultTime);
4250
2339
  if (!row) return null;
4251
- db.prepare(
4252
- "UPDATE jobs SET refresh_time = ?, updated_at = ? WHERE unique_id = ?",
4253
- ).run(now, now, row.unique_id);
2340
+ getDb()
2341
+ .prepare(
2342
+ "UPDATE jobs SET refresh_time = ?, updated_at = ? WHERE unique_id = ?",
2343
+ )
2344
+ .run(now, now, row.unique_id);
4254
2345
  return {
4255
2346
  uniqueId: row.unique_id,
4256
2347
  nickname: row.nickname,
@@ -4299,7 +2390,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4299
2390
  }
4300
2391
 
4301
2392
  function commitRedoJob(uniqueId, result) {
4302
- if (db) {
2393
+ if (getDb()) {
4303
2394
  const user = getJob(uniqueId);
4304
2395
  if (!user) return { saved: false, error: "user not found" };
4305
2396
  user.refreshTime = Date.now();
@@ -4443,13 +2534,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4443
2534
  : [];
4444
2535
  const hasCountryFilter = targetCountries.length > 0;
4445
2536
 
4446
- if (db) {
2537
+ if (getDb()) {
4447
2538
  const l = Math.max(1, parseInt(limit) || 5);
4448
2539
 
4449
2540
  let sql = `
4450
2541
  SELECT *
4451
2542
  FROM jobs_base
4452
- WHERE COALESCE(tt_seller, '') = ''
2543
+ WHERE (COALESCE(tt_seller, '') = '' OR tt_seller = 1)
4453
2544
  AND COALESCE(user_update_count, 0) <= 0
4454
2545
  `;
4455
2546
  const sqlParams = [];
@@ -4460,18 +2551,21 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4460
2551
  sqlParams.push(...targetCountries);
4461
2552
  }
4462
2553
 
4463
- // 优先级:sources 包含 "tag" 的任务优先,其余按 created_at 排序
2554
+ // 优先级:tt_seller=1 的商家重处理任务优先 > tag 来源 > 其余
4464
2555
  sql += ` ORDER BY
2556
+ CASE WHEN tt_seller = 1 THEN 0 ELSE 1 END,
4465
2557
  CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
4466
2558
  created_at ASC,
4467
2559
  unique_id ASC
4468
2560
  LIMIT ?`;
4469
2561
  sqlParams.push(l);
4470
2562
 
4471
- const rows = db.prepare(sql).all(...sqlParams);
2563
+ const rows = getDb()
2564
+ .prepare(sql)
2565
+ .all(...sqlParams);
4472
2566
  if (rows.length === 0) return [];
4473
2567
  const now = Date.now();
4474
- const bumpStmt = db.prepare(
2568
+ const bumpStmt = getDb().prepare(
4475
2569
  `
4476
2570
  UPDATE jobs_base
4477
2571
  SET user_update_count = COALESCE(user_update_count, 0) + 1,
@@ -4479,7 +2573,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4479
2573
  WHERE unique_id = ?
4480
2574
  `,
4481
2575
  );
4482
- const bumpTxn = db.transaction((items) => {
2576
+ const bumpTxn = getDb().transaction((items) => {
4483
2577
  for (const item of items) {
4484
2578
  bumpStmt.run(now, item.unique_id);
4485
2579
  }
@@ -4497,9 +2591,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4497
2591
  const pending = data
4498
2592
  .filter((u) => {
4499
2593
  const updateCount = u.userUpdateCount;
4500
- const ttSellerEmpty =
4501
- u.ttSeller === null || u.ttSeller === undefined || u.ttSeller === "";
4502
- if (!ttSellerEmpty) return false;
2594
+ // ttSeller 为空 或 ttSeller=1(商家重处理)都可以领取
2595
+ const ttSellerEligible =
2596
+ u.ttSeller === null ||
2597
+ u.ttSeller === undefined ||
2598
+ u.ttSeller === "" ||
2599
+ u.ttSeller === 1;
2600
+ if (!ttSellerEligible) return false;
4503
2601
  if (
4504
2602
  updateCount === null ||
4505
2603
  updateCount === undefined ||
@@ -4514,7 +2612,10 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4514
2612
  return false;
4515
2613
  })
4516
2614
  .sort((a, b) => {
4517
- // 优先级:sources 包含 "tag" 的任务优先
2615
+ // 优先级:tt_seller=1 的商家重处理任务优先 > tag 来源 > 其余
2616
+ const aIsSeller = a.ttSeller === 1 ? 0 : 1;
2617
+ const bIsSeller = b.ttSeller === 1 ? 0 : 1;
2618
+ if (aIsSeller !== bIsSeller) return aIsSeller - bIsSeller;
4518
2619
  const aIsTag = (a.sources || "").includes("tag");
4519
2620
  const bIsTag = (b.sources || "").includes("tag");
4520
2621
  if (aIsTag !== bIsTag) return aIsTag ? -1 : 1;
@@ -4531,7 +2632,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4531
2632
  }
4532
2633
 
4533
2634
  function updateUserInfo(uniqueId, info) {
4534
- if (db) {
2635
+ if (getDb()) {
4535
2636
  return updateJobInfo(uniqueId, info, true);
4536
2637
  }
4537
2638
 
@@ -4550,15 +2651,17 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4550
2651
  }
4551
2652
 
4552
2653
  function updateUserLocation(uniqueId, location) {
4553
- if (db) {
4554
- const existing = db
2654
+ if (getDb()) {
2655
+ const existing = getDb()
4555
2656
  .prepare("SELECT * FROM jobs WHERE unique_id = ?")
4556
2657
  .get(uniqueId);
4557
2658
  if (!existing) return { error: "user not found" };
4558
2659
  const now = Date.now();
4559
- db.prepare(
4560
- "UPDATE jobs SET location_created = ?, modified_at = ?, updated_at = ? WHERE unique_id = ?",
4561
- ).run(location, now, now, uniqueId);
2660
+ getDb()
2661
+ .prepare(
2662
+ "UPDATE jobs SET location_created = ?, modified_at = ?, updated_at = ? WHERE unique_id = ?",
2663
+ )
2664
+ .run(location, now, now, uniqueId);
4562
2665
  return { ok: true, location, modifiedAt: now };
4563
2666
  }
4564
2667
 
@@ -4574,13 +2677,14 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4574
2677
 
4575
2678
  // 将单个 job 移动到 raw_jobs 表(完整字段复制 + 删除原记录)
4576
2679
  function moveJobToRaw(uniqueId) {
4577
- if (!db) return false;
2680
+ if (!getDb()) return false;
4578
2681
  const safeId = String(uniqueId).trim();
4579
2682
  if (!safeId) return false;
4580
2683
 
4581
- const moveSingleTxn = db.transaction(() => {
4582
- db.prepare(
4583
- `
2684
+ const moveSingleTxn = getDb().transaction(() => {
2685
+ getDb()
2686
+ .prepare(
2687
+ `
4584
2688
  INSERT OR REPLACE INTO raw_jobs (
4585
2689
  unique_id, nickname, status, sources, claimed_by, claimed_at,
4586
2690
  error, pinned, no_video, restricted, user_update_count,
@@ -4602,21 +2706,22 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4602
2706
  user_create_time
4603
2707
  FROM jobs WHERE unique_id = ?
4604
2708
  `,
4605
- ).run(safeId);
2709
+ )
2710
+ .run(safeId);
4606
2711
 
4607
- db.prepare("DELETE FROM jobs WHERE unique_id = ?").run(safeId);
2712
+ getDb().prepare("DELETE FROM jobs WHERE unique_id = ?").run(safeId);
4608
2713
  });
4609
2714
  moveSingleTxn();
4610
2715
  return true;
4611
2716
  }
4612
2717
 
4613
2718
  function batchUpdateUserInfo(updates) {
4614
- if (db) {
2719
+ if (getDb()) {
4615
2720
  const results = [];
4616
2721
  const rawMoveList = [];
4617
2722
  const sellerMoveList = [];
4618
2723
 
4619
- const txn = db.transaction((items) => {
2724
+ const txn = getDb().transaction((items) => {
4620
2725
  items.forEach((item) => {
4621
2726
  const uniqueId = item.uniqueId;
4622
2727
  // 处理 { error: true, statusCode: xxx } 的情况
@@ -4668,8 +2773,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4668
2773
  // 批量移动商家用户到 jobs
4669
2774
  if (sellerMoveList.length > 0) {
4670
2775
  const placeholders = sellerMoveList.map(() => "?").join(",");
4671
- db.prepare(
4672
- `
2776
+ getDb()
2777
+ .prepare(
2778
+ `
4673
2779
  INSERT OR REPLACE INTO jobs (
4674
2780
  unique_id, nickname, status, sources, claimed_by, claimed_at,
4675
2781
  error, pinned, no_video, restricted, user_update_count,
@@ -4691,18 +2797,20 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4691
2797
  user_create_time
4692
2798
  FROM jobs_base WHERE unique_id IN (${placeholders})
4693
2799
  `,
4694
- ).run(...sellerMoveList);
2800
+ )
2801
+ .run(...sellerMoveList);
4695
2802
 
4696
- db.prepare(
4697
- `DELETE FROM jobs_base WHERE unique_id IN (${placeholders})`,
4698
- ).run(...sellerMoveList);
2803
+ getDb()
2804
+ .prepare(`DELETE FROM jobs_base WHERE unique_id IN (${placeholders})`)
2805
+ .run(...sellerMoveList);
4699
2806
  }
4700
2807
 
4701
2808
  // 批量移动非商家用户到 raw_jobs
4702
2809
  if (rawMoveList.length > 0) {
4703
2810
  const placeholders = rawMoveList.map(() => "?").join(",");
4704
- db.prepare(
4705
- `
2811
+ getDb()
2812
+ .prepare(
2813
+ `
4706
2814
  INSERT OR REPLACE INTO raw_jobs (
4707
2815
  unique_id, nickname, status, sources, claimed_by, claimed_at,
4708
2816
  error, pinned, no_video, restricted, user_update_count,
@@ -4724,11 +2832,12 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4724
2832
  user_create_time
4725
2833
  FROM jobs_base WHERE unique_id IN (${placeholders})
4726
2834
  `,
4727
- ).run(...rawMoveList);
2835
+ )
2836
+ .run(...rawMoveList);
4728
2837
 
4729
- db.prepare(
4730
- `DELETE FROM jobs_base WHERE unique_id IN (${placeholders})`,
4731
- ).run(...rawMoveList);
2838
+ getDb()
2839
+ .prepare(`DELETE FROM jobs_base WHERE unique_id IN (${placeholders})`)
2840
+ .run(...rawMoveList);
4732
2841
  }
4733
2842
 
4734
2843
  // 清理内部标记
@@ -4780,8 +2889,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4780
2889
  return { registered: 0, skipped: 0 };
4781
2890
  }
4782
2891
 
4783
- if (db) {
4784
- const insertStmt = db.prepare(`
2892
+ if (getDb()) {
2893
+ const insertStmt = getDb().prepare(`
4785
2894
  INSERT OR IGNORE INTO videos (
4786
2895
  id,
4787
2896
  href,
@@ -4797,7 +2906,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4797
2906
  let registered = 0;
4798
2907
  let skipped = 0;
4799
2908
  const now = Date.now();
4800
- const txn = db.transaction((items) => {
2909
+ const txn = getDb().transaction((items) => {
4801
2910
  for (const item of items) {
4802
2911
  const result = insertStmt.run(
4803
2912
  item.id,
@@ -4844,7 +2953,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4844
2953
  }
4845
2954
 
4846
2955
  function getVideos() {
4847
- if (db) {
2956
+ if (getDb()) {
4848
2957
  return getAllVideoRows().map(mapVideoRow);
4849
2958
  }
4850
2959
  return videos;
@@ -4852,7 +2961,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4852
2961
 
4853
2962
  function getVideo(videoId) {
4854
2963
  if (!videoId) return null;
4855
- if (db) {
2964
+ if (getDb()) {
4856
2965
  return mapVideoRow(getVideoRow(videoId));
4857
2966
  }
4858
2967
  return videos.find((video) => video.id === videoId) || null;
@@ -4862,8 +2971,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4862
2971
  const safeLimit = Math.max(1, Math.min(100, parseInt(limit) || 50));
4863
2972
  const safeOffset = Math.max(0, parseInt(offset) || 0);
4864
2973
 
4865
- if (db) {
4866
- const rows = db
2974
+ if (getDb()) {
2975
+ const rows = getDb()
4867
2976
  .prepare(
4868
2977
  `
4869
2978
  SELECT *
@@ -4873,7 +2982,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4873
2982
  `,
4874
2983
  )
4875
2984
  .all(safeLimit, safeOffset);
4876
- const total = db.prepare("SELECT COUNT(*) as c FROM videos").get().c;
2985
+ const total = getDb().prepare("SELECT COUNT(*) as c FROM videos").get().c;
4877
2986
  return {
4878
2987
  total,
4879
2988
  limit: safeLimit,
@@ -4891,16 +3000,16 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4891
3000
  }
4892
3001
 
4893
3002
  function getVideoCount() {
4894
- if (db) {
4895
- return db.prepare("SELECT COUNT(*) as c FROM videos").get().c;
3003
+ if (getDb()) {
3004
+ return getDb().prepare("SELECT COUNT(*) as c FROM videos").get().c;
4896
3005
  }
4897
3006
  return videos.length;
4898
3007
  }
4899
3008
 
4900
3009
  function getPendingCommentTasks(limit) {
4901
- if (db) {
3010
+ if (getDb()) {
4902
3011
  const l = Math.max(1, parseInt(limit) || 1);
4903
- const rows = db
3012
+ const rows = getDb()
4904
3013
  .prepare(
4905
3014
  `
4906
3015
  SELECT *
@@ -4912,14 +3021,14 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4912
3021
  )
4913
3022
  .all(l);
4914
3023
  if (rows.length === 0) return [];
4915
- const bumpStmt = db.prepare(
3024
+ const bumpStmt = getDb().prepare(
4916
3025
  `
4917
3026
  UPDATE videos
4918
3027
  SET user_update_count = COALESCE(user_update_count, 0) + 1
4919
3028
  WHERE id = ?
4920
3029
  `,
4921
3030
  );
4922
- const bumpTxn = db.transaction((items) => {
3031
+ const bumpTxn = getDb().transaction((items) => {
4923
3032
  for (const item of items) bumpStmt.run(item.id);
4924
3033
  });
4925
3034
  bumpTxn(rows);
@@ -4949,17 +3058,19 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4949
3058
  }
4950
3059
 
4951
3060
  function commitCommentTask(videoId) {
4952
- if (db) {
3061
+ if (getDb()) {
4953
3062
  const video = getVideoRow(videoId);
4954
3063
  if (!video) return { ok: false, error: "video not found" };
4955
3064
  const nextCount = (video.user_update_count || 0) + 1;
4956
- db.prepare(
4957
- `
3065
+ getDb()
3066
+ .prepare(
3067
+ `
4958
3068
  UPDATE videos
4959
3069
  SET user_update_count = ?
4960
3070
  WHERE id = ?
4961
3071
  `,
4962
- ).run(nextCount, videoId);
3072
+ )
3073
+ .run(nextCount, videoId);
4963
3074
  return { ok: true, userUpdateCount: nextCount };
4964
3075
  }
4965
3076
 
@@ -5024,6 +3135,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
5024
3135
  getClientLoginStatus,
5025
3136
  trackClient,
5026
3137
  getActiveClients,
3138
+ moveSellerJobsToBase, // 将 jobs/raw_jobs 中 tt_seller=1 且 video_count=0 的记录移动到 jobs_base
5027
3139
  registerVideos,
5028
3140
  getVideo,
5029
3141
  getVideos,
@@ -5051,6 +3163,126 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
5051
3163
 
5052
3164
  // 辅助函数:获取 LLM 采样偏移量
5053
3165
  function getLlmSampleOffsets() {
5054
- return Object.fromEntries(llmSampleOffsets);
3166
+ return Object.fromEntries(offsetStore.entries());
3167
+ }
3168
+
3169
+ // ===== 将 jobs/raw_jobs 中商家用户移动到 jobs_base =====
3170
+
3171
+ /**
3172
+ * 将 jobs 和 raw_jobs 中 tt_seller=1 且 video_count=0 的记录移动到 jobs_base,
3173
+ * 并重置 user_update_count=0,保留 tt_seller=1 以便后续优先级判断。
3174
+ */
3175
+ function moveSellerJobsToBase() {
3176
+ const db = getDb();
3177
+ if (!db) return { ok: false, error: "db not ready" };
3178
+
3179
+ const COLUMNS = [
3180
+ "unique_id",
3181
+ "nickname",
3182
+ "status",
3183
+ "sources",
3184
+ "claimed_by",
3185
+ "claimed_at",
3186
+ "error",
3187
+ "pinned",
3188
+ "no_video",
3189
+ "restricted",
3190
+ "user_update_count",
3191
+ "tt_seller",
3192
+ "verified",
3193
+ "video_count",
3194
+ "comment_count",
3195
+ "guessed_location",
3196
+ "location_created",
3197
+ "confirmed_location",
3198
+ "modified_at",
3199
+ "follower_count",
3200
+ "following_count",
3201
+ "heart_count",
3202
+ "refresh_time",
3203
+ "processed",
3204
+ "processed_at",
3205
+ "created_at",
3206
+ "updated_at",
3207
+ "region",
3208
+ "signature",
3209
+ "sec_uid",
3210
+ "status_code",
3211
+ "latest_video_time",
3212
+ "bio_link",
3213
+ ];
3214
+ const cols = COLUMNS.join(",");
3215
+ const insertSql = `INSERT OR IGNORE INTO jobs_base (${cols}) SELECT ${cols} FROM `;
3216
+ const condition = "WHERE tt_seller = 1 AND video_count = 0";
3217
+
3218
+ let fromJobs = 0;
3219
+ let fromRawJobs = 0;
3220
+
3221
+ try {
3222
+ // 1. jobs → jobs_base
3223
+ const result1 = db.prepare(insertSql + "jobs " + condition).run();
3224
+ fromJobs = result1.changes || 0;
3225
+
3226
+ // 2. raw_jobs → jobs_base
3227
+ const result2 = db.prepare(insertSql + "raw_jobs " + condition).run();
3228
+ fromRawJobs = result2.changes || 0;
3229
+ } catch (e) {
3230
+ return { ok: false, error: e.message };
3231
+ }
3232
+
3233
+ // 3. 重置 user_update_count=0,保留 tt_seller=1 以便后续优先级判断
3234
+ // 包括:新移动过来的记录 + jobs_base 中已有的滞留记录
3235
+ let resetCount = 0;
3236
+ try {
3237
+ const resetStmt = db.prepare(
3238
+ `UPDATE jobs_base
3239
+ SET user_update_count = 0
3240
+ WHERE video_count = 0
3241
+ AND tt_seller = 1`,
3242
+ );
3243
+ resetStmt.run();
3244
+ resetCount = resetStmt.changes || 0;
3245
+ } catch (e) {
3246
+ return {
3247
+ ok: false,
3248
+ error: `reset failed: ${e.message}, inserted: jobs=${fromJobs}, raw_jobs=${fromRawJobs}`,
3249
+ };
3250
+ }
3251
+
3252
+ // 4. 删除 jobs 和 raw_jobs 中已移动的记录
3253
+ try {
3254
+ db.prepare("DELETE FROM jobs " + condition).run();
3255
+ db.prepare("DELETE FROM raw_jobs " + condition).run();
3256
+ } catch (e) {
3257
+ return {
3258
+ ok: false,
3259
+ error: `delete failed: ${e.message}, inserted: jobs=${fromJobs}, raw_jobs=${fromRawJobs}`,
3260
+ };
3261
+ }
3262
+
3263
+ // 5. 验证:统计 jobs_base 中可被 attach 领取的记录数(tt_seller=1 且 user_update_count<=0)
3264
+ let available = 0;
3265
+ try {
3266
+ const row = db
3267
+ .prepare(
3268
+ `SELECT COUNT(*) as total FROM jobs_base
3269
+ WHERE tt_seller = 1
3270
+ AND COALESCE(user_update_count, 0) <= 0
3271
+ AND video_count = 0`,
3272
+ )
3273
+ .get();
3274
+ available = row.total;
3275
+ } catch (_) {
3276
+ // ignore
3277
+ }
3278
+
3279
+ return {
3280
+ ok: true,
3281
+ fromJobs,
3282
+ fromRawJobs,
3283
+ totalInserted: fromJobs + fromRawJobs,
3284
+ resetCount,
3285
+ availableInBase: available,
3286
+ };
5055
3287
  }
5056
3288
  }