tt-help-cli-ycl 1.3.92 → 1.3.94

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,21 @@
1
+ /**
2
+ * 数据存储主模块 — createStore() 编排器
3
+ *
4
+ * 本文件是数据存储的入口点,负责编排各子模块:
5
+ * - db-schema.js: 建表、迁移、全局连接管理
6
+ * - db-columns.js: 共享列名常量和 SQL 生成
7
+ * - db-crud.js: 基础 CRUD(增删改查、行映射)
8
+ * - db-stats.js: 仪表盘统计、按国家分组
9
+ * - db-raw-jobs.js: raw_jobs 移入/恢复
10
+ * - db-tags.js: Tag 发现与打分
11
+ * - llm-scoring.js: LLM 国家匹配度打分
12
+ *
13
+ * createStore() 保留为运行时编排器,管理:
14
+ * - 任务认领/提交(claimNextJob/commitJob)
15
+ * - 客户端追踪、视频管理、备份
16
+ * - 内存索引、stats 缓存
17
+ */
18
+
1
19
  import fs from "fs";
2
20
  import path from "path";
3
21
  import Database from "better-sqlite3";
@@ -6,421 +24,90 @@ import {
6
24
  DEFAULT_TARGET_LOCATIONS,
7
25
  } from "../lib/target-locations.js";
8
26
 
9
- // SQLite 用户表(用于判重)
10
- let db = null;
11
- let dbPath = null;
12
-
13
- function normalizeDbFilePath(filePath) {
14
- if (!filePath) {
15
- throw new Error("db path is required");
16
- }
17
- const resolved = path.resolve(filePath);
18
- if (path.extname(resolved).toLowerCase() !== ".db") {
19
- throw new Error(`仅支持 .db 路径,当前为: ${filePath}`);
20
- }
21
- return resolved;
22
- }
23
-
24
- function resetDbConnection() {
25
- if (db) {
26
- db.close();
27
- db = null;
28
- }
29
- dbPath = null;
30
- }
31
-
32
- function loadLegacyUsersFromFiles(userFilePath, doneFilePath) {
33
- const merged = new Map();
34
-
35
- const tryLoad = (targetPath, label) => {
36
- if (!targetPath) return;
37
- if (!fs.existsSync(targetPath)) return;
38
- try {
39
- const parsed = JSON.parse(fs.readFileSync(targetPath, "utf-8"));
40
- if (!Array.isArray(parsed)) return;
41
- for (const item of parsed) {
42
- const uniqueId = item?.uniqueId || item?.unique_id;
43
- if (!uniqueId) continue;
44
- merged.set(uniqueId, {
45
- ...merged.get(uniqueId),
46
- ...item,
47
- uniqueId,
48
- });
49
- }
50
- } catch (e) {
51
- console.error(`[data-store] SQLite 导入 ${label} 失败: ${e.message}`);
52
- }
53
- };
54
-
55
- tryLoad(userFilePath, "result.json");
56
- tryLoad(doneFilePath, "result-done.json");
57
-
58
- return [...merged.values()];
59
- }
60
-
61
- function loadLegacyVideosFromFile(videoPath) {
62
- if (!videoPath) return [];
63
- if (!fs.existsSync(videoPath)) return [];
64
-
65
- try {
66
- const parsed = JSON.parse(fs.readFileSync(videoPath, "utf-8"));
67
- return Array.isArray(parsed) ? parsed : [];
68
- } catch (e) {
69
- console.error(
70
- `[data-store] SQLite 导入 result-videos.json 失败: ${e.message}`,
71
- );
72
- return [];
73
- }
74
- }
75
-
76
- function initUserDb(filePath) {
77
- dbPath = normalizeDbFilePath(filePath);
78
- fs.mkdirSync(path.dirname(dbPath), { recursive: true });
79
- db = new Database(dbPath);
80
- db.pragma("journal_mode = WAL");
81
- db.exec(`
82
- CREATE TABLE IF NOT EXISTS users (
83
- unique_id TEXT PRIMARY KEY,
84
- tt_seller TEXT,
85
- verified INTEGER,
86
- location_created TEXT,
87
- created_at TEXT,
88
- updated_at TEXT
89
- )
90
- `);
91
- db.exec(`
92
- CREATE TABLE IF NOT EXISTS jobs (
93
- unique_id TEXT PRIMARY KEY,
94
- nickname TEXT,
95
- status TEXT DEFAULT 'pending',
96
- sources TEXT,
97
- claimed_by TEXT,
98
- claimed_at INTEGER,
99
- error TEXT,
100
- pinned INTEGER DEFAULT 0,
101
- no_video INTEGER DEFAULT 0,
102
- restricted INTEGER DEFAULT 0,
103
- user_update_count INTEGER DEFAULT 0,
104
- tt_seller INTEGER,
105
- verified INTEGER,
106
- video_count INTEGER DEFAULT 0,
107
- comment_count INTEGER DEFAULT 0,
108
- guessed_location TEXT,
109
- location_created TEXT,
110
- confirmed_location TEXT,
111
- modified_at INTEGER,
112
- follower_count INTEGER DEFAULT 0,
113
- following_count INTEGER DEFAULT 0,
114
- heart_count INTEGER DEFAULT 0,
115
- refresh_time INTEGER,
116
- processed INTEGER DEFAULT 0,
117
- processed_at INTEGER,
118
- created_at INTEGER,
119
- updated_at INTEGER,
120
- region TEXT,
121
- signature TEXT,
122
- sec_uid TEXT,
123
- status_code INTEGER
124
- )
125
- `);
126
-
127
- // 迁移:为已存在的 jobs 表添加 status_code 列
128
- const existingJobColumns = new Set(
129
- db
130
- .prepare("PRAGMA table_info(jobs)")
131
- .all()
132
- .map((c) => c.name),
133
- );
134
- if (!existingJobColumns.has("status_code")) {
135
- db.exec(`ALTER TABLE jobs ADD COLUMN status_code INTEGER`);
136
- }
137
- if (!existingJobColumns.has("latest_video_time")) {
138
- db.exec(`ALTER TABLE jobs ADD COLUMN latest_video_time INTEGER`);
139
- }
140
- if (!existingJobColumns.has("confirmed_location")) {
141
- db.exec(`ALTER TABLE jobs ADD COLUMN confirmed_location TEXT`);
142
- }
143
- if (!existingJobColumns.has("modified_at")) {
144
- db.exec(`ALTER TABLE jobs ADD COLUMN modified_at INTEGER`);
145
- }
146
- if (!existingJobColumns.has("bio_link")) {
147
- db.exec(`ALTER TABLE jobs ADD COLUMN bio_link TEXT`);
148
- }
149
- if (!existingJobColumns.has("top_video_play_count")) {
150
- db.exec(`ALTER TABLE jobs ADD COLUMN top_video_play_count INTEGER`);
151
- }
152
- if (!existingJobColumns.has("top_video_href")) {
153
- db.exec(`ALTER TABLE jobs ADD COLUMN top_video_href TEXT`);
154
- }
155
- if (!existingJobColumns.has("user_create_time")) {
156
- db.exec(`ALTER TABLE jobs ADD COLUMN user_create_time INTEGER`);
157
- }
158
- db.exec(`
159
- CREATE TABLE IF NOT EXISTS jobs_base (
160
- unique_id TEXT PRIMARY KEY,
161
- nickname TEXT,
162
- status TEXT DEFAULT 'pending',
163
- sources TEXT,
164
- claimed_by TEXT,
165
- claimed_at INTEGER,
166
- error TEXT,
167
- pinned INTEGER DEFAULT 0,
168
- no_video INTEGER DEFAULT 0,
169
- restricted INTEGER DEFAULT 0,
170
- user_update_count INTEGER DEFAULT 0,
171
- tt_seller INTEGER,
172
- verified INTEGER,
173
- video_count INTEGER DEFAULT 0,
174
- comment_count INTEGER DEFAULT 0,
175
- guessed_location TEXT,
176
- location_created TEXT,
177
- confirmed_location TEXT,
178
- modified_at INTEGER,
179
- follower_count INTEGER DEFAULT 0,
180
- following_count INTEGER DEFAULT 0,
181
- heart_count INTEGER DEFAULT 0,
182
- refresh_time INTEGER,
183
- processed INTEGER DEFAULT 0,
184
- processed_at INTEGER,
185
- created_at INTEGER,
186
- updated_at INTEGER,
187
- region TEXT,
188
- signature TEXT,
189
- sec_uid TEXT,
190
- status_code INTEGER,
191
- latest_video_time INTEGER,
192
- bio_link TEXT
193
- )
194
- `);
195
-
196
- // 迁移:为已存在的 jobs_base 表补全列
197
- const existingJobBaseColumns = new Set(
198
- db
199
- .prepare("PRAGMA table_info(jobs_base)")
200
- .all()
201
- .map((c) => c.name),
202
- );
203
- if (!existingJobBaseColumns.has("status_code")) {
204
- db.exec(`ALTER TABLE jobs_base ADD COLUMN status_code INTEGER`);
205
- }
206
- if (!existingJobBaseColumns.has("latest_video_time")) {
207
- db.exec(`ALTER TABLE jobs_base ADD COLUMN latest_video_time INTEGER`);
208
- }
209
- if (!existingJobBaseColumns.has("confirmed_location")) {
210
- db.exec(`ALTER TABLE jobs_base ADD COLUMN confirmed_location TEXT`);
211
- }
212
- if (!existingJobBaseColumns.has("modified_at")) {
213
- db.exec(`ALTER TABLE jobs_base ADD COLUMN modified_at INTEGER`);
214
- }
215
- if (!existingJobBaseColumns.has("bio_link")) {
216
- db.exec(`ALTER TABLE jobs_base ADD COLUMN bio_link TEXT`);
217
- }
218
- if (!existingJobBaseColumns.has("user_create_time")) {
219
- db.exec(`ALTER TABLE jobs_base ADD COLUMN user_create_time INTEGER`);
220
- }
221
- db.exec(`
222
- CREATE TABLE IF NOT EXISTS raw_jobs (
223
- unique_id TEXT PRIMARY KEY,
224
- nickname TEXT,
225
- status TEXT DEFAULT 'pending',
226
- sources TEXT,
227
- claimed_by TEXT,
228
- claimed_at INTEGER,
229
- error TEXT,
230
- pinned INTEGER DEFAULT 0,
231
- no_video INTEGER DEFAULT 0,
232
- restricted INTEGER DEFAULT 0,
233
- user_update_count INTEGER DEFAULT 0,
234
- tt_seller INTEGER,
235
- verified INTEGER,
236
- video_count INTEGER DEFAULT 0,
237
- comment_count INTEGER DEFAULT 0,
238
- guessed_location TEXT,
239
- location_created TEXT,
240
- confirmed_location TEXT,
241
- modified_at INTEGER,
242
- follower_count INTEGER DEFAULT 0,
243
- following_count INTEGER DEFAULT 0,
244
- heart_count INTEGER DEFAULT 0,
245
- refresh_time INTEGER,
246
- processed INTEGER DEFAULT 0,
247
- processed_at INTEGER,
248
- created_at INTEGER,
249
- updated_at INTEGER,
250
- region TEXT,
251
- signature TEXT,
252
- sec_uid TEXT,
253
- status_code INTEGER,
254
- latest_video_time INTEGER
255
- )
256
- `);
257
-
258
- // 迁移:为已存在的 raw_jobs 表添加 status_code 列
259
- const existingRawJobColumns = new Set(
260
- db
261
- .prepare("PRAGMA table_info(raw_jobs)")
262
- .all()
263
- .map((c) => c.name),
264
- );
265
- if (!existingRawJobColumns.has("status_code")) {
266
- db.exec(`ALTER TABLE raw_jobs ADD COLUMN status_code INTEGER`);
267
- }
268
- if (!existingRawJobColumns.has("latest_video_time")) {
269
- db.exec(`ALTER TABLE raw_jobs ADD COLUMN latest_video_time INTEGER`);
270
- }
271
- if (!existingRawJobColumns.has("confirmed_location")) {
272
- db.exec(`ALTER TABLE raw_jobs ADD COLUMN confirmed_location TEXT`);
273
- }
274
- if (!existingRawJobColumns.has("modified_at")) {
275
- db.exec(`ALTER TABLE raw_jobs ADD COLUMN modified_at INTEGER`);
276
- }
277
- if (!existingRawJobColumns.has("bio_link")) {
278
- db.exec(`ALTER TABLE raw_jobs ADD COLUMN bio_link TEXT`);
279
- }
280
- if (!existingRawJobColumns.has("user_create_time")) {
281
- db.exec(`ALTER TABLE raw_jobs ADD COLUMN user_create_time INTEGER`);
282
- }
283
- db.exec(`
284
- CREATE TABLE IF NOT EXISTS videos (
285
- id TEXT PRIMARY KEY,
286
- href TEXT,
287
- author_unique_id TEXT,
288
- location_created TEXT,
289
- tt_seller INTEGER DEFAULT 0,
290
- registered_at INTEGER,
291
- user_update_count INTEGER DEFAULT 0,
292
- play_count INTEGER,
293
- digg_count INTEGER,
294
- comment_count INTEGER,
295
- share_count INTEGER,
296
- collect_count INTEGER,
297
- stats_updated_at INTEGER,
298
- create_time INTEGER
299
- )
300
- `);
301
- db.exec(`
302
- CREATE INDEX IF NOT EXISTS idx_jobs_status_video
303
- ON jobs(status, video_count DESC)
304
- `);
305
- db.exec(`
306
- CREATE INDEX IF NOT EXISTS idx_jobs_claimed_by_status
307
- ON jobs(claimed_by, status, claimed_at)
308
- `);
309
- db.exec(`
310
- CREATE INDEX IF NOT EXISTS idx_jobs_status_claimed_at
311
- ON jobs(status, claimed_at)
312
- `);
313
- db.exec(`
314
- CREATE INDEX IF NOT EXISTS idx_jobs_redo_target
315
- ON jobs(tt_seller, verified, location_created, refresh_time)
316
- `);
317
- db.exec(`
318
- CREATE INDEX IF NOT EXISTS idx_jobs_pending_priority
319
- ON jobs(status, pinned DESC, guessed_location, follower_count DESC)
320
- `);
321
- db.exec(`
322
- CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_pinned
323
- ON jobs(created_at ASC, unique_id ASC)
324
- WHERE status = 'pending' AND COALESCE(pinned, 0) = 1
325
- `);
326
- db.exec(`
327
- CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_seller
328
- ON jobs(UPPER(COALESCE(guessed_location, '')), follower_count DESC, created_at ASC, unique_id ASC)
329
- WHERE status = 'pending'
330
- AND COALESCE(pinned, 0) = 0
331
- AND tt_seller = 1
332
- AND verified = 0
333
- `);
334
- db.exec(`
335
- CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_follow
336
- ON jobs(UPPER(COALESCE(guessed_location, '')), follower_count DESC, created_at ASC, unique_id ASC)
337
- WHERE status = 'pending'
338
- AND COALESCE(pinned, 0) = 0
339
- AND (
340
- instr(COALESCE(sources, ''), '"following"') > 0
341
- OR instr(COALESCE(sources, ''), '"follower"') > 0
342
- )
343
- `);
344
- db.exec(`
345
- CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_other
346
- ON jobs(UPPER(COALESCE(guessed_location, '')), follower_count DESC, created_at ASC, unique_id ASC)
347
- WHERE status = 'pending' AND COALESCE(pinned, 0) = 0
348
- `);
349
- db.exec(`
350
- CREATE INDEX IF NOT EXISTS idx_jobs_user_update_queue
351
- ON jobs(created_at ASC, unique_id ASC)
352
- WHERE (tt_seller IS NULL OR tt_seller = '')
353
- AND (user_update_count IS NULL OR user_update_count <= 0)
354
- `);
355
- db.exec(`
356
- CREATE INDEX IF NOT EXISTS idx_jobs_user_update_queue_expr
357
- ON jobs(created_at ASC, unique_id ASC)
358
- WHERE COALESCE(tt_seller, '') = ''
359
- AND COALESCE(user_update_count, 0) <= 0
360
- `);
361
- db.exec(`
362
- CREATE INDEX IF NOT EXISTS idx_videos_comment_queue
363
- ON videos(user_update_count, tt_seller DESC, registered_at ASC)
364
- `);
365
- db.exec(`
366
- CREATE INDEX IF NOT EXISTS idx_videos_comment_queue_pending
367
- ON videos(tt_seller DESC, registered_at ASC, id)
368
- WHERE user_update_count IS NULL OR user_update_count <= 0
369
- `);
370
-
371
- const existingVideoColumns = new Set(
372
- db
373
- .prepare("PRAGMA table_info(videos)")
374
- .all()
375
- .map((column) => column.name),
376
- );
377
- const requiredVideoColumns = {
378
- play_count: "INTEGER",
379
- digg_count: "INTEGER",
380
- comment_count: "INTEGER",
381
- share_count: "INTEGER",
382
- collect_count: "INTEGER",
383
- stats_updated_at: "INTEGER",
384
- };
385
- for (const [column, type] of Object.entries(requiredVideoColumns)) {
386
- if (!existingVideoColumns.has(column)) {
387
- db.exec(`ALTER TABLE videos ADD COLUMN ${column} ${type}`);
388
- }
389
- }
390
-
391
- // 迁移:videos 表添加 create_time 列
392
- if (!existingVideoColumns.has("create_time")) {
393
- db.exec(`ALTER TABLE videos ADD COLUMN create_time INTEGER`);
394
- }
27
+ // Schema 与连接管理
28
+ import {
29
+ getDb,
30
+ getDbPath,
31
+ initDb,
32
+ resetDbConnection,
33
+ loadLegacyUsersFromFiles,
34
+ loadLegacyVideosFromFile,
35
+ } from "./db-schema.js";
36
+
37
+ // CRUD 操作
38
+ import {
39
+ snakeToCamel,
40
+ camelToSnake,
41
+ normalizeJobValue,
42
+ mapJobRow,
43
+ mapVideoRow,
44
+ inferStatus,
45
+ hasUserInDb,
46
+ addUserToDb,
47
+ addJobToDb,
48
+ addJobBaseToDb,
49
+ addJob,
50
+ getJobRow,
51
+ getJobBaseRow,
52
+ getJob,
53
+ getAllJobs,
54
+ getVideoRow,
55
+ getAllVideoRows,
56
+ updateJobInfo,
57
+ updateJobBaseInfo,
58
+ getUserDbCount,
59
+ getJobsCount,
60
+ getPendingJobsCount,
61
+ getPendingJobsUserUpdateCount,
62
+ getRawJobsCount,
63
+ } from "./db-crud.js";
64
+
65
+ // 统计查询
66
+ import {
67
+ getDashboardStatsFromDb,
68
+ getPendingByCountryFromDb,
69
+ getUserUpdateByCountryFromDb,
70
+ getAttachStuckByCountryFromDb,
71
+ getRawByCountryFromDb,
72
+ restoreAttachStuckByCountry,
73
+ resetPendingByCountry,
74
+ } from "./db-stats.js";
75
+
76
+ // Raw Jobs 管理
77
+ import {
78
+ moveJobsToRawByCountry,
79
+ restoreRawJobsByCountry,
80
+ restoreRawJobById,
81
+ restoreRawJobsByFilter,
82
+ getRawJobsPageFromDb,
83
+ } from "./db-raw-jobs.js";
84
+
85
+ // Tag CRUD
86
+ import {
87
+ insertTag,
88
+ getTagsByStatus,
89
+ getTagsByCountry,
90
+ getDeadTags,
91
+ claimTag,
92
+ reportTagScore,
93
+ getAllTags,
94
+ rawQuery,
95
+ normalizeTags,
96
+ clearTags,
97
+ } from "./db-tags.js";
98
+
99
+ // LLM 打分
100
+ import {
101
+ scoreJobLocation,
102
+ scoreJobsBatch,
103
+ createLlmOffsetStore,
104
+ } from "./llm-scoring.js";
395
105
 
396
- // tags 表:标签发现与打分系统
397
- db.exec(`
398
- CREATE TABLE IF NOT EXISTS tags (
399
- id INTEGER PRIMARY KEY AUTOINCREMENT,
400
- tag TEXT NOT NULL UNIQUE,
401
- status TEXT NOT NULL DEFAULT 'new',
402
- score REAL NOT NULL DEFAULT 0,
403
- created_at TEXT NOT NULL DEFAULT (datetime('now')),
404
- scored_at TEXT,
405
- score_count INTEGER NOT NULL DEFAULT 0,
406
- countries TEXT NOT NULL DEFAULT '[]',
407
- matched_countries TEXT DEFAULT '[]',
408
- total_posts INTEGER DEFAULT 0,
409
- author_count INTEGER DEFAULT 0,
410
- matched_authors INTEGER DEFAULT 0,
411
- pushed_users INTEGER DEFAULT 0,
412
- source TEXT NOT NULL DEFAULT 'llm',
413
- user_prompt TEXT,
414
- last_error TEXT
415
- )
416
- `);
417
- db.exec(`CREATE INDEX IF NOT EXISTS idx_tags_status ON tags(status)`);
418
- db.exec(`CREATE INDEX IF NOT EXISTS idx_tags_score ON tags(score DESC)`);
419
-
420
- const count = db.prepare("SELECT COUNT(*) as c FROM users").get().c;
421
- console.log(`[data-store] SQLite users 表初始化完成: ${count} 条`);
422
- }
106
+ // ===== 薄包装函数(保持外部 API 不变)=====
423
107
 
108
+ /**
109
+ * 导入历史 JSON 数据到 SQLite
110
+ */
424
111
  export function importLegacyJsonToDb({
425
112
  dbFilePath,
426
113
  usersFilePath,
@@ -428,33 +115,30 @@ export function importLegacyJsonToDb({
428
115
  videosFilePath,
429
116
  }) {
430
117
  resetDbConnection();
431
- initUserDb(dbFilePath);
118
+ initDb(dbFilePath);
119
+
120
+ const db = getDb();
121
+ const dbPath = getDbPath();
432
122
 
433
123
  const legacyUsers = loadLegacyUsersFromFiles(usersFilePath, doneFilePath);
434
124
  const legacyVideos = loadLegacyVideosFromFile(videosFilePath);
435
125
 
436
- const beforeUsers = db.prepare("SELECT COUNT(*) as c FROM users").get().c;
437
- const beforeJobs = db.prepare("SELECT COUNT(*) as c FROM jobs").get().c;
438
- const beforeVideos = db.prepare("SELECT COUNT(*) as c FROM videos").get().c;
439
-
440
- const insertUserStmt = db.prepare(`
441
- INSERT OR IGNORE INTO users (unique_id) VALUES (?)
442
- `);
443
- const insertVideoStmt = db.prepare(`
444
- INSERT OR IGNORE INTO videos (
445
- id,
446
- href,
447
- author_unique_id,
448
- location_created,
449
- tt_seller,
450
- registered_at,
451
- user_update_count,
452
- create_time
453
- )
454
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)
455
- `);
126
+ const beforeUsers = getDb()
127
+ .prepare("SELECT COUNT(*) as c FROM users")
128
+ .get().c;
129
+ const beforeJobs = getDb().prepare("SELECT COUNT(*) as c FROM jobs").get().c;
130
+ const beforeVideos = getDb()
131
+ .prepare("SELECT COUNT(*) as c FROM videos")
132
+ .get().c;
456
133
 
457
- const importUsersTxn = db.transaction((items) => {
134
+ const insertUserStmt = getDb().prepare(
135
+ `INSERT OR IGNORE INTO users (unique_id) VALUES (?)`,
136
+ );
137
+ const insertVideoStmt = getDb().prepare(
138
+ `INSERT OR IGNORE INTO videos (id, href, author_unique_id, location_created, tt_seller, registered_at, user_update_count, create_time) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
139
+ );
140
+
141
+ const importUsersTxn = getDb().transaction((items) => {
458
142
  for (const item of items) {
459
143
  const uniqueId = item.uniqueId || item.unique_id;
460
144
  if (!uniqueId) continue;
@@ -463,7 +147,7 @@ export function importLegacyJsonToDb({
463
147
  }
464
148
  });
465
149
 
466
- const importVideosTxn = db.transaction((items) => {
150
+ const importVideosTxn = getDb().transaction((items) => {
467
151
  for (const item of items) {
468
152
  if (!item?.id) continue;
469
153
  insertVideoStmt.run(
@@ -482,12 +166,14 @@ export function importLegacyJsonToDb({
482
166
  importUsersTxn(legacyUsers);
483
167
  importVideosTxn(legacyVideos);
484
168
 
485
- const afterUsers = db.prepare("SELECT COUNT(*) as c FROM users").get().c;
486
- const afterJobs = db.prepare("SELECT COUNT(*) as c FROM jobs").get().c;
487
- const afterVideos = db.prepare("SELECT COUNT(*) as c FROM videos").get().c;
169
+ const afterUsers = getDb().prepare("SELECT COUNT(*) as c FROM users").get().c;
170
+ const afterJobs = getDb().prepare("SELECT COUNT(*) as c FROM jobs").get().c;
171
+ const afterVideos = getDb()
172
+ .prepare("SELECT COUNT(*) as c FROM videos")
173
+ .get().c;
488
174
 
489
175
  return {
490
- dbPath,
176
+ dbPath: getDbPath(),
491
177
  usersImported: afterUsers - beforeUsers,
492
178
  jobsImported: afterJobs - beforeJobs,
493
179
  videosImported: afterVideos - beforeVideos,
@@ -501,1082 +187,6 @@ export function closeStoreDb() {
501
187
  resetDbConnection();
502
188
  }
503
189
 
504
- function hasUserInDb(uid) {
505
- if (!db) return false;
506
- const row = db.prepare("SELECT 1 FROM users WHERE unique_id = ?").get(uid);
507
- return !!row;
508
- }
509
-
510
- function addUserToDb(user) {
511
- if (!db) return;
512
- db.prepare(
513
- `
514
- INSERT OR IGNORE INTO users (unique_id, tt_seller, verified, location_created, created_at, updated_at)
515
- VALUES (?, ?, ?, ?, ?, ?)
516
- `,
517
- ).run(
518
- user.uniqueId,
519
- user.ttSeller === undefined ||
520
- user.ttSeller === null ||
521
- user.ttSeller === ""
522
- ? null
523
- : user.ttSeller
524
- ? 1
525
- : 0,
526
- user.verified === undefined ||
527
- user.verified === null ||
528
- user.verified === ""
529
- ? null
530
- : user.verified
531
- ? 1
532
- : 0,
533
- user.locationCreated || null,
534
- new Date().toISOString(),
535
- new Date().toISOString(),
536
- );
537
- }
538
-
539
- function addJobToDb(user) {
540
- if (!db) return;
541
- const now = Date.now();
542
- db.prepare(
543
- `
544
- INSERT OR IGNORE INTO jobs (
545
- unique_id,
546
- nickname,
547
- status,
548
- sources,
549
- claimed_by,
550
- claimed_at,
551
- error,
552
- pinned,
553
- no_video,
554
- restricted,
555
- user_update_count,
556
- tt_seller,
557
- verified,
558
- video_count,
559
- comment_count,
560
- guessed_location,
561
- location_created,
562
- follower_count,
563
- following_count,
564
- heart_count,
565
- refresh_time,
566
- processed,
567
- processed_at,
568
- created_at,
569
- updated_at,
570
- region,
571
- signature,
572
- bio_link,
573
- sec_uid
574
- )
575
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
576
- `,
577
- ).run(
578
- user.uniqueId,
579
- user.nickname || null,
580
- user.status || inferStatus(user),
581
- JSON.stringify(
582
- Array.isArray(user.sources) ? [...new Set(user.sources)] : [],
583
- ),
584
- user.claimedBy || null,
585
- user.claimedAt || null,
586
- user.error || null,
587
- user.pinned ? 1 : 0,
588
- user.noVideo ? 1 : 0,
589
- user.restricted ? 1 : 0,
590
- user.userUpdateCount || 0,
591
- user.ttSeller === undefined ||
592
- user.ttSeller === null ||
593
- user.ttSeller === ""
594
- ? null
595
- : user.ttSeller
596
- ? 1
597
- : 0,
598
- user.verified === undefined ||
599
- user.verified === null ||
600
- user.verified === ""
601
- ? null
602
- : user.verified
603
- ? 1
604
- : 0,
605
- user.videoCount || 0,
606
- user.commentCount || 0,
607
- user.guessedLocation || null,
608
- user.locationCreated || null,
609
- user.followerCount || 0,
610
- user.followingCount || 0,
611
- user.heartCount || 0,
612
- user.refreshTime || null,
613
- user.processed ? 1 : 0,
614
- user.processedAt || null,
615
- user.createdAt || now,
616
- user.updatedAt || now,
617
- user.region || null,
618
- user.signature || null,
619
- user.bioLink?.link || user.bioLink?.url || user.bioLink || null,
620
- user.secUid || null,
621
- );
622
- }
623
-
624
- function getUserDbCount() {
625
- if (!db) return 0;
626
- return db.prepare("SELECT COUNT(*) as c FROM users").get().c;
627
- }
628
-
629
- function getJobsCount() {
630
- if (!db) return 0;
631
- return db.prepare("SELECT COUNT(*) as c FROM jobs").get().c;
632
- }
633
-
634
- function getPendingJobsCount() {
635
- if (!db) return 0;
636
- return db
637
- .prepare("SELECT COUNT(*) as c FROM jobs WHERE status = 'pending'")
638
- .get().c;
639
- }
640
-
641
- function getPendingJobsUserUpdateCount() {
642
- if (!db) return 0;
643
- return db
644
- .prepare(
645
- `
646
- SELECT COUNT(*) as c
647
- FROM jobs
648
- WHERE COALESCE(tt_seller, '') = ''
649
- AND COALESCE(user_update_count, 0) <= 0
650
- `,
651
- )
652
- .get().c;
653
- }
654
-
655
- function getRawJobsCount() {
656
- if (!db) return 0;
657
- return db.prepare("SELECT COUNT(*) as c FROM raw_jobs").get().c;
658
- }
659
-
660
- function getDashboardStatsFromDb(targetLocations = []) {
661
- if (!db) return null;
662
-
663
- const targetPlaceholders = targetLocations.map(() => "?").join(", ");
664
- const targetParams = targetLocations.length ? targetLocations : [];
665
-
666
- // 合并所有 jobs 表的聚合统计为单次扫描
667
- const aggregateRow = db
668
- .prepare(
669
- `
670
- SELECT
671
- COUNT(*) as total,
672
- SUM(CASE WHEN status = 'pending' THEN 1 ELSE 0 END) as pending,
673
- SUM(CASE WHEN status = 'processing' THEN 1 ELSE 0 END) as processing,
674
- SUM(CASE WHEN status = 'done' THEN 1 ELSE 0 END) as done,
675
- SUM(CASE WHEN status = 'error' THEN 1 ELSE 0 END) as error,
676
- SUM(CASE WHEN status = 'restricted' THEN 1 ELSE 0 END) as restricted,
677
- SUM(CASE WHEN tt_seller = 1 AND verified = 0 ${
678
- targetLocations.length
679
- ? `AND location_created IN (${targetPlaceholders})`
680
- : "AND 1 = 0"
681
- } THEN 1 ELSE 0 END) as targetUsers,
682
- SUM(CASE WHEN no_video = 1 THEN 1 ELSE 0 END) as noVideo,
683
- SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"video"') > 0 THEN 1 ELSE 0 END) as video,
684
- SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"comment"') > 0 THEN 1 ELSE 0 END) as comment,
685
- SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"guess"') > 0 THEN 1 ELSE 0 END) as guess,
686
- SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"following"') > 0 THEN 1 ELSE 0 END) as following,
687
- SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"follower"') > 0 THEN 1 ELSE 0 END) as follower,
688
- SUM(CASE
689
- WHEN status != 'done'
690
- AND instr(COALESCE(sources, ''), '"video"') = 0
691
- AND instr(COALESCE(sources, ''), '"comment"') = 0
692
- AND instr(COALESCE(sources, ''), '"guess"') = 0
693
- AND instr(COALESCE(sources, ''), '"following"') = 0
694
- AND instr(COALESCE(sources, ''), '"follower"') = 0
695
- THEN 1 ELSE 0 END) as seed
696
- FROM jobs
697
- `,
698
- )
699
- .get(...targetParams);
700
-
701
- // userUpdateTasks 单独从 jobs_base 统计
702
- const userUpdateTasksRow = db
703
- .prepare(
704
- `
705
- SELECT COUNT(*) as userUpdateTasks
706
- FROM jobs_base
707
- WHERE COALESCE(tt_seller, '') = ''
708
- AND COALESCE(user_update_count, 0) <= 0
709
- `,
710
- )
711
- .get();
712
-
713
- // countryStats 和 targetCountryStats 需要 GROUP BY,保留为独立查询
714
- const countryStats = db
715
- .prepare(
716
- `
717
- SELECT
718
- COALESCE(location_created, '未知') as country,
719
- COUNT(*) as count,
720
- SUM(CASE
721
- WHEN tt_seller = 1 AND verified = 0 ${
722
- targetLocations.length
723
- ? `AND location_created IN (${targetPlaceholders})`
724
- : "AND 1 = 0"
725
- }
726
- THEN 1 ELSE 0 END) as targetCount
727
- FROM jobs
728
- WHERE status = 'done'
729
- GROUP BY COALESCE(location_created, '未知')
730
- ORDER BY count DESC
731
- `,
732
- )
733
- .all(...targetParams);
734
-
735
- const targetCountryStats = targetLocations.length
736
- ? db
737
- .prepare(
738
- `
739
- SELECT location_created as country, COUNT(*) as count
740
- FROM jobs
741
- WHERE tt_seller = 1
742
- AND verified = 0
743
- AND location_created IN (${targetPlaceholders})
744
- GROUP BY location_created
745
- ORDER BY count DESC
746
- `,
747
- )
748
- .all(...targetLocations)
749
- : [];
750
-
751
- const jobsBaseCount = db
752
- .prepare("SELECT COUNT(*) as total FROM jobs_base")
753
- .get().total;
754
-
755
- return {
756
- totalUsers: aggregateRow.total,
757
- rawJobs: getRawJobsCount(),
758
- dbTotalUsers: getUserDbCount(),
759
- jobsTotal: aggregateRow.total,
760
- jobsBaseTotal: jobsBaseCount,
761
- jobsPending: aggregateRow.pending,
762
- processedUsers: aggregateRow.done,
763
- pendingUsers: aggregateRow.pending,
764
- processingUsers: aggregateRow.processing,
765
- restrictedUsers: aggregateRow.restricted,
766
- errorUsers: aggregateRow.error,
767
- targetUsers: aggregateRow.targetUsers,
768
- userUpdateTasks: userUpdateTasksRow.userUpdateTasks,
769
- targetCountryStats,
770
- countryStats,
771
- sourceStats: {
772
- seed: aggregateRow.seed || 0,
773
- video: aggregateRow.video || 0,
774
- comment: aggregateRow.comment || 0,
775
- guess: aggregateRow.guess || 0,
776
- following: aggregateRow.following || 0,
777
- follower: aggregateRow.follower || 0,
778
- processed: aggregateRow.done,
779
- restricted: aggregateRow.restricted,
780
- error: aggregateRow.error,
781
- noVideo: aggregateRow.noVideo || 0,
782
- },
783
- };
784
- }
785
-
786
- function getPendingByCountryFromDb() {
787
- if (!db) return [];
788
-
789
- // 按 guessed_location 分组统计待处理任务
790
- const rows = db
791
- .prepare(
792
- `
793
- SELECT
794
- COALESCE(guessed_location, '未知') as country,
795
- COUNT(*) as count
796
- FROM jobs
797
- WHERE status = 'pending'
798
- GROUP BY COALESCE(guessed_location, '未知')
799
- ORDER BY count DESC
800
- `,
801
- )
802
- .all();
803
-
804
- return rows;
805
- }
806
-
807
- function getUserUpdateByCountryFromDb() {
808
- if (!db) return [];
809
-
810
- // 按 guessed_location 分组统计待补资料任务
811
- const rows = db
812
- .prepare(
813
- `
814
- SELECT
815
- COALESCE(guessed_location, '未知') as country,
816
- COUNT(*) as count
817
- FROM jobs_base
818
- WHERE tt_seller IS NULL
819
- AND COALESCE(user_update_count, 0) <= 0
820
- GROUP BY COALESCE(guessed_location, '未知')
821
- ORDER BY count DESC
822
- `,
823
- )
824
- .all();
825
-
826
- return rows;
827
- }
828
-
829
- function getAttachStuckByCountryFromDb() {
830
- if (!db) return [];
831
-
832
- return db
833
- .prepare(
834
- `
835
- SELECT
836
- COALESCE(guessed_location, '未知') as country,
837
- COUNT(*) as count
838
- FROM jobs_base
839
- WHERE tt_seller IS NULL
840
- AND COALESCE(user_update_count, 0) = 1
841
- GROUP BY COALESCE(guessed_location, '未知')
842
- ORDER BY count DESC
843
- `,
844
- )
845
- .all();
846
- }
847
-
848
- function restoreAttachStuckByCountry(country) {
849
- if (!db) {
850
- return { restored: 0, country, error: "db not ready" };
851
- }
852
-
853
- const normalizedCountry = String(country == null ? "未知" : country).trim();
854
- if (!normalizedCountry) {
855
- return {
856
- restored: 0,
857
- country: normalizedCountry,
858
- error: "country is required",
859
- };
860
- }
861
-
862
- const whereSql = `
863
- COALESCE(tt_seller, '') = ''
864
- AND COALESCE(user_update_count, 0) = 1
865
- AND COALESCE(guessed_location, '未知') = ?
866
- `;
867
- const count =
868
- db
869
- .prepare(
870
- `
871
- SELECT COUNT(*) as c
872
- FROM jobs_base
873
- WHERE ${whereSql}
874
- `,
875
- )
876
- .get(normalizedCountry)?.c || 0;
877
-
878
- if (!count) {
879
- return { restored: 0, country: normalizedCountry };
880
- }
881
-
882
- db.prepare(
883
- `
884
- UPDATE jobs_base
885
- SET user_update_count = 0,
886
- updated_at = ?,
887
- claimed_by = NULL,
888
- claimed_at = NULL
889
- WHERE ${whereSql}
890
- `,
891
- ).run(Date.now(), normalizedCountry);
892
-
893
- return { restored: count, country: normalizedCountry };
894
- }
895
-
896
- function resetPendingByCountry(country) {
897
- if (!db) {
898
- return { reset: 0, country, error: "db not ready" };
899
- }
900
-
901
- const normalizedCountry = String(country == null ? "未知" : country).trim();
902
- if (!normalizedCountry) {
903
- return {
904
- reset: 0,
905
- country: normalizedCountry,
906
- error: "country is required",
907
- };
908
- }
909
-
910
- const whereSql = `
911
- status = 'pending'
912
- AND COALESCE(guessed_location, '未知') = ?
913
- `;
914
- const count =
915
- db
916
- .prepare(
917
- `
918
- SELECT COUNT(*) as c
919
- FROM jobs
920
- WHERE ${whereSql}
921
- `,
922
- )
923
- .get(normalizedCountry)?.c || 0;
924
-
925
- if (!count) {
926
- return { reset: 0, country: normalizedCountry };
927
- }
928
-
929
- db.prepare(
930
- `
931
- UPDATE jobs
932
- SET user_update_count = 0,
933
- updated_at = ?,
934
- claimed_by = NULL,
935
- claimed_at = NULL
936
- WHERE ${whereSql}
937
- `,
938
- ).run(Date.now(), normalizedCountry);
939
-
940
- return { reset: count, country: normalizedCountry };
941
- }
942
-
943
- function getRawByCountryFromDb() {
944
- if (!db) return [];
945
-
946
- return db
947
- .prepare(
948
- `
949
- SELECT
950
- COALESCE(guessed_location, '未知') as country,
951
- COUNT(*) as count
952
- FROM raw_jobs
953
- GROUP BY COALESCE(guessed_location, '未知')
954
- ORDER BY count DESC
955
- `,
956
- )
957
- .all();
958
- }
959
-
960
- function moveJobsToRawByCountry(scope, country) {
961
- if (!db) {
962
- return { moved: 0, scope, country, error: "db not ready" };
963
- }
964
-
965
- const normalizedScope = String(scope || "").trim();
966
- const normalizedCountry = String(country == null ? "未知" : country).trim();
967
- if (!normalizedCountry) {
968
- return {
969
- moved: 0,
970
- scope: normalizedScope,
971
- country: normalizedCountry,
972
- error: "country is required",
973
- };
974
- }
975
-
976
- // pending 操作 jobs 表(与 getPendingByCountryFromDb 数据源一致)
977
- // userUpdate 操作 jobs_base 表(与 getUserUpdateByCountryFromDb 数据源一致)
978
- let sourceTable = "";
979
- let scopeWhere = "";
980
- let columns = "";
981
-
982
- if (normalizedScope === "pending") {
983
- sourceTable = "jobs";
984
- scopeWhere = `status = 'pending'`;
985
- columns = `
986
- unique_id, nickname, status, sources, claimed_by, claimed_at,
987
- error, pinned, no_video, restricted, user_update_count,
988
- tt_seller, verified, video_count, comment_count,
989
- guessed_location, location_created, follower_count,
990
- following_count, heart_count, refresh_time, processed,
991
- processed_at, created_at, updated_at, region, signature,
992
- sec_uid, latest_video_time, user_create_time
993
- `;
994
- } else if (normalizedScope === "userUpdate") {
995
- sourceTable = "jobs_base";
996
- scopeWhere = `tt_seller IS NULL AND COALESCE(user_update_count, 0) <= 0`;
997
- columns = `
998
- unique_id, nickname, status, sources, claimed_by, claimed_at,
999
- error, pinned, no_video, restricted, user_update_count,
1000
- tt_seller, verified, video_count, comment_count,
1001
- guessed_location, location_created, follower_count,
1002
- following_count, heart_count, refresh_time, processed,
1003
- processed_at, created_at, updated_at, region, signature,
1004
- sec_uid, latest_video_time, user_create_time
1005
- `;
1006
- } else {
1007
- return {
1008
- moved: 0,
1009
- scope: normalizedScope,
1010
- country: normalizedCountry,
1011
- error: "unsupported scope",
1012
- };
1013
- }
1014
-
1015
- const whereSql = `
1016
- ${scopeWhere}
1017
- AND COALESCE(guessed_location, '未知') = ?
1018
- `;
1019
- const count =
1020
- db
1021
- .prepare(
1022
- `
1023
- SELECT COUNT(*) as c
1024
- FROM ${sourceTable}
1025
- WHERE ${whereSql}
1026
- `,
1027
- )
1028
- .get(normalizedCountry)?.c || 0;
1029
-
1030
- if (!count) {
1031
- return { moved: 0, scope: normalizedScope, country: normalizedCountry };
1032
- }
1033
-
1034
- const moveTxn = db.transaction((targetCountry) => {
1035
- db.prepare(
1036
- `
1037
- INSERT OR REPLACE INTO raw_jobs (
1038
- ${columns}
1039
- )
1040
- SELECT
1041
- ${columns}
1042
- FROM ${sourceTable}
1043
- WHERE ${whereSql}
1044
- `,
1045
- ).run(targetCountry);
1046
-
1047
- db.prepare(
1048
- `
1049
- DELETE FROM ${sourceTable}
1050
- WHERE ${whereSql}
1051
- `,
1052
- ).run(targetCountry);
1053
- });
1054
-
1055
- moveTxn(normalizedCountry);
1056
- return { moved: count, scope: normalizedScope, country: normalizedCountry };
1057
- }
1058
-
1059
- function restoreRawJobsByCountry(country) {
1060
- if (!db) {
1061
- return { restored: 0, country, error: "db not ready" };
1062
- }
1063
-
1064
- const normalizedCountry = String(country == null ? "未知" : country).trim();
1065
- if (!normalizedCountry) {
1066
- return {
1067
- restored: 0,
1068
- country: normalizedCountry,
1069
- error: "country is required",
1070
- };
1071
- }
1072
-
1073
- const whereSql = `COALESCE(guessed_location, '未知') = ?`;
1074
- const count =
1075
- db
1076
- .prepare(
1077
- `
1078
- SELECT COUNT(*) as c
1079
- FROM raw_jobs
1080
- WHERE ${whereSql}
1081
- `,
1082
- )
1083
- .get(normalizedCountry)?.c || 0;
1084
-
1085
- if (!count) {
1086
- return { restored: 0, country: normalizedCountry };
1087
- }
1088
-
1089
- const restoreTxn = db.transaction((targetCountry) => {
1090
- db.prepare(
1091
- `
1092
- INSERT OR REPLACE INTO jobs (
1093
- unique_id,
1094
- nickname,
1095
- status,
1096
- sources,
1097
- claimed_by,
1098
- claimed_at,
1099
- error,
1100
- pinned,
1101
- no_video,
1102
- restricted,
1103
- user_update_count,
1104
- tt_seller,
1105
- verified,
1106
- video_count,
1107
- comment_count,
1108
- guessed_location,
1109
- location_created,
1110
- follower_count,
1111
- following_count,
1112
- heart_count,
1113
- refresh_time,
1114
- processed,
1115
- processed_at,
1116
- created_at,
1117
- updated_at,
1118
- region,
1119
- signature,
1120
- sec_uid
1121
- )
1122
- SELECT
1123
- unique_id,
1124
- nickname,
1125
- status,
1126
- sources,
1127
- claimed_by,
1128
- claimed_at,
1129
- error,
1130
- pinned,
1131
- no_video,
1132
- restricted,
1133
- user_update_count,
1134
- tt_seller,
1135
- verified,
1136
- video_count,
1137
- comment_count,
1138
- guessed_location,
1139
- location_created,
1140
- follower_count,
1141
- following_count,
1142
- heart_count,
1143
- refresh_time,
1144
- processed,
1145
- processed_at,
1146
- created_at,
1147
- updated_at,
1148
- region,
1149
- signature,
1150
- sec_uid
1151
- FROM raw_jobs
1152
- WHERE ${whereSql}
1153
- `,
1154
- ).run(targetCountry);
1155
-
1156
- db.prepare(
1157
- `
1158
- DELETE FROM raw_jobs
1159
- WHERE ${whereSql}
1160
- `,
1161
- ).run(targetCountry);
1162
- });
1163
-
1164
- restoreTxn(normalizedCountry);
1165
- return { restored: count, country: normalizedCountry };
1166
- }
1167
-
1168
- function restoreRawJobById(uniqueId) {
1169
- if (!db) {
1170
- return { restored: 0, uniqueId, error: "db not ready" };
1171
- }
1172
-
1173
- const safeId = String(uniqueId).trim();
1174
- if (!safeId) {
1175
- return { restored: 0, uniqueId: safeId, error: "uniqueId is required" };
1176
- }
1177
-
1178
- const exists =
1179
- db
1180
- .prepare("SELECT COUNT(*) as c FROM raw_jobs WHERE unique_id = ?")
1181
- .get(safeId)?.c || 0;
1182
-
1183
- if (!exists) {
1184
- return { restored: 0, uniqueId: safeId };
1185
- }
1186
-
1187
- const restoreTxn = db.transaction(() => {
1188
- db.prepare(
1189
- `
1190
- INSERT OR REPLACE INTO jobs (
1191
- unique_id, nickname, status, sources, claimed_by, claimed_at, error,
1192
- pinned, no_video, restricted, user_update_count, tt_seller, verified,
1193
- video_count, comment_count, guessed_location, location_created,
1194
- follower_count, following_count, heart_count, refresh_time,
1195
- processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
1196
- )
1197
- SELECT
1198
- unique_id, nickname, status, sources, claimed_by, claimed_at, error,
1199
- pinned, no_video, restricted, user_update_count, tt_seller, verified,
1200
- video_count, comment_count, guessed_location, location_created,
1201
- follower_count, following_count, heart_count, refresh_time,
1202
- processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
1203
- FROM raw_jobs WHERE unique_id = ?
1204
- `,
1205
- ).run(safeId);
1206
-
1207
- db.prepare("DELETE FROM raw_jobs WHERE unique_id = ?").run(safeId);
1208
- });
1209
-
1210
- restoreTxn();
1211
- return { restored: 1, uniqueId: safeId };
1212
- }
1213
-
1214
- function restoreRawJobsByFilter({ search, location, hasVideo, hasFollower }) {
1215
- if (!db) {
1216
- return { restored: 0, error: "db not ready" };
1217
- }
1218
-
1219
- const where = [];
1220
- const args = [];
1221
-
1222
- if (search) {
1223
- where.push(
1224
- "(LOWER(unique_id) LIKE ? OR LOWER(COALESCE(nickname, '')) LIKE ?)",
1225
- );
1226
- const likeVal = `%${search.toLowerCase()}%`;
1227
- args.push(likeVal, likeVal);
1228
- }
1229
-
1230
- if (location) {
1231
- where.push("COALESCE(guessed_location, '未知') = ?");
1232
- args.push(location);
1233
- }
1234
-
1235
- if (hasVideo) {
1236
- where.push("COALESCE(video_count, 0) > 0");
1237
- }
1238
-
1239
- if (hasFollower) {
1240
- where.push("COALESCE(follower_count, 0) > 0");
1241
- }
1242
-
1243
- if (where.length === 0) {
1244
- return { restored: 0, error: "at least one filter is required" };
1245
- }
1246
-
1247
- const whereSql = where.join(" AND ");
1248
-
1249
- const count =
1250
- db
1251
- .prepare(`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql}`)
1252
- .get(...args)?.c || 0;
1253
-
1254
- if (!count) {
1255
- return { restored: 0 };
1256
- }
1257
-
1258
- const restoreTxn = db.transaction(() => {
1259
- db.prepare(
1260
- `
1261
- INSERT OR REPLACE INTO jobs (
1262
- unique_id, nickname, status, sources, claimed_by, claimed_at, error,
1263
- pinned, no_video, restricted, user_update_count, tt_seller, verified,
1264
- video_count, comment_count, guessed_location, location_created,
1265
- follower_count, following_count, heart_count, refresh_time,
1266
- processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
1267
- )
1268
- SELECT
1269
- unique_id, nickname, status, sources, claimed_by, claimed_at, error,
1270
- pinned, no_video, restricted, user_update_count, tt_seller, verified,
1271
- video_count, comment_count, guessed_location, location_created,
1272
- follower_count, following_count, heart_count, refresh_time,
1273
- processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
1274
- FROM raw_jobs WHERE ${whereSql}
1275
- `,
1276
- ).run(...args);
1277
-
1278
- db.prepare(`DELETE FROM raw_jobs WHERE ${whereSql}`).run(...args);
1279
- });
1280
-
1281
- restoreTxn();
1282
- return { restored: count };
1283
- }
1284
-
1285
- function getRawJobsPageFromDb({
1286
- search,
1287
- location,
1288
- limit,
1289
- offset,
1290
- hasVideo,
1291
- hasFollower,
1292
- }) {
1293
- if (!db) return null;
1294
-
1295
- const safeLimit = Math.max(1, Math.min(200, parseInt(limit) || 50));
1296
- const safeOffset = Math.max(0, parseInt(offset) || 0);
1297
- const where = [];
1298
- const args = [];
1299
-
1300
- if (search) {
1301
- where.push(
1302
- "(LOWER(unique_id) LIKE ? OR LOWER(COALESCE(nickname, '')) LIKE ?)",
1303
- );
1304
- const pattern = `%${String(search).toLowerCase()}%`;
1305
- args.push(pattern, pattern);
1306
- }
1307
- if (location) {
1308
- where.push("COALESCE(guessed_location, '未知') = ?");
1309
- args.push(location);
1310
- }
1311
- if (hasVideo) {
1312
- where.push("COALESCE(video_count, 0) > 0");
1313
- }
1314
- if (hasFollower) {
1315
- where.push("COALESCE(follower_count, 0) > 0");
1316
- }
1317
-
1318
- const whereSql = where.length ? `WHERE ${where.join(" AND ")}` : "";
1319
- const total = db
1320
- .prepare(`SELECT COUNT(*) as c FROM raw_jobs ${whereSql}`)
1321
- .get(...args).c;
1322
-
1323
- const rows = db
1324
- .prepare(
1325
- `
1326
- SELECT *
1327
- FROM raw_jobs
1328
- ${whereSql}
1329
- ORDER BY created_at DESC, unique_id ASC
1330
- LIMIT ? OFFSET ?
1331
- `,
1332
- )
1333
- .all(...args, safeLimit, safeOffset);
1334
-
1335
- return {
1336
- total,
1337
- limit: safeLimit,
1338
- offset: safeOffset,
1339
- users: rows.map(mapJobRow),
1340
- };
1341
- }
1342
-
1343
- // ====== Tag 发现与打分 CRUD ======
1344
-
1345
- function insertTag(tag, countries, source = "llm") {
1346
- if (!db) return { inserted: false, error: "db not ready" };
1347
- // 防止存入带 # 前缀的 tag
1348
- const normalized = tag.replace(/^#+/, "").trim().toLowerCase();
1349
- if (!normalized || normalized.length < 2) {
1350
- return { inserted: false, error: "invalid tag" };
1351
- }
1352
- try {
1353
- const result = db
1354
- .prepare(
1355
- `
1356
- INSERT OR IGNORE INTO tags (tag, countries, source)
1357
- VALUES (?, ?, ?)
1358
- `,
1359
- )
1360
- .run(normalized, JSON.stringify(countries), source);
1361
- return { inserted: result.changes > 0, tag: normalized };
1362
- } catch (e) {
1363
- return { inserted: false, error: e.message };
1364
- }
1365
- }
1366
-
1367
- function getTagsByStatus(status, limit = 100) {
1368
- if (!db) return [];
1369
- const rows = db
1370
- .prepare(
1371
- `
1372
- SELECT * FROM tags WHERE status = ? ORDER BY score ASC, created_at ASC LIMIT ?
1373
- `,
1374
- )
1375
- .all(status, limit);
1376
- return rows.map((r) => ({
1377
- ...r,
1378
- countries: JSON.parse(r.countries || "[]"),
1379
- matched_countries: JSON.parse(r.matched_countries || "[]"),
1380
- }));
1381
- }
1382
-
1383
- function getTagsByCountry(country, minScore = 0) {
1384
- if (!db) return [];
1385
- const rows = db
1386
- .prepare(
1387
- `
1388
- SELECT * FROM tags WHERE status != 'dead'
1389
- ORDER BY score DESC
1390
- `,
1391
- )
1392
- .all();
1393
- // Filter in JS since countries is JSON
1394
- return rows
1395
- .map((r) => ({
1396
- ...r,
1397
- countries: JSON.parse(r.countries || "[]"),
1398
- matched_countries: JSON.parse(r.matched_countries || "[]"),
1399
- }))
1400
- .filter((r) => r.countries.includes(country) && r.score >= minScore);
1401
- }
1402
-
1403
- function getDeadTags(country) {
1404
- if (!db) return [];
1405
- const rows = db
1406
- .prepare(
1407
- `
1408
- SELECT * FROM tags WHERE status = 'dead' ORDER BY score ASC
1409
- `,
1410
- )
1411
- .all();
1412
- return rows
1413
- .map((r) => ({
1414
- ...r,
1415
- countries: JSON.parse(r.countries || "[]"),
1416
- matched_countries: JSON.parse(r.matched_countries || "[]"),
1417
- }))
1418
- .filter((r) => r.countries.includes(country));
1419
- }
1420
-
1421
- function claimTag(tag) {
1422
- if (!db) return { ok: false, error: "db not ready" };
1423
- // 原子操作:只有 status='new' 时才更新为 'scoring',避免竞态
1424
- const result = db
1425
- .prepare(
1426
- "UPDATE tags SET status = 'scoring' WHERE tag = ? AND status = 'new'",
1427
- )
1428
- .run(tag);
1429
- if (result.changes === 0) {
1430
- // 检查是否不存在 vs 已被别人锁定
1431
- const row = db.prepare("SELECT status FROM tags WHERE tag = ?").get(tag);
1432
- if (!row) return { ok: false, error: "tag not found" };
1433
- return { ok: false, error: `tag status is ${row.status}, already claimed` };
1434
- }
1435
- return { ok: true, tag };
1436
- }
1437
-
1438
- function reportTagScore(tag, fields) {
1439
- if (!db) return { ok: false, error: "db not ready" };
1440
- const {
1441
- score,
1442
- status,
1443
- totalPosts,
1444
- authorCount,
1445
- matchedAuthors,
1446
- matchedCountries,
1447
- pushedUsers,
1448
- error,
1449
- } = fields;
1450
- const matchedCountriesJson = matchedCountries
1451
- ? JSON.stringify(matchedCountries)
1452
- : null;
1453
- const now = new Date().toISOString();
1454
-
1455
- try {
1456
- const result = db
1457
- .prepare(
1458
- `
1459
- UPDATE tags SET
1460
- score = COALESCE(?, score),
1461
- status = COALESCE(?, status),
1462
- total_posts = COALESCE(?, total_posts),
1463
- author_count = COALESCE(?, author_count),
1464
- matched_authors = COALESCE(?, matched_authors),
1465
- matched_countries = COALESCE(?, matched_countries),
1466
- pushed_users = COALESCE(?, pushed_users),
1467
- last_error = COALESCE(?, last_error),
1468
- scored_at = ?,
1469
- score_count = score_count + 1
1470
- WHERE tag = ?
1471
- `,
1472
- )
1473
- .run(
1474
- score ?? null,
1475
- status ?? null,
1476
- totalPosts ?? null,
1477
- authorCount ?? null,
1478
- matchedAuthors ?? null,
1479
- matchedCountriesJson,
1480
- pushedUsers ?? null,
1481
- error ?? null,
1482
- now,
1483
- tag,
1484
- );
1485
- return { ok: result.changes > 0, tag };
1486
- } catch (e) {
1487
- return { ok: false, error: e.message };
1488
- }
1489
- }
1490
-
1491
- function getAllTags(limit = 200) {
1492
- if (!db) return [];
1493
- const rows = db
1494
- .prepare(
1495
- `
1496
- SELECT * FROM tags ORDER BY score DESC, created_at DESC LIMIT ?
1497
- `,
1498
- )
1499
- .all(limit);
1500
- return rows.map((r) => ({
1501
- ...r,
1502
- countries: JSON.parse(r.countries || "[]"),
1503
- matched_countries: JSON.parse(r.matched_countries || "[]"),
1504
- }));
1505
- }
1506
-
1507
- // 调试接口:直接执行 SQL 查询,返回原始数据
1508
- function rawQuery(sql, params = []) {
1509
- if (!db) return { error: "db not ready" };
1510
- try {
1511
- const rows = db.prepare(sql).all(...params);
1512
- return { rows };
1513
- } catch (e) {
1514
- return { error: e.message };
1515
- }
1516
- }
1517
-
1518
- // 清理 tags 表中以 # 开头的脏数据
1519
- function normalizeTags() {
1520
- if (!db) return { ok: false, error: "db not ready" };
1521
- const dirtyRows = db
1522
- .prepare("SELECT id, tag, countries FROM tags WHERE tag LIKE '#%'")
1523
- .all();
1524
- const fixed = [];
1525
- const merged = [];
1526
- const skipped = [];
1527
-
1528
- for (const row of dirtyRows) {
1529
- const cleanTag = row.tag.replace(/^#+/, "").trim().toLowerCase();
1530
- if (!cleanTag || cleanTag.length < 2) {
1531
- db.prepare("DELETE FROM tags WHERE id = ?").run(row.id);
1532
- skipped.push({
1533
- dirty: row.tag,
1534
- reason: "empty after normalize, deleted",
1535
- });
1536
- continue;
1537
- }
1538
-
1539
- // 检查 cleanTag 是否已存在
1540
- const existing = db
1541
- .prepare("SELECT * FROM tags WHERE tag = ?")
1542
- .get(cleanTag);
1543
- if (existing) {
1544
- // 合并:保留已有 clean 版本,合并 countries
1545
- const oldCountries = JSON.parse(row.countries || "[]");
1546
- const existCountries = JSON.parse(existing.countries || "[]");
1547
- const mergedCountries = [
1548
- ...new Set([...existCountries, ...oldCountries]),
1549
- ];
1550
- db.prepare("UPDATE tags SET countries = ? WHERE tag = ?").run(
1551
- JSON.stringify(mergedCountries),
1552
- cleanTag,
1553
- );
1554
- // 删除脏数据
1555
- db.prepare("DELETE FROM tags WHERE id = ?").run(row.id);
1556
- merged.push({ dirty: row.tag, clean: cleanTag, id: row.id });
1557
- } else {
1558
- // 直接重命名
1559
- db.prepare("UPDATE tags SET tag = ? WHERE id = ?").run(cleanTag, row.id);
1560
- fixed.push({ dirty: row.tag, clean: cleanTag, id: row.id });
1561
- }
1562
- }
1563
-
1564
- return {
1565
- ok: true,
1566
- fixed: fixed.length,
1567
- merged: merged.length,
1568
- skipped: skipped.length,
1569
- details: { fixed, merged, skipped },
1570
- };
1571
- }
1572
-
1573
- function clearTags() {
1574
- if (!db) return { ok: false, error: "db not ready" };
1575
- const count = db.prepare("SELECT COUNT(*) as c FROM tags").get().c;
1576
- db.exec("DELETE FROM tags");
1577
- return { ok: true, deleted: count };
1578
- }
1579
-
1580
190
  function getUsersPageFromDb({
1581
191
  status,
1582
192
  search,
@@ -1587,7 +197,7 @@ function getUsersPageFromDb({
1587
197
  offset,
1588
198
  targetLocations = [],
1589
199
  }) {
1590
- if (!db) return null;
200
+ if (!getDb()) return null;
1591
201
 
1592
202
  const safeLimit = Math.max(1, Math.min(200, parseInt(limit) || 50));
1593
203
  const safeOffset = Math.max(0, parseInt(offset) || 0);
@@ -1636,7 +246,7 @@ function getUsersPageFromDb({
1636
246
  if (cachedCount && Date.now() - cachedCount.time < 5000) {
1637
247
  total = cachedCount.c;
1638
248
  } else {
1639
- total = db
249
+ total = getDb()
1640
250
  .prepare(`SELECT COUNT(*) as c FROM jobs ${whereSql}`)
1641
251
  .get(...args).c;
1642
252
  getUsersPageFromDb._countCache.set(cacheKey, {
@@ -1646,7 +256,7 @@ function getUsersPageFromDb({
1646
256
  }
1647
257
 
1648
258
  // 只查询前端需要的列,避免 SELECT * 带来的大字段传输和 mapJobRow 开销
1649
- const rows = db
259
+ const rows = getDb()
1650
260
  .prepare(
1651
261
  `
1652
262
  SELECT
@@ -1688,13 +298,13 @@ function getUsersPageFromDb({
1688
298
  }
1689
299
 
1690
300
  function getTargetUsersFromDb(targetLocations = []) {
1691
- if (!db) return null;
301
+ if (!getDb()) return null;
1692
302
  if (!targetLocations.length) {
1693
303
  return { total: 0, users: [] };
1694
304
  }
1695
305
 
1696
306
  const placeholders = targetLocations.map(() => "?").join(", ");
1697
- const rows = db
307
+ const rows = getDb()
1698
308
  .prepare(
1699
309
  `
1700
310
  SELECT
@@ -1703,6 +313,8 @@ function getTargetUsersFromDb(targetLocations = []) {
1703
313
  location_created, latest_video_time, refresh_time,
1704
314
  guessed_location, pinned, processed_at, video_count,
1705
315
  no_video, claimed_by, claimed_at, created_at, updated_at
316
+ FROM jobs
317
+ WHERE tt_seller = 1
1706
318
  AND verified = 0
1707
319
  AND location_created IN (${placeholders})
1708
320
  ORDER BY COALESCE(follower_count, 0) DESC, unique_id ASC
@@ -1718,7 +330,7 @@ function getTargetUsersFromDb(targetLocations = []) {
1718
330
  }
1719
331
 
1720
332
  function getTargetUsersByCountryFromDb(targetLocations = [], options = {}) {
1721
- if (!db) return null;
333
+ if (!getDb()) return null;
1722
334
  if (!targetLocations.length) {
1723
335
  return { countries: [] };
1724
336
  }
@@ -1735,7 +347,7 @@ function getTargetUsersByCountryFromDb(targetLocations = [], options = {}) {
1735
347
 
1736
348
  // 摘要模式:只返回各国统计数,不返回用户数据
1737
349
  if (summaryOnly) {
1738
- const statsRows = db
350
+ const statsRows = getDb()
1739
351
  .prepare(
1740
352
  `
1741
353
  SELECT location_created as country, COUNT(*) as count
@@ -1803,13 +415,16 @@ function getTargetUsersByCountryFromDb(targetLocations = [], options = {}) {
1803
415
  /SELECT[^FROM]*FROM/,
1804
416
  "SELECT COUNT(*) as cnt FROM",
1805
417
  );
1806
- const total = db.prepare(countSql).get(...params)?.cnt || 0;
418
+ const total =
419
+ getDb()
420
+ .prepare(countSql)
421
+ .get(...params)?.cnt || 0;
1807
422
 
1808
423
  sql += ` LIMIT ? OFFSET ?`;
1809
424
  const safeLimit = Math.min(Math.floor(limit), 10000);
1810
425
  const safeOffset = Math.max(Math.floor(offset), 0);
1811
426
 
1812
- const rows = db
427
+ const rows = getDb()
1813
428
  .prepare(sql)
1814
429
  .all(...params, safeLimit, safeOffset)
1815
430
  .map(mapJobRow);
@@ -1822,7 +437,7 @@ function getTargetUsersByCountryFromDb(targetLocations = [], options = {}) {
1822
437
  };
1823
438
  }
1824
439
 
1825
- const rows = db
440
+ const rows = getDb()
1826
441
  .prepare(
1827
442
  `
1828
443
  SELECT
@@ -1873,327 +488,6 @@ function getTargetUsersByCountryFromDb(targetLocations = [], options = {}) {
1873
488
  };
1874
489
  }
1875
490
 
1876
- function snakeToCamel(key) {
1877
- return key.replace(/_([a-z])/g, (_, ch) => ch.toUpperCase());
1878
- }
1879
-
1880
- function camelToSnake(key) {
1881
- return key.replace(/[A-Z]/g, (ch) => `_${ch.toLowerCase()}`);
1882
- }
1883
-
1884
- const jobBooleanColumns = new Set([
1885
- "pinned",
1886
- "no_video",
1887
- "restricted",
1888
- "processed",
1889
- "tt_seller",
1890
- "verified",
1891
- "error",
1892
- ]);
1893
-
1894
- const videoBooleanColumns = new Set(["tt_seller"]);
1895
-
1896
- const writableJobColumns = new Set([
1897
- "nickname",
1898
- "status",
1899
- "sources",
1900
- "claimed_by",
1901
- "claimed_at",
1902
- "error",
1903
- "pinned",
1904
- "no_video",
1905
- "restricted",
1906
- "user_update_count",
1907
- "tt_seller",
1908
- "verified",
1909
- "video_count",
1910
- "comment_count",
1911
- "guessed_location",
1912
- "location_created",
1913
- "confirmed_location",
1914
- "modified_at",
1915
- "follower_count",
1916
- "following_count",
1917
- "heart_count",
1918
- "refresh_time",
1919
- "processed",
1920
- "processed_at",
1921
- "updated_at",
1922
- "region",
1923
- "signature",
1924
- "bio_link",
1925
- "sec_uid",
1926
- "status_code",
1927
- "latest_video_time",
1928
- "top_video_play_count",
1929
- "top_video_href",
1930
- "user_create_time",
1931
- ]);
1932
-
1933
- function normalizeJobValue(column, value) {
1934
- if (value === undefined || value === null) return null;
1935
- if (column === "sources") {
1936
- if (!Array.isArray(value)) return JSON.stringify([]);
1937
- return JSON.stringify([...new Set(value)]);
1938
- }
1939
- if (jobBooleanColumns.has(column)) {
1940
- return value ? 1 : 0;
1941
- }
1942
- // 防御:如果值是对象或数组,转为 JSON 字符串
1943
- if (typeof value === "object") return JSON.stringify(value);
1944
- return value;
1945
- }
1946
-
1947
- function mapJobRow(row) {
1948
- if (!row) return undefined;
1949
- const mapped = {};
1950
- for (const [key, value] of Object.entries(row)) {
1951
- const camelKey = snakeToCamel(key);
1952
- if (key === "sources") {
1953
- try {
1954
- mapped[camelKey] = value ? JSON.parse(value) : [];
1955
- } catch {
1956
- mapped[camelKey] = [];
1957
- }
1958
- continue;
1959
- }
1960
- if (jobBooleanColumns.has(key)) {
1961
- mapped[camelKey] = value === null || value === undefined ? null : !!value;
1962
- continue;
1963
- }
1964
- mapped[camelKey] = value;
1965
- }
1966
- return mapped;
1967
- }
1968
-
1969
- function getJobRow(uniqueId) {
1970
- if (!db) return null;
1971
- return db.prepare("SELECT * FROM jobs WHERE unique_id = ?").get(uniqueId);
1972
- }
1973
-
1974
- function getJobBaseRow(uniqueId) {
1975
- if (!db) return null;
1976
- return db
1977
- .prepare("SELECT * FROM jobs_base WHERE unique_id = ?")
1978
- .get(uniqueId);
1979
- }
1980
-
1981
- function getJob(uniqueId) {
1982
- return mapJobRow(getJobRow(uniqueId));
1983
- }
1984
-
1985
- function getAllJobs() {
1986
- if (!db) return [];
1987
- return db.prepare("SELECT * FROM jobs").all().map(mapJobRow);
1988
- }
1989
-
1990
- function mapVideoRow(row) {
1991
- if (!row) return undefined;
1992
- const mapped = {};
1993
- for (const [key, value] of Object.entries(row)) {
1994
- const camelKey = snakeToCamel(key);
1995
- if (videoBooleanColumns.has(key)) {
1996
- mapped[camelKey] = value === null || value === undefined ? null : !!value;
1997
- continue;
1998
- }
1999
- mapped[camelKey] = value;
2000
- }
2001
- return mapped;
2002
- }
2003
-
2004
- function getVideoRow(videoId) {
2005
- if (!db) return null;
2006
- return db.prepare("SELECT * FROM videos WHERE id = ?").get(videoId);
2007
- }
2008
-
2009
- function getAllVideoRows() {
2010
- if (!db) return [];
2011
- return db.prepare("SELECT * FROM videos").all();
2012
- }
2013
-
2014
- function updateJobInfo(uniqueId, info, incrementCount = true) {
2015
- if (!db) return { error: "db not initialized" };
2016
- const existing = getJobRow(uniqueId);
2017
- if (!existing) return { error: "user not found" };
2018
-
2019
- const nextValues = {};
2020
- for (const [key, value] of Object.entries(info || {})) {
2021
- if (key === "uniqueId" || key === "unique_id") continue;
2022
- if (value === undefined || value === "") continue;
2023
- let column = camelToSnake(key);
2024
- // 字段别名:bio → signature, createTime → user_create_time
2025
- if (column === "bio") column = "signature";
2026
- if (column === "create_time") column = "user_create_time";
2027
- if (!writableJobColumns.has(column)) continue;
2028
- nextValues[column] = normalizeJobValue(column, value);
2029
- }
2030
-
2031
- nextValues.updated_at = Date.now();
2032
- if (incrementCount) {
2033
- nextValues.user_update_count = (existing.user_update_count || 0) + 1;
2034
- }
2035
-
2036
- const columns = Object.keys(nextValues);
2037
- if (columns.length > 0) {
2038
- const sql = `UPDATE jobs SET ${columns.map((column) => `${column} = ?`).join(", ")} WHERE unique_id = ?`;
2039
- db.prepare(sql).run(
2040
- ...columns.map((column) => nextValues[column]),
2041
- uniqueId,
2042
- );
2043
- }
2044
-
2045
- return {
2046
- ok: true,
2047
- userUpdateCount:
2048
- nextValues.user_update_count ?? existing.user_update_count ?? 0,
2049
- };
2050
- }
2051
-
2052
- function inferStatus(u) {
2053
- if (u.restricted) return "restricted";
2054
- if (u.error) return "error";
2055
- if (u.processed) return "done";
2056
- return "pending";
2057
- }
2058
-
2059
- function updateJobBaseInfo(uniqueId, info, incrementCount = true) {
2060
- if (!db) return { error: "db not initialized" };
2061
- const existing = getJobBaseRow(uniqueId);
2062
- if (!existing) return { error: "user not found" };
2063
-
2064
- const nextValues = {};
2065
- for (const [key, value] of Object.entries(info || {})) {
2066
- if (key === "uniqueId" || key === "unique_id") continue;
2067
- if (value === undefined || value === "") continue;
2068
- let column = camelToSnake(key);
2069
- // 字段别名:bio → signature, createTime → user_create_time
2070
- if (column === "bio") column = "signature";
2071
- if (column === "create_time") column = "user_create_time";
2072
- if (!writableJobColumns.has(column)) continue;
2073
- nextValues[column] = normalizeJobValue(column, value);
2074
- }
2075
-
2076
- nextValues.updated_at = Date.now();
2077
- if (incrementCount) {
2078
- nextValues.user_update_count = (existing.user_update_count || 0) + 1;
2079
- }
2080
-
2081
- const columns = Object.keys(nextValues);
2082
- if (columns.length > 0) {
2083
- const sql = `UPDATE jobs_base SET ${columns.map((column) => `${column} = ?`).join(", ")} WHERE unique_id = ?`;
2084
- db.prepare(sql).run(
2085
- ...columns.map((column) => nextValues[column]),
2086
- uniqueId,
2087
- );
2088
- }
2089
-
2090
- return {
2091
- ok: true,
2092
- userUpdateCount:
2093
- nextValues.user_update_count ?? existing.user_update_count ?? 0,
2094
- };
2095
- }
2096
-
2097
- function addJobBaseToDb(user) {
2098
- if (!db) return;
2099
- const now = Date.now();
2100
- db.prepare(
2101
- `
2102
- INSERT OR IGNORE INTO jobs_base (
2103
- unique_id,
2104
- nickname,
2105
- status,
2106
- sources,
2107
- claimed_by,
2108
- claimed_at,
2109
- error,
2110
- pinned,
2111
- no_video,
2112
- restricted,
2113
- user_update_count,
2114
- tt_seller,
2115
- verified,
2116
- video_count,
2117
- comment_count,
2118
- guessed_location,
2119
- location_created,
2120
- follower_count,
2121
- following_count,
2122
- heart_count,
2123
- refresh_time,
2124
- processed,
2125
- processed_at,
2126
- created_at,
2127
- updated_at,
2128
- region,
2129
- signature,
2130
- bio_link,
2131
- sec_uid
2132
- )
2133
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2134
- `,
2135
- ).run(
2136
- user.uniqueId,
2137
- user.nickname || null,
2138
- user.status || inferStatus(user),
2139
- JSON.stringify(
2140
- Array.isArray(user.sources) ? [...new Set(user.sources)] : [],
2141
- ),
2142
- user.claimedBy || null,
2143
- user.claimedAt || null,
2144
- user.error || null,
2145
- user.pinned ? 1 : 0,
2146
- user.noVideo ? 1 : 0,
2147
- user.restricted ? 1 : 0,
2148
- user.userUpdateCount || 0,
2149
- user.ttSeller === undefined ||
2150
- user.ttSeller === null ||
2151
- user.ttSeller === ""
2152
- ? null
2153
- : user.ttSeller
2154
- ? 1
2155
- : 0,
2156
- user.verified === undefined ||
2157
- user.verified === null ||
2158
- user.verified === ""
2159
- ? null
2160
- : user.verified
2161
- ? 1
2162
- : 0,
2163
- user.videoCount || 0,
2164
- user.commentCount || 0,
2165
- user.guessedLocation || null,
2166
- user.locationCreated || null,
2167
- user.followerCount || 0,
2168
- user.followingCount || 0,
2169
- user.heartCount || 0,
2170
- user.refreshTime || null,
2171
- user.processed ? 1 : 0,
2172
- user.processedAt || null,
2173
- user.createdAt || now,
2174
- user.updatedAt || now,
2175
- user.region || null,
2176
- user.signature || null,
2177
- user.bioLink?.link || user.bioLink?.url || user.bioLink || null,
2178
- user.secUid || null,
2179
- );
2180
- }
2181
-
2182
- function addJob(user) {
2183
- if (!db) {
2184
- addUserToDb(user);
2185
- return;
2186
- }
2187
- if (!user.status) user.status = inferStatus(user);
2188
- if (!user.createdAt) user.createdAt = Date.now();
2189
- if (!user.updatedAt) user.updatedAt = user.createdAt;
2190
- const writeTxn = db.transaction((job) => {
2191
- addUserToDb(job);
2192
- addJobToDb(job);
2193
- });
2194
- writeTxn(user);
2195
- }
2196
-
2197
491
  export function createStore(filePath, options = {}) {
2198
492
  if (!filePath) {
2199
493
  throw new Error("createStore requires an explicit .db path");
@@ -2219,62 +513,12 @@ export function createStore(filePath, options = {}) {
2219
513
  let refillLock = null; // Promise | null
2220
514
  // LLM 采样偏移量记忆:按猜测国家记录上次查询位置,避免重复采样
2221
515
  // 格式: { "ES": 300, "PL": 500, "NL": 400 }
2222
- let llmSampleOffsets = new Map();
516
+ const offsetStore = createLlmOffsetStore();
2223
517
  if (filePath) {
2224
518
  // 初始化 SQLite 用户表(用于判重)
2225
- initUserDb(filePath);
519
+ initDb(filePath);
2226
520
  // 从数据库恢复偏移量
2227
- loadLlmSampleOffsets();
2228
- }
2229
-
2230
- /**
2231
- * 从数据库加载 LLM 采样偏移量
2232
- */
2233
- function loadLlmSampleOffsets() {
2234
- try {
2235
- const row = db
2236
- .prepare(`SELECT offsets FROM _llm_sample_offsets LIMIT 1`)
2237
- .get();
2238
- if (row && row.offsets) {
2239
- const parsed = JSON.parse(row.offsets);
2240
- if (parsed && typeof parsed === "object") {
2241
- Object.entries(parsed).forEach(([k, v]) => {
2242
- llmSampleOffsets.set(k, v);
2243
- });
2244
- console.error(
2245
- `[data-store] 已恢复 LLM 采样偏移量: ${Array.from(
2246
- llmSampleOffsets.entries(),
2247
- )
2248
- .map(([k, v]) => `${k}:${v}`)
2249
- .join(", ")}`,
2250
- );
2251
- }
2252
- }
2253
- } catch (e) {
2254
- // 表不存在或解析失败,使用空偏移量
2255
- console.error(
2256
- `[data-store] 加载 LLM 采样偏移量失败,使用空偏移量: ${e.message}`,
2257
- );
2258
- }
2259
- }
2260
-
2261
- /**
2262
- * 将 LLM 采样偏移量持久化到数据库
2263
- */
2264
- function saveLlmSampleOffsets() {
2265
- try {
2266
- const offsetsJson = JSON.stringify(Object.fromEntries(llmSampleOffsets));
2267
- // 表不存在则创建
2268
- db.prepare(
2269
- `CREATE TABLE IF NOT EXISTS _llm_sample_offsets (id INTEGER PRIMARY KEY CHECK (id = 1), offsets TEXT)`,
2270
- ).run();
2271
- // 插入或更新
2272
- db.prepare(
2273
- `INSERT OR REPLACE INTO _llm_sample_offsets (id, offsets) VALUES (1, ?)`,
2274
- ).run(offsetsJson);
2275
- } catch (e) {
2276
- console.error(`[data-store] 保存 LLM 采样偏移量失败: ${e.message}`);
2277
- }
521
+ offsetStore.load();
2278
522
  }
2279
523
 
2280
524
  // stats 缓存
@@ -2287,7 +531,7 @@ export function createStore(filePath, options = {}) {
2287
531
  }
2288
532
 
2289
533
  function computeStatsInternal() {
2290
- if (db) {
534
+ if (getDb()) {
2291
535
  const total = getJobsCount();
2292
536
  const statusCounts = {
2293
537
  pending: 0,
@@ -2296,7 +540,7 @@ export function createStore(filePath, options = {}) {
2296
540
  error: 0,
2297
541
  restricted: 0,
2298
542
  };
2299
- const rows = db
543
+ const rows = getDb()
2300
544
  .prepare(
2301
545
  `
2302
546
  SELECT status, COUNT(*) as count
@@ -2372,7 +616,7 @@ export function createStore(filePath, options = {}) {
2372
616
  }
2373
617
 
2374
618
  function rebuildStatusGroups() {
2375
- if (db) {
619
+ if (getDb()) {
2376
620
  statusGroups = {
2377
621
  pending: [],
2378
622
  processing: [],
@@ -2436,9 +680,9 @@ export function createStore(filePath, options = {}) {
2436
680
 
2437
681
  function flushSave() {
2438
682
  // 数据库模式:先保存 LLM 偏移量,再备份数据库
2439
- if (db && dbPath) {
683
+ if (getDb() && getDbPath()) {
2440
684
  try {
2441
- saveLlmSampleOffsets();
685
+ offsetStore.save();
2442
686
  } catch (e) {
2443
687
  console.error(`[data-store] 保存 LLM 偏移量失败: ${e.message}`);
2444
688
  }
@@ -2452,7 +696,7 @@ export function createStore(filePath, options = {}) {
2452
696
  * @returns {string|null} 备份文件路径,失败返回 null
2453
697
  */
2454
698
  function backupDatabase(maxBackups = 3) {
2455
- if (!db || !dbPath) {
699
+ if (!getDb() || !getDbPath()) {
2456
700
  console.error("[data-store] 数据库未初始化,跳过备份");
2457
701
  return null;
2458
702
  }
@@ -2464,16 +708,16 @@ export function createStore(filePath, options = {}) {
2464
708
  .toISOString()
2465
709
  .replace(/[-:T.]/g, "")
2466
710
  .slice(0, 15); // YYYYMMDDHHmmss
2467
- const baseName = path.basename(dbPath, ".db");
711
+ const baseName = path.basename(getDbPath(), ".db");
2468
712
  const backupName = `${baseName}-${timestamp}.db`;
2469
- const backupDir = path.dirname(dbPath);
713
+ const backupDir = path.dirname(getDbPath());
2470
714
  const backupPath = path.join(backupDir, backupName);
2471
715
 
2472
716
  console.error(`[data-store] 正在备份数据库: ${backupName}`);
2473
717
 
2474
718
  // 使用 better-sqlite3 的 backup API(原子性备份,安全可靠)
2475
719
  const backupDb = new Database(backupPath);
2476
- db.backup("main", backupDb, "main");
720
+ getDb().backup("main", backupDb, "main");
2477
721
  backupDb.close();
2478
722
 
2479
723
  // 验证备份文件大小
@@ -2523,7 +767,7 @@ export function createStore(filePath, options = {}) {
2523
767
 
2524
768
  function stopBackup() {
2525
769
  // 退出时执行备份
2526
- if (db && dbPath) {
770
+ if (getDb() && getDbPath()) {
2527
771
  backupDatabase();
2528
772
  }
2529
773
  }
@@ -2531,7 +775,7 @@ export function createStore(filePath, options = {}) {
2531
775
  function getUser(uid) {
2532
776
  const idx = uidIndex.get(uid);
2533
777
  if (idx !== undefined) return data[idx];
2534
- if (db) return getJob(uid);
778
+ if (getDb()) return getJob(uid);
2535
779
  return undefined;
2536
780
  }
2537
781
 
@@ -2549,12 +793,25 @@ export function createStore(filePath, options = {}) {
2549
793
 
2550
794
  function addUser(user, append) {
2551
795
  const memoryIdx = uidIndex.get(user.uniqueId);
2552
- if (db && memoryIdx === undefined) {
796
+ if (getDb() && memoryIdx === undefined) {
2553
797
  // 用 users 表判重(所有发现过的用户合集),而不是 jobs 表
2554
798
  if (hasUserInDb(user.uniqueId)) {
2555
799
  return;
2556
800
  }
2557
- addJob(user);
801
+ const now = Date.now();
802
+ const writeTxn = getDb().transaction((job) => {
803
+ addUserToDb({
804
+ ...job,
805
+ createdAt: job.createdAt || now,
806
+ updatedAt: job.updatedAt || now,
807
+ });
808
+ addJobBaseToDb({
809
+ ...job,
810
+ createdAt: job.createdAt || now,
811
+ updatedAt: job.updatedAt || now,
812
+ });
813
+ });
814
+ writeTxn(user);
2558
815
  return;
2559
816
  }
2560
817
 
@@ -2616,7 +873,7 @@ export function createStore(filePath, options = {}) {
2616
873
  createdAt: now,
2617
874
  updatedAt: now,
2618
875
  };
2619
- const writeTxn = db.transaction((job) => {
876
+ const writeTxn = getDb().transaction((job) => {
2620
877
  addUserToDb(job);
2621
878
  addJobBaseToDb(job);
2622
879
  });
@@ -2628,195 +885,26 @@ export function createStore(filePath, options = {}) {
2628
885
  }
2629
886
 
2630
887
  function getPendingUsers() {
2631
- if (db) {
888
+ if (getDb()) {
2632
889
  return getAllJobs().filter((u) => u.status === "pending");
2633
890
  }
2634
891
  return data.filter((u) => u.status === "pending");
2635
892
  }
2636
893
 
2637
894
  function getProcessedUsers() {
2638
- if (db) {
895
+ if (getDb()) {
2639
896
  return getAllJobs().filter((u) => u.status === "done");
2640
897
  }
2641
898
  return data.filter((u) => u.status === "done");
2642
899
  }
2643
900
 
2644
901
  function getAllUsers() {
2645
- if (db) {
902
+ if (getDb()) {
2646
903
  return getAllJobs();
2647
904
  }
2648
905
  return data;
2649
906
  }
2650
907
 
2651
- /**
2652
- * 使用 LLM 对单个 job 的国家匹配度打分(0-100)
2653
- * @param {Object} job - raw_jobs 中的一条记录
2654
- * @param {string[]} targetLocations - 目标国家列表
2655
- * @returns {Promise<{ uniqueId: string, score: number, reason: string }>}
2656
- */
2657
- async function scoreJobLocation(job, targetLocations) {
2658
- const { fetch: undiciFetch } = await import("undici");
2659
-
2660
- const prompt = `
2661
- 你是一个 TikTok 用户数据分析助手。请根据以下用户信息,判断该用户是否来自以下**任意一个**目标国家。
2662
-
2663
- 目标国家列表: ${targetLocations.join(", ")}
2664
-
2665
- 重要:
2666
- - 用户只要来自上述**任意一个**国家就算匹配。
2667
- - guessed_location 是系统初步猜测的结果,**仅供参考**,不要完全依赖它。
2668
- - 请综合用户名、昵称、签名、位置等信息做判断。
2669
-
2670
- 用户信息:
2671
- - 用户名: ${job.unique_id || "未知"}
2672
- - 昵称: ${job.nickname || "未知"}
2673
- - 签名: ${job.signature || "未知"}
2674
- - 地区: ${job.region || "未知"}
2675
- - 猜测国家(参考): ${job.guessed_location || "未知"}
2676
- - 位置信息: ${job.location_created || "未知"}
2677
- - 主页链接: ${job.bio_link || "未知"}
2678
-
2679
- 返回 JSON(仅返回 JSON,无其他内容):
2680
- {"score": 0-100, "reason": "English only, under 50 chars, no quotes/brackets"}
2681
-
2682
- Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unlikely
2683
- `;
2684
-
2685
- try {
2686
- const apiKey = process.env.APIKEY || "";
2687
- const response = await undiciFetch(
2688
- "http://82.156.52.214:18000/v1/chat/completions",
2689
- {
2690
- method: "POST",
2691
- headers: {
2692
- "Content-Type": "application/json",
2693
- Authorization: `Bearer ${apiKey}`,
2694
- },
2695
- body: JSON.stringify({
2696
- model: "zc-fast",
2697
- messages: [{ role: "user", content: prompt }],
2698
- max_tokens: 512,
2699
- temperature: 0.1,
2700
- }),
2701
- },
2702
- );
2703
-
2704
- const result = await response.json();
2705
- const content = result.choices?.[0]?.message?.content || "";
2706
-
2707
- // 解析 JSON 响应(多层容错)
2708
- let parsed = null;
2709
-
2710
- // 尝试 1: 直接解析
2711
- try {
2712
- parsed = JSON.parse(content);
2713
- } catch {
2714
- // 尝试 2: 提取 {} 包裹的内容
2715
- const match = content.match(/\{[\s\S]*\}/);
2716
- if (match) {
2717
- try {
2718
- parsed = JSON.parse(match[0]);
2719
- } catch {
2720
- // 尝试 3: 清理常见问题后解析
2721
- const cleaned = match[0]
2722
- .replace(/"/g, '"') // 弯引号 → 直引号
2723
- .replace(/\s+/g, " ") // 多余空白
2724
- .trim();
2725
- try {
2726
- parsed = JSON.parse(cleaned);
2727
- } catch {
2728
- // 尝试 4: 从文本中提取 score 和 reason(reason 可能包含引号等特殊字符)
2729
- const scoreMatch = content.match(/"?score"?\s*:\s*(\d+)/i);
2730
- if (scoreMatch) {
2731
- let reason = "解析降级";
2732
- // 找 "reason": 的位置,取到最后一个 } 前的内容
2733
- const reasonKeyPos = content.search(/"?reason"?\s*:\s*"/i);
2734
- if (reasonKeyPos !== -1) {
2735
- const afterKey = content.substring(reasonKeyPos);
2736
- const colonPos = afterKey.indexOf(":");
2737
- const valueStart = afterKey.indexOf('"', colonPos + 1) + 1;
2738
- const rawValue = afterKey.substring(valueStart);
2739
- // 取到原始 content 最后一个 } 前
2740
- const lastBrace = content.lastIndexOf("}");
2741
- const reasonEnd = lastBrace - reasonKeyPos - valueStart;
2742
- if (reasonEnd > 0) {
2743
- reason = rawValue.substring(0, reasonEnd).trim();
2744
- // 去掉首尾的引号
2745
- if (reason.startsWith('"')) reason = reason.substring(1);
2746
- if (reason.endsWith('"'))
2747
- reason = reason.substring(0, reason.length - 1);
2748
- }
2749
- }
2750
- parsed = {
2751
- score: parseInt(scoreMatch[1]) || 50,
2752
- reason,
2753
- };
2754
- }
2755
- }
2756
- }
2757
- }
2758
-
2759
- // 尝试 5: 如果以上都失败,用更宽松的正则提取
2760
- if (!parsed) {
2761
- const scoreMatch = content.match(/"score"\s*:\s*(\d+)/);
2762
- const reasonMatch = content.match(/"reason"\s*:\s*"([^"]*)"/);
2763
- if (scoreMatch) {
2764
- parsed = {
2765
- score: parseInt(scoreMatch[1]) || 50,
2766
- reason: reasonMatch ? reasonMatch[1] : "解析降级 - 宽松模式",
2767
- };
2768
- }
2769
- }
2770
- }
2771
-
2772
- if (parsed && typeof parsed.score === "number") {
2773
- return {
2774
- uniqueId: job.unique_id,
2775
- score: Math.max(0, Math.min(100, parsed.score)),
2776
- reason: parsed.reason || "",
2777
- };
2778
- }
2779
-
2780
- // 所有解析都失败,返回默认分
2781
- console.error(
2782
- `[scoreJobLocation] JSON 解析失败 (${job.unique_id}): ${content.substring(0, 100)}`,
2783
- );
2784
- return {
2785
- uniqueId: job.unique_id,
2786
- score: 50,
2787
- reason: "LLM 响应解析失败,使用默认分",
2788
- };
2789
- } catch (e) {
2790
- console.error(
2791
- `[scoreJobLocation] LLM 调用失败 (${job.unique_id}): ${e.message}`,
2792
- );
2793
- return {
2794
- uniqueId: job.unique_id,
2795
- score: 50,
2796
- reason: `LLM 调用异常: ${e.message}`,
2797
- };
2798
- }
2799
- }
2800
-
2801
- /**
2802
- * 批量对 jobs 进行 LLM 国家匹配度打分
2803
- * @param {Object[]} jobs - raw_jobs 记录数组
2804
- * @param {string[]} targetLocations - 目标国家列表
2805
- * @param {number} batchSize - 每批处理数量(并发),默认 10
2806
- * @returns {Promise<Array<{ uniqueId: string, score: number, reason: string }>>}
2807
- */
2808
- async function scoreJobsBatch(jobs, targetLocations, batchSize = 10) {
2809
- const results = [];
2810
- for (let i = 0; i < jobs.length; i += batchSize) {
2811
- const batch = jobs.slice(i, i + batchSize);
2812
- const batchResults = await Promise.all(
2813
- batch.map((job) => scoreJobLocation(job, targetLocations)),
2814
- );
2815
- results.push(...batchResults);
2816
- }
2817
- return results;
2818
- }
2819
-
2820
908
  /**
2821
909
  * 从 raw_jobs 中移动一批符合条件的任务到 jobs 表
2822
910
  * @param {string[]} locations - 目标国家列表(null 表示不限制)
@@ -2828,8 +916,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
2828
916
  * @returns {{ moved: number }} 实际移动的数量
2829
917
  */
2830
918
  function refillJobsFromRaw(locations = null, limit = 500, options = {}) {
2831
- if (!db) {
2832
- return { moved: 0, error: "db not ready" };
919
+ if (!getDb()) {
920
+ return { moved: 0, error: "getDb() not ready" };
2833
921
  }
2834
922
 
2835
923
  const safeLimit = Math.max(1, Math.min(2000, parseInt(limit) || 500));
@@ -2860,7 +948,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
2860
948
 
2861
949
  // 统计符合条件的数量
2862
950
  const count =
2863
- db
951
+ getDb()
2864
952
  .prepare(`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql}`)
2865
953
  .get(...args)?.c || 0;
2866
954
 
@@ -2868,149 +956,142 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
2868
956
  return { moved: 0 };
2869
957
  }
2870
958
 
2871
- // 如果启用 LLM 打分,先采样一批进行评分(累积模式:按猜测国家分组,使用偏移量记忆避免重复采样)
959
+ // 如果启用 LLM 打分:先取 tag(一次性),再取非 tag 走 LLM 打分
2872
960
  if (useLlm && normalizedLocations && normalizedLocations.length > 0) {
2873
- const llmMinReturn = options.llmMinReturn ?? 60; // 最少返回合格数
2874
- const maxBatches = options.llmMaxBatches ?? 10; // 最多采样轮次,防止无限循环
961
+ const llmTotal = options.llmTotal ?? 200; // 总条数
962
+ const llmTagRatio = options.llmTagRatio ?? 0.6; // tag 占比 60%
963
+ const llmTagLimit = Math.floor(llmTotal * llmTagRatio); // tag 上限 120
964
+ const llmNonTagTarget = llmTotal - llmTagLimit; // 非 tag 目标 80
965
+ const llmMinScore = options.llmMinScore ?? 60;
966
+ const llmSampleSize = options.llmSampleSize ?? 100;
967
+ const maxBatches = options.llmMaxBatches ?? 10;
2875
968
 
2876
- // 打印当前偏移量状态
2877
- const offsetSummary = Array.from(llmSampleOffsets.entries())
2878
- .map(([k, v]) => `${k}:${v}`)
2879
- .join(", ");
2880
969
  console.error(
2881
- `[data-store] LLM 打分开始: 符合条件 ${count} 条,每批 ${llmSampleSize} 条,最低分 ${llmMinScore},最少返回 ${llmMinReturn} 条`,
970
+ `[data-store] LLM 打分开始: 总目标 ${llmTotal} 条,tag 最多 ${llmTagLimit} 条(一次性),非 tag 目标 ${llmNonTagTarget} 条(LLM 打分)`,
2882
971
  );
2883
- if (offsetSummary) {
2884
- console.error(`[data-store] 偏移量记忆: ${offsetSummary}`);
2885
- }
2886
972
 
2887
973
  // 返回 Promise,调用方需要 await
2888
974
  return (async () => {
2889
- const allQualified = [];
975
+ const allTagQualified = [];
976
+ const allNonTagQualified = [];
2890
977
  const allScores = [];
2891
978
 
2892
- // 按猜测国家分组处理,每个国家使用独立的偏移量
2893
- const locationGroups = normalizedLocations;
2894
- let totalBatches = 0;
979
+ // ===== 第一步:一次性取所有 tag(全局,最多 llmTagLimit 条)=====
980
+ let tagOffset = offsetStore.get("_tag") || 0;
981
+ const tagGlobalCount =
982
+ getDb()
983
+ .prepare(
984
+ `SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql} AND sources LIKE '%tag%'`,
985
+ )
986
+ .get(...args)?.c || 0;
2895
987
 
2896
- for (const location of locationGroups) {
2897
- // 获取该国家上次的偏移量
2898
- let offset = llmSampleOffsets.get(location) || 0;
988
+ if (tagOffset >= tagGlobalCount) {
989
+ tagOffset = 0;
990
+ offsetStore.set("_tag", 0);
991
+ }
2899
992
 
2900
- // 查询该国家的总数量
2901
- const locationCountSql = `SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql} AND guessed_location = ?`;
2902
- const locationArgs = [...args, location];
2903
- const locationCount =
2904
- db.prepare(locationCountSql).get(...locationArgs)?.c || 0;
993
+ console.error(
994
+ `[data-store] Tag 全局共 ${tagGlobalCount} 条,从偏移量 ${tagOffset} 开始`,
995
+ );
2905
996
 
2906
- if (locationCount === 0) {
2907
- console.error(
2908
- `[data-store] 国家 ${location}: raw_jobs 中无数据,跳过`,
997
+ while (
998
+ allTagQualified.length < llmTagLimit &&
999
+ tagOffset < tagGlobalCount
1000
+ ) {
1001
+ const batch = getDb()
1002
+ .prepare(
1003
+ `
1004
+ SELECT * FROM raw_jobs WHERE ${whereSql} AND sources LIKE '%tag%'
1005
+ ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
1006
+ LIMIT ? OFFSET ?
1007
+ `,
1008
+ )
1009
+ .all(
1010
+ ...args,
1011
+ Math.min(llmSampleSize, llmTagLimit - allTagQualified.length),
1012
+ tagOffset,
2909
1013
  );
1014
+
1015
+ if (!batch.length) break;
1016
+
1017
+ allTagQualified.push(...batch.map((s) => s.unique_id));
1018
+ tagOffset += batch.length;
1019
+
1020
+ console.error(
1021
+ `[data-store] Tag 本批 ${batch.length} 条,累计 ${allTagQualified.length}/${llmTagLimit}`,
1022
+ );
1023
+ }
1024
+
1025
+ offsetStore.set("_tag", tagOffset);
1026
+
1027
+ // ===== 第二步:按国家取非 tag,走 LLM 打分,直到合格数达到 llmNonTagTarget =====
1028
+ for (const location of normalizedLocations) {
1029
+ if (allNonTagQualified.length >= llmNonTagTarget) break;
1030
+
1031
+ const nonTagOffsetKey = `${location}:nonTag`;
1032
+ let offset = offsetStore.get(nonTagOffsetKey) || 0;
1033
+
1034
+ const locationArgs = [...args, location];
1035
+ const nonTagCount =
1036
+ getDb()
1037
+ .prepare(
1038
+ `SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql} AND guessed_location = ? AND (sources NOT LIKE '%tag%' OR sources IS NULL)`,
1039
+ )
1040
+ .get(...locationArgs)?.c || 0;
1041
+
1042
+ if (nonTagCount === 0) {
1043
+ console.error(`[data-store] 国家 ${location}: 无非 tag 数据,跳过`);
2910
1044
  continue;
2911
1045
  }
2912
1046
 
2913
- // 如果偏移量超过总数,重置为 0(一轮结束,重新开始)
2914
- if (offset >= locationCount) {
1047
+ if (offset >= nonTagCount) {
2915
1048
  offset = 0;
2916
- llmSampleOffsets.set(location, 0);
1049
+ offsetStore.set(nonTagOffsetKey, 0);
2917
1050
  }
2918
1051
 
2919
1052
  console.error(
2920
- `[data-store] 国家 ${location}: 共 ${locationCount} 条,从偏移量 ${offset} 开始`,
1053
+ `[data-store] 国家 ${location}: 非 tag 共 ${nonTagCount} 条,从偏移量 ${offset} 开始`,
2921
1054
  );
2922
1055
 
2923
1056
  for (let batch = 0; batch < maxBatches; batch++) {
2924
- const remaining = locationCount - offset;
2925
- if (remaining <= 0) break;
1057
+ if (allNonTagQualified.length >= llmNonTagTarget) break;
2926
1058
 
2927
- const sampleLimit = Math.min(llmSampleSize, remaining);
2928
- const samples = db
1059
+ const samples = getDb()
2929
1060
  .prepare(
2930
1061
  `
2931
1062
  SELECT * FROM raw_jobs WHERE ${whereSql} AND guessed_location = ?
2932
- ORDER BY
2933
- CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
2934
- COALESCE(video_count, 0) DESC, created_at DESC
1063
+ AND (sources NOT LIKE '%tag%' OR sources IS NULL)
1064
+ ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
2935
1065
  LIMIT ? OFFSET ?
2936
1066
  `,
2937
1067
  )
2938
- .all(...locationArgs, sampleLimit, offset);
1068
+ .all(...locationArgs, llmSampleSize, offset);
2939
1069
 
2940
- if (samples.length === 0) break;
1070
+ if (!samples.length) break;
2941
1071
 
2942
- // 分离 tag 来源和非 tag 来源:tag 来源跳过 LLM 打分直接合格
2943
- const tagSamples = samples.filter((s) =>
2944
- (s.sources || "").includes("tag"),
1072
+ const scores = await scoreJobsBatch(
1073
+ samples,
1074
+ DEFAULT_TARGET_LOCATIONS,
2945
1075
  );
2946
- const nonTagSamples = samples.filter(
2947
- (s) => !(s.sources || "").includes("tag"),
2948
- );
2949
-
2950
- // tag 来源直接加入合格列表
2951
- if (tagSamples.length > 0) {
2952
- allQualified.push(...tagSamples.map((s) => s.unique_id));
2953
- console.error(
2954
- `[data-store] ${location}: 本批 ${tagSamples.length} 条 tag 来源任务跳过 LLM 打分直接合格`,
2955
- );
2956
- }
2957
-
2958
- // 非 tag 来源走 LLM 打分
2959
- let batchQualified = [];
2960
- let scores = [];
2961
- if (nonTagSamples.length > 0) {
2962
- scores = await scoreJobsBatch(
2963
- nonTagSamples,
2964
- DEFAULT_TARGET_LOCATIONS,
2965
- );
2966
- batchQualified = scores.filter((s) => s.score >= llmMinScore);
2967
- }
2968
-
1076
+ const qualified = scores.filter((s) => s.score >= llmMinScore);
1077
+ allNonTagQualified.push(...qualified.map((s) => s.uniqueId));
2969
1078
  allScores.push(...scores);
2970
- allQualified.push(...batchQualified.map((s) => s.uniqueId));
2971
1079
 
2972
- totalBatches++;
2973
- console.error(
2974
- `[data-store] ${location} 第 ${batch + 1} 批: 采样 ${samples.length} 条,本批合格 ${batchQualified.length} 条,累计合格 ${allQualified.length} 条`,
2975
- );
2976
-
2977
- // 更新偏移量记忆
2978
1080
  offset += samples.length;
2979
- llmSampleOffsets.set(location, offset);
1081
+ offsetStore.set(nonTagOffsetKey, offset);
2980
1082
 
2981
- // 合格数已达到最小返回阈值,停止采样
2982
- if (allQualified.length >= llmMinReturn) break;
1083
+ console.error(
1084
+ `[data-store] ${location} 第 ${batch + 1} 批: 采样 ${samples.length} 条,本批合格 ${qualified.length} 条,非 tag 累计 ${allNonTagQualified.length}/${llmNonTagTarget}`,
1085
+ );
2983
1086
  }
2984
-
2985
- // 合格数已达到最小返回阈值,停止所有国家的采样
2986
- if (allQualified.length >= llmMinReturn) break;
2987
1087
  }
2988
1088
 
2989
- // 分离 tag 合格和非 tag 合格
2990
- // tag 任务直接合格(不在 allScores 中),非 tag 任务走 LLM 打分
2991
- const tagQualified = allQualified.filter(
2992
- (uid) => !allScores.find((s) => s.uniqueId === uid),
2993
- );
2994
- const nonTagQualifiedScores = allScores
2995
- .filter((s) => s.score >= llmMinScore)
2996
- .sort((a, b) => b.score - a.score);
2997
- const nonTagQualified = nonTagQualifiedScores.map((s) => s.uniqueId);
2998
-
2999
- // 限制 tag 占比:最多占 safeLimit 的 70%,留 30% 给非 tag
3000
- const tagMaxCount = Math.floor(safeLimit * 0.7);
3001
- const tagCount = Math.min(tagQualified.length, tagMaxCount);
3002
- const nonTagMaxCount = safeLimit - tagCount;
3003
- const finalNonTagQualified = nonTagQualified.slice(0, nonTagMaxCount);
3004
-
3005
- // 最终合格列表:tag 优先 + 非 tag 按分数排序
3006
- const qualified = [
3007
- ...tagQualified.slice(0, tagCount),
3008
- ...finalNonTagQualified,
3009
- ];
1089
+ // ===== 最终结果 =====
1090
+ const qualified = [...allTagQualified, ...allNonTagQualified];
3010
1091
 
3011
1092
  if (!qualified.length) {
3012
1093
  console.error(
3013
- `[data-store] LLM 打分后无符合条件的任务(阈值: ${llmMinScore},共采样 ${allScores.length} 条)`,
1094
+ `[data-store] LLM 打分后无符合条件的任务(tag: ${allTagQualified.length}, 非 tag: ${allNonTagQualified.length})`,
3014
1095
  );
3015
1096
  return {
3016
1097
  moved: 0,
@@ -3020,11 +1101,16 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3020
1101
  };
3021
1102
  }
3022
1103
 
1104
+ console.error(
1105
+ `[data-store] LLM 打分完成: tag ${allTagQualified.length} 条 + 非 tag ${allNonTagQualified.length} 条 = 共 ${qualified.length} 条`,
1106
+ );
1107
+
3023
1108
  // 移动符合条件的记录
3024
1109
  const placeholders = qualified.map(() => "?").join(", ");
3025
- const moveTxn = db.transaction(() => {
3026
- db.prepare(
3027
- `
1110
+ const moveTxn = getDb().transaction(() => {
1111
+ getDb()
1112
+ .prepare(
1113
+ `
3028
1114
  INSERT OR IGNORE INTO jobs (
3029
1115
  unique_id, nickname, status, sources, pinned,
3030
1116
  tt_seller, verified, video_count, comment_count,
@@ -3043,41 +1129,41 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3043
1129
  FROM raw_jobs
3044
1130
  WHERE unique_id IN (${placeholders})
3045
1131
  `,
3046
- ).run(...qualified);
1132
+ )
1133
+ .run(...qualified);
3047
1134
 
3048
- db.prepare(
3049
- `DELETE FROM raw_jobs WHERE unique_id IN (${placeholders})`,
3050
- ).run(...qualified);
1135
+ getDb()
1136
+ .prepare(
1137
+ `DELETE FROM raw_jobs WHERE unique_id IN (${placeholders})`,
1138
+ )
1139
+ .run(...qualified);
3051
1140
  });
3052
-
3053
1141
  moveTxn();
3054
1142
  markStatsDirty();
3055
1143
 
3056
1144
  // 持久化偏移量到数据库
3057
- saveLlmSampleOffsets();
1145
+ offsetStore.save();
3058
1146
 
3059
1147
  // 打印最终偏移量状态
3060
- const finalOffsetSummary = Array.from(llmSampleOffsets.entries())
1148
+ const finalOffsetSummary = Array.from(offsetStore.entries())
3061
1149
  .map(([k, v]) => `${k}:${v}`)
3062
1150
  .join(", ");
3063
- console.error(
3064
- `[data-store] LLM 打分完成: 共采样 ${allScores.length} 条,合格 ${qualified.length} 条,已移动到 jobs`,
3065
- );
3066
1151
  console.error(`[data-store] 偏移量记忆更新: ${finalOffsetSummary}`);
3067
- const scoresDetail = allScores.map((s) => s);
1152
+
3068
1153
  return {
3069
1154
  moved: qualified.length,
3070
1155
  scored: allScores.length,
3071
1156
  qualified: qualified.length,
3072
- scores: scoresDetail,
1157
+ scores: allScores,
3073
1158
  };
3074
1159
  })();
3075
1160
  }
3076
1161
 
3077
1162
  // 常规移动:INSERT + DELETE 事务
3078
- const moveTxn = db.transaction(() => {
3079
- db.prepare(
3080
- `
1163
+ const moveTxn = getDb().transaction(() => {
1164
+ getDb()
1165
+ .prepare(
1166
+ `
3081
1167
  INSERT OR IGNORE INTO jobs (
3082
1168
  unique_id, nickname, status, sources, pinned,
3083
1169
  tt_seller, verified, video_count, comment_count,
@@ -3100,11 +1186,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3100
1186
  COALESCE(video_count, 0) DESC, created_at DESC
3101
1187
  LIMIT ?
3102
1188
  `,
3103
- ).run(...args, safeLimit);
1189
+ )
1190
+ .run(...args, safeLimit);
3104
1191
 
3105
1192
  // 删除已移动的记录:用子查询匹配刚 INSERT 的 unique_id
3106
- db.prepare(
3107
- `
1193
+ getDb()
1194
+ .prepare(
1195
+ `
3108
1196
  DELETE FROM raw_jobs
3109
1197
  WHERE unique_id IN (
3110
1198
  SELECT unique_id FROM raw_jobs
@@ -3115,7 +1203,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3115
1203
  LIMIT ?
3116
1204
  )
3117
1205
  `,
3118
- ).run(...args, safeLimit);
1206
+ )
1207
+ .run(...args, safeLimit);
3119
1208
  });
3120
1209
 
3121
1210
  moveTxn();
@@ -3133,9 +1222,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3133
1222
  ) {
3134
1223
  // 记录客户端登录状态
3135
1224
  clientLoginStatus.set(userId, !!loggedIn);
3136
- if (db) {
1225
+ if (getDb()) {
3137
1226
  const now = Date.now();
3138
- const ongoingRow = db
1227
+ const ongoingRow = getDb()
3139
1228
  .prepare(
3140
1229
  `
3141
1230
  SELECT *
@@ -3150,10 +1239,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3150
1239
  )
3151
1240
  .get(userId, now, expireMs);
3152
1241
  if (ongoingRow) {
3153
- db.prepare("UPDATE jobs SET claimed_at = ? WHERE unique_id = ?").run(
3154
- now,
3155
- ongoingRow.unique_id,
3156
- );
1242
+ getDb()
1243
+ .prepare("UPDATE jobs SET claimed_at = ? WHERE unique_id = ?")
1244
+ .run(now, ongoingRow.unique_id);
3157
1245
  return {
3158
1246
  uniqueId: ongoingRow.unique_id,
3159
1247
  nickname: ongoingRow.nickname,
@@ -3235,7 +1323,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3235
1323
  for (const filter of filters) {
3236
1324
  where.push(filter);
3237
1325
  }
3238
- return db
1326
+ return getDb()
3239
1327
  .prepare(
3240
1328
  `
3241
1329
  SELECT *
@@ -3278,7 +1366,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3278
1366
  );
3279
1367
  args.push(...normalizedLocations);
3280
1368
  }
3281
- return db
1369
+ return getDb()
3282
1370
  .prepare(
3283
1371
  `
3284
1372
  SELECT *
@@ -3348,9 +1436,11 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3348
1436
 
3349
1437
  function claimRow(row) {
3350
1438
  if (!row) return null;
3351
- db.prepare(
3352
- "UPDATE jobs SET status = 'processing', claimed_at = ?, claimed_by = ? WHERE unique_id = ?",
3353
- ).run(now, userId, row.unique_id);
1439
+ getDb()
1440
+ .prepare(
1441
+ "UPDATE jobs SET status = 'processing', claimed_at = ?, claimed_by = ? WHERE unique_id = ?",
1442
+ )
1443
+ .run(now, userId, row.unique_id);
3354
1444
  markStatsDirty();
3355
1445
  return {
3356
1446
  uniqueId: row.unique_id,
@@ -3360,7 +1450,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3360
1450
  };
3361
1451
  }
3362
1452
 
3363
- const expiredRow = db
1453
+ const expiredRow = getDb()
3364
1454
  .prepare(
3365
1455
  `
3366
1456
  SELECT *
@@ -3375,9 +1465,11 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3375
1465
  .get(now, expireMs);
3376
1466
  let expiredCandidate = null;
3377
1467
  if (expiredRow) {
3378
- db.prepare(
3379
- "UPDATE jobs SET status = 'pending', claimed_at = NULL WHERE unique_id = ?",
3380
- ).run(expiredRow.unique_id);
1468
+ getDb()
1469
+ .prepare(
1470
+ "UPDATE jobs SET status = 'pending', claimed_at = NULL WHERE unique_id = ?",
1471
+ )
1472
+ .run(expiredRow.unique_id);
3381
1473
  expiredCandidate = mapJobRow({
3382
1474
  ...expiredRow,
3383
1475
  status: "pending",
@@ -3469,7 +1561,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3469
1561
  return null;
3470
1562
  }
3471
1563
 
3472
- if (!db) {
1564
+ if (!getDb()) {
3473
1565
  const now = Date.now();
3474
1566
 
3475
1567
  // 0. 该客户端有未过期的任务,续期返回
@@ -3607,16 +1699,16 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3607
1699
  locations = null,
3608
1700
  loggedIn = true,
3609
1701
  ) {
3610
- if (db) {
1702
+ if (getDb()) {
3611
1703
  const now = Date.now();
3612
1704
  const info = {
3613
- path: "db",
1705
+ path: "getDb()",
3614
1706
  userId,
3615
1707
  expireMs,
3616
1708
  loggedIn,
3617
1709
  };
3618
1710
 
3619
- const ongoingRow = db
1711
+ const ongoingRow = getDb()
3620
1712
  .prepare(
3621
1713
  `
3622
1714
  SELECT *
@@ -3716,7 +1808,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3716
1808
  ORDER BY follower_count DESC, created_at ASC, unique_id ASC
3717
1809
  LIMIT 1
3718
1810
  `;
3719
- const row = db.prepare(sql).get(...args);
1811
+ const row = getDb()
1812
+ .prepare(sql)
1813
+ .get(...args);
3720
1814
  return { row, sql, args };
3721
1815
  }
3722
1816
 
@@ -3757,7 +1851,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3757
1851
  ORDER BY created_at ASC, unique_id ASC
3758
1852
  LIMIT 1
3759
1853
  `;
3760
- const row = db.prepare(sql).get(...args);
1854
+ const row = getDb()
1855
+ .prepare(sql)
1856
+ .get(...args);
3761
1857
  return { row, sql, args };
3762
1858
  }
3763
1859
 
@@ -3770,7 +1866,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
3770
1866
  ORDER BY claimed_at ASC
3771
1867
  LIMIT 1
3772
1868
  `;
3773
- const expiredRow = db.prepare(expiredSql).get(now, expireMs);
1869
+ const expiredRow = getDb().prepare(expiredSql).get(now, expireMs);
3774
1870
  info.expired = expiredRow
3775
1871
  ? {
3776
1872
  uniqueId: expiredRow.unique_id,
@@ -4096,7 +2192,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4096
2192
  }
4097
2193
 
4098
2194
  function commitJob(uniqueId, result) {
4099
- if (db) {
2195
+ if (getDb()) {
4100
2196
  const user = getJob(uniqueId);
4101
2197
  if (!user) return { saved: false, error: "user not found" };
4102
2198
 
@@ -4122,7 +2218,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4122
2218
  }
4123
2219
 
4124
2220
  function commitNewExplore(uniqueId, result) {
4125
- if (db) {
2221
+ if (getDb()) {
4126
2222
  const existing = getJob(uniqueId);
4127
2223
  if (existing) {
4128
2224
  updateUserFromResult(existing, result);
@@ -4172,7 +2268,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4172
2268
  }
4173
2269
 
4174
2270
  function resetJob(uniqueId) {
4175
- if (db) {
2271
+ if (getDb()) {
4176
2272
  const user = getJob(uniqueId);
4177
2273
  if (!user) return { saved: false, error: "user not found" };
4178
2274
  user.status = "pending";
@@ -4203,7 +2299,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4203
2299
  }
4204
2300
 
4205
2301
  function togglePin(uniqueId) {
4206
- if (db) {
2302
+ if (getDb()) {
4207
2303
  const user = getJob(uniqueId);
4208
2304
  if (!user) return { saved: false, error: "user not found" };
4209
2305
  const nextPinned = !user.pinned;
@@ -4220,13 +2316,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4220
2316
  }
4221
2317
 
4222
2318
  function getNextRedoJob(userId, maxAgeSeconds = 43200) {
4223
- if (db) {
2319
+ if (getDb()) {
4224
2320
  const now = Date.now();
4225
2321
  const threshold = now - maxAgeSeconds * 1000;
4226
2322
  const defaultTime = new Date("2016-01-01T00:00:00Z").getTime();
4227
2323
  const targetLocations = DEFAULT_TARGET_LOCATIONS;
4228
2324
  const placeholders = targetLocations.map(() => "?").join(",");
4229
- const row = db
2325
+ const row = getDb()
4230
2326
  .prepare(
4231
2327
  `
4232
2328
  SELECT *
@@ -4241,9 +2337,11 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4241
2337
  )
4242
2338
  .get(...targetLocations, defaultTime, threshold, defaultTime);
4243
2339
  if (!row) return null;
4244
- db.prepare(
4245
- "UPDATE jobs SET refresh_time = ?, updated_at = ? WHERE unique_id = ?",
4246
- ).run(now, now, row.unique_id);
2340
+ getDb()
2341
+ .prepare(
2342
+ "UPDATE jobs SET refresh_time = ?, updated_at = ? WHERE unique_id = ?",
2343
+ )
2344
+ .run(now, now, row.unique_id);
4247
2345
  return {
4248
2346
  uniqueId: row.unique_id,
4249
2347
  nickname: row.nickname,
@@ -4292,7 +2390,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4292
2390
  }
4293
2391
 
4294
2392
  function commitRedoJob(uniqueId, result) {
4295
- if (db) {
2393
+ if (getDb()) {
4296
2394
  const user = getJob(uniqueId);
4297
2395
  if (!user) return { saved: false, error: "user not found" };
4298
2396
  user.refreshTime = Date.now();
@@ -4436,13 +2534,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4436
2534
  : [];
4437
2535
  const hasCountryFilter = targetCountries.length > 0;
4438
2536
 
4439
- if (db) {
2537
+ if (getDb()) {
4440
2538
  const l = Math.max(1, parseInt(limit) || 5);
4441
2539
 
4442
2540
  let sql = `
4443
2541
  SELECT *
4444
2542
  FROM jobs_base
4445
- WHERE COALESCE(tt_seller, '') = ''
2543
+ WHERE (COALESCE(tt_seller, '') = '' OR tt_seller = 1)
4446
2544
  AND COALESCE(user_update_count, 0) <= 0
4447
2545
  `;
4448
2546
  const sqlParams = [];
@@ -4453,18 +2551,21 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4453
2551
  sqlParams.push(...targetCountries);
4454
2552
  }
4455
2553
 
4456
- // 优先级:sources 包含 "tag" 的任务优先,其余按 created_at 排序
2554
+ // 优先级:tt_seller=1 的商家重处理任务优先 > tag 来源 > 其余
4457
2555
  sql += ` ORDER BY
2556
+ CASE WHEN tt_seller = 1 THEN 0 ELSE 1 END,
4458
2557
  CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
4459
2558
  created_at ASC,
4460
2559
  unique_id ASC
4461
2560
  LIMIT ?`;
4462
2561
  sqlParams.push(l);
4463
2562
 
4464
- const rows = db.prepare(sql).all(...sqlParams);
2563
+ const rows = getDb()
2564
+ .prepare(sql)
2565
+ .all(...sqlParams);
4465
2566
  if (rows.length === 0) return [];
4466
2567
  const now = Date.now();
4467
- const bumpStmt = db.prepare(
2568
+ const bumpStmt = getDb().prepare(
4468
2569
  `
4469
2570
  UPDATE jobs_base
4470
2571
  SET user_update_count = COALESCE(user_update_count, 0) + 1,
@@ -4472,7 +2573,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4472
2573
  WHERE unique_id = ?
4473
2574
  `,
4474
2575
  );
4475
- const bumpTxn = db.transaction((items) => {
2576
+ const bumpTxn = getDb().transaction((items) => {
4476
2577
  for (const item of items) {
4477
2578
  bumpStmt.run(now, item.unique_id);
4478
2579
  }
@@ -4490,9 +2591,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4490
2591
  const pending = data
4491
2592
  .filter((u) => {
4492
2593
  const updateCount = u.userUpdateCount;
4493
- const ttSellerEmpty =
4494
- u.ttSeller === null || u.ttSeller === undefined || u.ttSeller === "";
4495
- if (!ttSellerEmpty) return false;
2594
+ // ttSeller 为空 或 ttSeller=1(商家重处理)都可以领取
2595
+ const ttSellerEligible =
2596
+ u.ttSeller === null ||
2597
+ u.ttSeller === undefined ||
2598
+ u.ttSeller === "" ||
2599
+ u.ttSeller === 1;
2600
+ if (!ttSellerEligible) return false;
4496
2601
  if (
4497
2602
  updateCount === null ||
4498
2603
  updateCount === undefined ||
@@ -4507,7 +2612,10 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4507
2612
  return false;
4508
2613
  })
4509
2614
  .sort((a, b) => {
4510
- // 优先级:sources 包含 "tag" 的任务优先
2615
+ // 优先级:tt_seller=1 的商家重处理任务优先 > tag 来源 > 其余
2616
+ const aIsSeller = a.ttSeller === 1 ? 0 : 1;
2617
+ const bIsSeller = b.ttSeller === 1 ? 0 : 1;
2618
+ if (aIsSeller !== bIsSeller) return aIsSeller - bIsSeller;
4511
2619
  const aIsTag = (a.sources || "").includes("tag");
4512
2620
  const bIsTag = (b.sources || "").includes("tag");
4513
2621
  if (aIsTag !== bIsTag) return aIsTag ? -1 : 1;
@@ -4524,7 +2632,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4524
2632
  }
4525
2633
 
4526
2634
  function updateUserInfo(uniqueId, info) {
4527
- if (db) {
2635
+ if (getDb()) {
4528
2636
  return updateJobInfo(uniqueId, info, true);
4529
2637
  }
4530
2638
 
@@ -4543,15 +2651,17 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4543
2651
  }
4544
2652
 
4545
2653
  function updateUserLocation(uniqueId, location) {
4546
- if (db) {
4547
- const existing = db
2654
+ if (getDb()) {
2655
+ const existing = getDb()
4548
2656
  .prepare("SELECT * FROM jobs WHERE unique_id = ?")
4549
2657
  .get(uniqueId);
4550
2658
  if (!existing) return { error: "user not found" };
4551
2659
  const now = Date.now();
4552
- db.prepare(
4553
- "UPDATE jobs SET location_created = ?, modified_at = ?, updated_at = ? WHERE unique_id = ?",
4554
- ).run(location, now, now, uniqueId);
2660
+ getDb()
2661
+ .prepare(
2662
+ "UPDATE jobs SET location_created = ?, modified_at = ?, updated_at = ? WHERE unique_id = ?",
2663
+ )
2664
+ .run(location, now, now, uniqueId);
4555
2665
  return { ok: true, location, modifiedAt: now };
4556
2666
  }
4557
2667
 
@@ -4567,13 +2677,14 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4567
2677
 
4568
2678
  // 将单个 job 移动到 raw_jobs 表(完整字段复制 + 删除原记录)
4569
2679
  function moveJobToRaw(uniqueId) {
4570
- if (!db) return false;
2680
+ if (!getDb()) return false;
4571
2681
  const safeId = String(uniqueId).trim();
4572
2682
  if (!safeId) return false;
4573
2683
 
4574
- const moveSingleTxn = db.transaction(() => {
4575
- db.prepare(
4576
- `
2684
+ const moveSingleTxn = getDb().transaction(() => {
2685
+ getDb()
2686
+ .prepare(
2687
+ `
4577
2688
  INSERT OR REPLACE INTO raw_jobs (
4578
2689
  unique_id, nickname, status, sources, claimed_by, claimed_at,
4579
2690
  error, pinned, no_video, restricted, user_update_count,
@@ -4595,21 +2706,22 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4595
2706
  user_create_time
4596
2707
  FROM jobs WHERE unique_id = ?
4597
2708
  `,
4598
- ).run(safeId);
2709
+ )
2710
+ .run(safeId);
4599
2711
 
4600
- db.prepare("DELETE FROM jobs WHERE unique_id = ?").run(safeId);
2712
+ getDb().prepare("DELETE FROM jobs WHERE unique_id = ?").run(safeId);
4601
2713
  });
4602
2714
  moveSingleTxn();
4603
2715
  return true;
4604
2716
  }
4605
2717
 
4606
2718
  function batchUpdateUserInfo(updates) {
4607
- if (db) {
2719
+ if (getDb()) {
4608
2720
  const results = [];
4609
2721
  const rawMoveList = [];
4610
2722
  const sellerMoveList = [];
4611
2723
 
4612
- const txn = db.transaction((items) => {
2724
+ const txn = getDb().transaction((items) => {
4613
2725
  items.forEach((item) => {
4614
2726
  const uniqueId = item.uniqueId;
4615
2727
  // 处理 { error: true, statusCode: xxx } 的情况
@@ -4661,8 +2773,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4661
2773
  // 批量移动商家用户到 jobs
4662
2774
  if (sellerMoveList.length > 0) {
4663
2775
  const placeholders = sellerMoveList.map(() => "?").join(",");
4664
- db.prepare(
4665
- `
2776
+ getDb()
2777
+ .prepare(
2778
+ `
4666
2779
  INSERT OR REPLACE INTO jobs (
4667
2780
  unique_id, nickname, status, sources, claimed_by, claimed_at,
4668
2781
  error, pinned, no_video, restricted, user_update_count,
@@ -4684,18 +2797,20 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4684
2797
  user_create_time
4685
2798
  FROM jobs_base WHERE unique_id IN (${placeholders})
4686
2799
  `,
4687
- ).run(...sellerMoveList);
2800
+ )
2801
+ .run(...sellerMoveList);
4688
2802
 
4689
- db.prepare(
4690
- `DELETE FROM jobs_base WHERE unique_id IN (${placeholders})`,
4691
- ).run(...sellerMoveList);
2803
+ getDb()
2804
+ .prepare(`DELETE FROM jobs_base WHERE unique_id IN (${placeholders})`)
2805
+ .run(...sellerMoveList);
4692
2806
  }
4693
2807
 
4694
2808
  // 批量移动非商家用户到 raw_jobs
4695
2809
  if (rawMoveList.length > 0) {
4696
2810
  const placeholders = rawMoveList.map(() => "?").join(",");
4697
- db.prepare(
4698
- `
2811
+ getDb()
2812
+ .prepare(
2813
+ `
4699
2814
  INSERT OR REPLACE INTO raw_jobs (
4700
2815
  unique_id, nickname, status, sources, claimed_by, claimed_at,
4701
2816
  error, pinned, no_video, restricted, user_update_count,
@@ -4717,11 +2832,12 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4717
2832
  user_create_time
4718
2833
  FROM jobs_base WHERE unique_id IN (${placeholders})
4719
2834
  `,
4720
- ).run(...rawMoveList);
2835
+ )
2836
+ .run(...rawMoveList);
4721
2837
 
4722
- db.prepare(
4723
- `DELETE FROM jobs_base WHERE unique_id IN (${placeholders})`,
4724
- ).run(...rawMoveList);
2838
+ getDb()
2839
+ .prepare(`DELETE FROM jobs_base WHERE unique_id IN (${placeholders})`)
2840
+ .run(...rawMoveList);
4725
2841
  }
4726
2842
 
4727
2843
  // 清理内部标记
@@ -4773,8 +2889,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4773
2889
  return { registered: 0, skipped: 0 };
4774
2890
  }
4775
2891
 
4776
- if (db) {
4777
- const insertStmt = db.prepare(`
2892
+ if (getDb()) {
2893
+ const insertStmt = getDb().prepare(`
4778
2894
  INSERT OR IGNORE INTO videos (
4779
2895
  id,
4780
2896
  href,
@@ -4790,7 +2906,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4790
2906
  let registered = 0;
4791
2907
  let skipped = 0;
4792
2908
  const now = Date.now();
4793
- const txn = db.transaction((items) => {
2909
+ const txn = getDb().transaction((items) => {
4794
2910
  for (const item of items) {
4795
2911
  const result = insertStmt.run(
4796
2912
  item.id,
@@ -4837,7 +2953,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4837
2953
  }
4838
2954
 
4839
2955
  function getVideos() {
4840
- if (db) {
2956
+ if (getDb()) {
4841
2957
  return getAllVideoRows().map(mapVideoRow);
4842
2958
  }
4843
2959
  return videos;
@@ -4845,7 +2961,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4845
2961
 
4846
2962
  function getVideo(videoId) {
4847
2963
  if (!videoId) return null;
4848
- if (db) {
2964
+ if (getDb()) {
4849
2965
  return mapVideoRow(getVideoRow(videoId));
4850
2966
  }
4851
2967
  return videos.find((video) => video.id === videoId) || null;
@@ -4855,8 +2971,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4855
2971
  const safeLimit = Math.max(1, Math.min(100, parseInt(limit) || 50));
4856
2972
  const safeOffset = Math.max(0, parseInt(offset) || 0);
4857
2973
 
4858
- if (db) {
4859
- const rows = db
2974
+ if (getDb()) {
2975
+ const rows = getDb()
4860
2976
  .prepare(
4861
2977
  `
4862
2978
  SELECT *
@@ -4866,7 +2982,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4866
2982
  `,
4867
2983
  )
4868
2984
  .all(safeLimit, safeOffset);
4869
- const total = db.prepare("SELECT COUNT(*) as c FROM videos").get().c;
2985
+ const total = getDb().prepare("SELECT COUNT(*) as c FROM videos").get().c;
4870
2986
  return {
4871
2987
  total,
4872
2988
  limit: safeLimit,
@@ -4884,16 +3000,16 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4884
3000
  }
4885
3001
 
4886
3002
  function getVideoCount() {
4887
- if (db) {
4888
- return db.prepare("SELECT COUNT(*) as c FROM videos").get().c;
3003
+ if (getDb()) {
3004
+ return getDb().prepare("SELECT COUNT(*) as c FROM videos").get().c;
4889
3005
  }
4890
3006
  return videos.length;
4891
3007
  }
4892
3008
 
4893
3009
  function getPendingCommentTasks(limit) {
4894
- if (db) {
3010
+ if (getDb()) {
4895
3011
  const l = Math.max(1, parseInt(limit) || 1);
4896
- const rows = db
3012
+ const rows = getDb()
4897
3013
  .prepare(
4898
3014
  `
4899
3015
  SELECT *
@@ -4905,14 +3021,14 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4905
3021
  )
4906
3022
  .all(l);
4907
3023
  if (rows.length === 0) return [];
4908
- const bumpStmt = db.prepare(
3024
+ const bumpStmt = getDb().prepare(
4909
3025
  `
4910
3026
  UPDATE videos
4911
3027
  SET user_update_count = COALESCE(user_update_count, 0) + 1
4912
3028
  WHERE id = ?
4913
3029
  `,
4914
3030
  );
4915
- const bumpTxn = db.transaction((items) => {
3031
+ const bumpTxn = getDb().transaction((items) => {
4916
3032
  for (const item of items) bumpStmt.run(item.id);
4917
3033
  });
4918
3034
  bumpTxn(rows);
@@ -4942,17 +3058,19 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4942
3058
  }
4943
3059
 
4944
3060
  function commitCommentTask(videoId) {
4945
- if (db) {
3061
+ if (getDb()) {
4946
3062
  const video = getVideoRow(videoId);
4947
3063
  if (!video) return { ok: false, error: "video not found" };
4948
3064
  const nextCount = (video.user_update_count || 0) + 1;
4949
- db.prepare(
4950
- `
3065
+ getDb()
3066
+ .prepare(
3067
+ `
4951
3068
  UPDATE videos
4952
3069
  SET user_update_count = ?
4953
3070
  WHERE id = ?
4954
3071
  `,
4955
- ).run(nextCount, videoId);
3072
+ )
3073
+ .run(nextCount, videoId);
4956
3074
  return { ok: true, userUpdateCount: nextCount };
4957
3075
  }
4958
3076
 
@@ -5017,6 +3135,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
5017
3135
  getClientLoginStatus,
5018
3136
  trackClient,
5019
3137
  getActiveClients,
3138
+ moveSellerJobsToBase, // 将 jobs/raw_jobs 中 tt_seller=1 且 video_count=0 的记录移动到 jobs_base
5020
3139
  registerVideos,
5021
3140
  getVideo,
5022
3141
  getVideos,
@@ -5044,6 +3163,126 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
5044
3163
 
5045
3164
  // 辅助函数:获取 LLM 采样偏移量
5046
3165
  function getLlmSampleOffsets() {
5047
- return Object.fromEntries(llmSampleOffsets);
3166
+ return Object.fromEntries(offsetStore.entries());
3167
+ }
3168
+
3169
+ // ===== 将 jobs/raw_jobs 中商家用户移动到 jobs_base =====
3170
+
3171
+ /**
3172
+ * 将 jobs 和 raw_jobs 中 tt_seller=1 且 video_count=0 的记录移动到 jobs_base,
3173
+ * 并重置 user_update_count=0,保留 tt_seller=1 以便后续优先级判断。
3174
+ */
3175
+ function moveSellerJobsToBase() {
3176
+ const db = getDb();
3177
+ if (!db) return { ok: false, error: "db not ready" };
3178
+
3179
+ const COLUMNS = [
3180
+ "unique_id",
3181
+ "nickname",
3182
+ "status",
3183
+ "sources",
3184
+ "claimed_by",
3185
+ "claimed_at",
3186
+ "error",
3187
+ "pinned",
3188
+ "no_video",
3189
+ "restricted",
3190
+ "user_update_count",
3191
+ "tt_seller",
3192
+ "verified",
3193
+ "video_count",
3194
+ "comment_count",
3195
+ "guessed_location",
3196
+ "location_created",
3197
+ "confirmed_location",
3198
+ "modified_at",
3199
+ "follower_count",
3200
+ "following_count",
3201
+ "heart_count",
3202
+ "refresh_time",
3203
+ "processed",
3204
+ "processed_at",
3205
+ "created_at",
3206
+ "updated_at",
3207
+ "region",
3208
+ "signature",
3209
+ "sec_uid",
3210
+ "status_code",
3211
+ "latest_video_time",
3212
+ "bio_link",
3213
+ ];
3214
+ const cols = COLUMNS.join(",");
3215
+ const insertSql = `INSERT OR IGNORE INTO jobs_base (${cols}) SELECT ${cols} FROM `;
3216
+ const condition = "WHERE tt_seller = 1 AND video_count = 0";
3217
+
3218
+ let fromJobs = 0;
3219
+ let fromRawJobs = 0;
3220
+
3221
+ try {
3222
+ // 1. jobs → jobs_base
3223
+ const result1 = db.prepare(insertSql + "jobs " + condition).run();
3224
+ fromJobs = result1.changes || 0;
3225
+
3226
+ // 2. raw_jobs → jobs_base
3227
+ const result2 = db.prepare(insertSql + "raw_jobs " + condition).run();
3228
+ fromRawJobs = result2.changes || 0;
3229
+ } catch (e) {
3230
+ return { ok: false, error: e.message };
3231
+ }
3232
+
3233
+ // 3. 重置 user_update_count=0,保留 tt_seller=1 以便后续优先级判断
3234
+ // 包括:新移动过来的记录 + jobs_base 中已有的滞留记录
3235
+ let resetCount = 0;
3236
+ try {
3237
+ const resetStmt = db.prepare(
3238
+ `UPDATE jobs_base
3239
+ SET user_update_count = 0
3240
+ WHERE video_count = 0
3241
+ AND tt_seller = 1`,
3242
+ );
3243
+ resetStmt.run();
3244
+ resetCount = resetStmt.changes || 0;
3245
+ } catch (e) {
3246
+ return {
3247
+ ok: false,
3248
+ error: `reset failed: ${e.message}, inserted: jobs=${fromJobs}, raw_jobs=${fromRawJobs}`,
3249
+ };
3250
+ }
3251
+
3252
+ // 4. 删除 jobs 和 raw_jobs 中已移动的记录
3253
+ try {
3254
+ db.prepare("DELETE FROM jobs " + condition).run();
3255
+ db.prepare("DELETE FROM raw_jobs " + condition).run();
3256
+ } catch (e) {
3257
+ return {
3258
+ ok: false,
3259
+ error: `delete failed: ${e.message}, inserted: jobs=${fromJobs}, raw_jobs=${fromRawJobs}`,
3260
+ };
3261
+ }
3262
+
3263
+ // 5. 验证:统计 jobs_base 中可被 attach 领取的记录数(tt_seller=1 且 user_update_count<=0)
3264
+ let available = 0;
3265
+ try {
3266
+ const row = db
3267
+ .prepare(
3268
+ `SELECT COUNT(*) as total FROM jobs_base
3269
+ WHERE tt_seller = 1
3270
+ AND COALESCE(user_update_count, 0) <= 0
3271
+ AND video_count = 0`,
3272
+ )
3273
+ .get();
3274
+ available = row.total;
3275
+ } catch (_) {
3276
+ // ignore
3277
+ }
3278
+
3279
+ return {
3280
+ ok: true,
3281
+ fromJobs,
3282
+ fromRawJobs,
3283
+ totalInserted: fromJobs + fromRawJobs,
3284
+ resetCount,
3285
+ availableInBase: available,
3286
+ };
5048
3287
  }
5049
3288
  }