tt-help-cli-ycl 1.3.93 → 1.3.94
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli/comments.js +49 -24
- package/src/cli/tag.js +239 -91
- package/src/lib/args.js +23 -0
- package/src/lib/browser/cdp.js +4 -1
- package/src/lib/constants.js +15 -0
- package/src/lib/tag-fetcher.js +69 -63
- package/src/watch/data-store.js +631 -2399
- package/src/watch/data-store.js.bak +5091 -0
- package/src/watch/data-store.js.bak2 +5019 -0
- package/src/watch/db-columns.js +160 -0
- package/src/watch/db-crud.js +458 -0
- package/src/watch/db-mappers.js +128 -0
- package/src/watch/db-raw-jobs.js +235 -0
- package/src/watch/db-schema.js +367 -0
- package/src/watch/db-stats.js +235 -0
- package/src/watch/db-tags.js +348 -0
- package/src/watch/llm-scoring.js +235 -0
- package/src/watch/public/app.js +47 -0
- package/src/watch/public/index.html +6 -0
- package/src/watch/server.js +24 -0
- package/src/watch/tag-service.js +142 -11
package/src/watch/data-store.js
CHANGED
|
@@ -1,1580 +1,190 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
if (path.extname(resolved).toLowerCase() !== ".db") {
|
|
19
|
-
throw new Error(`仅支持 .db 路径,当前为: ${filePath}`);
|
|
20
|
-
}
|
|
21
|
-
return resolved;
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
function resetDbConnection() {
|
|
25
|
-
if (db) {
|
|
26
|
-
db.close();
|
|
27
|
-
db = null;
|
|
28
|
-
}
|
|
29
|
-
dbPath = null;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
function loadLegacyUsersFromFiles(userFilePath, doneFilePath) {
|
|
33
|
-
const merged = new Map();
|
|
34
|
-
|
|
35
|
-
const tryLoad = (targetPath, label) => {
|
|
36
|
-
if (!targetPath) return;
|
|
37
|
-
if (!fs.existsSync(targetPath)) return;
|
|
38
|
-
try {
|
|
39
|
-
const parsed = JSON.parse(fs.readFileSync(targetPath, "utf-8"));
|
|
40
|
-
if (!Array.isArray(parsed)) return;
|
|
41
|
-
for (const item of parsed) {
|
|
42
|
-
const uniqueId = item?.uniqueId || item?.unique_id;
|
|
43
|
-
if (!uniqueId) continue;
|
|
44
|
-
merged.set(uniqueId, {
|
|
45
|
-
...merged.get(uniqueId),
|
|
46
|
-
...item,
|
|
47
|
-
uniqueId,
|
|
48
|
-
});
|
|
49
|
-
}
|
|
50
|
-
} catch (e) {
|
|
51
|
-
console.error(`[data-store] SQLite 导入 ${label} 失败: ${e.message}`);
|
|
52
|
-
}
|
|
53
|
-
};
|
|
54
|
-
|
|
55
|
-
tryLoad(userFilePath, "result.json");
|
|
56
|
-
tryLoad(doneFilePath, "result-done.json");
|
|
57
|
-
|
|
58
|
-
return [...merged.values()];
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
function loadLegacyVideosFromFile(videoPath) {
|
|
62
|
-
if (!videoPath) return [];
|
|
63
|
-
if (!fs.existsSync(videoPath)) return [];
|
|
64
|
-
|
|
65
|
-
try {
|
|
66
|
-
const parsed = JSON.parse(fs.readFileSync(videoPath, "utf-8"));
|
|
67
|
-
return Array.isArray(parsed) ? parsed : [];
|
|
68
|
-
} catch (e) {
|
|
69
|
-
console.error(
|
|
70
|
-
`[data-store] SQLite 导入 result-videos.json 失败: ${e.message}`,
|
|
71
|
-
);
|
|
72
|
-
return [];
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
function initUserDb(filePath) {
|
|
77
|
-
dbPath = normalizeDbFilePath(filePath);
|
|
78
|
-
fs.mkdirSync(path.dirname(dbPath), { recursive: true });
|
|
79
|
-
db = new Database(dbPath);
|
|
80
|
-
db.pragma("journal_mode = WAL");
|
|
81
|
-
db.exec(`
|
|
82
|
-
CREATE TABLE IF NOT EXISTS users (
|
|
83
|
-
unique_id TEXT PRIMARY KEY,
|
|
84
|
-
tt_seller TEXT,
|
|
85
|
-
verified INTEGER,
|
|
86
|
-
location_created TEXT,
|
|
87
|
-
created_at TEXT,
|
|
88
|
-
updated_at TEXT
|
|
89
|
-
)
|
|
90
|
-
`);
|
|
91
|
-
db.exec(`
|
|
92
|
-
CREATE TABLE IF NOT EXISTS jobs (
|
|
93
|
-
unique_id TEXT PRIMARY KEY,
|
|
94
|
-
nickname TEXT,
|
|
95
|
-
status TEXT DEFAULT 'pending',
|
|
96
|
-
sources TEXT,
|
|
97
|
-
claimed_by TEXT,
|
|
98
|
-
claimed_at INTEGER,
|
|
99
|
-
error TEXT,
|
|
100
|
-
pinned INTEGER DEFAULT 0,
|
|
101
|
-
no_video INTEGER DEFAULT 0,
|
|
102
|
-
restricted INTEGER DEFAULT 0,
|
|
103
|
-
user_update_count INTEGER DEFAULT 0,
|
|
104
|
-
tt_seller INTEGER,
|
|
105
|
-
verified INTEGER,
|
|
106
|
-
video_count INTEGER DEFAULT 0,
|
|
107
|
-
comment_count INTEGER DEFAULT 0,
|
|
108
|
-
guessed_location TEXT,
|
|
109
|
-
location_created TEXT,
|
|
110
|
-
confirmed_location TEXT,
|
|
111
|
-
modified_at INTEGER,
|
|
112
|
-
follower_count INTEGER DEFAULT 0,
|
|
113
|
-
following_count INTEGER DEFAULT 0,
|
|
114
|
-
heart_count INTEGER DEFAULT 0,
|
|
115
|
-
refresh_time INTEGER,
|
|
116
|
-
processed INTEGER DEFAULT 0,
|
|
117
|
-
processed_at INTEGER,
|
|
118
|
-
created_at INTEGER,
|
|
119
|
-
updated_at INTEGER,
|
|
120
|
-
region TEXT,
|
|
121
|
-
signature TEXT,
|
|
122
|
-
sec_uid TEXT,
|
|
123
|
-
status_code INTEGER
|
|
124
|
-
)
|
|
125
|
-
`);
|
|
126
|
-
|
|
127
|
-
// 迁移:为已存在的 jobs 表添加 status_code 列
|
|
128
|
-
const existingJobColumns = new Set(
|
|
129
|
-
db
|
|
130
|
-
.prepare("PRAGMA table_info(jobs)")
|
|
131
|
-
.all()
|
|
132
|
-
.map((c) => c.name),
|
|
133
|
-
);
|
|
134
|
-
if (!existingJobColumns.has("status_code")) {
|
|
135
|
-
db.exec(`ALTER TABLE jobs ADD COLUMN status_code INTEGER`);
|
|
136
|
-
}
|
|
137
|
-
if (!existingJobColumns.has("latest_video_time")) {
|
|
138
|
-
db.exec(`ALTER TABLE jobs ADD COLUMN latest_video_time INTEGER`);
|
|
139
|
-
}
|
|
140
|
-
if (!existingJobColumns.has("confirmed_location")) {
|
|
141
|
-
db.exec(`ALTER TABLE jobs ADD COLUMN confirmed_location TEXT`);
|
|
142
|
-
}
|
|
143
|
-
if (!existingJobColumns.has("modified_at")) {
|
|
144
|
-
db.exec(`ALTER TABLE jobs ADD COLUMN modified_at INTEGER`);
|
|
145
|
-
}
|
|
146
|
-
if (!existingJobColumns.has("bio_link")) {
|
|
147
|
-
db.exec(`ALTER TABLE jobs ADD COLUMN bio_link TEXT`);
|
|
148
|
-
}
|
|
149
|
-
if (!existingJobColumns.has("top_video_play_count")) {
|
|
150
|
-
db.exec(`ALTER TABLE jobs ADD COLUMN top_video_play_count INTEGER`);
|
|
151
|
-
}
|
|
152
|
-
if (!existingJobColumns.has("top_video_href")) {
|
|
153
|
-
db.exec(`ALTER TABLE jobs ADD COLUMN top_video_href TEXT`);
|
|
154
|
-
}
|
|
155
|
-
if (!existingJobColumns.has("user_create_time")) {
|
|
156
|
-
db.exec(`ALTER TABLE jobs ADD COLUMN user_create_time INTEGER`);
|
|
157
|
-
}
|
|
158
|
-
db.exec(`
|
|
159
|
-
CREATE TABLE IF NOT EXISTS jobs_base (
|
|
160
|
-
unique_id TEXT PRIMARY KEY,
|
|
161
|
-
nickname TEXT,
|
|
162
|
-
status TEXT DEFAULT 'pending',
|
|
163
|
-
sources TEXT,
|
|
164
|
-
claimed_by TEXT,
|
|
165
|
-
claimed_at INTEGER,
|
|
166
|
-
error TEXT,
|
|
167
|
-
pinned INTEGER DEFAULT 0,
|
|
168
|
-
no_video INTEGER DEFAULT 0,
|
|
169
|
-
restricted INTEGER DEFAULT 0,
|
|
170
|
-
user_update_count INTEGER DEFAULT 0,
|
|
171
|
-
tt_seller INTEGER,
|
|
172
|
-
verified INTEGER,
|
|
173
|
-
video_count INTEGER DEFAULT 0,
|
|
174
|
-
comment_count INTEGER DEFAULT 0,
|
|
175
|
-
guessed_location TEXT,
|
|
176
|
-
location_created TEXT,
|
|
177
|
-
confirmed_location TEXT,
|
|
178
|
-
modified_at INTEGER,
|
|
179
|
-
follower_count INTEGER DEFAULT 0,
|
|
180
|
-
following_count INTEGER DEFAULT 0,
|
|
181
|
-
heart_count INTEGER DEFAULT 0,
|
|
182
|
-
refresh_time INTEGER,
|
|
183
|
-
processed INTEGER DEFAULT 0,
|
|
184
|
-
processed_at INTEGER,
|
|
185
|
-
created_at INTEGER,
|
|
186
|
-
updated_at INTEGER,
|
|
187
|
-
region TEXT,
|
|
188
|
-
signature TEXT,
|
|
189
|
-
sec_uid TEXT,
|
|
190
|
-
status_code INTEGER,
|
|
191
|
-
latest_video_time INTEGER,
|
|
192
|
-
bio_link TEXT
|
|
193
|
-
)
|
|
194
|
-
`);
|
|
195
|
-
|
|
196
|
-
// 迁移:为已存在的 jobs_base 表补全列
|
|
197
|
-
const existingJobBaseColumns = new Set(
|
|
198
|
-
db
|
|
199
|
-
.prepare("PRAGMA table_info(jobs_base)")
|
|
200
|
-
.all()
|
|
201
|
-
.map((c) => c.name),
|
|
202
|
-
);
|
|
203
|
-
if (!existingJobBaseColumns.has("status_code")) {
|
|
204
|
-
db.exec(`ALTER TABLE jobs_base ADD COLUMN status_code INTEGER`);
|
|
205
|
-
}
|
|
206
|
-
if (!existingJobBaseColumns.has("latest_video_time")) {
|
|
207
|
-
db.exec(`ALTER TABLE jobs_base ADD COLUMN latest_video_time INTEGER`);
|
|
208
|
-
}
|
|
209
|
-
if (!existingJobBaseColumns.has("confirmed_location")) {
|
|
210
|
-
db.exec(`ALTER TABLE jobs_base ADD COLUMN confirmed_location TEXT`);
|
|
211
|
-
}
|
|
212
|
-
if (!existingJobBaseColumns.has("modified_at")) {
|
|
213
|
-
db.exec(`ALTER TABLE jobs_base ADD COLUMN modified_at INTEGER`);
|
|
214
|
-
}
|
|
215
|
-
if (!existingJobBaseColumns.has("bio_link")) {
|
|
216
|
-
db.exec(`ALTER TABLE jobs_base ADD COLUMN bio_link TEXT`);
|
|
217
|
-
}
|
|
218
|
-
if (!existingJobBaseColumns.has("user_create_time")) {
|
|
219
|
-
db.exec(`ALTER TABLE jobs_base ADD COLUMN user_create_time INTEGER`);
|
|
220
|
-
}
|
|
221
|
-
db.exec(`
|
|
222
|
-
CREATE TABLE IF NOT EXISTS raw_jobs (
|
|
223
|
-
unique_id TEXT PRIMARY KEY,
|
|
224
|
-
nickname TEXT,
|
|
225
|
-
status TEXT DEFAULT 'pending',
|
|
226
|
-
sources TEXT,
|
|
227
|
-
claimed_by TEXT,
|
|
228
|
-
claimed_at INTEGER,
|
|
229
|
-
error TEXT,
|
|
230
|
-
pinned INTEGER DEFAULT 0,
|
|
231
|
-
no_video INTEGER DEFAULT 0,
|
|
232
|
-
restricted INTEGER DEFAULT 0,
|
|
233
|
-
user_update_count INTEGER DEFAULT 0,
|
|
234
|
-
tt_seller INTEGER,
|
|
235
|
-
verified INTEGER,
|
|
236
|
-
video_count INTEGER DEFAULT 0,
|
|
237
|
-
comment_count INTEGER DEFAULT 0,
|
|
238
|
-
guessed_location TEXT,
|
|
239
|
-
location_created TEXT,
|
|
240
|
-
confirmed_location TEXT,
|
|
241
|
-
modified_at INTEGER,
|
|
242
|
-
follower_count INTEGER DEFAULT 0,
|
|
243
|
-
following_count INTEGER DEFAULT 0,
|
|
244
|
-
heart_count INTEGER DEFAULT 0,
|
|
245
|
-
refresh_time INTEGER,
|
|
246
|
-
processed INTEGER DEFAULT 0,
|
|
247
|
-
processed_at INTEGER,
|
|
248
|
-
created_at INTEGER,
|
|
249
|
-
updated_at INTEGER,
|
|
250
|
-
region TEXT,
|
|
251
|
-
signature TEXT,
|
|
252
|
-
sec_uid TEXT,
|
|
253
|
-
status_code INTEGER,
|
|
254
|
-
latest_video_time INTEGER
|
|
255
|
-
)
|
|
256
|
-
`);
|
|
257
|
-
|
|
258
|
-
// 迁移:为已存在的 raw_jobs 表添加 status_code 列
|
|
259
|
-
const existingRawJobColumns = new Set(
|
|
260
|
-
db
|
|
261
|
-
.prepare("PRAGMA table_info(raw_jobs)")
|
|
262
|
-
.all()
|
|
263
|
-
.map((c) => c.name),
|
|
264
|
-
);
|
|
265
|
-
if (!existingRawJobColumns.has("status_code")) {
|
|
266
|
-
db.exec(`ALTER TABLE raw_jobs ADD COLUMN status_code INTEGER`);
|
|
267
|
-
}
|
|
268
|
-
if (!existingRawJobColumns.has("latest_video_time")) {
|
|
269
|
-
db.exec(`ALTER TABLE raw_jobs ADD COLUMN latest_video_time INTEGER`);
|
|
270
|
-
}
|
|
271
|
-
if (!existingRawJobColumns.has("confirmed_location")) {
|
|
272
|
-
db.exec(`ALTER TABLE raw_jobs ADD COLUMN confirmed_location TEXT`);
|
|
273
|
-
}
|
|
274
|
-
if (!existingRawJobColumns.has("modified_at")) {
|
|
275
|
-
db.exec(`ALTER TABLE raw_jobs ADD COLUMN modified_at INTEGER`);
|
|
276
|
-
}
|
|
277
|
-
if (!existingRawJobColumns.has("bio_link")) {
|
|
278
|
-
db.exec(`ALTER TABLE raw_jobs ADD COLUMN bio_link TEXT`);
|
|
279
|
-
}
|
|
280
|
-
if (!existingRawJobColumns.has("user_create_time")) {
|
|
281
|
-
db.exec(`ALTER TABLE raw_jobs ADD COLUMN user_create_time INTEGER`);
|
|
282
|
-
}
|
|
283
|
-
db.exec(`
|
|
284
|
-
CREATE TABLE IF NOT EXISTS videos (
|
|
285
|
-
id TEXT PRIMARY KEY,
|
|
286
|
-
href TEXT,
|
|
287
|
-
author_unique_id TEXT,
|
|
288
|
-
location_created TEXT,
|
|
289
|
-
tt_seller INTEGER DEFAULT 0,
|
|
290
|
-
registered_at INTEGER,
|
|
291
|
-
user_update_count INTEGER DEFAULT 0,
|
|
292
|
-
play_count INTEGER,
|
|
293
|
-
digg_count INTEGER,
|
|
294
|
-
comment_count INTEGER,
|
|
295
|
-
share_count INTEGER,
|
|
296
|
-
collect_count INTEGER,
|
|
297
|
-
stats_updated_at INTEGER,
|
|
298
|
-
create_time INTEGER
|
|
299
|
-
)
|
|
300
|
-
`);
|
|
301
|
-
db.exec(`
|
|
302
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_status_video
|
|
303
|
-
ON jobs(status, video_count DESC)
|
|
304
|
-
`);
|
|
305
|
-
db.exec(`
|
|
306
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_claimed_by_status
|
|
307
|
-
ON jobs(claimed_by, status, claimed_at)
|
|
308
|
-
`);
|
|
309
|
-
db.exec(`
|
|
310
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_status_claimed_at
|
|
311
|
-
ON jobs(status, claimed_at)
|
|
312
|
-
`);
|
|
313
|
-
db.exec(`
|
|
314
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_redo_target
|
|
315
|
-
ON jobs(tt_seller, verified, location_created, refresh_time)
|
|
316
|
-
`);
|
|
317
|
-
db.exec(`
|
|
318
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_pending_priority
|
|
319
|
-
ON jobs(status, pinned DESC, guessed_location, follower_count DESC)
|
|
320
|
-
`);
|
|
321
|
-
db.exec(`
|
|
322
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_pinned
|
|
323
|
-
ON jobs(created_at ASC, unique_id ASC)
|
|
324
|
-
WHERE status = 'pending' AND COALESCE(pinned, 0) = 1
|
|
325
|
-
`);
|
|
326
|
-
db.exec(`
|
|
327
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_seller
|
|
328
|
-
ON jobs(UPPER(COALESCE(guessed_location, '')), follower_count DESC, created_at ASC, unique_id ASC)
|
|
329
|
-
WHERE status = 'pending'
|
|
330
|
-
AND COALESCE(pinned, 0) = 0
|
|
331
|
-
AND tt_seller = 1
|
|
332
|
-
AND verified = 0
|
|
333
|
-
`);
|
|
334
|
-
db.exec(`
|
|
335
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_follow
|
|
336
|
-
ON jobs(UPPER(COALESCE(guessed_location, '')), follower_count DESC, created_at ASC, unique_id ASC)
|
|
337
|
-
WHERE status = 'pending'
|
|
338
|
-
AND COALESCE(pinned, 0) = 0
|
|
339
|
-
AND (
|
|
340
|
-
instr(COALESCE(sources, ''), '"following"') > 0
|
|
341
|
-
OR instr(COALESCE(sources, ''), '"follower"') > 0
|
|
342
|
-
)
|
|
343
|
-
`);
|
|
344
|
-
db.exec(`
|
|
345
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_other
|
|
346
|
-
ON jobs(UPPER(COALESCE(guessed_location, '')), follower_count DESC, created_at ASC, unique_id ASC)
|
|
347
|
-
WHERE status = 'pending' AND COALESCE(pinned, 0) = 0
|
|
348
|
-
`);
|
|
349
|
-
db.exec(`
|
|
350
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_user_update_queue
|
|
351
|
-
ON jobs(created_at ASC, unique_id ASC)
|
|
352
|
-
WHERE (tt_seller IS NULL OR tt_seller = '')
|
|
353
|
-
AND (user_update_count IS NULL OR user_update_count <= 0)
|
|
354
|
-
`);
|
|
355
|
-
db.exec(`
|
|
356
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_user_update_queue_expr
|
|
357
|
-
ON jobs(created_at ASC, unique_id ASC)
|
|
358
|
-
WHERE COALESCE(tt_seller, '') = ''
|
|
359
|
-
AND COALESCE(user_update_count, 0) <= 0
|
|
360
|
-
`);
|
|
361
|
-
db.exec(`
|
|
362
|
-
CREATE INDEX IF NOT EXISTS idx_videos_comment_queue
|
|
363
|
-
ON videos(user_update_count, tt_seller DESC, registered_at ASC)
|
|
364
|
-
`);
|
|
365
|
-
db.exec(`
|
|
366
|
-
CREATE INDEX IF NOT EXISTS idx_videos_comment_queue_pending
|
|
367
|
-
ON videos(tt_seller DESC, registered_at ASC, id)
|
|
368
|
-
WHERE user_update_count IS NULL OR user_update_count <= 0
|
|
369
|
-
`);
|
|
370
|
-
|
|
371
|
-
const existingVideoColumns = new Set(
|
|
372
|
-
db
|
|
373
|
-
.prepare("PRAGMA table_info(videos)")
|
|
374
|
-
.all()
|
|
375
|
-
.map((column) => column.name),
|
|
376
|
-
);
|
|
377
|
-
const requiredVideoColumns = {
|
|
378
|
-
play_count: "INTEGER",
|
|
379
|
-
digg_count: "INTEGER",
|
|
380
|
-
comment_count: "INTEGER",
|
|
381
|
-
share_count: "INTEGER",
|
|
382
|
-
collect_count: "INTEGER",
|
|
383
|
-
stats_updated_at: "INTEGER",
|
|
384
|
-
};
|
|
385
|
-
for (const [column, type] of Object.entries(requiredVideoColumns)) {
|
|
386
|
-
if (!existingVideoColumns.has(column)) {
|
|
387
|
-
db.exec(`ALTER TABLE videos ADD COLUMN ${column} ${type}`);
|
|
388
|
-
}
|
|
389
|
-
}
|
|
390
|
-
|
|
391
|
-
// 迁移:videos 表添加 create_time 列
|
|
392
|
-
if (!existingVideoColumns.has("create_time")) {
|
|
393
|
-
db.exec(`ALTER TABLE videos ADD COLUMN create_time INTEGER`);
|
|
394
|
-
}
|
|
395
|
-
|
|
396
|
-
// tags 表:标签发现与打分系统
|
|
397
|
-
db.exec(`
|
|
398
|
-
CREATE TABLE IF NOT EXISTS tags (
|
|
399
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
400
|
-
tag TEXT NOT NULL UNIQUE,
|
|
401
|
-
status TEXT NOT NULL DEFAULT 'new',
|
|
402
|
-
score REAL NOT NULL DEFAULT 0,
|
|
403
|
-
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
404
|
-
scored_at TEXT,
|
|
405
|
-
score_count INTEGER NOT NULL DEFAULT 0,
|
|
406
|
-
countries TEXT NOT NULL DEFAULT '[]',
|
|
407
|
-
matched_countries TEXT DEFAULT '[]',
|
|
408
|
-
total_posts INTEGER DEFAULT 0,
|
|
409
|
-
author_count INTEGER DEFAULT 0,
|
|
410
|
-
matched_authors INTEGER DEFAULT 0,
|
|
411
|
-
pushed_users INTEGER DEFAULT 0,
|
|
412
|
-
source TEXT NOT NULL DEFAULT 'llm',
|
|
413
|
-
user_prompt TEXT,
|
|
414
|
-
last_error TEXT
|
|
415
|
-
)
|
|
416
|
-
`);
|
|
417
|
-
db.exec(`CREATE INDEX IF NOT EXISTS idx_tags_status ON tags(status)`);
|
|
418
|
-
db.exec(`CREATE INDEX IF NOT EXISTS idx_tags_score ON tags(score DESC)`);
|
|
419
|
-
|
|
420
|
-
const count = db.prepare("SELECT COUNT(*) as c FROM users").get().c;
|
|
421
|
-
console.log(`[data-store] SQLite users 表初始化完成: ${count} 条`);
|
|
422
|
-
}
|
|
423
|
-
|
|
424
|
-
export function importLegacyJsonToDb({
|
|
425
|
-
dbFilePath,
|
|
426
|
-
usersFilePath,
|
|
427
|
-
doneFilePath,
|
|
428
|
-
videosFilePath,
|
|
429
|
-
}) {
|
|
430
|
-
resetDbConnection();
|
|
431
|
-
initUserDb(dbFilePath);
|
|
432
|
-
|
|
433
|
-
const legacyUsers = loadLegacyUsersFromFiles(usersFilePath, doneFilePath);
|
|
434
|
-
const legacyVideos = loadLegacyVideosFromFile(videosFilePath);
|
|
435
|
-
|
|
436
|
-
const beforeUsers = db.prepare("SELECT COUNT(*) as c FROM users").get().c;
|
|
437
|
-
const beforeJobs = db.prepare("SELECT COUNT(*) as c FROM jobs").get().c;
|
|
438
|
-
const beforeVideos = db.prepare("SELECT COUNT(*) as c FROM videos").get().c;
|
|
439
|
-
|
|
440
|
-
const insertUserStmt = db.prepare(`
|
|
441
|
-
INSERT OR IGNORE INTO users (unique_id) VALUES (?)
|
|
442
|
-
`);
|
|
443
|
-
const insertVideoStmt = db.prepare(`
|
|
444
|
-
INSERT OR IGNORE INTO videos (
|
|
445
|
-
id,
|
|
446
|
-
href,
|
|
447
|
-
author_unique_id,
|
|
448
|
-
location_created,
|
|
449
|
-
tt_seller,
|
|
450
|
-
registered_at,
|
|
451
|
-
user_update_count,
|
|
452
|
-
create_time
|
|
453
|
-
)
|
|
454
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
455
|
-
`);
|
|
456
|
-
|
|
457
|
-
const importUsersTxn = db.transaction((items) => {
|
|
458
|
-
for (const item of items) {
|
|
459
|
-
const uniqueId = item.uniqueId || item.unique_id;
|
|
460
|
-
if (!uniqueId) continue;
|
|
461
|
-
insertUserStmt.run(uniqueId);
|
|
462
|
-
addJobToDb({ ...item, uniqueId });
|
|
463
|
-
}
|
|
464
|
-
});
|
|
465
|
-
|
|
466
|
-
const importVideosTxn = db.transaction((items) => {
|
|
467
|
-
for (const item of items) {
|
|
468
|
-
if (!item?.id) continue;
|
|
469
|
-
insertVideoStmt.run(
|
|
470
|
-
item.id,
|
|
471
|
-
item.href || null,
|
|
472
|
-
item.authorUniqueId || item.author_unique_id || null,
|
|
473
|
-
item.locationCreated || item.location_created || null,
|
|
474
|
-
item.ttSeller ? 1 : 0,
|
|
475
|
-
item.registeredAt || item.registered_at || Date.now(),
|
|
476
|
-
item.userUpdateCount || item.user_update_count || 0,
|
|
477
|
-
item.createTime || item.create_time || null,
|
|
478
|
-
);
|
|
479
|
-
}
|
|
480
|
-
});
|
|
481
|
-
|
|
482
|
-
importUsersTxn(legacyUsers);
|
|
483
|
-
importVideosTxn(legacyVideos);
|
|
484
|
-
|
|
485
|
-
const afterUsers = db.prepare("SELECT COUNT(*) as c FROM users").get().c;
|
|
486
|
-
const afterJobs = db.prepare("SELECT COUNT(*) as c FROM jobs").get().c;
|
|
487
|
-
const afterVideos = db.prepare("SELECT COUNT(*) as c FROM videos").get().c;
|
|
488
|
-
|
|
489
|
-
return {
|
|
490
|
-
dbPath,
|
|
491
|
-
usersImported: afterUsers - beforeUsers,
|
|
492
|
-
jobsImported: afterJobs - beforeJobs,
|
|
493
|
-
videosImported: afterVideos - beforeVideos,
|
|
494
|
-
totalUsers: afterUsers,
|
|
495
|
-
totalJobs: afterJobs,
|
|
496
|
-
totalVideos: afterVideos,
|
|
497
|
-
};
|
|
498
|
-
}
|
|
499
|
-
|
|
500
|
-
export function closeStoreDb() {
|
|
501
|
-
resetDbConnection();
|
|
502
|
-
}
|
|
503
|
-
|
|
504
|
-
function hasUserInDb(uid) {
|
|
505
|
-
if (!db) return false;
|
|
506
|
-
const row = db.prepare("SELECT 1 FROM users WHERE unique_id = ?").get(uid);
|
|
507
|
-
return !!row;
|
|
508
|
-
}
|
|
509
|
-
|
|
510
|
-
function addUserToDb(user) {
|
|
511
|
-
if (!db) return;
|
|
512
|
-
db.prepare(
|
|
513
|
-
`
|
|
514
|
-
INSERT OR IGNORE INTO users (unique_id, tt_seller, verified, location_created, created_at, updated_at)
|
|
515
|
-
VALUES (?, ?, ?, ?, ?, ?)
|
|
516
|
-
`,
|
|
517
|
-
).run(
|
|
518
|
-
user.uniqueId,
|
|
519
|
-
user.ttSeller === undefined ||
|
|
520
|
-
user.ttSeller === null ||
|
|
521
|
-
user.ttSeller === ""
|
|
522
|
-
? null
|
|
523
|
-
: user.ttSeller
|
|
524
|
-
? 1
|
|
525
|
-
: 0,
|
|
526
|
-
user.verified === undefined ||
|
|
527
|
-
user.verified === null ||
|
|
528
|
-
user.verified === ""
|
|
529
|
-
? null
|
|
530
|
-
: user.verified
|
|
531
|
-
? 1
|
|
532
|
-
: 0,
|
|
533
|
-
user.locationCreated || null,
|
|
534
|
-
new Date().toISOString(),
|
|
535
|
-
new Date().toISOString(),
|
|
536
|
-
);
|
|
537
|
-
}
|
|
538
|
-
|
|
539
|
-
function addJobToDb(user) {
|
|
540
|
-
if (!db) return;
|
|
541
|
-
const now = Date.now();
|
|
542
|
-
db.prepare(
|
|
543
|
-
`
|
|
544
|
-
INSERT OR IGNORE INTO jobs (
|
|
545
|
-
unique_id,
|
|
546
|
-
nickname,
|
|
547
|
-
status,
|
|
548
|
-
sources,
|
|
549
|
-
claimed_by,
|
|
550
|
-
claimed_at,
|
|
551
|
-
error,
|
|
552
|
-
pinned,
|
|
553
|
-
no_video,
|
|
554
|
-
restricted,
|
|
555
|
-
user_update_count,
|
|
556
|
-
tt_seller,
|
|
557
|
-
verified,
|
|
558
|
-
video_count,
|
|
559
|
-
comment_count,
|
|
560
|
-
guessed_location,
|
|
561
|
-
location_created,
|
|
562
|
-
follower_count,
|
|
563
|
-
following_count,
|
|
564
|
-
heart_count,
|
|
565
|
-
refresh_time,
|
|
566
|
-
processed,
|
|
567
|
-
processed_at,
|
|
568
|
-
created_at,
|
|
569
|
-
updated_at,
|
|
570
|
-
region,
|
|
571
|
-
signature,
|
|
572
|
-
bio_link,
|
|
573
|
-
sec_uid
|
|
574
|
-
)
|
|
575
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
576
|
-
`,
|
|
577
|
-
).run(
|
|
578
|
-
user.uniqueId,
|
|
579
|
-
user.nickname || null,
|
|
580
|
-
user.status || inferStatus(user),
|
|
581
|
-
JSON.stringify(
|
|
582
|
-
Array.isArray(user.sources) ? [...new Set(user.sources)] : [],
|
|
583
|
-
),
|
|
584
|
-
user.claimedBy || null,
|
|
585
|
-
user.claimedAt || null,
|
|
586
|
-
user.error || null,
|
|
587
|
-
user.pinned ? 1 : 0,
|
|
588
|
-
user.noVideo ? 1 : 0,
|
|
589
|
-
user.restricted ? 1 : 0,
|
|
590
|
-
user.userUpdateCount || 0,
|
|
591
|
-
user.ttSeller === undefined ||
|
|
592
|
-
user.ttSeller === null ||
|
|
593
|
-
user.ttSeller === ""
|
|
594
|
-
? null
|
|
595
|
-
: user.ttSeller
|
|
596
|
-
? 1
|
|
597
|
-
: 0,
|
|
598
|
-
user.verified === undefined ||
|
|
599
|
-
user.verified === null ||
|
|
600
|
-
user.verified === ""
|
|
601
|
-
? null
|
|
602
|
-
: user.verified
|
|
603
|
-
? 1
|
|
604
|
-
: 0,
|
|
605
|
-
user.videoCount || 0,
|
|
606
|
-
user.commentCount || 0,
|
|
607
|
-
user.guessedLocation || null,
|
|
608
|
-
user.locationCreated || null,
|
|
609
|
-
user.followerCount || 0,
|
|
610
|
-
user.followingCount || 0,
|
|
611
|
-
user.heartCount || 0,
|
|
612
|
-
user.refreshTime || null,
|
|
613
|
-
user.processed ? 1 : 0,
|
|
614
|
-
user.processedAt || null,
|
|
615
|
-
user.createdAt || now,
|
|
616
|
-
user.updatedAt || now,
|
|
617
|
-
user.region || null,
|
|
618
|
-
user.signature || null,
|
|
619
|
-
user.bioLink?.link || user.bioLink?.url || user.bioLink || null,
|
|
620
|
-
user.secUid || null,
|
|
621
|
-
);
|
|
622
|
-
}
|
|
623
|
-
|
|
624
|
-
function getUserDbCount() {
|
|
625
|
-
if (!db) return 0;
|
|
626
|
-
return db.prepare("SELECT COUNT(*) as c FROM users").get().c;
|
|
627
|
-
}
|
|
628
|
-
|
|
629
|
-
function getJobsCount() {
|
|
630
|
-
if (!db) return 0;
|
|
631
|
-
return db.prepare("SELECT COUNT(*) as c FROM jobs").get().c;
|
|
632
|
-
}
|
|
633
|
-
|
|
634
|
-
function getPendingJobsCount() {
|
|
635
|
-
if (!db) return 0;
|
|
636
|
-
return db
|
|
637
|
-
.prepare("SELECT COUNT(*) as c FROM jobs WHERE status = 'pending'")
|
|
638
|
-
.get().c;
|
|
639
|
-
}
|
|
640
|
-
|
|
641
|
-
function getPendingJobsUserUpdateCount() {
|
|
642
|
-
if (!db) return 0;
|
|
643
|
-
return db
|
|
644
|
-
.prepare(
|
|
645
|
-
`
|
|
646
|
-
SELECT COUNT(*) as c
|
|
647
|
-
FROM jobs
|
|
648
|
-
WHERE COALESCE(tt_seller, '') = ''
|
|
649
|
-
AND COALESCE(user_update_count, 0) <= 0
|
|
650
|
-
`,
|
|
651
|
-
)
|
|
652
|
-
.get().c;
|
|
653
|
-
}
|
|
654
|
-
|
|
655
|
-
function getRawJobsCount() {
|
|
656
|
-
if (!db) return 0;
|
|
657
|
-
return db.prepare("SELECT COUNT(*) as c FROM raw_jobs").get().c;
|
|
658
|
-
}
|
|
659
|
-
|
|
660
|
-
function getDashboardStatsFromDb(targetLocations = []) {
|
|
661
|
-
if (!db) return null;
|
|
662
|
-
|
|
663
|
-
const targetPlaceholders = targetLocations.map(() => "?").join(", ");
|
|
664
|
-
const targetParams = targetLocations.length ? targetLocations : [];
|
|
665
|
-
|
|
666
|
-
// 合并所有 jobs 表的聚合统计为单次扫描
|
|
667
|
-
const aggregateRow = db
|
|
668
|
-
.prepare(
|
|
669
|
-
`
|
|
670
|
-
SELECT
|
|
671
|
-
COUNT(*) as total,
|
|
672
|
-
SUM(CASE WHEN status = 'pending' THEN 1 ELSE 0 END) as pending,
|
|
673
|
-
SUM(CASE WHEN status = 'processing' THEN 1 ELSE 0 END) as processing,
|
|
674
|
-
SUM(CASE WHEN status = 'done' THEN 1 ELSE 0 END) as done,
|
|
675
|
-
SUM(CASE WHEN status = 'error' THEN 1 ELSE 0 END) as error,
|
|
676
|
-
SUM(CASE WHEN status = 'restricted' THEN 1 ELSE 0 END) as restricted,
|
|
677
|
-
SUM(CASE WHEN tt_seller = 1 AND verified = 0 ${
|
|
678
|
-
targetLocations.length
|
|
679
|
-
? `AND location_created IN (${targetPlaceholders})`
|
|
680
|
-
: "AND 1 = 0"
|
|
681
|
-
} THEN 1 ELSE 0 END) as targetUsers,
|
|
682
|
-
SUM(CASE WHEN no_video = 1 THEN 1 ELSE 0 END) as noVideo,
|
|
683
|
-
SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"video"') > 0 THEN 1 ELSE 0 END) as video,
|
|
684
|
-
SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"comment"') > 0 THEN 1 ELSE 0 END) as comment,
|
|
685
|
-
SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"guess"') > 0 THEN 1 ELSE 0 END) as guess,
|
|
686
|
-
SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"following"') > 0 THEN 1 ELSE 0 END) as following,
|
|
687
|
-
SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"follower"') > 0 THEN 1 ELSE 0 END) as follower,
|
|
688
|
-
SUM(CASE
|
|
689
|
-
WHEN status != 'done'
|
|
690
|
-
AND instr(COALESCE(sources, ''), '"video"') = 0
|
|
691
|
-
AND instr(COALESCE(sources, ''), '"comment"') = 0
|
|
692
|
-
AND instr(COALESCE(sources, ''), '"guess"') = 0
|
|
693
|
-
AND instr(COALESCE(sources, ''), '"following"') = 0
|
|
694
|
-
AND instr(COALESCE(sources, ''), '"follower"') = 0
|
|
695
|
-
THEN 1 ELSE 0 END) as seed
|
|
696
|
-
FROM jobs
|
|
697
|
-
`,
|
|
698
|
-
)
|
|
699
|
-
.get(...targetParams);
|
|
700
|
-
|
|
701
|
-
// userUpdateTasks 单独从 jobs_base 统计
|
|
702
|
-
const userUpdateTasksRow = db
|
|
703
|
-
.prepare(
|
|
704
|
-
`
|
|
705
|
-
SELECT COUNT(*) as userUpdateTasks
|
|
706
|
-
FROM jobs_base
|
|
707
|
-
WHERE COALESCE(tt_seller, '') = ''
|
|
708
|
-
AND COALESCE(user_update_count, 0) <= 0
|
|
709
|
-
`,
|
|
710
|
-
)
|
|
711
|
-
.get();
|
|
712
|
-
|
|
713
|
-
// countryStats 和 targetCountryStats 需要 GROUP BY,保留为独立查询
|
|
714
|
-
const countryStats = db
|
|
715
|
-
.prepare(
|
|
716
|
-
`
|
|
717
|
-
SELECT
|
|
718
|
-
COALESCE(location_created, '未知') as country,
|
|
719
|
-
COUNT(*) as count,
|
|
720
|
-
SUM(CASE
|
|
721
|
-
WHEN tt_seller = 1 AND verified = 0 ${
|
|
722
|
-
targetLocations.length
|
|
723
|
-
? `AND location_created IN (${targetPlaceholders})`
|
|
724
|
-
: "AND 1 = 0"
|
|
725
|
-
}
|
|
726
|
-
THEN 1 ELSE 0 END) as targetCount
|
|
727
|
-
FROM jobs
|
|
728
|
-
WHERE status = 'done'
|
|
729
|
-
GROUP BY COALESCE(location_created, '未知')
|
|
730
|
-
ORDER BY count DESC
|
|
731
|
-
`,
|
|
732
|
-
)
|
|
733
|
-
.all(...targetParams);
|
|
734
|
-
|
|
735
|
-
const targetCountryStats = targetLocations.length
|
|
736
|
-
? db
|
|
737
|
-
.prepare(
|
|
738
|
-
`
|
|
739
|
-
SELECT location_created as country, COUNT(*) as count
|
|
740
|
-
FROM jobs
|
|
741
|
-
WHERE tt_seller = 1
|
|
742
|
-
AND verified = 0
|
|
743
|
-
AND location_created IN (${targetPlaceholders})
|
|
744
|
-
GROUP BY location_created
|
|
745
|
-
ORDER BY count DESC
|
|
746
|
-
`,
|
|
747
|
-
)
|
|
748
|
-
.all(...targetLocations)
|
|
749
|
-
: [];
|
|
750
|
-
|
|
751
|
-
const jobsBaseCount = db
|
|
752
|
-
.prepare("SELECT COUNT(*) as total FROM jobs_base")
|
|
753
|
-
.get().total;
|
|
754
|
-
|
|
755
|
-
return {
|
|
756
|
-
totalUsers: aggregateRow.total,
|
|
757
|
-
rawJobs: getRawJobsCount(),
|
|
758
|
-
dbTotalUsers: getUserDbCount(),
|
|
759
|
-
jobsTotal: aggregateRow.total,
|
|
760
|
-
jobsBaseTotal: jobsBaseCount,
|
|
761
|
-
jobsPending: aggregateRow.pending,
|
|
762
|
-
processedUsers: aggregateRow.done,
|
|
763
|
-
pendingUsers: aggregateRow.pending,
|
|
764
|
-
processingUsers: aggregateRow.processing,
|
|
765
|
-
restrictedUsers: aggregateRow.restricted,
|
|
766
|
-
errorUsers: aggregateRow.error,
|
|
767
|
-
targetUsers: aggregateRow.targetUsers,
|
|
768
|
-
userUpdateTasks: userUpdateTasksRow.userUpdateTasks,
|
|
769
|
-
targetCountryStats,
|
|
770
|
-
countryStats,
|
|
771
|
-
sourceStats: {
|
|
772
|
-
seed: aggregateRow.seed || 0,
|
|
773
|
-
video: aggregateRow.video || 0,
|
|
774
|
-
comment: aggregateRow.comment || 0,
|
|
775
|
-
guess: aggregateRow.guess || 0,
|
|
776
|
-
following: aggregateRow.following || 0,
|
|
777
|
-
follower: aggregateRow.follower || 0,
|
|
778
|
-
processed: aggregateRow.done,
|
|
779
|
-
restricted: aggregateRow.restricted,
|
|
780
|
-
error: aggregateRow.error,
|
|
781
|
-
noVideo: aggregateRow.noVideo || 0,
|
|
782
|
-
},
|
|
783
|
-
};
|
|
784
|
-
}
|
|
785
|
-
|
|
786
|
-
function getPendingByCountryFromDb() {
|
|
787
|
-
if (!db) return [];
|
|
788
|
-
|
|
789
|
-
// 按 guessed_location 分组统计待处理任务
|
|
790
|
-
const rows = db
|
|
791
|
-
.prepare(
|
|
792
|
-
`
|
|
793
|
-
SELECT
|
|
794
|
-
COALESCE(guessed_location, '未知') as country,
|
|
795
|
-
COUNT(*) as count
|
|
796
|
-
FROM jobs
|
|
797
|
-
WHERE status = 'pending'
|
|
798
|
-
GROUP BY COALESCE(guessed_location, '未知')
|
|
799
|
-
ORDER BY count DESC
|
|
800
|
-
`,
|
|
801
|
-
)
|
|
802
|
-
.all();
|
|
803
|
-
|
|
804
|
-
return rows;
|
|
805
|
-
}
|
|
806
|
-
|
|
807
|
-
function getUserUpdateByCountryFromDb() {
|
|
808
|
-
if (!db) return [];
|
|
809
|
-
|
|
810
|
-
// 按 guessed_location 分组统计待补资料任务
|
|
811
|
-
const rows = db
|
|
812
|
-
.prepare(
|
|
813
|
-
`
|
|
814
|
-
SELECT
|
|
815
|
-
COALESCE(guessed_location, '未知') as country,
|
|
816
|
-
COUNT(*) as count
|
|
817
|
-
FROM jobs_base
|
|
818
|
-
WHERE tt_seller IS NULL
|
|
819
|
-
AND COALESCE(user_update_count, 0) <= 0
|
|
820
|
-
GROUP BY COALESCE(guessed_location, '未知')
|
|
821
|
-
ORDER BY count DESC
|
|
822
|
-
`,
|
|
823
|
-
)
|
|
824
|
-
.all();
|
|
825
|
-
|
|
826
|
-
return rows;
|
|
827
|
-
}
|
|
828
|
-
|
|
829
|
-
function getAttachStuckByCountryFromDb() {
|
|
830
|
-
if (!db) return [];
|
|
831
|
-
|
|
832
|
-
return db
|
|
833
|
-
.prepare(
|
|
834
|
-
`
|
|
835
|
-
SELECT
|
|
836
|
-
COALESCE(guessed_location, '未知') as country,
|
|
837
|
-
COUNT(*) as count
|
|
838
|
-
FROM jobs_base
|
|
839
|
-
WHERE tt_seller IS NULL
|
|
840
|
-
AND COALESCE(user_update_count, 0) = 1
|
|
841
|
-
GROUP BY COALESCE(guessed_location, '未知')
|
|
842
|
-
ORDER BY count DESC
|
|
843
|
-
`,
|
|
844
|
-
)
|
|
845
|
-
.all();
|
|
846
|
-
}
|
|
847
|
-
|
|
848
|
-
function restoreAttachStuckByCountry(country) {
|
|
849
|
-
if (!db) {
|
|
850
|
-
return { restored: 0, country, error: "db not ready" };
|
|
851
|
-
}
|
|
852
|
-
|
|
853
|
-
const normalizedCountry = String(country == null ? "未知" : country).trim();
|
|
854
|
-
if (!normalizedCountry) {
|
|
855
|
-
return {
|
|
856
|
-
restored: 0,
|
|
857
|
-
country: normalizedCountry,
|
|
858
|
-
error: "country is required",
|
|
859
|
-
};
|
|
860
|
-
}
|
|
861
|
-
|
|
862
|
-
const whereSql = `
|
|
863
|
-
COALESCE(tt_seller, '') = ''
|
|
864
|
-
AND COALESCE(user_update_count, 0) = 1
|
|
865
|
-
AND COALESCE(guessed_location, '未知') = ?
|
|
866
|
-
`;
|
|
867
|
-
const count =
|
|
868
|
-
db
|
|
869
|
-
.prepare(
|
|
870
|
-
`
|
|
871
|
-
SELECT COUNT(*) as c
|
|
872
|
-
FROM jobs_base
|
|
873
|
-
WHERE ${whereSql}
|
|
874
|
-
`,
|
|
875
|
-
)
|
|
876
|
-
.get(normalizedCountry)?.c || 0;
|
|
877
|
-
|
|
878
|
-
if (!count) {
|
|
879
|
-
return { restored: 0, country: normalizedCountry };
|
|
880
|
-
}
|
|
881
|
-
|
|
882
|
-
db.prepare(
|
|
883
|
-
`
|
|
884
|
-
UPDATE jobs_base
|
|
885
|
-
SET user_update_count = 0,
|
|
886
|
-
updated_at = ?,
|
|
887
|
-
claimed_by = NULL,
|
|
888
|
-
claimed_at = NULL
|
|
889
|
-
WHERE ${whereSql}
|
|
890
|
-
`,
|
|
891
|
-
).run(Date.now(), normalizedCountry);
|
|
892
|
-
|
|
893
|
-
return { restored: count, country: normalizedCountry };
|
|
894
|
-
}
|
|
895
|
-
|
|
896
|
-
function resetPendingByCountry(country) {
|
|
897
|
-
if (!db) {
|
|
898
|
-
return { reset: 0, country, error: "db not ready" };
|
|
899
|
-
}
|
|
900
|
-
|
|
901
|
-
const normalizedCountry = String(country == null ? "未知" : country).trim();
|
|
902
|
-
if (!normalizedCountry) {
|
|
903
|
-
return {
|
|
904
|
-
reset: 0,
|
|
905
|
-
country: normalizedCountry,
|
|
906
|
-
error: "country is required",
|
|
907
|
-
};
|
|
908
|
-
}
|
|
909
|
-
|
|
910
|
-
const whereSql = `
|
|
911
|
-
status = 'pending'
|
|
912
|
-
AND COALESCE(guessed_location, '未知') = ?
|
|
913
|
-
`;
|
|
914
|
-
const count =
|
|
915
|
-
db
|
|
916
|
-
.prepare(
|
|
917
|
-
`
|
|
918
|
-
SELECT COUNT(*) as c
|
|
919
|
-
FROM jobs
|
|
920
|
-
WHERE ${whereSql}
|
|
921
|
-
`,
|
|
922
|
-
)
|
|
923
|
-
.get(normalizedCountry)?.c || 0;
|
|
924
|
-
|
|
925
|
-
if (!count) {
|
|
926
|
-
return { reset: 0, country: normalizedCountry };
|
|
927
|
-
}
|
|
928
|
-
|
|
929
|
-
db.prepare(
|
|
930
|
-
`
|
|
931
|
-
UPDATE jobs
|
|
932
|
-
SET user_update_count = 0,
|
|
933
|
-
updated_at = ?,
|
|
934
|
-
claimed_by = NULL,
|
|
935
|
-
claimed_at = NULL
|
|
936
|
-
WHERE ${whereSql}
|
|
937
|
-
`,
|
|
938
|
-
).run(Date.now(), normalizedCountry);
|
|
939
|
-
|
|
940
|
-
return { reset: count, country: normalizedCountry };
|
|
941
|
-
}
|
|
942
|
-
|
|
943
|
-
function getRawByCountryFromDb() {
|
|
944
|
-
if (!db) return [];
|
|
945
|
-
|
|
946
|
-
return db
|
|
947
|
-
.prepare(
|
|
948
|
-
`
|
|
949
|
-
SELECT
|
|
950
|
-
COALESCE(guessed_location, '未知') as country,
|
|
951
|
-
COUNT(*) as count
|
|
952
|
-
FROM raw_jobs
|
|
953
|
-
GROUP BY COALESCE(guessed_location, '未知')
|
|
954
|
-
ORDER BY count DESC
|
|
955
|
-
`,
|
|
956
|
-
)
|
|
957
|
-
.all();
|
|
958
|
-
}
|
|
959
|
-
|
|
960
|
-
function moveJobsToRawByCountry(scope, country) {
|
|
961
|
-
if (!db) {
|
|
962
|
-
return { moved: 0, scope, country, error: "db not ready" };
|
|
963
|
-
}
|
|
964
|
-
|
|
965
|
-
const normalizedScope = String(scope || "").trim();
|
|
966
|
-
const normalizedCountry = String(country == null ? "未知" : country).trim();
|
|
967
|
-
if (!normalizedCountry) {
|
|
968
|
-
return {
|
|
969
|
-
moved: 0,
|
|
970
|
-
scope: normalizedScope,
|
|
971
|
-
country: normalizedCountry,
|
|
972
|
-
error: "country is required",
|
|
973
|
-
};
|
|
974
|
-
}
|
|
975
|
-
|
|
976
|
-
// pending 操作 jobs 表(与 getPendingByCountryFromDb 数据源一致)
|
|
977
|
-
// userUpdate 操作 jobs_base 表(与 getUserUpdateByCountryFromDb 数据源一致)
|
|
978
|
-
let sourceTable = "";
|
|
979
|
-
let scopeWhere = "";
|
|
980
|
-
let columns = "";
|
|
981
|
-
|
|
982
|
-
if (normalizedScope === "pending") {
|
|
983
|
-
sourceTable = "jobs";
|
|
984
|
-
scopeWhere = `status = 'pending'`;
|
|
985
|
-
columns = `
|
|
986
|
-
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
987
|
-
error, pinned, no_video, restricted, user_update_count,
|
|
988
|
-
tt_seller, verified, video_count, comment_count,
|
|
989
|
-
guessed_location, location_created, follower_count,
|
|
990
|
-
following_count, heart_count, refresh_time, processed,
|
|
991
|
-
processed_at, created_at, updated_at, region, signature,
|
|
992
|
-
sec_uid, latest_video_time, user_create_time
|
|
993
|
-
`;
|
|
994
|
-
} else if (normalizedScope === "userUpdate") {
|
|
995
|
-
sourceTable = "jobs_base";
|
|
996
|
-
scopeWhere = `tt_seller IS NULL AND COALESCE(user_update_count, 0) <= 0`;
|
|
997
|
-
columns = `
|
|
998
|
-
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
999
|
-
error, pinned, no_video, restricted, user_update_count,
|
|
1000
|
-
tt_seller, verified, video_count, comment_count,
|
|
1001
|
-
guessed_location, location_created, follower_count,
|
|
1002
|
-
following_count, heart_count, refresh_time, processed,
|
|
1003
|
-
processed_at, created_at, updated_at, region, signature,
|
|
1004
|
-
sec_uid, latest_video_time, user_create_time
|
|
1005
|
-
`;
|
|
1006
|
-
} else {
|
|
1007
|
-
return {
|
|
1008
|
-
moved: 0,
|
|
1009
|
-
scope: normalizedScope,
|
|
1010
|
-
country: normalizedCountry,
|
|
1011
|
-
error: "unsupported scope",
|
|
1012
|
-
};
|
|
1013
|
-
}
|
|
1014
|
-
|
|
1015
|
-
const whereSql = `
|
|
1016
|
-
${scopeWhere}
|
|
1017
|
-
AND COALESCE(guessed_location, '未知') = ?
|
|
1018
|
-
`;
|
|
1019
|
-
const count =
|
|
1020
|
-
db
|
|
1021
|
-
.prepare(
|
|
1022
|
-
`
|
|
1023
|
-
SELECT COUNT(*) as c
|
|
1024
|
-
FROM ${sourceTable}
|
|
1025
|
-
WHERE ${whereSql}
|
|
1026
|
-
`,
|
|
1027
|
-
)
|
|
1028
|
-
.get(normalizedCountry)?.c || 0;
|
|
1029
|
-
|
|
1030
|
-
if (!count) {
|
|
1031
|
-
return { moved: 0, scope: normalizedScope, country: normalizedCountry };
|
|
1032
|
-
}
|
|
1033
|
-
|
|
1034
|
-
const moveTxn = db.transaction((targetCountry) => {
|
|
1035
|
-
db.prepare(
|
|
1036
|
-
`
|
|
1037
|
-
INSERT OR REPLACE INTO raw_jobs (
|
|
1038
|
-
${columns}
|
|
1039
|
-
)
|
|
1040
|
-
SELECT
|
|
1041
|
-
${columns}
|
|
1042
|
-
FROM ${sourceTable}
|
|
1043
|
-
WHERE ${whereSql}
|
|
1044
|
-
`,
|
|
1045
|
-
).run(targetCountry);
|
|
1046
|
-
|
|
1047
|
-
db.prepare(
|
|
1048
|
-
`
|
|
1049
|
-
DELETE FROM ${sourceTable}
|
|
1050
|
-
WHERE ${whereSql}
|
|
1051
|
-
`,
|
|
1052
|
-
).run(targetCountry);
|
|
1053
|
-
});
|
|
1054
|
-
|
|
1055
|
-
moveTxn(normalizedCountry);
|
|
1056
|
-
return { moved: count, scope: normalizedScope, country: normalizedCountry };
|
|
1057
|
-
}
|
|
1058
|
-
|
|
1059
|
-
function restoreRawJobsByCountry(country) {
|
|
1060
|
-
if (!db) {
|
|
1061
|
-
return { restored: 0, country, error: "db not ready" };
|
|
1062
|
-
}
|
|
1063
|
-
|
|
1064
|
-
const normalizedCountry = String(country == null ? "未知" : country).trim();
|
|
1065
|
-
if (!normalizedCountry) {
|
|
1066
|
-
return {
|
|
1067
|
-
restored: 0,
|
|
1068
|
-
country: normalizedCountry,
|
|
1069
|
-
error: "country is required",
|
|
1070
|
-
};
|
|
1071
|
-
}
|
|
1072
|
-
|
|
1073
|
-
const whereSql = `COALESCE(guessed_location, '未知') = ?`;
|
|
1074
|
-
const count =
|
|
1075
|
-
db
|
|
1076
|
-
.prepare(
|
|
1077
|
-
`
|
|
1078
|
-
SELECT COUNT(*) as c
|
|
1079
|
-
FROM raw_jobs
|
|
1080
|
-
WHERE ${whereSql}
|
|
1081
|
-
`,
|
|
1082
|
-
)
|
|
1083
|
-
.get(normalizedCountry)?.c || 0;
|
|
1084
|
-
|
|
1085
|
-
if (!count) {
|
|
1086
|
-
return { restored: 0, country: normalizedCountry };
|
|
1087
|
-
}
|
|
1088
|
-
|
|
1089
|
-
const restoreTxn = db.transaction((targetCountry) => {
|
|
1090
|
-
db.prepare(
|
|
1091
|
-
`
|
|
1092
|
-
INSERT OR REPLACE INTO jobs (
|
|
1093
|
-
unique_id,
|
|
1094
|
-
nickname,
|
|
1095
|
-
status,
|
|
1096
|
-
sources,
|
|
1097
|
-
claimed_by,
|
|
1098
|
-
claimed_at,
|
|
1099
|
-
error,
|
|
1100
|
-
pinned,
|
|
1101
|
-
no_video,
|
|
1102
|
-
restricted,
|
|
1103
|
-
user_update_count,
|
|
1104
|
-
tt_seller,
|
|
1105
|
-
verified,
|
|
1106
|
-
video_count,
|
|
1107
|
-
comment_count,
|
|
1108
|
-
guessed_location,
|
|
1109
|
-
location_created,
|
|
1110
|
-
follower_count,
|
|
1111
|
-
following_count,
|
|
1112
|
-
heart_count,
|
|
1113
|
-
refresh_time,
|
|
1114
|
-
processed,
|
|
1115
|
-
processed_at,
|
|
1116
|
-
created_at,
|
|
1117
|
-
updated_at,
|
|
1118
|
-
region,
|
|
1119
|
-
signature,
|
|
1120
|
-
sec_uid
|
|
1121
|
-
)
|
|
1122
|
-
SELECT
|
|
1123
|
-
unique_id,
|
|
1124
|
-
nickname,
|
|
1125
|
-
status,
|
|
1126
|
-
sources,
|
|
1127
|
-
claimed_by,
|
|
1128
|
-
claimed_at,
|
|
1129
|
-
error,
|
|
1130
|
-
pinned,
|
|
1131
|
-
no_video,
|
|
1132
|
-
restricted,
|
|
1133
|
-
user_update_count,
|
|
1134
|
-
tt_seller,
|
|
1135
|
-
verified,
|
|
1136
|
-
video_count,
|
|
1137
|
-
comment_count,
|
|
1138
|
-
guessed_location,
|
|
1139
|
-
location_created,
|
|
1140
|
-
follower_count,
|
|
1141
|
-
following_count,
|
|
1142
|
-
heart_count,
|
|
1143
|
-
refresh_time,
|
|
1144
|
-
processed,
|
|
1145
|
-
processed_at,
|
|
1146
|
-
created_at,
|
|
1147
|
-
updated_at,
|
|
1148
|
-
region,
|
|
1149
|
-
signature,
|
|
1150
|
-
sec_uid
|
|
1151
|
-
FROM raw_jobs
|
|
1152
|
-
WHERE ${whereSql}
|
|
1153
|
-
`,
|
|
1154
|
-
).run(targetCountry);
|
|
1155
|
-
|
|
1156
|
-
db.prepare(
|
|
1157
|
-
`
|
|
1158
|
-
DELETE FROM raw_jobs
|
|
1159
|
-
WHERE ${whereSql}
|
|
1160
|
-
`,
|
|
1161
|
-
).run(targetCountry);
|
|
1162
|
-
});
|
|
1163
|
-
|
|
1164
|
-
restoreTxn(normalizedCountry);
|
|
1165
|
-
return { restored: count, country: normalizedCountry };
|
|
1166
|
-
}
|
|
1167
|
-
|
|
1168
|
-
function restoreRawJobById(uniqueId) {
|
|
1169
|
-
if (!db) {
|
|
1170
|
-
return { restored: 0, uniqueId, error: "db not ready" };
|
|
1171
|
-
}
|
|
1172
|
-
|
|
1173
|
-
const safeId = String(uniqueId).trim();
|
|
1174
|
-
if (!safeId) {
|
|
1175
|
-
return { restored: 0, uniqueId: safeId, error: "uniqueId is required" };
|
|
1176
|
-
}
|
|
1177
|
-
|
|
1178
|
-
const exists =
|
|
1179
|
-
db
|
|
1180
|
-
.prepare("SELECT COUNT(*) as c FROM raw_jobs WHERE unique_id = ?")
|
|
1181
|
-
.get(safeId)?.c || 0;
|
|
1182
|
-
|
|
1183
|
-
if (!exists) {
|
|
1184
|
-
return { restored: 0, uniqueId: safeId };
|
|
1185
|
-
}
|
|
1186
|
-
|
|
1187
|
-
const restoreTxn = db.transaction(() => {
|
|
1188
|
-
db.prepare(
|
|
1189
|
-
`
|
|
1190
|
-
INSERT OR REPLACE INTO jobs (
|
|
1191
|
-
unique_id, nickname, status, sources, claimed_by, claimed_at, error,
|
|
1192
|
-
pinned, no_video, restricted, user_update_count, tt_seller, verified,
|
|
1193
|
-
video_count, comment_count, guessed_location, location_created,
|
|
1194
|
-
follower_count, following_count, heart_count, refresh_time,
|
|
1195
|
-
processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
|
|
1196
|
-
)
|
|
1197
|
-
SELECT
|
|
1198
|
-
unique_id, nickname, status, sources, claimed_by, claimed_at, error,
|
|
1199
|
-
pinned, no_video, restricted, user_update_count, tt_seller, verified,
|
|
1200
|
-
video_count, comment_count, guessed_location, location_created,
|
|
1201
|
-
follower_count, following_count, heart_count, refresh_time,
|
|
1202
|
-
processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
|
|
1203
|
-
FROM raw_jobs WHERE unique_id = ?
|
|
1204
|
-
`,
|
|
1205
|
-
).run(safeId);
|
|
1206
|
-
|
|
1207
|
-
db.prepare("DELETE FROM raw_jobs WHERE unique_id = ?").run(safeId);
|
|
1208
|
-
});
|
|
1209
|
-
|
|
1210
|
-
restoreTxn();
|
|
1211
|
-
return { restored: 1, uniqueId: safeId };
|
|
1212
|
-
}
|
|
1213
|
-
|
|
1214
|
-
function restoreRawJobsByFilter({ search, location, hasVideo, hasFollower }) {
|
|
1215
|
-
if (!db) {
|
|
1216
|
-
return { restored: 0, error: "db not ready" };
|
|
1217
|
-
}
|
|
1218
|
-
|
|
1219
|
-
const where = [];
|
|
1220
|
-
const args = [];
|
|
1221
|
-
|
|
1222
|
-
if (search) {
|
|
1223
|
-
where.push(
|
|
1224
|
-
"(LOWER(unique_id) LIKE ? OR LOWER(COALESCE(nickname, '')) LIKE ?)",
|
|
1225
|
-
);
|
|
1226
|
-
const likeVal = `%${search.toLowerCase()}%`;
|
|
1227
|
-
args.push(likeVal, likeVal);
|
|
1228
|
-
}
|
|
1229
|
-
|
|
1230
|
-
if (location) {
|
|
1231
|
-
where.push("COALESCE(guessed_location, '未知') = ?");
|
|
1232
|
-
args.push(location);
|
|
1233
|
-
}
|
|
1234
|
-
|
|
1235
|
-
if (hasVideo) {
|
|
1236
|
-
where.push("COALESCE(video_count, 0) > 0");
|
|
1237
|
-
}
|
|
1238
|
-
|
|
1239
|
-
if (hasFollower) {
|
|
1240
|
-
where.push("COALESCE(follower_count, 0) > 0");
|
|
1241
|
-
}
|
|
1
|
+
/**
|
|
2
|
+
* 数据存储主模块 — createStore() 编排器
|
|
3
|
+
*
|
|
4
|
+
* 本文件是数据存储的入口点,负责编排各子模块:
|
|
5
|
+
* - db-schema.js: 建表、迁移、全局连接管理
|
|
6
|
+
* - db-columns.js: 共享列名常量和 SQL 生成
|
|
7
|
+
* - db-crud.js: 基础 CRUD(增删改查、行映射)
|
|
8
|
+
* - db-stats.js: 仪表盘统计、按国家分组
|
|
9
|
+
* - db-raw-jobs.js: raw_jobs 移入/恢复
|
|
10
|
+
* - db-tags.js: Tag 发现与打分
|
|
11
|
+
* - llm-scoring.js: LLM 国家匹配度打分
|
|
12
|
+
*
|
|
13
|
+
* createStore() 保留为运行时编排器,管理:
|
|
14
|
+
* - 任务认领/提交(claimNextJob/commitJob)
|
|
15
|
+
* - 客户端追踪、视频管理、备份
|
|
16
|
+
* - 内存索引、stats 缓存
|
|
17
|
+
*/
|
|
1242
18
|
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
db
|
|
1251
|
-
.prepare(`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql}`)
|
|
1252
|
-
.get(...args)?.c || 0;
|
|
1253
|
-
|
|
1254
|
-
if (!count) {
|
|
1255
|
-
return { restored: 0 };
|
|
1256
|
-
}
|
|
1257
|
-
|
|
1258
|
-
const restoreTxn = db.transaction(() => {
|
|
1259
|
-
db.prepare(
|
|
1260
|
-
`
|
|
1261
|
-
INSERT OR REPLACE INTO jobs (
|
|
1262
|
-
unique_id, nickname, status, sources, claimed_by, claimed_at, error,
|
|
1263
|
-
pinned, no_video, restricted, user_update_count, tt_seller, verified,
|
|
1264
|
-
video_count, comment_count, guessed_location, location_created,
|
|
1265
|
-
follower_count, following_count, heart_count, refresh_time,
|
|
1266
|
-
processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
|
|
1267
|
-
)
|
|
1268
|
-
SELECT
|
|
1269
|
-
unique_id, nickname, status, sources, claimed_by, claimed_at, error,
|
|
1270
|
-
pinned, no_video, restricted, user_update_count, tt_seller, verified,
|
|
1271
|
-
video_count, comment_count, guessed_location, location_created,
|
|
1272
|
-
follower_count, following_count, heart_count, refresh_time,
|
|
1273
|
-
processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
|
|
1274
|
-
FROM raw_jobs WHERE ${whereSql}
|
|
1275
|
-
`,
|
|
1276
|
-
).run(...args);
|
|
19
|
+
import fs from "fs";
|
|
20
|
+
import path from "path";
|
|
21
|
+
import Database from "better-sqlite3";
|
|
22
|
+
import {
|
|
23
|
+
isLocationInList,
|
|
24
|
+
DEFAULT_TARGET_LOCATIONS,
|
|
25
|
+
} from "../lib/target-locations.js";
|
|
1277
26
|
|
|
1278
|
-
|
|
1279
|
-
|
|
27
|
+
// Schema 与连接管理
|
|
28
|
+
import {
|
|
29
|
+
getDb,
|
|
30
|
+
getDbPath,
|
|
31
|
+
initDb,
|
|
32
|
+
resetDbConnection,
|
|
33
|
+
loadLegacyUsersFromFiles,
|
|
34
|
+
loadLegacyVideosFromFile,
|
|
35
|
+
} from "./db-schema.js";
|
|
36
|
+
|
|
37
|
+
// CRUD 操作
|
|
38
|
+
import {
|
|
39
|
+
snakeToCamel,
|
|
40
|
+
camelToSnake,
|
|
41
|
+
normalizeJobValue,
|
|
42
|
+
mapJobRow,
|
|
43
|
+
mapVideoRow,
|
|
44
|
+
inferStatus,
|
|
45
|
+
hasUserInDb,
|
|
46
|
+
addUserToDb,
|
|
47
|
+
addJobToDb,
|
|
48
|
+
addJobBaseToDb,
|
|
49
|
+
addJob,
|
|
50
|
+
getJobRow,
|
|
51
|
+
getJobBaseRow,
|
|
52
|
+
getJob,
|
|
53
|
+
getAllJobs,
|
|
54
|
+
getVideoRow,
|
|
55
|
+
getAllVideoRows,
|
|
56
|
+
updateJobInfo,
|
|
57
|
+
updateJobBaseInfo,
|
|
58
|
+
getUserDbCount,
|
|
59
|
+
getJobsCount,
|
|
60
|
+
getPendingJobsCount,
|
|
61
|
+
getPendingJobsUserUpdateCount,
|
|
62
|
+
getRawJobsCount,
|
|
63
|
+
} from "./db-crud.js";
|
|
64
|
+
|
|
65
|
+
// 统计查询
|
|
66
|
+
import {
|
|
67
|
+
getDashboardStatsFromDb,
|
|
68
|
+
getPendingByCountryFromDb,
|
|
69
|
+
getUserUpdateByCountryFromDb,
|
|
70
|
+
getAttachStuckByCountryFromDb,
|
|
71
|
+
getRawByCountryFromDb,
|
|
72
|
+
restoreAttachStuckByCountry,
|
|
73
|
+
resetPendingByCountry,
|
|
74
|
+
} from "./db-stats.js";
|
|
75
|
+
|
|
76
|
+
// Raw Jobs 管理
|
|
77
|
+
import {
|
|
78
|
+
moveJobsToRawByCountry,
|
|
79
|
+
restoreRawJobsByCountry,
|
|
80
|
+
restoreRawJobById,
|
|
81
|
+
restoreRawJobsByFilter,
|
|
82
|
+
getRawJobsPageFromDb,
|
|
83
|
+
} from "./db-raw-jobs.js";
|
|
84
|
+
|
|
85
|
+
// Tag CRUD
|
|
86
|
+
import {
|
|
87
|
+
insertTag,
|
|
88
|
+
getTagsByStatus,
|
|
89
|
+
getTagsByCountry,
|
|
90
|
+
getDeadTags,
|
|
91
|
+
claimTag,
|
|
92
|
+
reportTagScore,
|
|
93
|
+
getAllTags,
|
|
94
|
+
rawQuery,
|
|
95
|
+
normalizeTags,
|
|
96
|
+
clearTags,
|
|
97
|
+
} from "./db-tags.js";
|
|
98
|
+
|
|
99
|
+
// LLM 打分
|
|
100
|
+
import {
|
|
101
|
+
scoreJobLocation,
|
|
102
|
+
scoreJobsBatch,
|
|
103
|
+
createLlmOffsetStore,
|
|
104
|
+
} from "./llm-scoring.js";
|
|
1280
105
|
|
|
1281
|
-
|
|
1282
|
-
return { restored: count };
|
|
1283
|
-
}
|
|
106
|
+
// ===== 薄包装函数(保持外部 API 不变)=====
|
|
1284
107
|
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
108
|
+
/**
|
|
109
|
+
* 导入历史 JSON 数据到 SQLite
|
|
110
|
+
*/
|
|
111
|
+
export function importLegacyJsonToDb({
|
|
112
|
+
dbFilePath,
|
|
113
|
+
usersFilePath,
|
|
114
|
+
doneFilePath,
|
|
115
|
+
videosFilePath,
|
|
1292
116
|
}) {
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
const safeLimit = Math.max(1, Math.min(200, parseInt(limit) || 50));
|
|
1296
|
-
const safeOffset = Math.max(0, parseInt(offset) || 0);
|
|
1297
|
-
const where = [];
|
|
1298
|
-
const args = [];
|
|
1299
|
-
|
|
1300
|
-
if (search) {
|
|
1301
|
-
where.push(
|
|
1302
|
-
"(LOWER(unique_id) LIKE ? OR LOWER(COALESCE(nickname, '')) LIKE ?)",
|
|
1303
|
-
);
|
|
1304
|
-
const pattern = `%${String(search).toLowerCase()}%`;
|
|
1305
|
-
args.push(pattern, pattern);
|
|
1306
|
-
}
|
|
1307
|
-
if (location) {
|
|
1308
|
-
where.push("COALESCE(guessed_location, '未知') = ?");
|
|
1309
|
-
args.push(location);
|
|
1310
|
-
}
|
|
1311
|
-
if (hasVideo) {
|
|
1312
|
-
where.push("COALESCE(video_count, 0) > 0");
|
|
1313
|
-
}
|
|
1314
|
-
if (hasFollower) {
|
|
1315
|
-
where.push("COALESCE(follower_count, 0) > 0");
|
|
1316
|
-
}
|
|
1317
|
-
|
|
1318
|
-
const whereSql = where.length ? `WHERE ${where.join(" AND ")}` : "";
|
|
1319
|
-
const total = db
|
|
1320
|
-
.prepare(`SELECT COUNT(*) as c FROM raw_jobs ${whereSql}`)
|
|
1321
|
-
.get(...args).c;
|
|
1322
|
-
|
|
1323
|
-
const rows = db
|
|
1324
|
-
.prepare(
|
|
1325
|
-
`
|
|
1326
|
-
SELECT *
|
|
1327
|
-
FROM raw_jobs
|
|
1328
|
-
${whereSql}
|
|
1329
|
-
ORDER BY created_at DESC, unique_id ASC
|
|
1330
|
-
LIMIT ? OFFSET ?
|
|
1331
|
-
`,
|
|
1332
|
-
)
|
|
1333
|
-
.all(...args, safeLimit, safeOffset);
|
|
1334
|
-
|
|
1335
|
-
return {
|
|
1336
|
-
total,
|
|
1337
|
-
limit: safeLimit,
|
|
1338
|
-
offset: safeOffset,
|
|
1339
|
-
users: rows.map(mapJobRow),
|
|
1340
|
-
};
|
|
1341
|
-
}
|
|
1342
|
-
|
|
1343
|
-
// ====== Tag 发现与打分 CRUD ======
|
|
1344
|
-
|
|
1345
|
-
function insertTag(tag, countries, source = "llm") {
|
|
1346
|
-
if (!db) return { inserted: false, error: "db not ready" };
|
|
1347
|
-
// 防止存入带 # 前缀的 tag
|
|
1348
|
-
const normalized = tag.replace(/^#+/, "").trim().toLowerCase();
|
|
1349
|
-
if (!normalized || normalized.length < 2) {
|
|
1350
|
-
return { inserted: false, error: "invalid tag" };
|
|
1351
|
-
}
|
|
1352
|
-
try {
|
|
1353
|
-
const result = db
|
|
1354
|
-
.prepare(
|
|
1355
|
-
`
|
|
1356
|
-
INSERT OR IGNORE INTO tags (tag, countries, source)
|
|
1357
|
-
VALUES (?, ?, ?)
|
|
1358
|
-
`,
|
|
1359
|
-
)
|
|
1360
|
-
.run(normalized, JSON.stringify(countries), source);
|
|
1361
|
-
return { inserted: result.changes > 0, tag: normalized };
|
|
1362
|
-
} catch (e) {
|
|
1363
|
-
return { inserted: false, error: e.message };
|
|
1364
|
-
}
|
|
1365
|
-
}
|
|
1366
|
-
|
|
1367
|
-
function getTagsByStatus(status, limit = 100) {
|
|
1368
|
-
if (!db) return [];
|
|
1369
|
-
const rows = db
|
|
1370
|
-
.prepare(
|
|
1371
|
-
`
|
|
1372
|
-
SELECT * FROM tags WHERE status = ? ORDER BY score ASC, created_at ASC LIMIT ?
|
|
1373
|
-
`,
|
|
1374
|
-
)
|
|
1375
|
-
.all(status, limit);
|
|
1376
|
-
return rows.map((r) => ({
|
|
1377
|
-
...r,
|
|
1378
|
-
countries: JSON.parse(r.countries || "[]"),
|
|
1379
|
-
matched_countries: JSON.parse(r.matched_countries || "[]"),
|
|
1380
|
-
}));
|
|
1381
|
-
}
|
|
1382
|
-
|
|
1383
|
-
function getTagsByCountry(country, minScore = 0) {
|
|
1384
|
-
if (!db) return [];
|
|
1385
|
-
const rows = db
|
|
1386
|
-
.prepare(
|
|
1387
|
-
`
|
|
1388
|
-
SELECT * FROM tags WHERE status != 'dead'
|
|
1389
|
-
ORDER BY score DESC
|
|
1390
|
-
`,
|
|
1391
|
-
)
|
|
1392
|
-
.all();
|
|
1393
|
-
// Filter in JS since countries is JSON
|
|
1394
|
-
return rows
|
|
1395
|
-
.map((r) => ({
|
|
1396
|
-
...r,
|
|
1397
|
-
countries: JSON.parse(r.countries || "[]"),
|
|
1398
|
-
matched_countries: JSON.parse(r.matched_countries || "[]"),
|
|
1399
|
-
}))
|
|
1400
|
-
.filter((r) => r.countries.includes(country) && r.score >= minScore);
|
|
1401
|
-
}
|
|
1402
|
-
|
|
1403
|
-
function getDeadTags(country) {
|
|
1404
|
-
if (!db) return [];
|
|
1405
|
-
const rows = db
|
|
1406
|
-
.prepare(
|
|
1407
|
-
`
|
|
1408
|
-
SELECT * FROM tags WHERE status = 'dead' ORDER BY score ASC
|
|
1409
|
-
`,
|
|
1410
|
-
)
|
|
1411
|
-
.all();
|
|
1412
|
-
return rows
|
|
1413
|
-
.map((r) => ({
|
|
1414
|
-
...r,
|
|
1415
|
-
countries: JSON.parse(r.countries || "[]"),
|
|
1416
|
-
matched_countries: JSON.parse(r.matched_countries || "[]"),
|
|
1417
|
-
}))
|
|
1418
|
-
.filter((r) => r.countries.includes(country));
|
|
1419
|
-
}
|
|
117
|
+
resetDbConnection();
|
|
118
|
+
initDb(dbFilePath);
|
|
1420
119
|
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
// 原子操作:只有 status='new' 时才更新为 'scoring',避免竞态
|
|
1424
|
-
const result = db
|
|
1425
|
-
.prepare(
|
|
1426
|
-
"UPDATE tags SET status = 'scoring' WHERE tag = ? AND status = 'new'",
|
|
1427
|
-
)
|
|
1428
|
-
.run(tag);
|
|
1429
|
-
if (result.changes === 0) {
|
|
1430
|
-
// 检查是否不存在 vs 已被别人锁定
|
|
1431
|
-
const row = db.prepare("SELECT status FROM tags WHERE tag = ?").get(tag);
|
|
1432
|
-
if (!row) return { ok: false, error: "tag not found" };
|
|
1433
|
-
return { ok: false, error: `tag status is ${row.status}, already claimed` };
|
|
1434
|
-
}
|
|
1435
|
-
return { ok: true, tag };
|
|
1436
|
-
}
|
|
120
|
+
const db = getDb();
|
|
121
|
+
const dbPath = getDbPath();
|
|
1437
122
|
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
const {
|
|
1441
|
-
score,
|
|
1442
|
-
status,
|
|
1443
|
-
totalPosts,
|
|
1444
|
-
authorCount,
|
|
1445
|
-
matchedAuthors,
|
|
1446
|
-
matchedCountries,
|
|
1447
|
-
pushedUsers,
|
|
1448
|
-
error,
|
|
1449
|
-
} = fields;
|
|
1450
|
-
const matchedCountriesJson = matchedCountries
|
|
1451
|
-
? JSON.stringify(matchedCountries)
|
|
1452
|
-
: null;
|
|
1453
|
-
const now = new Date().toISOString();
|
|
1454
|
-
|
|
1455
|
-
try {
|
|
1456
|
-
const result = db
|
|
1457
|
-
.prepare(
|
|
1458
|
-
`
|
|
1459
|
-
UPDATE tags SET
|
|
1460
|
-
score = COALESCE(?, score),
|
|
1461
|
-
status = COALESCE(?, status),
|
|
1462
|
-
total_posts = COALESCE(?, total_posts),
|
|
1463
|
-
author_count = COALESCE(?, author_count),
|
|
1464
|
-
matched_authors = COALESCE(?, matched_authors),
|
|
1465
|
-
matched_countries = COALESCE(?, matched_countries),
|
|
1466
|
-
pushed_users = COALESCE(?, pushed_users),
|
|
1467
|
-
last_error = COALESCE(?, last_error),
|
|
1468
|
-
scored_at = ?,
|
|
1469
|
-
score_count = score_count + 1
|
|
1470
|
-
WHERE tag = ?
|
|
1471
|
-
`,
|
|
1472
|
-
)
|
|
1473
|
-
.run(
|
|
1474
|
-
score ?? null,
|
|
1475
|
-
status ?? null,
|
|
1476
|
-
totalPosts ?? null,
|
|
1477
|
-
authorCount ?? null,
|
|
1478
|
-
matchedAuthors ?? null,
|
|
1479
|
-
matchedCountriesJson,
|
|
1480
|
-
pushedUsers ?? null,
|
|
1481
|
-
error ?? null,
|
|
1482
|
-
now,
|
|
1483
|
-
tag,
|
|
1484
|
-
);
|
|
1485
|
-
return { ok: result.changes > 0, tag };
|
|
1486
|
-
} catch (e) {
|
|
1487
|
-
return { ok: false, error: e.message };
|
|
1488
|
-
}
|
|
1489
|
-
}
|
|
123
|
+
const legacyUsers = loadLegacyUsersFromFiles(usersFilePath, doneFilePath);
|
|
124
|
+
const legacyVideos = loadLegacyVideosFromFile(videosFilePath);
|
|
1490
125
|
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1496
|
-
SELECT *
|
|
1497
|
-
|
|
1498
|
-
)
|
|
1499
|
-
.all(limit);
|
|
1500
|
-
return rows.map((r) => ({
|
|
1501
|
-
...r,
|
|
1502
|
-
countries: JSON.parse(r.countries || "[]"),
|
|
1503
|
-
matched_countries: JSON.parse(r.matched_countries || "[]"),
|
|
1504
|
-
}));
|
|
1505
|
-
}
|
|
126
|
+
const beforeUsers = getDb()
|
|
127
|
+
.prepare("SELECT COUNT(*) as c FROM users")
|
|
128
|
+
.get().c;
|
|
129
|
+
const beforeJobs = getDb().prepare("SELECT COUNT(*) as c FROM jobs").get().c;
|
|
130
|
+
const beforeVideos = getDb()
|
|
131
|
+
.prepare("SELECT COUNT(*) as c FROM videos")
|
|
132
|
+
.get().c;
|
|
1506
133
|
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
} catch (e) {
|
|
1514
|
-
return { error: e.message };
|
|
1515
|
-
}
|
|
1516
|
-
}
|
|
134
|
+
const insertUserStmt = getDb().prepare(
|
|
135
|
+
`INSERT OR IGNORE INTO users (unique_id) VALUES (?)`,
|
|
136
|
+
);
|
|
137
|
+
const insertVideoStmt = getDb().prepare(
|
|
138
|
+
`INSERT OR IGNORE INTO videos (id, href, author_unique_id, location_created, tt_seller, registered_at, user_update_count, create_time) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
139
|
+
);
|
|
1517
140
|
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
|
|
1524
|
-
const fixed = [];
|
|
1525
|
-
const merged = [];
|
|
1526
|
-
const skipped = [];
|
|
1527
|
-
|
|
1528
|
-
for (const row of dirtyRows) {
|
|
1529
|
-
const cleanTag = row.tag.replace(/^#+/, "").trim().toLowerCase();
|
|
1530
|
-
if (!cleanTag || cleanTag.length < 2) {
|
|
1531
|
-
db.prepare("DELETE FROM tags WHERE id = ?").run(row.id);
|
|
1532
|
-
skipped.push({
|
|
1533
|
-
dirty: row.tag,
|
|
1534
|
-
reason: "empty after normalize, deleted",
|
|
1535
|
-
});
|
|
1536
|
-
continue;
|
|
141
|
+
const importUsersTxn = getDb().transaction((items) => {
|
|
142
|
+
for (const item of items) {
|
|
143
|
+
const uniqueId = item.uniqueId || item.unique_id;
|
|
144
|
+
if (!uniqueId) continue;
|
|
145
|
+
insertUserStmt.run(uniqueId);
|
|
146
|
+
addJobToDb({ ...item, uniqueId });
|
|
1537
147
|
}
|
|
148
|
+
});
|
|
1538
149
|
|
|
1539
|
-
|
|
1540
|
-
const
|
|
1541
|
-
|
|
1542
|
-
.
|
|
1543
|
-
|
|
1544
|
-
|
|
1545
|
-
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
JSON.stringify(mergedCountries),
|
|
1552
|
-
cleanTag,
|
|
150
|
+
const importVideosTxn = getDb().transaction((items) => {
|
|
151
|
+
for (const item of items) {
|
|
152
|
+
if (!item?.id) continue;
|
|
153
|
+
insertVideoStmt.run(
|
|
154
|
+
item.id,
|
|
155
|
+
item.href || null,
|
|
156
|
+
item.authorUniqueId || item.author_unique_id || null,
|
|
157
|
+
item.locationCreated || item.location_created || null,
|
|
158
|
+
item.ttSeller ? 1 : 0,
|
|
159
|
+
item.registeredAt || item.registered_at || Date.now(),
|
|
160
|
+
item.userUpdateCount || item.user_update_count || 0,
|
|
161
|
+
item.createTime || item.create_time || null,
|
|
1553
162
|
);
|
|
1554
|
-
// 删除脏数据
|
|
1555
|
-
db.prepare("DELETE FROM tags WHERE id = ?").run(row.id);
|
|
1556
|
-
merged.push({ dirty: row.tag, clean: cleanTag, id: row.id });
|
|
1557
|
-
} else {
|
|
1558
|
-
// 直接重命名
|
|
1559
|
-
db.prepare("UPDATE tags SET tag = ? WHERE id = ?").run(cleanTag, row.id);
|
|
1560
|
-
fixed.push({ dirty: row.tag, clean: cleanTag, id: row.id });
|
|
1561
163
|
}
|
|
1562
|
-
}
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
importUsersTxn(legacyUsers);
|
|
167
|
+
importVideosTxn(legacyVideos);
|
|
168
|
+
|
|
169
|
+
const afterUsers = getDb().prepare("SELECT COUNT(*) as c FROM users").get().c;
|
|
170
|
+
const afterJobs = getDb().prepare("SELECT COUNT(*) as c FROM jobs").get().c;
|
|
171
|
+
const afterVideos = getDb()
|
|
172
|
+
.prepare("SELECT COUNT(*) as c FROM videos")
|
|
173
|
+
.get().c;
|
|
1563
174
|
|
|
1564
175
|
return {
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1569
|
-
|
|
176
|
+
dbPath: getDbPath(),
|
|
177
|
+
usersImported: afterUsers - beforeUsers,
|
|
178
|
+
jobsImported: afterJobs - beforeJobs,
|
|
179
|
+
videosImported: afterVideos - beforeVideos,
|
|
180
|
+
totalUsers: afterUsers,
|
|
181
|
+
totalJobs: afterJobs,
|
|
182
|
+
totalVideos: afterVideos,
|
|
1570
183
|
};
|
|
1571
184
|
}
|
|
1572
185
|
|
|
1573
|
-
function
|
|
1574
|
-
|
|
1575
|
-
const count = db.prepare("SELECT COUNT(*) as c FROM tags").get().c;
|
|
1576
|
-
db.exec("DELETE FROM tags");
|
|
1577
|
-
return { ok: true, deleted: count };
|
|
186
|
+
export function closeStoreDb() {
|
|
187
|
+
resetDbConnection();
|
|
1578
188
|
}
|
|
1579
189
|
|
|
1580
190
|
function getUsersPageFromDb({
|
|
@@ -1587,7 +197,7 @@ function getUsersPageFromDb({
|
|
|
1587
197
|
offset,
|
|
1588
198
|
targetLocations = [],
|
|
1589
199
|
}) {
|
|
1590
|
-
if (!
|
|
200
|
+
if (!getDb()) return null;
|
|
1591
201
|
|
|
1592
202
|
const safeLimit = Math.max(1, Math.min(200, parseInt(limit) || 50));
|
|
1593
203
|
const safeOffset = Math.max(0, parseInt(offset) || 0);
|
|
@@ -1636,7 +246,7 @@ function getUsersPageFromDb({
|
|
|
1636
246
|
if (cachedCount && Date.now() - cachedCount.time < 5000) {
|
|
1637
247
|
total = cachedCount.c;
|
|
1638
248
|
} else {
|
|
1639
|
-
total =
|
|
249
|
+
total = getDb()
|
|
1640
250
|
.prepare(`SELECT COUNT(*) as c FROM jobs ${whereSql}`)
|
|
1641
251
|
.get(...args).c;
|
|
1642
252
|
getUsersPageFromDb._countCache.set(cacheKey, {
|
|
@@ -1646,7 +256,7 @@ function getUsersPageFromDb({
|
|
|
1646
256
|
}
|
|
1647
257
|
|
|
1648
258
|
// 只查询前端需要的列,避免 SELECT * 带来的大字段传输和 mapJobRow 开销
|
|
1649
|
-
const rows =
|
|
259
|
+
const rows = getDb()
|
|
1650
260
|
.prepare(
|
|
1651
261
|
`
|
|
1652
262
|
SELECT
|
|
@@ -1688,13 +298,13 @@ function getUsersPageFromDb({
|
|
|
1688
298
|
}
|
|
1689
299
|
|
|
1690
300
|
function getTargetUsersFromDb(targetLocations = []) {
|
|
1691
|
-
if (!
|
|
301
|
+
if (!getDb()) return null;
|
|
1692
302
|
if (!targetLocations.length) {
|
|
1693
303
|
return { total: 0, users: [] };
|
|
1694
304
|
}
|
|
1695
305
|
|
|
1696
306
|
const placeholders = targetLocations.map(() => "?").join(", ");
|
|
1697
|
-
const rows =
|
|
307
|
+
const rows = getDb()
|
|
1698
308
|
.prepare(
|
|
1699
309
|
`
|
|
1700
310
|
SELECT
|
|
@@ -1703,6 +313,8 @@ function getTargetUsersFromDb(targetLocations = []) {
|
|
|
1703
313
|
location_created, latest_video_time, refresh_time,
|
|
1704
314
|
guessed_location, pinned, processed_at, video_count,
|
|
1705
315
|
no_video, claimed_by, claimed_at, created_at, updated_at
|
|
316
|
+
FROM jobs
|
|
317
|
+
WHERE tt_seller = 1
|
|
1706
318
|
AND verified = 0
|
|
1707
319
|
AND location_created IN (${placeholders})
|
|
1708
320
|
ORDER BY COALESCE(follower_count, 0) DESC, unique_id ASC
|
|
@@ -1718,7 +330,7 @@ function getTargetUsersFromDb(targetLocations = []) {
|
|
|
1718
330
|
}
|
|
1719
331
|
|
|
1720
332
|
function getTargetUsersByCountryFromDb(targetLocations = [], options = {}) {
|
|
1721
|
-
if (!
|
|
333
|
+
if (!getDb()) return null;
|
|
1722
334
|
if (!targetLocations.length) {
|
|
1723
335
|
return { countries: [] };
|
|
1724
336
|
}
|
|
@@ -1735,7 +347,7 @@ function getTargetUsersByCountryFromDb(targetLocations = [], options = {}) {
|
|
|
1735
347
|
|
|
1736
348
|
// 摘要模式:只返回各国统计数,不返回用户数据
|
|
1737
349
|
if (summaryOnly) {
|
|
1738
|
-
const statsRows =
|
|
350
|
+
const statsRows = getDb()
|
|
1739
351
|
.prepare(
|
|
1740
352
|
`
|
|
1741
353
|
SELECT location_created as country, COUNT(*) as count
|
|
@@ -1803,397 +415,79 @@ function getTargetUsersByCountryFromDb(targetLocations = [], options = {}) {
|
|
|
1803
415
|
/SELECT[^FROM]*FROM/,
|
|
1804
416
|
"SELECT COUNT(*) as cnt FROM",
|
|
1805
417
|
);
|
|
1806
|
-
const total =
|
|
418
|
+
const total =
|
|
419
|
+
getDb()
|
|
420
|
+
.prepare(countSql)
|
|
421
|
+
.get(...params)?.cnt || 0;
|
|
1807
422
|
|
|
1808
423
|
sql += ` LIMIT ? OFFSET ?`;
|
|
1809
424
|
const safeLimit = Math.min(Math.floor(limit), 10000);
|
|
1810
425
|
const safeOffset = Math.max(Math.floor(offset), 0);
|
|
1811
426
|
|
|
1812
|
-
const rows =
|
|
1813
|
-
.prepare(sql)
|
|
1814
|
-
.all(...params, safeLimit, safeOffset)
|
|
1815
|
-
.map(mapJobRow);
|
|
1816
|
-
|
|
1817
|
-
return {
|
|
1818
|
-
total,
|
|
1819
|
-
limit: safeLimit,
|
|
1820
|
-
offset: safeOffset,
|
|
1821
|
-
users: rows,
|
|
1822
|
-
};
|
|
1823
|
-
}
|
|
1824
|
-
|
|
1825
|
-
const rows = db
|
|
1826
|
-
.prepare(
|
|
1827
|
-
`
|
|
1828
|
-
SELECT
|
|
1829
|
-
unique_id,
|
|
1830
|
-
nickname,
|
|
1831
|
-
follower_count,
|
|
1832
|
-
video_count,
|
|
1833
|
-
tt_seller,
|
|
1834
|
-
verified,
|
|
1835
|
-
location_created,
|
|
1836
|
-
confirmed_location,
|
|
1837
|
-
modified_at,
|
|
1838
|
-
latest_video_time,
|
|
1839
|
-
refresh_time,
|
|
1840
|
-
status,
|
|
1841
|
-
sources
|
|
1842
|
-
FROM jobs
|
|
1843
|
-
WHERE tt_seller = 1
|
|
1844
|
-
AND verified = 0
|
|
1845
|
-
AND location_created IN (${placeholders})
|
|
1846
|
-
ORDER BY location_created ASC, COALESCE(latest_video_time, 0) DESC
|
|
1847
|
-
`,
|
|
1848
|
-
)
|
|
1849
|
-
.all(...targetLocations)
|
|
1850
|
-
.map(mapJobRow);
|
|
1851
|
-
|
|
1852
|
-
const countryMap = new Map();
|
|
1853
|
-
for (const row of rows) {
|
|
1854
|
-
const country = row.locationCreated || "未知";
|
|
1855
|
-
if (!countryMap.has(country)) {
|
|
1856
|
-
countryMap.set(country, []);
|
|
1857
|
-
}
|
|
1858
|
-
countryMap.get(country).push(row);
|
|
1859
|
-
}
|
|
1860
|
-
|
|
1861
|
-
const countries = [];
|
|
1862
|
-
for (const [country, users] of countryMap) {
|
|
1863
|
-
countries.push({
|
|
1864
|
-
country,
|
|
1865
|
-
count: users.length,
|
|
1866
|
-
users,
|
|
1867
|
-
});
|
|
1868
|
-
}
|
|
1869
|
-
|
|
1870
|
-
return {
|
|
1871
|
-
total: rows.length,
|
|
1872
|
-
countries,
|
|
1873
|
-
};
|
|
1874
|
-
}
|
|
1875
|
-
|
|
1876
|
-
function snakeToCamel(key) {
|
|
1877
|
-
return key.replace(/_([a-z])/g, (_, ch) => ch.toUpperCase());
|
|
1878
|
-
}
|
|
1879
|
-
|
|
1880
|
-
function camelToSnake(key) {
|
|
1881
|
-
return key.replace(/[A-Z]/g, (ch) => `_${ch.toLowerCase()}`);
|
|
1882
|
-
}
|
|
1883
|
-
|
|
1884
|
-
const jobBooleanColumns = new Set([
|
|
1885
|
-
"pinned",
|
|
1886
|
-
"no_video",
|
|
1887
|
-
"restricted",
|
|
1888
|
-
"processed",
|
|
1889
|
-
"tt_seller",
|
|
1890
|
-
"verified",
|
|
1891
|
-
"error",
|
|
1892
|
-
]);
|
|
1893
|
-
|
|
1894
|
-
const videoBooleanColumns = new Set(["tt_seller"]);
|
|
1895
|
-
|
|
1896
|
-
const writableJobColumns = new Set([
|
|
1897
|
-
"nickname",
|
|
1898
|
-
"status",
|
|
1899
|
-
"sources",
|
|
1900
|
-
"claimed_by",
|
|
1901
|
-
"claimed_at",
|
|
1902
|
-
"error",
|
|
1903
|
-
"pinned",
|
|
1904
|
-
"no_video",
|
|
1905
|
-
"restricted",
|
|
1906
|
-
"user_update_count",
|
|
1907
|
-
"tt_seller",
|
|
1908
|
-
"verified",
|
|
1909
|
-
"video_count",
|
|
1910
|
-
"comment_count",
|
|
1911
|
-
"guessed_location",
|
|
1912
|
-
"location_created",
|
|
1913
|
-
"confirmed_location",
|
|
1914
|
-
"modified_at",
|
|
1915
|
-
"follower_count",
|
|
1916
|
-
"following_count",
|
|
1917
|
-
"heart_count",
|
|
1918
|
-
"refresh_time",
|
|
1919
|
-
"processed",
|
|
1920
|
-
"processed_at",
|
|
1921
|
-
"updated_at",
|
|
1922
|
-
"region",
|
|
1923
|
-
"signature",
|
|
1924
|
-
"bio_link",
|
|
1925
|
-
"sec_uid",
|
|
1926
|
-
"status_code",
|
|
1927
|
-
"latest_video_time",
|
|
1928
|
-
"top_video_play_count",
|
|
1929
|
-
"top_video_href",
|
|
1930
|
-
"user_create_time",
|
|
1931
|
-
]);
|
|
1932
|
-
|
|
1933
|
-
function normalizeJobValue(column, value) {
|
|
1934
|
-
if (value === undefined || value === null) return null;
|
|
1935
|
-
if (column === "sources") {
|
|
1936
|
-
if (!Array.isArray(value)) return JSON.stringify([]);
|
|
1937
|
-
return JSON.stringify([...new Set(value)]);
|
|
1938
|
-
}
|
|
1939
|
-
if (jobBooleanColumns.has(column)) {
|
|
1940
|
-
return value ? 1 : 0;
|
|
1941
|
-
}
|
|
1942
|
-
// 防御:如果值是对象或数组,转为 JSON 字符串
|
|
1943
|
-
if (typeof value === "object") return JSON.stringify(value);
|
|
1944
|
-
return value;
|
|
1945
|
-
}
|
|
1946
|
-
|
|
1947
|
-
function mapJobRow(row) {
|
|
1948
|
-
if (!row) return undefined;
|
|
1949
|
-
const mapped = {};
|
|
1950
|
-
for (const [key, value] of Object.entries(row)) {
|
|
1951
|
-
const camelKey = snakeToCamel(key);
|
|
1952
|
-
if (key === "sources") {
|
|
1953
|
-
try {
|
|
1954
|
-
mapped[camelKey] = value ? JSON.parse(value) : [];
|
|
1955
|
-
} catch {
|
|
1956
|
-
mapped[camelKey] = [];
|
|
1957
|
-
}
|
|
1958
|
-
continue;
|
|
1959
|
-
}
|
|
1960
|
-
if (jobBooleanColumns.has(key)) {
|
|
1961
|
-
mapped[camelKey] = value === null || value === undefined ? null : !!value;
|
|
1962
|
-
continue;
|
|
1963
|
-
}
|
|
1964
|
-
mapped[camelKey] = value;
|
|
1965
|
-
}
|
|
1966
|
-
return mapped;
|
|
1967
|
-
}
|
|
1968
|
-
|
|
1969
|
-
function getJobRow(uniqueId) {
|
|
1970
|
-
if (!db) return null;
|
|
1971
|
-
return db.prepare("SELECT * FROM jobs WHERE unique_id = ?").get(uniqueId);
|
|
1972
|
-
}
|
|
1973
|
-
|
|
1974
|
-
function getJobBaseRow(uniqueId) {
|
|
1975
|
-
if (!db) return null;
|
|
1976
|
-
return db
|
|
1977
|
-
.prepare("SELECT * FROM jobs_base WHERE unique_id = ?")
|
|
1978
|
-
.get(uniqueId);
|
|
1979
|
-
}
|
|
1980
|
-
|
|
1981
|
-
function getJob(uniqueId) {
|
|
1982
|
-
return mapJobRow(getJobRow(uniqueId));
|
|
1983
|
-
}
|
|
1984
|
-
|
|
1985
|
-
function getAllJobs() {
|
|
1986
|
-
if (!db) return [];
|
|
1987
|
-
return db.prepare("SELECT * FROM jobs").all().map(mapJobRow);
|
|
1988
|
-
}
|
|
1989
|
-
|
|
1990
|
-
function mapVideoRow(row) {
|
|
1991
|
-
if (!row) return undefined;
|
|
1992
|
-
const mapped = {};
|
|
1993
|
-
for (const [key, value] of Object.entries(row)) {
|
|
1994
|
-
const camelKey = snakeToCamel(key);
|
|
1995
|
-
if (videoBooleanColumns.has(key)) {
|
|
1996
|
-
mapped[camelKey] = value === null || value === undefined ? null : !!value;
|
|
1997
|
-
continue;
|
|
1998
|
-
}
|
|
1999
|
-
mapped[camelKey] = value;
|
|
2000
|
-
}
|
|
2001
|
-
return mapped;
|
|
2002
|
-
}
|
|
2003
|
-
|
|
2004
|
-
function getVideoRow(videoId) {
|
|
2005
|
-
if (!db) return null;
|
|
2006
|
-
return db.prepare("SELECT * FROM videos WHERE id = ?").get(videoId);
|
|
2007
|
-
}
|
|
2008
|
-
|
|
2009
|
-
function getAllVideoRows() {
|
|
2010
|
-
if (!db) return [];
|
|
2011
|
-
return db.prepare("SELECT * FROM videos").all();
|
|
2012
|
-
}
|
|
2013
|
-
|
|
2014
|
-
function updateJobInfo(uniqueId, info, incrementCount = true) {
|
|
2015
|
-
if (!db) return { error: "db not initialized" };
|
|
2016
|
-
const existing = getJobRow(uniqueId);
|
|
2017
|
-
if (!existing) return { error: "user not found" };
|
|
2018
|
-
|
|
2019
|
-
const nextValues = {};
|
|
2020
|
-
for (const [key, value] of Object.entries(info || {})) {
|
|
2021
|
-
if (key === "uniqueId" || key === "unique_id") continue;
|
|
2022
|
-
if (value === undefined || value === "") continue;
|
|
2023
|
-
let column = camelToSnake(key);
|
|
2024
|
-
// 字段别名:bio → signature, createTime → user_create_time
|
|
2025
|
-
if (column === "bio") column = "signature";
|
|
2026
|
-
if (column === "create_time") column = "user_create_time";
|
|
2027
|
-
if (!writableJobColumns.has(column)) continue;
|
|
2028
|
-
nextValues[column] = normalizeJobValue(column, value);
|
|
2029
|
-
}
|
|
2030
|
-
|
|
2031
|
-
nextValues.updated_at = Date.now();
|
|
2032
|
-
if (incrementCount) {
|
|
2033
|
-
nextValues.user_update_count = (existing.user_update_count || 0) + 1;
|
|
2034
|
-
}
|
|
2035
|
-
|
|
2036
|
-
const columns = Object.keys(nextValues);
|
|
2037
|
-
if (columns.length > 0) {
|
|
2038
|
-
const sql = `UPDATE jobs SET ${columns.map((column) => `${column} = ?`).join(", ")} WHERE unique_id = ?`;
|
|
2039
|
-
db.prepare(sql).run(
|
|
2040
|
-
...columns.map((column) => nextValues[column]),
|
|
2041
|
-
uniqueId,
|
|
2042
|
-
);
|
|
2043
|
-
}
|
|
2044
|
-
|
|
2045
|
-
return {
|
|
2046
|
-
ok: true,
|
|
2047
|
-
userUpdateCount:
|
|
2048
|
-
nextValues.user_update_count ?? existing.user_update_count ?? 0,
|
|
2049
|
-
};
|
|
2050
|
-
}
|
|
2051
|
-
|
|
2052
|
-
function inferStatus(u) {
|
|
2053
|
-
if (u.restricted) return "restricted";
|
|
2054
|
-
if (u.error) return "error";
|
|
2055
|
-
if (u.processed) return "done";
|
|
2056
|
-
return "pending";
|
|
2057
|
-
}
|
|
427
|
+
const rows = getDb()
|
|
428
|
+
.prepare(sql)
|
|
429
|
+
.all(...params, safeLimit, safeOffset)
|
|
430
|
+
.map(mapJobRow);
|
|
2058
431
|
|
|
2059
|
-
|
|
2060
|
-
|
|
2061
|
-
|
|
2062
|
-
|
|
2063
|
-
|
|
2064
|
-
|
|
2065
|
-
for (const [key, value] of Object.entries(info || {})) {
|
|
2066
|
-
if (key === "uniqueId" || key === "unique_id") continue;
|
|
2067
|
-
if (value === undefined || value === "") continue;
|
|
2068
|
-
let column = camelToSnake(key);
|
|
2069
|
-
// 字段别名:bio → signature, createTime → user_create_time
|
|
2070
|
-
if (column === "bio") column = "signature";
|
|
2071
|
-
if (column === "create_time") column = "user_create_time";
|
|
2072
|
-
if (!writableJobColumns.has(column)) continue;
|
|
2073
|
-
nextValues[column] = normalizeJobValue(column, value);
|
|
432
|
+
return {
|
|
433
|
+
total,
|
|
434
|
+
limit: safeLimit,
|
|
435
|
+
offset: safeOffset,
|
|
436
|
+
users: rows,
|
|
437
|
+
};
|
|
2074
438
|
}
|
|
2075
439
|
|
|
2076
|
-
|
|
2077
|
-
|
|
2078
|
-
|
|
440
|
+
const rows = getDb()
|
|
441
|
+
.prepare(
|
|
442
|
+
`
|
|
443
|
+
SELECT
|
|
444
|
+
unique_id,
|
|
445
|
+
nickname,
|
|
446
|
+
follower_count,
|
|
447
|
+
video_count,
|
|
448
|
+
tt_seller,
|
|
449
|
+
verified,
|
|
450
|
+
location_created,
|
|
451
|
+
confirmed_location,
|
|
452
|
+
modified_at,
|
|
453
|
+
latest_video_time,
|
|
454
|
+
refresh_time,
|
|
455
|
+
status,
|
|
456
|
+
sources
|
|
457
|
+
FROM jobs
|
|
458
|
+
WHERE tt_seller = 1
|
|
459
|
+
AND verified = 0
|
|
460
|
+
AND location_created IN (${placeholders})
|
|
461
|
+
ORDER BY location_created ASC, COALESCE(latest_video_time, 0) DESC
|
|
462
|
+
`,
|
|
463
|
+
)
|
|
464
|
+
.all(...targetLocations)
|
|
465
|
+
.map(mapJobRow);
|
|
466
|
+
|
|
467
|
+
const countryMap = new Map();
|
|
468
|
+
for (const row of rows) {
|
|
469
|
+
const country = row.locationCreated || "未知";
|
|
470
|
+
if (!countryMap.has(country)) {
|
|
471
|
+
countryMap.set(country, []);
|
|
472
|
+
}
|
|
473
|
+
countryMap.get(country).push(row);
|
|
2079
474
|
}
|
|
2080
475
|
|
|
2081
|
-
const
|
|
2082
|
-
|
|
2083
|
-
|
|
2084
|
-
|
|
2085
|
-
|
|
2086
|
-
|
|
2087
|
-
);
|
|
476
|
+
const countries = [];
|
|
477
|
+
for (const [country, users] of countryMap) {
|
|
478
|
+
countries.push({
|
|
479
|
+
country,
|
|
480
|
+
count: users.length,
|
|
481
|
+
users,
|
|
482
|
+
});
|
|
2088
483
|
}
|
|
2089
484
|
|
|
2090
485
|
return {
|
|
2091
|
-
|
|
2092
|
-
|
|
2093
|
-
nextValues.user_update_count ?? existing.user_update_count ?? 0,
|
|
486
|
+
total: rows.length,
|
|
487
|
+
countries,
|
|
2094
488
|
};
|
|
2095
489
|
}
|
|
2096
490
|
|
|
2097
|
-
function addJobBaseToDb(user) {
|
|
2098
|
-
if (!db) return;
|
|
2099
|
-
const now = Date.now();
|
|
2100
|
-
db.prepare(
|
|
2101
|
-
`
|
|
2102
|
-
INSERT OR IGNORE INTO jobs_base (
|
|
2103
|
-
unique_id,
|
|
2104
|
-
nickname,
|
|
2105
|
-
status,
|
|
2106
|
-
sources,
|
|
2107
|
-
claimed_by,
|
|
2108
|
-
claimed_at,
|
|
2109
|
-
error,
|
|
2110
|
-
pinned,
|
|
2111
|
-
no_video,
|
|
2112
|
-
restricted,
|
|
2113
|
-
user_update_count,
|
|
2114
|
-
tt_seller,
|
|
2115
|
-
verified,
|
|
2116
|
-
video_count,
|
|
2117
|
-
comment_count,
|
|
2118
|
-
guessed_location,
|
|
2119
|
-
location_created,
|
|
2120
|
-
follower_count,
|
|
2121
|
-
following_count,
|
|
2122
|
-
heart_count,
|
|
2123
|
-
refresh_time,
|
|
2124
|
-
processed,
|
|
2125
|
-
processed_at,
|
|
2126
|
-
created_at,
|
|
2127
|
-
updated_at,
|
|
2128
|
-
region,
|
|
2129
|
-
signature,
|
|
2130
|
-
bio_link,
|
|
2131
|
-
sec_uid
|
|
2132
|
-
)
|
|
2133
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
2134
|
-
`,
|
|
2135
|
-
).run(
|
|
2136
|
-
user.uniqueId,
|
|
2137
|
-
user.nickname || null,
|
|
2138
|
-
user.status || inferStatus(user),
|
|
2139
|
-
JSON.stringify(
|
|
2140
|
-
Array.isArray(user.sources) ? [...new Set(user.sources)] : [],
|
|
2141
|
-
),
|
|
2142
|
-
user.claimedBy || null,
|
|
2143
|
-
user.claimedAt || null,
|
|
2144
|
-
user.error || null,
|
|
2145
|
-
user.pinned ? 1 : 0,
|
|
2146
|
-
user.noVideo ? 1 : 0,
|
|
2147
|
-
user.restricted ? 1 : 0,
|
|
2148
|
-
user.userUpdateCount || 0,
|
|
2149
|
-
user.ttSeller === undefined ||
|
|
2150
|
-
user.ttSeller === null ||
|
|
2151
|
-
user.ttSeller === ""
|
|
2152
|
-
? null
|
|
2153
|
-
: user.ttSeller
|
|
2154
|
-
? 1
|
|
2155
|
-
: 0,
|
|
2156
|
-
user.verified === undefined ||
|
|
2157
|
-
user.verified === null ||
|
|
2158
|
-
user.verified === ""
|
|
2159
|
-
? null
|
|
2160
|
-
: user.verified
|
|
2161
|
-
? 1
|
|
2162
|
-
: 0,
|
|
2163
|
-
user.videoCount || 0,
|
|
2164
|
-
user.commentCount || 0,
|
|
2165
|
-
user.guessedLocation || null,
|
|
2166
|
-
user.locationCreated || null,
|
|
2167
|
-
user.followerCount || 0,
|
|
2168
|
-
user.followingCount || 0,
|
|
2169
|
-
user.heartCount || 0,
|
|
2170
|
-
user.refreshTime || null,
|
|
2171
|
-
user.processed ? 1 : 0,
|
|
2172
|
-
user.processedAt || null,
|
|
2173
|
-
user.createdAt || now,
|
|
2174
|
-
user.updatedAt || now,
|
|
2175
|
-
user.region || null,
|
|
2176
|
-
user.signature || null,
|
|
2177
|
-
user.bioLink?.link || user.bioLink?.url || user.bioLink || null,
|
|
2178
|
-
user.secUid || null,
|
|
2179
|
-
);
|
|
2180
|
-
}
|
|
2181
|
-
|
|
2182
|
-
function addJob(user) {
|
|
2183
|
-
if (!db) {
|
|
2184
|
-
addUserToDb(user);
|
|
2185
|
-
return;
|
|
2186
|
-
}
|
|
2187
|
-
if (!user.status) user.status = inferStatus(user);
|
|
2188
|
-
if (!user.createdAt) user.createdAt = Date.now();
|
|
2189
|
-
if (!user.updatedAt) user.updatedAt = user.createdAt;
|
|
2190
|
-
const writeTxn = db.transaction((job) => {
|
|
2191
|
-
addUserToDb(job);
|
|
2192
|
-
addJobToDb(job);
|
|
2193
|
-
});
|
|
2194
|
-
writeTxn(user);
|
|
2195
|
-
}
|
|
2196
|
-
|
|
2197
491
|
export function createStore(filePath, options = {}) {
|
|
2198
492
|
if (!filePath) {
|
|
2199
493
|
throw new Error("createStore requires an explicit .db path");
|
|
@@ -2219,62 +513,12 @@ export function createStore(filePath, options = {}) {
|
|
|
2219
513
|
let refillLock = null; // Promise | null
|
|
2220
514
|
// LLM 采样偏移量记忆:按猜测国家记录上次查询位置,避免重复采样
|
|
2221
515
|
// 格式: { "ES": 300, "PL": 500, "NL": 400 }
|
|
2222
|
-
|
|
516
|
+
const offsetStore = createLlmOffsetStore();
|
|
2223
517
|
if (filePath) {
|
|
2224
518
|
// 初始化 SQLite 用户表(用于判重)
|
|
2225
|
-
|
|
519
|
+
initDb(filePath);
|
|
2226
520
|
// 从数据库恢复偏移量
|
|
2227
|
-
|
|
2228
|
-
}
|
|
2229
|
-
|
|
2230
|
-
/**
|
|
2231
|
-
* 从数据库加载 LLM 采样偏移量
|
|
2232
|
-
*/
|
|
2233
|
-
function loadLlmSampleOffsets() {
|
|
2234
|
-
try {
|
|
2235
|
-
const row = db
|
|
2236
|
-
.prepare(`SELECT offsets FROM _llm_sample_offsets LIMIT 1`)
|
|
2237
|
-
.get();
|
|
2238
|
-
if (row && row.offsets) {
|
|
2239
|
-
const parsed = JSON.parse(row.offsets);
|
|
2240
|
-
if (parsed && typeof parsed === "object") {
|
|
2241
|
-
Object.entries(parsed).forEach(([k, v]) => {
|
|
2242
|
-
llmSampleOffsets.set(k, v);
|
|
2243
|
-
});
|
|
2244
|
-
console.error(
|
|
2245
|
-
`[data-store] 已恢复 LLM 采样偏移量: ${Array.from(
|
|
2246
|
-
llmSampleOffsets.entries(),
|
|
2247
|
-
)
|
|
2248
|
-
.map(([k, v]) => `${k}:${v}`)
|
|
2249
|
-
.join(", ")}`,
|
|
2250
|
-
);
|
|
2251
|
-
}
|
|
2252
|
-
}
|
|
2253
|
-
} catch (e) {
|
|
2254
|
-
// 表不存在或解析失败,使用空偏移量
|
|
2255
|
-
console.error(
|
|
2256
|
-
`[data-store] 加载 LLM 采样偏移量失败,使用空偏移量: ${e.message}`,
|
|
2257
|
-
);
|
|
2258
|
-
}
|
|
2259
|
-
}
|
|
2260
|
-
|
|
2261
|
-
/**
|
|
2262
|
-
* 将 LLM 采样偏移量持久化到数据库
|
|
2263
|
-
*/
|
|
2264
|
-
function saveLlmSampleOffsets() {
|
|
2265
|
-
try {
|
|
2266
|
-
const offsetsJson = JSON.stringify(Object.fromEntries(llmSampleOffsets));
|
|
2267
|
-
// 表不存在则创建
|
|
2268
|
-
db.prepare(
|
|
2269
|
-
`CREATE TABLE IF NOT EXISTS _llm_sample_offsets (id INTEGER PRIMARY KEY CHECK (id = 1), offsets TEXT)`,
|
|
2270
|
-
).run();
|
|
2271
|
-
// 插入或更新
|
|
2272
|
-
db.prepare(
|
|
2273
|
-
`INSERT OR REPLACE INTO _llm_sample_offsets (id, offsets) VALUES (1, ?)`,
|
|
2274
|
-
).run(offsetsJson);
|
|
2275
|
-
} catch (e) {
|
|
2276
|
-
console.error(`[data-store] 保存 LLM 采样偏移量失败: ${e.message}`);
|
|
2277
|
-
}
|
|
521
|
+
offsetStore.load();
|
|
2278
522
|
}
|
|
2279
523
|
|
|
2280
524
|
// stats 缓存
|
|
@@ -2287,7 +531,7 @@ export function createStore(filePath, options = {}) {
|
|
|
2287
531
|
}
|
|
2288
532
|
|
|
2289
533
|
function computeStatsInternal() {
|
|
2290
|
-
if (
|
|
534
|
+
if (getDb()) {
|
|
2291
535
|
const total = getJobsCount();
|
|
2292
536
|
const statusCounts = {
|
|
2293
537
|
pending: 0,
|
|
@@ -2296,7 +540,7 @@ export function createStore(filePath, options = {}) {
|
|
|
2296
540
|
error: 0,
|
|
2297
541
|
restricted: 0,
|
|
2298
542
|
};
|
|
2299
|
-
const rows =
|
|
543
|
+
const rows = getDb()
|
|
2300
544
|
.prepare(
|
|
2301
545
|
`
|
|
2302
546
|
SELECT status, COUNT(*) as count
|
|
@@ -2372,7 +616,7 @@ export function createStore(filePath, options = {}) {
|
|
|
2372
616
|
}
|
|
2373
617
|
|
|
2374
618
|
function rebuildStatusGroups() {
|
|
2375
|
-
if (
|
|
619
|
+
if (getDb()) {
|
|
2376
620
|
statusGroups = {
|
|
2377
621
|
pending: [],
|
|
2378
622
|
processing: [],
|
|
@@ -2436,9 +680,9 @@ export function createStore(filePath, options = {}) {
|
|
|
2436
680
|
|
|
2437
681
|
function flushSave() {
|
|
2438
682
|
// 数据库模式:先保存 LLM 偏移量,再备份数据库
|
|
2439
|
-
if (
|
|
683
|
+
if (getDb() && getDbPath()) {
|
|
2440
684
|
try {
|
|
2441
|
-
|
|
685
|
+
offsetStore.save();
|
|
2442
686
|
} catch (e) {
|
|
2443
687
|
console.error(`[data-store] 保存 LLM 偏移量失败: ${e.message}`);
|
|
2444
688
|
}
|
|
@@ -2452,7 +696,7 @@ export function createStore(filePath, options = {}) {
|
|
|
2452
696
|
* @returns {string|null} 备份文件路径,失败返回 null
|
|
2453
697
|
*/
|
|
2454
698
|
function backupDatabase(maxBackups = 3) {
|
|
2455
|
-
if (!
|
|
699
|
+
if (!getDb() || !getDbPath()) {
|
|
2456
700
|
console.error("[data-store] 数据库未初始化,跳过备份");
|
|
2457
701
|
return null;
|
|
2458
702
|
}
|
|
@@ -2464,16 +708,16 @@ export function createStore(filePath, options = {}) {
|
|
|
2464
708
|
.toISOString()
|
|
2465
709
|
.replace(/[-:T.]/g, "")
|
|
2466
710
|
.slice(0, 15); // YYYYMMDDHHmmss
|
|
2467
|
-
const baseName = path.basename(
|
|
711
|
+
const baseName = path.basename(getDbPath(), ".db");
|
|
2468
712
|
const backupName = `${baseName}-${timestamp}.db`;
|
|
2469
|
-
const backupDir = path.dirname(
|
|
713
|
+
const backupDir = path.dirname(getDbPath());
|
|
2470
714
|
const backupPath = path.join(backupDir, backupName);
|
|
2471
715
|
|
|
2472
716
|
console.error(`[data-store] 正在备份数据库: ${backupName}`);
|
|
2473
717
|
|
|
2474
718
|
// 使用 better-sqlite3 的 backup API(原子性备份,安全可靠)
|
|
2475
719
|
const backupDb = new Database(backupPath);
|
|
2476
|
-
|
|
720
|
+
getDb().backup("main", backupDb, "main");
|
|
2477
721
|
backupDb.close();
|
|
2478
722
|
|
|
2479
723
|
// 验证备份文件大小
|
|
@@ -2523,7 +767,7 @@ export function createStore(filePath, options = {}) {
|
|
|
2523
767
|
|
|
2524
768
|
function stopBackup() {
|
|
2525
769
|
// 退出时执行备份
|
|
2526
|
-
if (
|
|
770
|
+
if (getDb() && getDbPath()) {
|
|
2527
771
|
backupDatabase();
|
|
2528
772
|
}
|
|
2529
773
|
}
|
|
@@ -2531,7 +775,7 @@ export function createStore(filePath, options = {}) {
|
|
|
2531
775
|
function getUser(uid) {
|
|
2532
776
|
const idx = uidIndex.get(uid);
|
|
2533
777
|
if (idx !== undefined) return data[idx];
|
|
2534
|
-
if (
|
|
778
|
+
if (getDb()) return getJob(uid);
|
|
2535
779
|
return undefined;
|
|
2536
780
|
}
|
|
2537
781
|
|
|
@@ -2549,12 +793,25 @@ export function createStore(filePath, options = {}) {
|
|
|
2549
793
|
|
|
2550
794
|
function addUser(user, append) {
|
|
2551
795
|
const memoryIdx = uidIndex.get(user.uniqueId);
|
|
2552
|
-
if (
|
|
796
|
+
if (getDb() && memoryIdx === undefined) {
|
|
2553
797
|
// 用 users 表判重(所有发现过的用户合集),而不是 jobs 表
|
|
2554
798
|
if (hasUserInDb(user.uniqueId)) {
|
|
2555
799
|
return;
|
|
2556
800
|
}
|
|
2557
|
-
|
|
801
|
+
const now = Date.now();
|
|
802
|
+
const writeTxn = getDb().transaction((job) => {
|
|
803
|
+
addUserToDb({
|
|
804
|
+
...job,
|
|
805
|
+
createdAt: job.createdAt || now,
|
|
806
|
+
updatedAt: job.updatedAt || now,
|
|
807
|
+
});
|
|
808
|
+
addJobBaseToDb({
|
|
809
|
+
...job,
|
|
810
|
+
createdAt: job.createdAt || now,
|
|
811
|
+
updatedAt: job.updatedAt || now,
|
|
812
|
+
});
|
|
813
|
+
});
|
|
814
|
+
writeTxn(user);
|
|
2558
815
|
return;
|
|
2559
816
|
}
|
|
2560
817
|
|
|
@@ -2616,7 +873,7 @@ export function createStore(filePath, options = {}) {
|
|
|
2616
873
|
createdAt: now,
|
|
2617
874
|
updatedAt: now,
|
|
2618
875
|
};
|
|
2619
|
-
const writeTxn =
|
|
876
|
+
const writeTxn = getDb().transaction((job) => {
|
|
2620
877
|
addUserToDb(job);
|
|
2621
878
|
addJobBaseToDb(job);
|
|
2622
879
|
});
|
|
@@ -2628,195 +885,26 @@ export function createStore(filePath, options = {}) {
|
|
|
2628
885
|
}
|
|
2629
886
|
|
|
2630
887
|
function getPendingUsers() {
|
|
2631
|
-
if (
|
|
888
|
+
if (getDb()) {
|
|
2632
889
|
return getAllJobs().filter((u) => u.status === "pending");
|
|
2633
890
|
}
|
|
2634
891
|
return data.filter((u) => u.status === "pending");
|
|
2635
892
|
}
|
|
2636
893
|
|
|
2637
894
|
function getProcessedUsers() {
|
|
2638
|
-
if (
|
|
895
|
+
if (getDb()) {
|
|
2639
896
|
return getAllJobs().filter((u) => u.status === "done");
|
|
2640
897
|
}
|
|
2641
898
|
return data.filter((u) => u.status === "done");
|
|
2642
899
|
}
|
|
2643
900
|
|
|
2644
901
|
function getAllUsers() {
|
|
2645
|
-
if (
|
|
902
|
+
if (getDb()) {
|
|
2646
903
|
return getAllJobs();
|
|
2647
904
|
}
|
|
2648
905
|
return data;
|
|
2649
906
|
}
|
|
2650
907
|
|
|
2651
|
-
/**
|
|
2652
|
-
* 使用 LLM 对单个 job 的国家匹配度打分(0-100)
|
|
2653
|
-
* @param {Object} job - raw_jobs 中的一条记录
|
|
2654
|
-
* @param {string[]} targetLocations - 目标国家列表
|
|
2655
|
-
* @returns {Promise<{ uniqueId: string, score: number, reason: string }>}
|
|
2656
|
-
*/
|
|
2657
|
-
async function scoreJobLocation(job, targetLocations) {
|
|
2658
|
-
const { fetch: undiciFetch } = await import("undici");
|
|
2659
|
-
|
|
2660
|
-
const prompt = `
|
|
2661
|
-
你是一个 TikTok 用户数据分析助手。请根据以下用户信息,判断该用户是否来自以下**任意一个**目标国家。
|
|
2662
|
-
|
|
2663
|
-
目标国家列表: ${targetLocations.join(", ")}
|
|
2664
|
-
|
|
2665
|
-
重要:
|
|
2666
|
-
- 用户只要来自上述**任意一个**国家就算匹配。
|
|
2667
|
-
- guessed_location 是系统初步猜测的结果,**仅供参考**,不要完全依赖它。
|
|
2668
|
-
- 请综合用户名、昵称、签名、位置等信息做判断。
|
|
2669
|
-
|
|
2670
|
-
用户信息:
|
|
2671
|
-
- 用户名: ${job.unique_id || "未知"}
|
|
2672
|
-
- 昵称: ${job.nickname || "未知"}
|
|
2673
|
-
- 签名: ${job.signature || "未知"}
|
|
2674
|
-
- 地区: ${job.region || "未知"}
|
|
2675
|
-
- 猜测国家(参考): ${job.guessed_location || "未知"}
|
|
2676
|
-
- 位置信息: ${job.location_created || "未知"}
|
|
2677
|
-
- 主页链接: ${job.bio_link || "未知"}
|
|
2678
|
-
|
|
2679
|
-
返回 JSON(仅返回 JSON,无其他内容):
|
|
2680
|
-
{"score": 0-100, "reason": "English only, under 50 chars, no quotes/brackets"}
|
|
2681
|
-
|
|
2682
|
-
Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unlikely
|
|
2683
|
-
`;
|
|
2684
|
-
|
|
2685
|
-
try {
|
|
2686
|
-
const apiKey = process.env.APIKEY || "";
|
|
2687
|
-
const response = await undiciFetch(
|
|
2688
|
-
"http://82.156.52.214:18000/v1/chat/completions",
|
|
2689
|
-
{
|
|
2690
|
-
method: "POST",
|
|
2691
|
-
headers: {
|
|
2692
|
-
"Content-Type": "application/json",
|
|
2693
|
-
Authorization: `Bearer ${apiKey}`,
|
|
2694
|
-
},
|
|
2695
|
-
body: JSON.stringify({
|
|
2696
|
-
model: "zc-fast",
|
|
2697
|
-
messages: [{ role: "user", content: prompt }],
|
|
2698
|
-
max_tokens: 512,
|
|
2699
|
-
temperature: 0.1,
|
|
2700
|
-
}),
|
|
2701
|
-
},
|
|
2702
|
-
);
|
|
2703
|
-
|
|
2704
|
-
const result = await response.json();
|
|
2705
|
-
const content = result.choices?.[0]?.message?.content || "";
|
|
2706
|
-
|
|
2707
|
-
// 解析 JSON 响应(多层容错)
|
|
2708
|
-
let parsed = null;
|
|
2709
|
-
|
|
2710
|
-
// 尝试 1: 直接解析
|
|
2711
|
-
try {
|
|
2712
|
-
parsed = JSON.parse(content);
|
|
2713
|
-
} catch {
|
|
2714
|
-
// 尝试 2: 提取 {} 包裹的内容
|
|
2715
|
-
const match = content.match(/\{[\s\S]*\}/);
|
|
2716
|
-
if (match) {
|
|
2717
|
-
try {
|
|
2718
|
-
parsed = JSON.parse(match[0]);
|
|
2719
|
-
} catch {
|
|
2720
|
-
// 尝试 3: 清理常见问题后解析
|
|
2721
|
-
const cleaned = match[0]
|
|
2722
|
-
.replace(/"/g, '"') // 弯引号 → 直引号
|
|
2723
|
-
.replace(/\s+/g, " ") // 多余空白
|
|
2724
|
-
.trim();
|
|
2725
|
-
try {
|
|
2726
|
-
parsed = JSON.parse(cleaned);
|
|
2727
|
-
} catch {
|
|
2728
|
-
// 尝试 4: 从文本中提取 score 和 reason(reason 可能包含引号等特殊字符)
|
|
2729
|
-
const scoreMatch = content.match(/"?score"?\s*:\s*(\d+)/i);
|
|
2730
|
-
if (scoreMatch) {
|
|
2731
|
-
let reason = "解析降级";
|
|
2732
|
-
// 找 "reason": 的位置,取到最后一个 } 前的内容
|
|
2733
|
-
const reasonKeyPos = content.search(/"?reason"?\s*:\s*"/i);
|
|
2734
|
-
if (reasonKeyPos !== -1) {
|
|
2735
|
-
const afterKey = content.substring(reasonKeyPos);
|
|
2736
|
-
const colonPos = afterKey.indexOf(":");
|
|
2737
|
-
const valueStart = afterKey.indexOf('"', colonPos + 1) + 1;
|
|
2738
|
-
const rawValue = afterKey.substring(valueStart);
|
|
2739
|
-
// 取到原始 content 最后一个 } 前
|
|
2740
|
-
const lastBrace = content.lastIndexOf("}");
|
|
2741
|
-
const reasonEnd = lastBrace - reasonKeyPos - valueStart;
|
|
2742
|
-
if (reasonEnd > 0) {
|
|
2743
|
-
reason = rawValue.substring(0, reasonEnd).trim();
|
|
2744
|
-
// 去掉首尾的引号
|
|
2745
|
-
if (reason.startsWith('"')) reason = reason.substring(1);
|
|
2746
|
-
if (reason.endsWith('"'))
|
|
2747
|
-
reason = reason.substring(0, reason.length - 1);
|
|
2748
|
-
}
|
|
2749
|
-
}
|
|
2750
|
-
parsed = {
|
|
2751
|
-
score: parseInt(scoreMatch[1]) || 50,
|
|
2752
|
-
reason,
|
|
2753
|
-
};
|
|
2754
|
-
}
|
|
2755
|
-
}
|
|
2756
|
-
}
|
|
2757
|
-
}
|
|
2758
|
-
|
|
2759
|
-
// 尝试 5: 如果以上都失败,用更宽松的正则提取
|
|
2760
|
-
if (!parsed) {
|
|
2761
|
-
const scoreMatch = content.match(/"score"\s*:\s*(\d+)/);
|
|
2762
|
-
const reasonMatch = content.match(/"reason"\s*:\s*"([^"]*)"/);
|
|
2763
|
-
if (scoreMatch) {
|
|
2764
|
-
parsed = {
|
|
2765
|
-
score: parseInt(scoreMatch[1]) || 50,
|
|
2766
|
-
reason: reasonMatch ? reasonMatch[1] : "解析降级 - 宽松模式",
|
|
2767
|
-
};
|
|
2768
|
-
}
|
|
2769
|
-
}
|
|
2770
|
-
}
|
|
2771
|
-
|
|
2772
|
-
if (parsed && typeof parsed.score === "number") {
|
|
2773
|
-
return {
|
|
2774
|
-
uniqueId: job.unique_id,
|
|
2775
|
-
score: Math.max(0, Math.min(100, parsed.score)),
|
|
2776
|
-
reason: parsed.reason || "",
|
|
2777
|
-
};
|
|
2778
|
-
}
|
|
2779
|
-
|
|
2780
|
-
// 所有解析都失败,返回默认分
|
|
2781
|
-
console.error(
|
|
2782
|
-
`[scoreJobLocation] JSON 解析失败 (${job.unique_id}): ${content.substring(0, 100)}`,
|
|
2783
|
-
);
|
|
2784
|
-
return {
|
|
2785
|
-
uniqueId: job.unique_id,
|
|
2786
|
-
score: 50,
|
|
2787
|
-
reason: "LLM 响应解析失败,使用默认分",
|
|
2788
|
-
};
|
|
2789
|
-
} catch (e) {
|
|
2790
|
-
console.error(
|
|
2791
|
-
`[scoreJobLocation] LLM 调用失败 (${job.unique_id}): ${e.message}`,
|
|
2792
|
-
);
|
|
2793
|
-
return {
|
|
2794
|
-
uniqueId: job.unique_id,
|
|
2795
|
-
score: 50,
|
|
2796
|
-
reason: `LLM 调用异常: ${e.message}`,
|
|
2797
|
-
};
|
|
2798
|
-
}
|
|
2799
|
-
}
|
|
2800
|
-
|
|
2801
|
-
/**
|
|
2802
|
-
* 批量对 jobs 进行 LLM 国家匹配度打分
|
|
2803
|
-
* @param {Object[]} jobs - raw_jobs 记录数组
|
|
2804
|
-
* @param {string[]} targetLocations - 目标国家列表
|
|
2805
|
-
* @param {number} batchSize - 每批处理数量(并发),默认 10
|
|
2806
|
-
* @returns {Promise<Array<{ uniqueId: string, score: number, reason: string }>>}
|
|
2807
|
-
*/
|
|
2808
|
-
async function scoreJobsBatch(jobs, targetLocations, batchSize = 10) {
|
|
2809
|
-
const results = [];
|
|
2810
|
-
for (let i = 0; i < jobs.length; i += batchSize) {
|
|
2811
|
-
const batch = jobs.slice(i, i + batchSize);
|
|
2812
|
-
const batchResults = await Promise.all(
|
|
2813
|
-
batch.map((job) => scoreJobLocation(job, targetLocations)),
|
|
2814
|
-
);
|
|
2815
|
-
results.push(...batchResults);
|
|
2816
|
-
}
|
|
2817
|
-
return results;
|
|
2818
|
-
}
|
|
2819
|
-
|
|
2820
908
|
/**
|
|
2821
909
|
* 从 raw_jobs 中移动一批符合条件的任务到 jobs 表
|
|
2822
910
|
* @param {string[]} locations - 目标国家列表(null 表示不限制)
|
|
@@ -2828,8 +916,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2828
916
|
* @returns {{ moved: number }} 实际移动的数量
|
|
2829
917
|
*/
|
|
2830
918
|
function refillJobsFromRaw(locations = null, limit = 500, options = {}) {
|
|
2831
|
-
if (!
|
|
2832
|
-
return { moved: 0, error: "
|
|
919
|
+
if (!getDb()) {
|
|
920
|
+
return { moved: 0, error: "getDb() not ready" };
|
|
2833
921
|
}
|
|
2834
922
|
|
|
2835
923
|
const safeLimit = Math.max(1, Math.min(2000, parseInt(limit) || 500));
|
|
@@ -2860,7 +948,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2860
948
|
|
|
2861
949
|
// 统计符合条件的数量
|
|
2862
950
|
const count =
|
|
2863
|
-
|
|
951
|
+
getDb()
|
|
2864
952
|
.prepare(`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql}`)
|
|
2865
953
|
.get(...args)?.c || 0;
|
|
2866
954
|
|
|
@@ -2868,156 +956,142 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2868
956
|
return { moved: 0 };
|
|
2869
957
|
}
|
|
2870
958
|
|
|
2871
|
-
// 如果启用 LLM
|
|
959
|
+
// 如果启用 LLM 打分:先取 tag(一次性),再取非 tag 走 LLM 打分
|
|
2872
960
|
if (useLlm && normalizedLocations && normalizedLocations.length > 0) {
|
|
2873
|
-
const
|
|
2874
|
-
const
|
|
2875
|
-
const
|
|
2876
|
-
const
|
|
2877
|
-
|
|
2878
|
-
|
|
2879
|
-
const
|
|
2880
|
-
|
|
2881
|
-
.join(", ");
|
|
961
|
+
const llmTotal = options.llmTotal ?? 200; // 总条数
|
|
962
|
+
const llmTagRatio = options.llmTagRatio ?? 0.6; // tag 占比 60%
|
|
963
|
+
const llmTagLimit = Math.floor(llmTotal * llmTagRatio); // tag 上限 120
|
|
964
|
+
const llmNonTagTarget = llmTotal - llmTagLimit; // 非 tag 目标 80
|
|
965
|
+
const llmMinScore = options.llmMinScore ?? 60;
|
|
966
|
+
const llmSampleSize = options.llmSampleSize ?? 100;
|
|
967
|
+
const maxBatches = options.llmMaxBatches ?? 10;
|
|
968
|
+
|
|
2882
969
|
console.error(
|
|
2883
|
-
`[data-store] LLM 打分开始:
|
|
970
|
+
`[data-store] LLM 打分开始: 总目标 ${llmTotal} 条,tag 最多 ${llmTagLimit} 条(一次性),非 tag 目标 ${llmNonTagTarget} 条(LLM 打分)`,
|
|
2884
971
|
);
|
|
2885
|
-
if (offsetSummary) {
|
|
2886
|
-
console.error(`[data-store] 偏移量记忆: ${offsetSummary}`);
|
|
2887
|
-
}
|
|
2888
972
|
|
|
2889
973
|
// 返回 Promise,调用方需要 await
|
|
2890
974
|
return (async () => {
|
|
2891
|
-
const allTagQualified = [];
|
|
2892
|
-
const allNonTagQualified = [];
|
|
975
|
+
const allTagQualified = [];
|
|
976
|
+
const allNonTagQualified = [];
|
|
2893
977
|
const allScores = [];
|
|
2894
978
|
|
|
2895
|
-
//
|
|
2896
|
-
|
|
2897
|
-
|
|
979
|
+
// ===== 第一步:一次性取所有 tag(全局,最多 llmTagLimit 条)=====
|
|
980
|
+
let tagOffset = offsetStore.get("_tag") || 0;
|
|
981
|
+
const tagGlobalCount =
|
|
982
|
+
getDb()
|
|
983
|
+
.prepare(
|
|
984
|
+
`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql} AND sources LIKE '%tag%'`,
|
|
985
|
+
)
|
|
986
|
+
.get(...args)?.c || 0;
|
|
2898
987
|
|
|
2899
|
-
|
|
2900
|
-
|
|
2901
|
-
|
|
988
|
+
if (tagOffset >= tagGlobalCount) {
|
|
989
|
+
tagOffset = 0;
|
|
990
|
+
offsetStore.set("_tag", 0);
|
|
991
|
+
}
|
|
2902
992
|
|
|
2903
|
-
|
|
2904
|
-
|
|
2905
|
-
|
|
2906
|
-
const locationCount =
|
|
2907
|
-
db.prepare(locationCountSql).get(...locationArgs)?.c || 0;
|
|
993
|
+
console.error(
|
|
994
|
+
`[data-store] Tag 全局共 ${tagGlobalCount} 条,从偏移量 ${tagOffset} 开始`,
|
|
995
|
+
);
|
|
2908
996
|
|
|
2909
|
-
|
|
2910
|
-
|
|
2911
|
-
|
|
997
|
+
while (
|
|
998
|
+
allTagQualified.length < llmTagLimit &&
|
|
999
|
+
tagOffset < tagGlobalCount
|
|
1000
|
+
) {
|
|
1001
|
+
const batch = getDb()
|
|
1002
|
+
.prepare(
|
|
1003
|
+
`
|
|
1004
|
+
SELECT * FROM raw_jobs WHERE ${whereSql} AND sources LIKE '%tag%'
|
|
1005
|
+
ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
|
|
1006
|
+
LIMIT ? OFFSET ?
|
|
1007
|
+
`,
|
|
1008
|
+
)
|
|
1009
|
+
.all(
|
|
1010
|
+
...args,
|
|
1011
|
+
Math.min(llmSampleSize, llmTagLimit - allTagQualified.length),
|
|
1012
|
+
tagOffset,
|
|
2912
1013
|
);
|
|
1014
|
+
|
|
1015
|
+
if (!batch.length) break;
|
|
1016
|
+
|
|
1017
|
+
allTagQualified.push(...batch.map((s) => s.unique_id));
|
|
1018
|
+
tagOffset += batch.length;
|
|
1019
|
+
|
|
1020
|
+
console.error(
|
|
1021
|
+
`[data-store] Tag 本批 ${batch.length} 条,累计 ${allTagQualified.length}/${llmTagLimit}`,
|
|
1022
|
+
);
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
offsetStore.set("_tag", tagOffset);
|
|
1026
|
+
|
|
1027
|
+
// ===== 第二步:按国家取非 tag,走 LLM 打分,直到合格数达到 llmNonTagTarget =====
|
|
1028
|
+
for (const location of normalizedLocations) {
|
|
1029
|
+
if (allNonTagQualified.length >= llmNonTagTarget) break;
|
|
1030
|
+
|
|
1031
|
+
const nonTagOffsetKey = `${location}:nonTag`;
|
|
1032
|
+
let offset = offsetStore.get(nonTagOffsetKey) || 0;
|
|
1033
|
+
|
|
1034
|
+
const locationArgs = [...args, location];
|
|
1035
|
+
const nonTagCount =
|
|
1036
|
+
getDb()
|
|
1037
|
+
.prepare(
|
|
1038
|
+
`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql} AND guessed_location = ? AND (sources NOT LIKE '%tag%' OR sources IS NULL)`,
|
|
1039
|
+
)
|
|
1040
|
+
.get(...locationArgs)?.c || 0;
|
|
1041
|
+
|
|
1042
|
+
if (nonTagCount === 0) {
|
|
1043
|
+
console.error(`[data-store] 国家 ${location}: 无非 tag 数据,跳过`);
|
|
2913
1044
|
continue;
|
|
2914
1045
|
}
|
|
2915
1046
|
|
|
2916
|
-
|
|
2917
|
-
if (offset >= locationCount) {
|
|
1047
|
+
if (offset >= nonTagCount) {
|
|
2918
1048
|
offset = 0;
|
|
2919
|
-
|
|
1049
|
+
offsetStore.set(nonTagOffsetKey, 0);
|
|
2920
1050
|
}
|
|
2921
1051
|
|
|
2922
1052
|
console.error(
|
|
2923
|
-
`[data-store] 国家 ${location}: 共 ${
|
|
1053
|
+
`[data-store] 国家 ${location}: 非 tag 共 ${nonTagCount} 条,从偏移量 ${offset} 开始`,
|
|
2924
1054
|
);
|
|
2925
1055
|
|
|
2926
1056
|
for (let batch = 0; batch < maxBatches; batch++) {
|
|
2927
|
-
|
|
2928
|
-
if (remaining <= 0) break;
|
|
1057
|
+
if (allNonTagQualified.length >= llmNonTagTarget) break;
|
|
2929
1058
|
|
|
2930
|
-
const
|
|
2931
|
-
const samples = db
|
|
1059
|
+
const samples = getDb()
|
|
2932
1060
|
.prepare(
|
|
2933
1061
|
`
|
|
2934
1062
|
SELECT * FROM raw_jobs WHERE ${whereSql} AND guessed_location = ?
|
|
2935
|
-
|
|
2936
|
-
|
|
2937
|
-
COALESCE(video_count, 0) DESC, created_at DESC
|
|
1063
|
+
AND (sources NOT LIKE '%tag%' OR sources IS NULL)
|
|
1064
|
+
ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
|
|
2938
1065
|
LIMIT ? OFFSET ?
|
|
2939
1066
|
`,
|
|
2940
1067
|
)
|
|
2941
|
-
.all(...locationArgs,
|
|
1068
|
+
.all(...locationArgs, llmSampleSize, offset);
|
|
2942
1069
|
|
|
2943
|
-
if (samples.length
|
|
1070
|
+
if (!samples.length) break;
|
|
2944
1071
|
|
|
2945
|
-
|
|
2946
|
-
|
|
2947
|
-
|
|
1072
|
+
const scores = await scoreJobsBatch(
|
|
1073
|
+
samples,
|
|
1074
|
+
DEFAULT_TARGET_LOCATIONS,
|
|
2948
1075
|
);
|
|
2949
|
-
const
|
|
2950
|
-
|
|
2951
|
-
);
|
|
2952
|
-
|
|
2953
|
-
// tag 来源直接加入合格列表
|
|
2954
|
-
if (tagSamples.length > 0) {
|
|
2955
|
-
allTagQualified.push(...tagSamples.map((s) => s.unique_id));
|
|
2956
|
-
console.error(
|
|
2957
|
-
`[data-store] ${location}: 本批 ${tagSamples.length} 条 tag 来源任务跳过 LLM 打分直接合格`,
|
|
2958
|
-
);
|
|
2959
|
-
}
|
|
2960
|
-
|
|
2961
|
-
// 非 tag 来源走 LLM 打分
|
|
2962
|
-
let batchQualified = [];
|
|
2963
|
-
let scores = [];
|
|
2964
|
-
if (nonTagSamples.length > 0) {
|
|
2965
|
-
scores = await scoreJobsBatch(
|
|
2966
|
-
nonTagSamples,
|
|
2967
|
-
DEFAULT_TARGET_LOCATIONS,
|
|
2968
|
-
);
|
|
2969
|
-
batchQualified = scores.filter((s) => s.score >= llmMinScore);
|
|
2970
|
-
allNonTagQualified.push(...batchQualified.map((s) => s.uniqueId));
|
|
2971
|
-
}
|
|
2972
|
-
|
|
1076
|
+
const qualified = scores.filter((s) => s.score >= llmMinScore);
|
|
1077
|
+
allNonTagQualified.push(...qualified.map((s) => s.uniqueId));
|
|
2973
1078
|
allScores.push(...scores);
|
|
2974
1079
|
|
|
2975
|
-
|
|
2976
|
-
|
|
1080
|
+
offset += samples.length;
|
|
1081
|
+
offsetStore.set(nonTagOffsetKey, offset);
|
|
1082
|
+
|
|
2977
1083
|
console.error(
|
|
2978
|
-
`[data-store] ${location} 第 ${batch + 1} 批: 采样 ${samples.length}
|
|
1084
|
+
`[data-store] ${location} 第 ${batch + 1} 批: 采样 ${samples.length} 条,本批合格 ${qualified.length} 条,非 tag 累计 ${allNonTagQualified.length}/${llmNonTagTarget}`,
|
|
2979
1085
|
);
|
|
2980
|
-
|
|
2981
|
-
// 更新偏移量记忆
|
|
2982
|
-
offset += samples.length;
|
|
2983
|
-
llmSampleOffsets.set(location, offset);
|
|
2984
|
-
|
|
2985
|
-
// 检查是否两个类型都达到阈值,都达到才停止
|
|
2986
|
-
const tagReached = allTagQualified.length >= llmMinTagReturn;
|
|
2987
|
-
const nonTagReached = allNonTagQualified.length >= llmMinNonTagReturn;
|
|
2988
|
-
if (tagReached && nonTagReached) {
|
|
2989
|
-
console.error(
|
|
2990
|
-
`[data-store] 两类任务均已达标 (tag: ${allTagQualified.length}/${llmMinTagReturn}, 非 tag: ${allNonTagQualified.length}/${llmMinNonTagReturn}),停止采样`,
|
|
2991
|
-
);
|
|
2992
|
-
break;
|
|
2993
|
-
}
|
|
2994
1086
|
}
|
|
2995
|
-
|
|
2996
|
-
// 检查是否两个类型都达到阈值,都达到才停止所有国家采样
|
|
2997
|
-
const tagReachedGlobal = allTagQualified.length >= llmMinTagReturn;
|
|
2998
|
-
const nonTagReachedGlobal = allNonTagQualified.length >= llmMinNonTagReturn;
|
|
2999
|
-
if (tagReachedGlobal && nonTagReachedGlobal) break;
|
|
3000
1087
|
}
|
|
3001
1088
|
|
|
3002
|
-
//
|
|
3003
|
-
|
|
3004
|
-
const tagMaxCount = Math.floor(safeLimit * 0.7);
|
|
3005
|
-
const tagCount = Math.min(allTagQualified.length, tagMaxCount);
|
|
3006
|
-
const nonTagMaxCount = safeLimit - tagCount;
|
|
3007
|
-
|
|
3008
|
-
const nonTagQualifiedScores = allScores
|
|
3009
|
-
.filter((s) => s.score >= llmMinScore)
|
|
3010
|
-
.sort((a, b) => b.score - a.score);
|
|
3011
|
-
const finalNonTagQualified = nonTagQualifiedScores.slice(0, nonTagMaxCount).map((s) => s.uniqueId);
|
|
3012
|
-
|
|
3013
|
-
const qualified = [
|
|
3014
|
-
...allTagQualified.slice(0, tagCount),
|
|
3015
|
-
...finalNonTagQualified,
|
|
3016
|
-
];
|
|
1089
|
+
// ===== 最终结果 =====
|
|
1090
|
+
const qualified = [...allTagQualified, ...allNonTagQualified];
|
|
3017
1091
|
|
|
3018
1092
|
if (!qualified.length) {
|
|
3019
1093
|
console.error(
|
|
3020
|
-
`[data-store] LLM
|
|
1094
|
+
`[data-store] LLM 打分后无符合条件的任务(tag: ${allTagQualified.length}, 非 tag: ${allNonTagQualified.length})`,
|
|
3021
1095
|
);
|
|
3022
1096
|
return {
|
|
3023
1097
|
moved: 0,
|
|
@@ -3027,11 +1101,16 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3027
1101
|
};
|
|
3028
1102
|
}
|
|
3029
1103
|
|
|
1104
|
+
console.error(
|
|
1105
|
+
`[data-store] LLM 打分完成: tag ${allTagQualified.length} 条 + 非 tag ${allNonTagQualified.length} 条 = 共 ${qualified.length} 条`,
|
|
1106
|
+
);
|
|
1107
|
+
|
|
3030
1108
|
// 移动符合条件的记录
|
|
3031
1109
|
const placeholders = qualified.map(() => "?").join(", ");
|
|
3032
|
-
const moveTxn =
|
|
3033
|
-
|
|
3034
|
-
|
|
1110
|
+
const moveTxn = getDb().transaction(() => {
|
|
1111
|
+
getDb()
|
|
1112
|
+
.prepare(
|
|
1113
|
+
`
|
|
3035
1114
|
INSERT OR IGNORE INTO jobs (
|
|
3036
1115
|
unique_id, nickname, status, sources, pinned,
|
|
3037
1116
|
tt_seller, verified, video_count, comment_count,
|
|
@@ -3050,41 +1129,41 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3050
1129
|
FROM raw_jobs
|
|
3051
1130
|
WHERE unique_id IN (${placeholders})
|
|
3052
1131
|
`,
|
|
3053
|
-
|
|
1132
|
+
)
|
|
1133
|
+
.run(...qualified);
|
|
3054
1134
|
|
|
3055
|
-
|
|
3056
|
-
|
|
3057
|
-
|
|
1135
|
+
getDb()
|
|
1136
|
+
.prepare(
|
|
1137
|
+
`DELETE FROM raw_jobs WHERE unique_id IN (${placeholders})`,
|
|
1138
|
+
)
|
|
1139
|
+
.run(...qualified);
|
|
3058
1140
|
});
|
|
3059
|
-
|
|
3060
1141
|
moveTxn();
|
|
3061
1142
|
markStatsDirty();
|
|
3062
1143
|
|
|
3063
1144
|
// 持久化偏移量到数据库
|
|
3064
|
-
|
|
1145
|
+
offsetStore.save();
|
|
3065
1146
|
|
|
3066
1147
|
// 打印最终偏移量状态
|
|
3067
|
-
const finalOffsetSummary = Array.from(
|
|
1148
|
+
const finalOffsetSummary = Array.from(offsetStore.entries())
|
|
3068
1149
|
.map(([k, v]) => `${k}:${v}`)
|
|
3069
1150
|
.join(", ");
|
|
3070
|
-
console.error(
|
|
3071
|
-
`[data-store] LLM 打分完成: 共采样 ${allScores.length} 条,合格 ${qualified.length} 条,已移动到 jobs`,
|
|
3072
|
-
);
|
|
3073
1151
|
console.error(`[data-store] 偏移量记忆更新: ${finalOffsetSummary}`);
|
|
3074
|
-
|
|
1152
|
+
|
|
3075
1153
|
return {
|
|
3076
1154
|
moved: qualified.length,
|
|
3077
1155
|
scored: allScores.length,
|
|
3078
1156
|
qualified: qualified.length,
|
|
3079
|
-
scores:
|
|
1157
|
+
scores: allScores,
|
|
3080
1158
|
};
|
|
3081
1159
|
})();
|
|
3082
1160
|
}
|
|
3083
1161
|
|
|
3084
1162
|
// 常规移动:INSERT + DELETE 事务
|
|
3085
|
-
const moveTxn =
|
|
3086
|
-
|
|
3087
|
-
|
|
1163
|
+
const moveTxn = getDb().transaction(() => {
|
|
1164
|
+
getDb()
|
|
1165
|
+
.prepare(
|
|
1166
|
+
`
|
|
3088
1167
|
INSERT OR IGNORE INTO jobs (
|
|
3089
1168
|
unique_id, nickname, status, sources, pinned,
|
|
3090
1169
|
tt_seller, verified, video_count, comment_count,
|
|
@@ -3107,11 +1186,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3107
1186
|
COALESCE(video_count, 0) DESC, created_at DESC
|
|
3108
1187
|
LIMIT ?
|
|
3109
1188
|
`,
|
|
3110
|
-
|
|
1189
|
+
)
|
|
1190
|
+
.run(...args, safeLimit);
|
|
3111
1191
|
|
|
3112
1192
|
// 删除已移动的记录:用子查询匹配刚 INSERT 的 unique_id
|
|
3113
|
-
|
|
3114
|
-
|
|
1193
|
+
getDb()
|
|
1194
|
+
.prepare(
|
|
1195
|
+
`
|
|
3115
1196
|
DELETE FROM raw_jobs
|
|
3116
1197
|
WHERE unique_id IN (
|
|
3117
1198
|
SELECT unique_id FROM raw_jobs
|
|
@@ -3122,7 +1203,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3122
1203
|
LIMIT ?
|
|
3123
1204
|
)
|
|
3124
1205
|
`,
|
|
3125
|
-
|
|
1206
|
+
)
|
|
1207
|
+
.run(...args, safeLimit);
|
|
3126
1208
|
});
|
|
3127
1209
|
|
|
3128
1210
|
moveTxn();
|
|
@@ -3140,9 +1222,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3140
1222
|
) {
|
|
3141
1223
|
// 记录客户端登录状态
|
|
3142
1224
|
clientLoginStatus.set(userId, !!loggedIn);
|
|
3143
|
-
if (
|
|
1225
|
+
if (getDb()) {
|
|
3144
1226
|
const now = Date.now();
|
|
3145
|
-
const ongoingRow =
|
|
1227
|
+
const ongoingRow = getDb()
|
|
3146
1228
|
.prepare(
|
|
3147
1229
|
`
|
|
3148
1230
|
SELECT *
|
|
@@ -3157,10 +1239,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3157
1239
|
)
|
|
3158
1240
|
.get(userId, now, expireMs);
|
|
3159
1241
|
if (ongoingRow) {
|
|
3160
|
-
|
|
3161
|
-
|
|
3162
|
-
ongoingRow.unique_id
|
|
3163
|
-
);
|
|
1242
|
+
getDb()
|
|
1243
|
+
.prepare("UPDATE jobs SET claimed_at = ? WHERE unique_id = ?")
|
|
1244
|
+
.run(now, ongoingRow.unique_id);
|
|
3164
1245
|
return {
|
|
3165
1246
|
uniqueId: ongoingRow.unique_id,
|
|
3166
1247
|
nickname: ongoingRow.nickname,
|
|
@@ -3242,7 +1323,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3242
1323
|
for (const filter of filters) {
|
|
3243
1324
|
where.push(filter);
|
|
3244
1325
|
}
|
|
3245
|
-
return
|
|
1326
|
+
return getDb()
|
|
3246
1327
|
.prepare(
|
|
3247
1328
|
`
|
|
3248
1329
|
SELECT *
|
|
@@ -3285,7 +1366,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3285
1366
|
);
|
|
3286
1367
|
args.push(...normalizedLocations);
|
|
3287
1368
|
}
|
|
3288
|
-
return
|
|
1369
|
+
return getDb()
|
|
3289
1370
|
.prepare(
|
|
3290
1371
|
`
|
|
3291
1372
|
SELECT *
|
|
@@ -3355,9 +1436,11 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3355
1436
|
|
|
3356
1437
|
function claimRow(row) {
|
|
3357
1438
|
if (!row) return null;
|
|
3358
|
-
|
|
3359
|
-
|
|
3360
|
-
|
|
1439
|
+
getDb()
|
|
1440
|
+
.prepare(
|
|
1441
|
+
"UPDATE jobs SET status = 'processing', claimed_at = ?, claimed_by = ? WHERE unique_id = ?",
|
|
1442
|
+
)
|
|
1443
|
+
.run(now, userId, row.unique_id);
|
|
3361
1444
|
markStatsDirty();
|
|
3362
1445
|
return {
|
|
3363
1446
|
uniqueId: row.unique_id,
|
|
@@ -3367,7 +1450,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3367
1450
|
};
|
|
3368
1451
|
}
|
|
3369
1452
|
|
|
3370
|
-
const expiredRow =
|
|
1453
|
+
const expiredRow = getDb()
|
|
3371
1454
|
.prepare(
|
|
3372
1455
|
`
|
|
3373
1456
|
SELECT *
|
|
@@ -3382,9 +1465,11 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3382
1465
|
.get(now, expireMs);
|
|
3383
1466
|
let expiredCandidate = null;
|
|
3384
1467
|
if (expiredRow) {
|
|
3385
|
-
|
|
3386
|
-
|
|
3387
|
-
|
|
1468
|
+
getDb()
|
|
1469
|
+
.prepare(
|
|
1470
|
+
"UPDATE jobs SET status = 'pending', claimed_at = NULL WHERE unique_id = ?",
|
|
1471
|
+
)
|
|
1472
|
+
.run(expiredRow.unique_id);
|
|
3388
1473
|
expiredCandidate = mapJobRow({
|
|
3389
1474
|
...expiredRow,
|
|
3390
1475
|
status: "pending",
|
|
@@ -3476,7 +1561,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3476
1561
|
return null;
|
|
3477
1562
|
}
|
|
3478
1563
|
|
|
3479
|
-
if (!
|
|
1564
|
+
if (!getDb()) {
|
|
3480
1565
|
const now = Date.now();
|
|
3481
1566
|
|
|
3482
1567
|
// 0. 该客户端有未过期的任务,续期返回
|
|
@@ -3614,16 +1699,16 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3614
1699
|
locations = null,
|
|
3615
1700
|
loggedIn = true,
|
|
3616
1701
|
) {
|
|
3617
|
-
if (
|
|
1702
|
+
if (getDb()) {
|
|
3618
1703
|
const now = Date.now();
|
|
3619
1704
|
const info = {
|
|
3620
|
-
path: "
|
|
1705
|
+
path: "getDb()",
|
|
3621
1706
|
userId,
|
|
3622
1707
|
expireMs,
|
|
3623
1708
|
loggedIn,
|
|
3624
1709
|
};
|
|
3625
1710
|
|
|
3626
|
-
const ongoingRow =
|
|
1711
|
+
const ongoingRow = getDb()
|
|
3627
1712
|
.prepare(
|
|
3628
1713
|
`
|
|
3629
1714
|
SELECT *
|
|
@@ -3723,7 +1808,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3723
1808
|
ORDER BY follower_count DESC, created_at ASC, unique_id ASC
|
|
3724
1809
|
LIMIT 1
|
|
3725
1810
|
`;
|
|
3726
|
-
const row =
|
|
1811
|
+
const row = getDb()
|
|
1812
|
+
.prepare(sql)
|
|
1813
|
+
.get(...args);
|
|
3727
1814
|
return { row, sql, args };
|
|
3728
1815
|
}
|
|
3729
1816
|
|
|
@@ -3764,7 +1851,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3764
1851
|
ORDER BY created_at ASC, unique_id ASC
|
|
3765
1852
|
LIMIT 1
|
|
3766
1853
|
`;
|
|
3767
|
-
const row =
|
|
1854
|
+
const row = getDb()
|
|
1855
|
+
.prepare(sql)
|
|
1856
|
+
.get(...args);
|
|
3768
1857
|
return { row, sql, args };
|
|
3769
1858
|
}
|
|
3770
1859
|
|
|
@@ -3777,7 +1866,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3777
1866
|
ORDER BY claimed_at ASC
|
|
3778
1867
|
LIMIT 1
|
|
3779
1868
|
`;
|
|
3780
|
-
const expiredRow =
|
|
1869
|
+
const expiredRow = getDb().prepare(expiredSql).get(now, expireMs);
|
|
3781
1870
|
info.expired = expiredRow
|
|
3782
1871
|
? {
|
|
3783
1872
|
uniqueId: expiredRow.unique_id,
|
|
@@ -4103,7 +2192,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4103
2192
|
}
|
|
4104
2193
|
|
|
4105
2194
|
function commitJob(uniqueId, result) {
|
|
4106
|
-
if (
|
|
2195
|
+
if (getDb()) {
|
|
4107
2196
|
const user = getJob(uniqueId);
|
|
4108
2197
|
if (!user) return { saved: false, error: "user not found" };
|
|
4109
2198
|
|
|
@@ -4129,7 +2218,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4129
2218
|
}
|
|
4130
2219
|
|
|
4131
2220
|
function commitNewExplore(uniqueId, result) {
|
|
4132
|
-
if (
|
|
2221
|
+
if (getDb()) {
|
|
4133
2222
|
const existing = getJob(uniqueId);
|
|
4134
2223
|
if (existing) {
|
|
4135
2224
|
updateUserFromResult(existing, result);
|
|
@@ -4179,7 +2268,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4179
2268
|
}
|
|
4180
2269
|
|
|
4181
2270
|
function resetJob(uniqueId) {
|
|
4182
|
-
if (
|
|
2271
|
+
if (getDb()) {
|
|
4183
2272
|
const user = getJob(uniqueId);
|
|
4184
2273
|
if (!user) return { saved: false, error: "user not found" };
|
|
4185
2274
|
user.status = "pending";
|
|
@@ -4210,7 +2299,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4210
2299
|
}
|
|
4211
2300
|
|
|
4212
2301
|
function togglePin(uniqueId) {
|
|
4213
|
-
if (
|
|
2302
|
+
if (getDb()) {
|
|
4214
2303
|
const user = getJob(uniqueId);
|
|
4215
2304
|
if (!user) return { saved: false, error: "user not found" };
|
|
4216
2305
|
const nextPinned = !user.pinned;
|
|
@@ -4227,13 +2316,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4227
2316
|
}
|
|
4228
2317
|
|
|
4229
2318
|
function getNextRedoJob(userId, maxAgeSeconds = 43200) {
|
|
4230
|
-
if (
|
|
2319
|
+
if (getDb()) {
|
|
4231
2320
|
const now = Date.now();
|
|
4232
2321
|
const threshold = now - maxAgeSeconds * 1000;
|
|
4233
2322
|
const defaultTime = new Date("2016-01-01T00:00:00Z").getTime();
|
|
4234
2323
|
const targetLocations = DEFAULT_TARGET_LOCATIONS;
|
|
4235
2324
|
const placeholders = targetLocations.map(() => "?").join(",");
|
|
4236
|
-
const row =
|
|
2325
|
+
const row = getDb()
|
|
4237
2326
|
.prepare(
|
|
4238
2327
|
`
|
|
4239
2328
|
SELECT *
|
|
@@ -4248,9 +2337,11 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4248
2337
|
)
|
|
4249
2338
|
.get(...targetLocations, defaultTime, threshold, defaultTime);
|
|
4250
2339
|
if (!row) return null;
|
|
4251
|
-
|
|
4252
|
-
|
|
4253
|
-
|
|
2340
|
+
getDb()
|
|
2341
|
+
.prepare(
|
|
2342
|
+
"UPDATE jobs SET refresh_time = ?, updated_at = ? WHERE unique_id = ?",
|
|
2343
|
+
)
|
|
2344
|
+
.run(now, now, row.unique_id);
|
|
4254
2345
|
return {
|
|
4255
2346
|
uniqueId: row.unique_id,
|
|
4256
2347
|
nickname: row.nickname,
|
|
@@ -4299,7 +2390,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4299
2390
|
}
|
|
4300
2391
|
|
|
4301
2392
|
function commitRedoJob(uniqueId, result) {
|
|
4302
|
-
if (
|
|
2393
|
+
if (getDb()) {
|
|
4303
2394
|
const user = getJob(uniqueId);
|
|
4304
2395
|
if (!user) return { saved: false, error: "user not found" };
|
|
4305
2396
|
user.refreshTime = Date.now();
|
|
@@ -4443,13 +2534,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4443
2534
|
: [];
|
|
4444
2535
|
const hasCountryFilter = targetCountries.length > 0;
|
|
4445
2536
|
|
|
4446
|
-
if (
|
|
2537
|
+
if (getDb()) {
|
|
4447
2538
|
const l = Math.max(1, parseInt(limit) || 5);
|
|
4448
2539
|
|
|
4449
2540
|
let sql = `
|
|
4450
2541
|
SELECT *
|
|
4451
2542
|
FROM jobs_base
|
|
4452
|
-
WHERE COALESCE(tt_seller, '') = ''
|
|
2543
|
+
WHERE (COALESCE(tt_seller, '') = '' OR tt_seller = 1)
|
|
4453
2544
|
AND COALESCE(user_update_count, 0) <= 0
|
|
4454
2545
|
`;
|
|
4455
2546
|
const sqlParams = [];
|
|
@@ -4460,18 +2551,21 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4460
2551
|
sqlParams.push(...targetCountries);
|
|
4461
2552
|
}
|
|
4462
2553
|
|
|
4463
|
-
// 优先级:
|
|
2554
|
+
// 优先级:tt_seller=1 的商家重处理任务优先 > tag 来源 > 其余
|
|
4464
2555
|
sql += ` ORDER BY
|
|
2556
|
+
CASE WHEN tt_seller = 1 THEN 0 ELSE 1 END,
|
|
4465
2557
|
CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
|
|
4466
2558
|
created_at ASC,
|
|
4467
2559
|
unique_id ASC
|
|
4468
2560
|
LIMIT ?`;
|
|
4469
2561
|
sqlParams.push(l);
|
|
4470
2562
|
|
|
4471
|
-
const rows =
|
|
2563
|
+
const rows = getDb()
|
|
2564
|
+
.prepare(sql)
|
|
2565
|
+
.all(...sqlParams);
|
|
4472
2566
|
if (rows.length === 0) return [];
|
|
4473
2567
|
const now = Date.now();
|
|
4474
|
-
const bumpStmt =
|
|
2568
|
+
const bumpStmt = getDb().prepare(
|
|
4475
2569
|
`
|
|
4476
2570
|
UPDATE jobs_base
|
|
4477
2571
|
SET user_update_count = COALESCE(user_update_count, 0) + 1,
|
|
@@ -4479,7 +2573,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4479
2573
|
WHERE unique_id = ?
|
|
4480
2574
|
`,
|
|
4481
2575
|
);
|
|
4482
|
-
const bumpTxn =
|
|
2576
|
+
const bumpTxn = getDb().transaction((items) => {
|
|
4483
2577
|
for (const item of items) {
|
|
4484
2578
|
bumpStmt.run(now, item.unique_id);
|
|
4485
2579
|
}
|
|
@@ -4497,9 +2591,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4497
2591
|
const pending = data
|
|
4498
2592
|
.filter((u) => {
|
|
4499
2593
|
const updateCount = u.userUpdateCount;
|
|
4500
|
-
|
|
4501
|
-
|
|
4502
|
-
|
|
2594
|
+
// ttSeller 为空 或 ttSeller=1(商家重处理)都可以领取
|
|
2595
|
+
const ttSellerEligible =
|
|
2596
|
+
u.ttSeller === null ||
|
|
2597
|
+
u.ttSeller === undefined ||
|
|
2598
|
+
u.ttSeller === "" ||
|
|
2599
|
+
u.ttSeller === 1;
|
|
2600
|
+
if (!ttSellerEligible) return false;
|
|
4503
2601
|
if (
|
|
4504
2602
|
updateCount === null ||
|
|
4505
2603
|
updateCount === undefined ||
|
|
@@ -4514,7 +2612,10 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4514
2612
|
return false;
|
|
4515
2613
|
})
|
|
4516
2614
|
.sort((a, b) => {
|
|
4517
|
-
// 优先级:
|
|
2615
|
+
// 优先级:tt_seller=1 的商家重处理任务优先 > tag 来源 > 其余
|
|
2616
|
+
const aIsSeller = a.ttSeller === 1 ? 0 : 1;
|
|
2617
|
+
const bIsSeller = b.ttSeller === 1 ? 0 : 1;
|
|
2618
|
+
if (aIsSeller !== bIsSeller) return aIsSeller - bIsSeller;
|
|
4518
2619
|
const aIsTag = (a.sources || "").includes("tag");
|
|
4519
2620
|
const bIsTag = (b.sources || "").includes("tag");
|
|
4520
2621
|
if (aIsTag !== bIsTag) return aIsTag ? -1 : 1;
|
|
@@ -4531,7 +2632,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4531
2632
|
}
|
|
4532
2633
|
|
|
4533
2634
|
function updateUserInfo(uniqueId, info) {
|
|
4534
|
-
if (
|
|
2635
|
+
if (getDb()) {
|
|
4535
2636
|
return updateJobInfo(uniqueId, info, true);
|
|
4536
2637
|
}
|
|
4537
2638
|
|
|
@@ -4550,15 +2651,17 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4550
2651
|
}
|
|
4551
2652
|
|
|
4552
2653
|
function updateUserLocation(uniqueId, location) {
|
|
4553
|
-
if (
|
|
4554
|
-
const existing =
|
|
2654
|
+
if (getDb()) {
|
|
2655
|
+
const existing = getDb()
|
|
4555
2656
|
.prepare("SELECT * FROM jobs WHERE unique_id = ?")
|
|
4556
2657
|
.get(uniqueId);
|
|
4557
2658
|
if (!existing) return { error: "user not found" };
|
|
4558
2659
|
const now = Date.now();
|
|
4559
|
-
|
|
4560
|
-
|
|
4561
|
-
|
|
2660
|
+
getDb()
|
|
2661
|
+
.prepare(
|
|
2662
|
+
"UPDATE jobs SET location_created = ?, modified_at = ?, updated_at = ? WHERE unique_id = ?",
|
|
2663
|
+
)
|
|
2664
|
+
.run(location, now, now, uniqueId);
|
|
4562
2665
|
return { ok: true, location, modifiedAt: now };
|
|
4563
2666
|
}
|
|
4564
2667
|
|
|
@@ -4574,13 +2677,14 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4574
2677
|
|
|
4575
2678
|
// 将单个 job 移动到 raw_jobs 表(完整字段复制 + 删除原记录)
|
|
4576
2679
|
function moveJobToRaw(uniqueId) {
|
|
4577
|
-
if (!
|
|
2680
|
+
if (!getDb()) return false;
|
|
4578
2681
|
const safeId = String(uniqueId).trim();
|
|
4579
2682
|
if (!safeId) return false;
|
|
4580
2683
|
|
|
4581
|
-
const moveSingleTxn =
|
|
4582
|
-
|
|
4583
|
-
|
|
2684
|
+
const moveSingleTxn = getDb().transaction(() => {
|
|
2685
|
+
getDb()
|
|
2686
|
+
.prepare(
|
|
2687
|
+
`
|
|
4584
2688
|
INSERT OR REPLACE INTO raw_jobs (
|
|
4585
2689
|
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
4586
2690
|
error, pinned, no_video, restricted, user_update_count,
|
|
@@ -4602,21 +2706,22 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4602
2706
|
user_create_time
|
|
4603
2707
|
FROM jobs WHERE unique_id = ?
|
|
4604
2708
|
`,
|
|
4605
|
-
|
|
2709
|
+
)
|
|
2710
|
+
.run(safeId);
|
|
4606
2711
|
|
|
4607
|
-
|
|
2712
|
+
getDb().prepare("DELETE FROM jobs WHERE unique_id = ?").run(safeId);
|
|
4608
2713
|
});
|
|
4609
2714
|
moveSingleTxn();
|
|
4610
2715
|
return true;
|
|
4611
2716
|
}
|
|
4612
2717
|
|
|
4613
2718
|
function batchUpdateUserInfo(updates) {
|
|
4614
|
-
if (
|
|
2719
|
+
if (getDb()) {
|
|
4615
2720
|
const results = [];
|
|
4616
2721
|
const rawMoveList = [];
|
|
4617
2722
|
const sellerMoveList = [];
|
|
4618
2723
|
|
|
4619
|
-
const txn =
|
|
2724
|
+
const txn = getDb().transaction((items) => {
|
|
4620
2725
|
items.forEach((item) => {
|
|
4621
2726
|
const uniqueId = item.uniqueId;
|
|
4622
2727
|
// 处理 { error: true, statusCode: xxx } 的情况
|
|
@@ -4668,8 +2773,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4668
2773
|
// 批量移动商家用户到 jobs
|
|
4669
2774
|
if (sellerMoveList.length > 0) {
|
|
4670
2775
|
const placeholders = sellerMoveList.map(() => "?").join(",");
|
|
4671
|
-
|
|
4672
|
-
|
|
2776
|
+
getDb()
|
|
2777
|
+
.prepare(
|
|
2778
|
+
`
|
|
4673
2779
|
INSERT OR REPLACE INTO jobs (
|
|
4674
2780
|
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
4675
2781
|
error, pinned, no_video, restricted, user_update_count,
|
|
@@ -4691,18 +2797,20 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4691
2797
|
user_create_time
|
|
4692
2798
|
FROM jobs_base WHERE unique_id IN (${placeholders})
|
|
4693
2799
|
`,
|
|
4694
|
-
|
|
2800
|
+
)
|
|
2801
|
+
.run(...sellerMoveList);
|
|
4695
2802
|
|
|
4696
|
-
|
|
4697
|
-
`DELETE FROM jobs_base WHERE unique_id IN (${placeholders})
|
|
4698
|
-
|
|
2803
|
+
getDb()
|
|
2804
|
+
.prepare(`DELETE FROM jobs_base WHERE unique_id IN (${placeholders})`)
|
|
2805
|
+
.run(...sellerMoveList);
|
|
4699
2806
|
}
|
|
4700
2807
|
|
|
4701
2808
|
// 批量移动非商家用户到 raw_jobs
|
|
4702
2809
|
if (rawMoveList.length > 0) {
|
|
4703
2810
|
const placeholders = rawMoveList.map(() => "?").join(",");
|
|
4704
|
-
|
|
4705
|
-
|
|
2811
|
+
getDb()
|
|
2812
|
+
.prepare(
|
|
2813
|
+
`
|
|
4706
2814
|
INSERT OR REPLACE INTO raw_jobs (
|
|
4707
2815
|
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
4708
2816
|
error, pinned, no_video, restricted, user_update_count,
|
|
@@ -4724,11 +2832,12 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4724
2832
|
user_create_time
|
|
4725
2833
|
FROM jobs_base WHERE unique_id IN (${placeholders})
|
|
4726
2834
|
`,
|
|
4727
|
-
|
|
2835
|
+
)
|
|
2836
|
+
.run(...rawMoveList);
|
|
4728
2837
|
|
|
4729
|
-
|
|
4730
|
-
`DELETE FROM jobs_base WHERE unique_id IN (${placeholders})
|
|
4731
|
-
|
|
2838
|
+
getDb()
|
|
2839
|
+
.prepare(`DELETE FROM jobs_base WHERE unique_id IN (${placeholders})`)
|
|
2840
|
+
.run(...rawMoveList);
|
|
4732
2841
|
}
|
|
4733
2842
|
|
|
4734
2843
|
// 清理内部标记
|
|
@@ -4780,8 +2889,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4780
2889
|
return { registered: 0, skipped: 0 };
|
|
4781
2890
|
}
|
|
4782
2891
|
|
|
4783
|
-
if (
|
|
4784
|
-
const insertStmt =
|
|
2892
|
+
if (getDb()) {
|
|
2893
|
+
const insertStmt = getDb().prepare(`
|
|
4785
2894
|
INSERT OR IGNORE INTO videos (
|
|
4786
2895
|
id,
|
|
4787
2896
|
href,
|
|
@@ -4797,7 +2906,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4797
2906
|
let registered = 0;
|
|
4798
2907
|
let skipped = 0;
|
|
4799
2908
|
const now = Date.now();
|
|
4800
|
-
const txn =
|
|
2909
|
+
const txn = getDb().transaction((items) => {
|
|
4801
2910
|
for (const item of items) {
|
|
4802
2911
|
const result = insertStmt.run(
|
|
4803
2912
|
item.id,
|
|
@@ -4844,7 +2953,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4844
2953
|
}
|
|
4845
2954
|
|
|
4846
2955
|
function getVideos() {
|
|
4847
|
-
if (
|
|
2956
|
+
if (getDb()) {
|
|
4848
2957
|
return getAllVideoRows().map(mapVideoRow);
|
|
4849
2958
|
}
|
|
4850
2959
|
return videos;
|
|
@@ -4852,7 +2961,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4852
2961
|
|
|
4853
2962
|
function getVideo(videoId) {
|
|
4854
2963
|
if (!videoId) return null;
|
|
4855
|
-
if (
|
|
2964
|
+
if (getDb()) {
|
|
4856
2965
|
return mapVideoRow(getVideoRow(videoId));
|
|
4857
2966
|
}
|
|
4858
2967
|
return videos.find((video) => video.id === videoId) || null;
|
|
@@ -4862,8 +2971,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4862
2971
|
const safeLimit = Math.max(1, Math.min(100, parseInt(limit) || 50));
|
|
4863
2972
|
const safeOffset = Math.max(0, parseInt(offset) || 0);
|
|
4864
2973
|
|
|
4865
|
-
if (
|
|
4866
|
-
const rows =
|
|
2974
|
+
if (getDb()) {
|
|
2975
|
+
const rows = getDb()
|
|
4867
2976
|
.prepare(
|
|
4868
2977
|
`
|
|
4869
2978
|
SELECT *
|
|
@@ -4873,7 +2982,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4873
2982
|
`,
|
|
4874
2983
|
)
|
|
4875
2984
|
.all(safeLimit, safeOffset);
|
|
4876
|
-
const total =
|
|
2985
|
+
const total = getDb().prepare("SELECT COUNT(*) as c FROM videos").get().c;
|
|
4877
2986
|
return {
|
|
4878
2987
|
total,
|
|
4879
2988
|
limit: safeLimit,
|
|
@@ -4891,16 +3000,16 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4891
3000
|
}
|
|
4892
3001
|
|
|
4893
3002
|
function getVideoCount() {
|
|
4894
|
-
if (
|
|
4895
|
-
return
|
|
3003
|
+
if (getDb()) {
|
|
3004
|
+
return getDb().prepare("SELECT COUNT(*) as c FROM videos").get().c;
|
|
4896
3005
|
}
|
|
4897
3006
|
return videos.length;
|
|
4898
3007
|
}
|
|
4899
3008
|
|
|
4900
3009
|
function getPendingCommentTasks(limit) {
|
|
4901
|
-
if (
|
|
3010
|
+
if (getDb()) {
|
|
4902
3011
|
const l = Math.max(1, parseInt(limit) || 1);
|
|
4903
|
-
const rows =
|
|
3012
|
+
const rows = getDb()
|
|
4904
3013
|
.prepare(
|
|
4905
3014
|
`
|
|
4906
3015
|
SELECT *
|
|
@@ -4912,14 +3021,14 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4912
3021
|
)
|
|
4913
3022
|
.all(l);
|
|
4914
3023
|
if (rows.length === 0) return [];
|
|
4915
|
-
const bumpStmt =
|
|
3024
|
+
const bumpStmt = getDb().prepare(
|
|
4916
3025
|
`
|
|
4917
3026
|
UPDATE videos
|
|
4918
3027
|
SET user_update_count = COALESCE(user_update_count, 0) + 1
|
|
4919
3028
|
WHERE id = ?
|
|
4920
3029
|
`,
|
|
4921
3030
|
);
|
|
4922
|
-
const bumpTxn =
|
|
3031
|
+
const bumpTxn = getDb().transaction((items) => {
|
|
4923
3032
|
for (const item of items) bumpStmt.run(item.id);
|
|
4924
3033
|
});
|
|
4925
3034
|
bumpTxn(rows);
|
|
@@ -4949,17 +3058,19 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4949
3058
|
}
|
|
4950
3059
|
|
|
4951
3060
|
function commitCommentTask(videoId) {
|
|
4952
|
-
if (
|
|
3061
|
+
if (getDb()) {
|
|
4953
3062
|
const video = getVideoRow(videoId);
|
|
4954
3063
|
if (!video) return { ok: false, error: "video not found" };
|
|
4955
3064
|
const nextCount = (video.user_update_count || 0) + 1;
|
|
4956
|
-
|
|
4957
|
-
|
|
3065
|
+
getDb()
|
|
3066
|
+
.prepare(
|
|
3067
|
+
`
|
|
4958
3068
|
UPDATE videos
|
|
4959
3069
|
SET user_update_count = ?
|
|
4960
3070
|
WHERE id = ?
|
|
4961
3071
|
`,
|
|
4962
|
-
|
|
3072
|
+
)
|
|
3073
|
+
.run(nextCount, videoId);
|
|
4963
3074
|
return { ok: true, userUpdateCount: nextCount };
|
|
4964
3075
|
}
|
|
4965
3076
|
|
|
@@ -5024,6 +3135,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
5024
3135
|
getClientLoginStatus,
|
|
5025
3136
|
trackClient,
|
|
5026
3137
|
getActiveClients,
|
|
3138
|
+
moveSellerJobsToBase, // 将 jobs/raw_jobs 中 tt_seller=1 且 video_count=0 的记录移动到 jobs_base
|
|
5027
3139
|
registerVideos,
|
|
5028
3140
|
getVideo,
|
|
5029
3141
|
getVideos,
|
|
@@ -5051,6 +3163,126 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
5051
3163
|
|
|
5052
3164
|
// 辅助函数:获取 LLM 采样偏移量
|
|
5053
3165
|
function getLlmSampleOffsets() {
|
|
5054
|
-
return Object.fromEntries(
|
|
3166
|
+
return Object.fromEntries(offsetStore.entries());
|
|
3167
|
+
}
|
|
3168
|
+
|
|
3169
|
+
// ===== 将 jobs/raw_jobs 中商家用户移动到 jobs_base =====
|
|
3170
|
+
|
|
3171
|
+
/**
|
|
3172
|
+
* 将 jobs 和 raw_jobs 中 tt_seller=1 且 video_count=0 的记录移动到 jobs_base,
|
|
3173
|
+
* 并重置 user_update_count=0,保留 tt_seller=1 以便后续优先级判断。
|
|
3174
|
+
*/
|
|
3175
|
+
function moveSellerJobsToBase() {
|
|
3176
|
+
const db = getDb();
|
|
3177
|
+
if (!db) return { ok: false, error: "db not ready" };
|
|
3178
|
+
|
|
3179
|
+
const COLUMNS = [
|
|
3180
|
+
"unique_id",
|
|
3181
|
+
"nickname",
|
|
3182
|
+
"status",
|
|
3183
|
+
"sources",
|
|
3184
|
+
"claimed_by",
|
|
3185
|
+
"claimed_at",
|
|
3186
|
+
"error",
|
|
3187
|
+
"pinned",
|
|
3188
|
+
"no_video",
|
|
3189
|
+
"restricted",
|
|
3190
|
+
"user_update_count",
|
|
3191
|
+
"tt_seller",
|
|
3192
|
+
"verified",
|
|
3193
|
+
"video_count",
|
|
3194
|
+
"comment_count",
|
|
3195
|
+
"guessed_location",
|
|
3196
|
+
"location_created",
|
|
3197
|
+
"confirmed_location",
|
|
3198
|
+
"modified_at",
|
|
3199
|
+
"follower_count",
|
|
3200
|
+
"following_count",
|
|
3201
|
+
"heart_count",
|
|
3202
|
+
"refresh_time",
|
|
3203
|
+
"processed",
|
|
3204
|
+
"processed_at",
|
|
3205
|
+
"created_at",
|
|
3206
|
+
"updated_at",
|
|
3207
|
+
"region",
|
|
3208
|
+
"signature",
|
|
3209
|
+
"sec_uid",
|
|
3210
|
+
"status_code",
|
|
3211
|
+
"latest_video_time",
|
|
3212
|
+
"bio_link",
|
|
3213
|
+
];
|
|
3214
|
+
const cols = COLUMNS.join(",");
|
|
3215
|
+
const insertSql = `INSERT OR IGNORE INTO jobs_base (${cols}) SELECT ${cols} FROM `;
|
|
3216
|
+
const condition = "WHERE tt_seller = 1 AND video_count = 0";
|
|
3217
|
+
|
|
3218
|
+
let fromJobs = 0;
|
|
3219
|
+
let fromRawJobs = 0;
|
|
3220
|
+
|
|
3221
|
+
try {
|
|
3222
|
+
// 1. jobs → jobs_base
|
|
3223
|
+
const result1 = db.prepare(insertSql + "jobs " + condition).run();
|
|
3224
|
+
fromJobs = result1.changes || 0;
|
|
3225
|
+
|
|
3226
|
+
// 2. raw_jobs → jobs_base
|
|
3227
|
+
const result2 = db.prepare(insertSql + "raw_jobs " + condition).run();
|
|
3228
|
+
fromRawJobs = result2.changes || 0;
|
|
3229
|
+
} catch (e) {
|
|
3230
|
+
return { ok: false, error: e.message };
|
|
3231
|
+
}
|
|
3232
|
+
|
|
3233
|
+
// 3. 重置 user_update_count=0,保留 tt_seller=1 以便后续优先级判断
|
|
3234
|
+
// 包括:新移动过来的记录 + jobs_base 中已有的滞留记录
|
|
3235
|
+
let resetCount = 0;
|
|
3236
|
+
try {
|
|
3237
|
+
const resetStmt = db.prepare(
|
|
3238
|
+
`UPDATE jobs_base
|
|
3239
|
+
SET user_update_count = 0
|
|
3240
|
+
WHERE video_count = 0
|
|
3241
|
+
AND tt_seller = 1`,
|
|
3242
|
+
);
|
|
3243
|
+
resetStmt.run();
|
|
3244
|
+
resetCount = resetStmt.changes || 0;
|
|
3245
|
+
} catch (e) {
|
|
3246
|
+
return {
|
|
3247
|
+
ok: false,
|
|
3248
|
+
error: `reset failed: ${e.message}, inserted: jobs=${fromJobs}, raw_jobs=${fromRawJobs}`,
|
|
3249
|
+
};
|
|
3250
|
+
}
|
|
3251
|
+
|
|
3252
|
+
// 4. 删除 jobs 和 raw_jobs 中已移动的记录
|
|
3253
|
+
try {
|
|
3254
|
+
db.prepare("DELETE FROM jobs " + condition).run();
|
|
3255
|
+
db.prepare("DELETE FROM raw_jobs " + condition).run();
|
|
3256
|
+
} catch (e) {
|
|
3257
|
+
return {
|
|
3258
|
+
ok: false,
|
|
3259
|
+
error: `delete failed: ${e.message}, inserted: jobs=${fromJobs}, raw_jobs=${fromRawJobs}`,
|
|
3260
|
+
};
|
|
3261
|
+
}
|
|
3262
|
+
|
|
3263
|
+
// 5. 验证:统计 jobs_base 中可被 attach 领取的记录数(tt_seller=1 且 user_update_count<=0)
|
|
3264
|
+
let available = 0;
|
|
3265
|
+
try {
|
|
3266
|
+
const row = db
|
|
3267
|
+
.prepare(
|
|
3268
|
+
`SELECT COUNT(*) as total FROM jobs_base
|
|
3269
|
+
WHERE tt_seller = 1
|
|
3270
|
+
AND COALESCE(user_update_count, 0) <= 0
|
|
3271
|
+
AND video_count = 0`,
|
|
3272
|
+
)
|
|
3273
|
+
.get();
|
|
3274
|
+
available = row.total;
|
|
3275
|
+
} catch (_) {
|
|
3276
|
+
// ignore
|
|
3277
|
+
}
|
|
3278
|
+
|
|
3279
|
+
return {
|
|
3280
|
+
ok: true,
|
|
3281
|
+
fromJobs,
|
|
3282
|
+
fromRawJobs,
|
|
3283
|
+
totalInserted: fromJobs + fromRawJobs,
|
|
3284
|
+
resetCount,
|
|
3285
|
+
availableInBase: available,
|
|
3286
|
+
};
|
|
5055
3287
|
}
|
|
5056
3288
|
}
|