tt-help-cli-ycl 1.3.93 → 1.3.95
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli/comments.js +49 -24
- package/src/cli/tag.js +239 -91
- package/src/lib/args.js +23 -0
- package/src/lib/browser/cdp.js +5 -1
- package/src/lib/constants.js +15 -0
- package/src/lib/tag-fetcher.js +69 -63
- package/src/watch/data-store.js +635 -2404
- package/src/watch/data-store.js.bak +5091 -0
- package/src/watch/data-store.js.bak2 +5019 -0
- package/src/watch/db-columns.js +160 -0
- package/src/watch/db-crud.js +458 -0
- package/src/watch/db-mappers.js +128 -0
- package/src/watch/db-raw-jobs.js +235 -0
- package/src/watch/db-schema.js +367 -0
- package/src/watch/db-stats.js +235 -0
- package/src/watch/db-tags.js +348 -0
- package/src/watch/llm-scoring.js +235 -0
- package/src/watch/public/app.js +47 -0
- package/src/watch/public/index.html +6 -0
- package/src/watch/server.js +34 -1
- package/src/watch/tag-service.js +142 -11
package/src/watch/data-store.js
CHANGED
|
@@ -1,1580 +1,190 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
if (path.extname(resolved).toLowerCase() !== ".db") {
|
|
19
|
-
throw new Error(`仅支持 .db 路径,当前为: ${filePath}`);
|
|
20
|
-
}
|
|
21
|
-
return resolved;
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
function resetDbConnection() {
|
|
25
|
-
if (db) {
|
|
26
|
-
db.close();
|
|
27
|
-
db = null;
|
|
28
|
-
}
|
|
29
|
-
dbPath = null;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
function loadLegacyUsersFromFiles(userFilePath, doneFilePath) {
|
|
33
|
-
const merged = new Map();
|
|
34
|
-
|
|
35
|
-
const tryLoad = (targetPath, label) => {
|
|
36
|
-
if (!targetPath) return;
|
|
37
|
-
if (!fs.existsSync(targetPath)) return;
|
|
38
|
-
try {
|
|
39
|
-
const parsed = JSON.parse(fs.readFileSync(targetPath, "utf-8"));
|
|
40
|
-
if (!Array.isArray(parsed)) return;
|
|
41
|
-
for (const item of parsed) {
|
|
42
|
-
const uniqueId = item?.uniqueId || item?.unique_id;
|
|
43
|
-
if (!uniqueId) continue;
|
|
44
|
-
merged.set(uniqueId, {
|
|
45
|
-
...merged.get(uniqueId),
|
|
46
|
-
...item,
|
|
47
|
-
uniqueId,
|
|
48
|
-
});
|
|
49
|
-
}
|
|
50
|
-
} catch (e) {
|
|
51
|
-
console.error(`[data-store] SQLite 导入 ${label} 失败: ${e.message}`);
|
|
52
|
-
}
|
|
53
|
-
};
|
|
54
|
-
|
|
55
|
-
tryLoad(userFilePath, "result.json");
|
|
56
|
-
tryLoad(doneFilePath, "result-done.json");
|
|
57
|
-
|
|
58
|
-
return [...merged.values()];
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
function loadLegacyVideosFromFile(videoPath) {
|
|
62
|
-
if (!videoPath) return [];
|
|
63
|
-
if (!fs.existsSync(videoPath)) return [];
|
|
64
|
-
|
|
65
|
-
try {
|
|
66
|
-
const parsed = JSON.parse(fs.readFileSync(videoPath, "utf-8"));
|
|
67
|
-
return Array.isArray(parsed) ? parsed : [];
|
|
68
|
-
} catch (e) {
|
|
69
|
-
console.error(
|
|
70
|
-
`[data-store] SQLite 导入 result-videos.json 失败: ${e.message}`,
|
|
71
|
-
);
|
|
72
|
-
return [];
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
function initUserDb(filePath) {
|
|
77
|
-
dbPath = normalizeDbFilePath(filePath);
|
|
78
|
-
fs.mkdirSync(path.dirname(dbPath), { recursive: true });
|
|
79
|
-
db = new Database(dbPath);
|
|
80
|
-
db.pragma("journal_mode = WAL");
|
|
81
|
-
db.exec(`
|
|
82
|
-
CREATE TABLE IF NOT EXISTS users (
|
|
83
|
-
unique_id TEXT PRIMARY KEY,
|
|
84
|
-
tt_seller TEXT,
|
|
85
|
-
verified INTEGER,
|
|
86
|
-
location_created TEXT,
|
|
87
|
-
created_at TEXT,
|
|
88
|
-
updated_at TEXT
|
|
89
|
-
)
|
|
90
|
-
`);
|
|
91
|
-
db.exec(`
|
|
92
|
-
CREATE TABLE IF NOT EXISTS jobs (
|
|
93
|
-
unique_id TEXT PRIMARY KEY,
|
|
94
|
-
nickname TEXT,
|
|
95
|
-
status TEXT DEFAULT 'pending',
|
|
96
|
-
sources TEXT,
|
|
97
|
-
claimed_by TEXT,
|
|
98
|
-
claimed_at INTEGER,
|
|
99
|
-
error TEXT,
|
|
100
|
-
pinned INTEGER DEFAULT 0,
|
|
101
|
-
no_video INTEGER DEFAULT 0,
|
|
102
|
-
restricted INTEGER DEFAULT 0,
|
|
103
|
-
user_update_count INTEGER DEFAULT 0,
|
|
104
|
-
tt_seller INTEGER,
|
|
105
|
-
verified INTEGER,
|
|
106
|
-
video_count INTEGER DEFAULT 0,
|
|
107
|
-
comment_count INTEGER DEFAULT 0,
|
|
108
|
-
guessed_location TEXT,
|
|
109
|
-
location_created TEXT,
|
|
110
|
-
confirmed_location TEXT,
|
|
111
|
-
modified_at INTEGER,
|
|
112
|
-
follower_count INTEGER DEFAULT 0,
|
|
113
|
-
following_count INTEGER DEFAULT 0,
|
|
114
|
-
heart_count INTEGER DEFAULT 0,
|
|
115
|
-
refresh_time INTEGER,
|
|
116
|
-
processed INTEGER DEFAULT 0,
|
|
117
|
-
processed_at INTEGER,
|
|
118
|
-
created_at INTEGER,
|
|
119
|
-
updated_at INTEGER,
|
|
120
|
-
region TEXT,
|
|
121
|
-
signature TEXT,
|
|
122
|
-
sec_uid TEXT,
|
|
123
|
-
status_code INTEGER
|
|
124
|
-
)
|
|
125
|
-
`);
|
|
126
|
-
|
|
127
|
-
// 迁移:为已存在的 jobs 表添加 status_code 列
|
|
128
|
-
const existingJobColumns = new Set(
|
|
129
|
-
db
|
|
130
|
-
.prepare("PRAGMA table_info(jobs)")
|
|
131
|
-
.all()
|
|
132
|
-
.map((c) => c.name),
|
|
133
|
-
);
|
|
134
|
-
if (!existingJobColumns.has("status_code")) {
|
|
135
|
-
db.exec(`ALTER TABLE jobs ADD COLUMN status_code INTEGER`);
|
|
136
|
-
}
|
|
137
|
-
if (!existingJobColumns.has("latest_video_time")) {
|
|
138
|
-
db.exec(`ALTER TABLE jobs ADD COLUMN latest_video_time INTEGER`);
|
|
139
|
-
}
|
|
140
|
-
if (!existingJobColumns.has("confirmed_location")) {
|
|
141
|
-
db.exec(`ALTER TABLE jobs ADD COLUMN confirmed_location TEXT`);
|
|
142
|
-
}
|
|
143
|
-
if (!existingJobColumns.has("modified_at")) {
|
|
144
|
-
db.exec(`ALTER TABLE jobs ADD COLUMN modified_at INTEGER`);
|
|
145
|
-
}
|
|
146
|
-
if (!existingJobColumns.has("bio_link")) {
|
|
147
|
-
db.exec(`ALTER TABLE jobs ADD COLUMN bio_link TEXT`);
|
|
148
|
-
}
|
|
149
|
-
if (!existingJobColumns.has("top_video_play_count")) {
|
|
150
|
-
db.exec(`ALTER TABLE jobs ADD COLUMN top_video_play_count INTEGER`);
|
|
151
|
-
}
|
|
152
|
-
if (!existingJobColumns.has("top_video_href")) {
|
|
153
|
-
db.exec(`ALTER TABLE jobs ADD COLUMN top_video_href TEXT`);
|
|
154
|
-
}
|
|
155
|
-
if (!existingJobColumns.has("user_create_time")) {
|
|
156
|
-
db.exec(`ALTER TABLE jobs ADD COLUMN user_create_time INTEGER`);
|
|
157
|
-
}
|
|
158
|
-
db.exec(`
|
|
159
|
-
CREATE TABLE IF NOT EXISTS jobs_base (
|
|
160
|
-
unique_id TEXT PRIMARY KEY,
|
|
161
|
-
nickname TEXT,
|
|
162
|
-
status TEXT DEFAULT 'pending',
|
|
163
|
-
sources TEXT,
|
|
164
|
-
claimed_by TEXT,
|
|
165
|
-
claimed_at INTEGER,
|
|
166
|
-
error TEXT,
|
|
167
|
-
pinned INTEGER DEFAULT 0,
|
|
168
|
-
no_video INTEGER DEFAULT 0,
|
|
169
|
-
restricted INTEGER DEFAULT 0,
|
|
170
|
-
user_update_count INTEGER DEFAULT 0,
|
|
171
|
-
tt_seller INTEGER,
|
|
172
|
-
verified INTEGER,
|
|
173
|
-
video_count INTEGER DEFAULT 0,
|
|
174
|
-
comment_count INTEGER DEFAULT 0,
|
|
175
|
-
guessed_location TEXT,
|
|
176
|
-
location_created TEXT,
|
|
177
|
-
confirmed_location TEXT,
|
|
178
|
-
modified_at INTEGER,
|
|
179
|
-
follower_count INTEGER DEFAULT 0,
|
|
180
|
-
following_count INTEGER DEFAULT 0,
|
|
181
|
-
heart_count INTEGER DEFAULT 0,
|
|
182
|
-
refresh_time INTEGER,
|
|
183
|
-
processed INTEGER DEFAULT 0,
|
|
184
|
-
processed_at INTEGER,
|
|
185
|
-
created_at INTEGER,
|
|
186
|
-
updated_at INTEGER,
|
|
187
|
-
region TEXT,
|
|
188
|
-
signature TEXT,
|
|
189
|
-
sec_uid TEXT,
|
|
190
|
-
status_code INTEGER,
|
|
191
|
-
latest_video_time INTEGER,
|
|
192
|
-
bio_link TEXT
|
|
193
|
-
)
|
|
194
|
-
`);
|
|
195
|
-
|
|
196
|
-
// 迁移:为已存在的 jobs_base 表补全列
|
|
197
|
-
const existingJobBaseColumns = new Set(
|
|
198
|
-
db
|
|
199
|
-
.prepare("PRAGMA table_info(jobs_base)")
|
|
200
|
-
.all()
|
|
201
|
-
.map((c) => c.name),
|
|
202
|
-
);
|
|
203
|
-
if (!existingJobBaseColumns.has("status_code")) {
|
|
204
|
-
db.exec(`ALTER TABLE jobs_base ADD COLUMN status_code INTEGER`);
|
|
205
|
-
}
|
|
206
|
-
if (!existingJobBaseColumns.has("latest_video_time")) {
|
|
207
|
-
db.exec(`ALTER TABLE jobs_base ADD COLUMN latest_video_time INTEGER`);
|
|
208
|
-
}
|
|
209
|
-
if (!existingJobBaseColumns.has("confirmed_location")) {
|
|
210
|
-
db.exec(`ALTER TABLE jobs_base ADD COLUMN confirmed_location TEXT`);
|
|
211
|
-
}
|
|
212
|
-
if (!existingJobBaseColumns.has("modified_at")) {
|
|
213
|
-
db.exec(`ALTER TABLE jobs_base ADD COLUMN modified_at INTEGER`);
|
|
214
|
-
}
|
|
215
|
-
if (!existingJobBaseColumns.has("bio_link")) {
|
|
216
|
-
db.exec(`ALTER TABLE jobs_base ADD COLUMN bio_link TEXT`);
|
|
217
|
-
}
|
|
218
|
-
if (!existingJobBaseColumns.has("user_create_time")) {
|
|
219
|
-
db.exec(`ALTER TABLE jobs_base ADD COLUMN user_create_time INTEGER`);
|
|
220
|
-
}
|
|
221
|
-
db.exec(`
|
|
222
|
-
CREATE TABLE IF NOT EXISTS raw_jobs (
|
|
223
|
-
unique_id TEXT PRIMARY KEY,
|
|
224
|
-
nickname TEXT,
|
|
225
|
-
status TEXT DEFAULT 'pending',
|
|
226
|
-
sources TEXT,
|
|
227
|
-
claimed_by TEXT,
|
|
228
|
-
claimed_at INTEGER,
|
|
229
|
-
error TEXT,
|
|
230
|
-
pinned INTEGER DEFAULT 0,
|
|
231
|
-
no_video INTEGER DEFAULT 0,
|
|
232
|
-
restricted INTEGER DEFAULT 0,
|
|
233
|
-
user_update_count INTEGER DEFAULT 0,
|
|
234
|
-
tt_seller INTEGER,
|
|
235
|
-
verified INTEGER,
|
|
236
|
-
video_count INTEGER DEFAULT 0,
|
|
237
|
-
comment_count INTEGER DEFAULT 0,
|
|
238
|
-
guessed_location TEXT,
|
|
239
|
-
location_created TEXT,
|
|
240
|
-
confirmed_location TEXT,
|
|
241
|
-
modified_at INTEGER,
|
|
242
|
-
follower_count INTEGER DEFAULT 0,
|
|
243
|
-
following_count INTEGER DEFAULT 0,
|
|
244
|
-
heart_count INTEGER DEFAULT 0,
|
|
245
|
-
refresh_time INTEGER,
|
|
246
|
-
processed INTEGER DEFAULT 0,
|
|
247
|
-
processed_at INTEGER,
|
|
248
|
-
created_at INTEGER,
|
|
249
|
-
updated_at INTEGER,
|
|
250
|
-
region TEXT,
|
|
251
|
-
signature TEXT,
|
|
252
|
-
sec_uid TEXT,
|
|
253
|
-
status_code INTEGER,
|
|
254
|
-
latest_video_time INTEGER
|
|
255
|
-
)
|
|
256
|
-
`);
|
|
257
|
-
|
|
258
|
-
// 迁移:为已存在的 raw_jobs 表添加 status_code 列
|
|
259
|
-
const existingRawJobColumns = new Set(
|
|
260
|
-
db
|
|
261
|
-
.prepare("PRAGMA table_info(raw_jobs)")
|
|
262
|
-
.all()
|
|
263
|
-
.map((c) => c.name),
|
|
264
|
-
);
|
|
265
|
-
if (!existingRawJobColumns.has("status_code")) {
|
|
266
|
-
db.exec(`ALTER TABLE raw_jobs ADD COLUMN status_code INTEGER`);
|
|
267
|
-
}
|
|
268
|
-
if (!existingRawJobColumns.has("latest_video_time")) {
|
|
269
|
-
db.exec(`ALTER TABLE raw_jobs ADD COLUMN latest_video_time INTEGER`);
|
|
270
|
-
}
|
|
271
|
-
if (!existingRawJobColumns.has("confirmed_location")) {
|
|
272
|
-
db.exec(`ALTER TABLE raw_jobs ADD COLUMN confirmed_location TEXT`);
|
|
273
|
-
}
|
|
274
|
-
if (!existingRawJobColumns.has("modified_at")) {
|
|
275
|
-
db.exec(`ALTER TABLE raw_jobs ADD COLUMN modified_at INTEGER`);
|
|
276
|
-
}
|
|
277
|
-
if (!existingRawJobColumns.has("bio_link")) {
|
|
278
|
-
db.exec(`ALTER TABLE raw_jobs ADD COLUMN bio_link TEXT`);
|
|
279
|
-
}
|
|
280
|
-
if (!existingRawJobColumns.has("user_create_time")) {
|
|
281
|
-
db.exec(`ALTER TABLE raw_jobs ADD COLUMN user_create_time INTEGER`);
|
|
282
|
-
}
|
|
283
|
-
db.exec(`
|
|
284
|
-
CREATE TABLE IF NOT EXISTS videos (
|
|
285
|
-
id TEXT PRIMARY KEY,
|
|
286
|
-
href TEXT,
|
|
287
|
-
author_unique_id TEXT,
|
|
288
|
-
location_created TEXT,
|
|
289
|
-
tt_seller INTEGER DEFAULT 0,
|
|
290
|
-
registered_at INTEGER,
|
|
291
|
-
user_update_count INTEGER DEFAULT 0,
|
|
292
|
-
play_count INTEGER,
|
|
293
|
-
digg_count INTEGER,
|
|
294
|
-
comment_count INTEGER,
|
|
295
|
-
share_count INTEGER,
|
|
296
|
-
collect_count INTEGER,
|
|
297
|
-
stats_updated_at INTEGER,
|
|
298
|
-
create_time INTEGER
|
|
299
|
-
)
|
|
300
|
-
`);
|
|
301
|
-
db.exec(`
|
|
302
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_status_video
|
|
303
|
-
ON jobs(status, video_count DESC)
|
|
304
|
-
`);
|
|
305
|
-
db.exec(`
|
|
306
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_claimed_by_status
|
|
307
|
-
ON jobs(claimed_by, status, claimed_at)
|
|
308
|
-
`);
|
|
309
|
-
db.exec(`
|
|
310
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_status_claimed_at
|
|
311
|
-
ON jobs(status, claimed_at)
|
|
312
|
-
`);
|
|
313
|
-
db.exec(`
|
|
314
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_redo_target
|
|
315
|
-
ON jobs(tt_seller, verified, location_created, refresh_time)
|
|
316
|
-
`);
|
|
317
|
-
db.exec(`
|
|
318
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_pending_priority
|
|
319
|
-
ON jobs(status, pinned DESC, guessed_location, follower_count DESC)
|
|
320
|
-
`);
|
|
321
|
-
db.exec(`
|
|
322
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_pinned
|
|
323
|
-
ON jobs(created_at ASC, unique_id ASC)
|
|
324
|
-
WHERE status = 'pending' AND COALESCE(pinned, 0) = 1
|
|
325
|
-
`);
|
|
326
|
-
db.exec(`
|
|
327
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_seller
|
|
328
|
-
ON jobs(UPPER(COALESCE(guessed_location, '')), follower_count DESC, created_at ASC, unique_id ASC)
|
|
329
|
-
WHERE status = 'pending'
|
|
330
|
-
AND COALESCE(pinned, 0) = 0
|
|
331
|
-
AND tt_seller = 1
|
|
332
|
-
AND verified = 0
|
|
333
|
-
`);
|
|
334
|
-
db.exec(`
|
|
335
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_follow
|
|
336
|
-
ON jobs(UPPER(COALESCE(guessed_location, '')), follower_count DESC, created_at ASC, unique_id ASC)
|
|
337
|
-
WHERE status = 'pending'
|
|
338
|
-
AND COALESCE(pinned, 0) = 0
|
|
339
|
-
AND (
|
|
340
|
-
instr(COALESCE(sources, ''), '"following"') > 0
|
|
341
|
-
OR instr(COALESCE(sources, ''), '"follower"') > 0
|
|
342
|
-
)
|
|
343
|
-
`);
|
|
344
|
-
db.exec(`
|
|
345
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_other
|
|
346
|
-
ON jobs(UPPER(COALESCE(guessed_location, '')), follower_count DESC, created_at ASC, unique_id ASC)
|
|
347
|
-
WHERE status = 'pending' AND COALESCE(pinned, 0) = 0
|
|
348
|
-
`);
|
|
349
|
-
db.exec(`
|
|
350
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_user_update_queue
|
|
351
|
-
ON jobs(created_at ASC, unique_id ASC)
|
|
352
|
-
WHERE (tt_seller IS NULL OR tt_seller = '')
|
|
353
|
-
AND (user_update_count IS NULL OR user_update_count <= 0)
|
|
354
|
-
`);
|
|
355
|
-
db.exec(`
|
|
356
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_user_update_queue_expr
|
|
357
|
-
ON jobs(created_at ASC, unique_id ASC)
|
|
358
|
-
WHERE COALESCE(tt_seller, '') = ''
|
|
359
|
-
AND COALESCE(user_update_count, 0) <= 0
|
|
360
|
-
`);
|
|
361
|
-
db.exec(`
|
|
362
|
-
CREATE INDEX IF NOT EXISTS idx_videos_comment_queue
|
|
363
|
-
ON videos(user_update_count, tt_seller DESC, registered_at ASC)
|
|
364
|
-
`);
|
|
365
|
-
db.exec(`
|
|
366
|
-
CREATE INDEX IF NOT EXISTS idx_videos_comment_queue_pending
|
|
367
|
-
ON videos(tt_seller DESC, registered_at ASC, id)
|
|
368
|
-
WHERE user_update_count IS NULL OR user_update_count <= 0
|
|
369
|
-
`);
|
|
370
|
-
|
|
371
|
-
const existingVideoColumns = new Set(
|
|
372
|
-
db
|
|
373
|
-
.prepare("PRAGMA table_info(videos)")
|
|
374
|
-
.all()
|
|
375
|
-
.map((column) => column.name),
|
|
376
|
-
);
|
|
377
|
-
const requiredVideoColumns = {
|
|
378
|
-
play_count: "INTEGER",
|
|
379
|
-
digg_count: "INTEGER",
|
|
380
|
-
comment_count: "INTEGER",
|
|
381
|
-
share_count: "INTEGER",
|
|
382
|
-
collect_count: "INTEGER",
|
|
383
|
-
stats_updated_at: "INTEGER",
|
|
384
|
-
};
|
|
385
|
-
for (const [column, type] of Object.entries(requiredVideoColumns)) {
|
|
386
|
-
if (!existingVideoColumns.has(column)) {
|
|
387
|
-
db.exec(`ALTER TABLE videos ADD COLUMN ${column} ${type}`);
|
|
388
|
-
}
|
|
389
|
-
}
|
|
390
|
-
|
|
391
|
-
// 迁移:videos 表添加 create_time 列
|
|
392
|
-
if (!existingVideoColumns.has("create_time")) {
|
|
393
|
-
db.exec(`ALTER TABLE videos ADD COLUMN create_time INTEGER`);
|
|
394
|
-
}
|
|
395
|
-
|
|
396
|
-
// tags 表:标签发现与打分系统
|
|
397
|
-
db.exec(`
|
|
398
|
-
CREATE TABLE IF NOT EXISTS tags (
|
|
399
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
400
|
-
tag TEXT NOT NULL UNIQUE,
|
|
401
|
-
status TEXT NOT NULL DEFAULT 'new',
|
|
402
|
-
score REAL NOT NULL DEFAULT 0,
|
|
403
|
-
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
404
|
-
scored_at TEXT,
|
|
405
|
-
score_count INTEGER NOT NULL DEFAULT 0,
|
|
406
|
-
countries TEXT NOT NULL DEFAULT '[]',
|
|
407
|
-
matched_countries TEXT DEFAULT '[]',
|
|
408
|
-
total_posts INTEGER DEFAULT 0,
|
|
409
|
-
author_count INTEGER DEFAULT 0,
|
|
410
|
-
matched_authors INTEGER DEFAULT 0,
|
|
411
|
-
pushed_users INTEGER DEFAULT 0,
|
|
412
|
-
source TEXT NOT NULL DEFAULT 'llm',
|
|
413
|
-
user_prompt TEXT,
|
|
414
|
-
last_error TEXT
|
|
415
|
-
)
|
|
416
|
-
`);
|
|
417
|
-
db.exec(`CREATE INDEX IF NOT EXISTS idx_tags_status ON tags(status)`);
|
|
418
|
-
db.exec(`CREATE INDEX IF NOT EXISTS idx_tags_score ON tags(score DESC)`);
|
|
419
|
-
|
|
420
|
-
const count = db.prepare("SELECT COUNT(*) as c FROM users").get().c;
|
|
421
|
-
console.log(`[data-store] SQLite users 表初始化完成: ${count} 条`);
|
|
422
|
-
}
|
|
423
|
-
|
|
424
|
-
export function importLegacyJsonToDb({
|
|
425
|
-
dbFilePath,
|
|
426
|
-
usersFilePath,
|
|
427
|
-
doneFilePath,
|
|
428
|
-
videosFilePath,
|
|
429
|
-
}) {
|
|
430
|
-
resetDbConnection();
|
|
431
|
-
initUserDb(dbFilePath);
|
|
432
|
-
|
|
433
|
-
const legacyUsers = loadLegacyUsersFromFiles(usersFilePath, doneFilePath);
|
|
434
|
-
const legacyVideos = loadLegacyVideosFromFile(videosFilePath);
|
|
435
|
-
|
|
436
|
-
const beforeUsers = db.prepare("SELECT COUNT(*) as c FROM users").get().c;
|
|
437
|
-
const beforeJobs = db.prepare("SELECT COUNT(*) as c FROM jobs").get().c;
|
|
438
|
-
const beforeVideos = db.prepare("SELECT COUNT(*) as c FROM videos").get().c;
|
|
439
|
-
|
|
440
|
-
const insertUserStmt = db.prepare(`
|
|
441
|
-
INSERT OR IGNORE INTO users (unique_id) VALUES (?)
|
|
442
|
-
`);
|
|
443
|
-
const insertVideoStmt = db.prepare(`
|
|
444
|
-
INSERT OR IGNORE INTO videos (
|
|
445
|
-
id,
|
|
446
|
-
href,
|
|
447
|
-
author_unique_id,
|
|
448
|
-
location_created,
|
|
449
|
-
tt_seller,
|
|
450
|
-
registered_at,
|
|
451
|
-
user_update_count,
|
|
452
|
-
create_time
|
|
453
|
-
)
|
|
454
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
455
|
-
`);
|
|
456
|
-
|
|
457
|
-
const importUsersTxn = db.transaction((items) => {
|
|
458
|
-
for (const item of items) {
|
|
459
|
-
const uniqueId = item.uniqueId || item.unique_id;
|
|
460
|
-
if (!uniqueId) continue;
|
|
461
|
-
insertUserStmt.run(uniqueId);
|
|
462
|
-
addJobToDb({ ...item, uniqueId });
|
|
463
|
-
}
|
|
464
|
-
});
|
|
465
|
-
|
|
466
|
-
const importVideosTxn = db.transaction((items) => {
|
|
467
|
-
for (const item of items) {
|
|
468
|
-
if (!item?.id) continue;
|
|
469
|
-
insertVideoStmt.run(
|
|
470
|
-
item.id,
|
|
471
|
-
item.href || null,
|
|
472
|
-
item.authorUniqueId || item.author_unique_id || null,
|
|
473
|
-
item.locationCreated || item.location_created || null,
|
|
474
|
-
item.ttSeller ? 1 : 0,
|
|
475
|
-
item.registeredAt || item.registered_at || Date.now(),
|
|
476
|
-
item.userUpdateCount || item.user_update_count || 0,
|
|
477
|
-
item.createTime || item.create_time || null,
|
|
478
|
-
);
|
|
479
|
-
}
|
|
480
|
-
});
|
|
481
|
-
|
|
482
|
-
importUsersTxn(legacyUsers);
|
|
483
|
-
importVideosTxn(legacyVideos);
|
|
484
|
-
|
|
485
|
-
const afterUsers = db.prepare("SELECT COUNT(*) as c FROM users").get().c;
|
|
486
|
-
const afterJobs = db.prepare("SELECT COUNT(*) as c FROM jobs").get().c;
|
|
487
|
-
const afterVideos = db.prepare("SELECT COUNT(*) as c FROM videos").get().c;
|
|
488
|
-
|
|
489
|
-
return {
|
|
490
|
-
dbPath,
|
|
491
|
-
usersImported: afterUsers - beforeUsers,
|
|
492
|
-
jobsImported: afterJobs - beforeJobs,
|
|
493
|
-
videosImported: afterVideos - beforeVideos,
|
|
494
|
-
totalUsers: afterUsers,
|
|
495
|
-
totalJobs: afterJobs,
|
|
496
|
-
totalVideos: afterVideos,
|
|
497
|
-
};
|
|
498
|
-
}
|
|
499
|
-
|
|
500
|
-
export function closeStoreDb() {
|
|
501
|
-
resetDbConnection();
|
|
502
|
-
}
|
|
503
|
-
|
|
504
|
-
function hasUserInDb(uid) {
|
|
505
|
-
if (!db) return false;
|
|
506
|
-
const row = db.prepare("SELECT 1 FROM users WHERE unique_id = ?").get(uid);
|
|
507
|
-
return !!row;
|
|
508
|
-
}
|
|
509
|
-
|
|
510
|
-
function addUserToDb(user) {
|
|
511
|
-
if (!db) return;
|
|
512
|
-
db.prepare(
|
|
513
|
-
`
|
|
514
|
-
INSERT OR IGNORE INTO users (unique_id, tt_seller, verified, location_created, created_at, updated_at)
|
|
515
|
-
VALUES (?, ?, ?, ?, ?, ?)
|
|
516
|
-
`,
|
|
517
|
-
).run(
|
|
518
|
-
user.uniqueId,
|
|
519
|
-
user.ttSeller === undefined ||
|
|
520
|
-
user.ttSeller === null ||
|
|
521
|
-
user.ttSeller === ""
|
|
522
|
-
? null
|
|
523
|
-
: user.ttSeller
|
|
524
|
-
? 1
|
|
525
|
-
: 0,
|
|
526
|
-
user.verified === undefined ||
|
|
527
|
-
user.verified === null ||
|
|
528
|
-
user.verified === ""
|
|
529
|
-
? null
|
|
530
|
-
: user.verified
|
|
531
|
-
? 1
|
|
532
|
-
: 0,
|
|
533
|
-
user.locationCreated || null,
|
|
534
|
-
new Date().toISOString(),
|
|
535
|
-
new Date().toISOString(),
|
|
536
|
-
);
|
|
537
|
-
}
|
|
538
|
-
|
|
539
|
-
function addJobToDb(user) {
|
|
540
|
-
if (!db) return;
|
|
541
|
-
const now = Date.now();
|
|
542
|
-
db.prepare(
|
|
543
|
-
`
|
|
544
|
-
INSERT OR IGNORE INTO jobs (
|
|
545
|
-
unique_id,
|
|
546
|
-
nickname,
|
|
547
|
-
status,
|
|
548
|
-
sources,
|
|
549
|
-
claimed_by,
|
|
550
|
-
claimed_at,
|
|
551
|
-
error,
|
|
552
|
-
pinned,
|
|
553
|
-
no_video,
|
|
554
|
-
restricted,
|
|
555
|
-
user_update_count,
|
|
556
|
-
tt_seller,
|
|
557
|
-
verified,
|
|
558
|
-
video_count,
|
|
559
|
-
comment_count,
|
|
560
|
-
guessed_location,
|
|
561
|
-
location_created,
|
|
562
|
-
follower_count,
|
|
563
|
-
following_count,
|
|
564
|
-
heart_count,
|
|
565
|
-
refresh_time,
|
|
566
|
-
processed,
|
|
567
|
-
processed_at,
|
|
568
|
-
created_at,
|
|
569
|
-
updated_at,
|
|
570
|
-
region,
|
|
571
|
-
signature,
|
|
572
|
-
bio_link,
|
|
573
|
-
sec_uid
|
|
574
|
-
)
|
|
575
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
576
|
-
`,
|
|
577
|
-
).run(
|
|
578
|
-
user.uniqueId,
|
|
579
|
-
user.nickname || null,
|
|
580
|
-
user.status || inferStatus(user),
|
|
581
|
-
JSON.stringify(
|
|
582
|
-
Array.isArray(user.sources) ? [...new Set(user.sources)] : [],
|
|
583
|
-
),
|
|
584
|
-
user.claimedBy || null,
|
|
585
|
-
user.claimedAt || null,
|
|
586
|
-
user.error || null,
|
|
587
|
-
user.pinned ? 1 : 0,
|
|
588
|
-
user.noVideo ? 1 : 0,
|
|
589
|
-
user.restricted ? 1 : 0,
|
|
590
|
-
user.userUpdateCount || 0,
|
|
591
|
-
user.ttSeller === undefined ||
|
|
592
|
-
user.ttSeller === null ||
|
|
593
|
-
user.ttSeller === ""
|
|
594
|
-
? null
|
|
595
|
-
: user.ttSeller
|
|
596
|
-
? 1
|
|
597
|
-
: 0,
|
|
598
|
-
user.verified === undefined ||
|
|
599
|
-
user.verified === null ||
|
|
600
|
-
user.verified === ""
|
|
601
|
-
? null
|
|
602
|
-
: user.verified
|
|
603
|
-
? 1
|
|
604
|
-
: 0,
|
|
605
|
-
user.videoCount || 0,
|
|
606
|
-
user.commentCount || 0,
|
|
607
|
-
user.guessedLocation || null,
|
|
608
|
-
user.locationCreated || null,
|
|
609
|
-
user.followerCount || 0,
|
|
610
|
-
user.followingCount || 0,
|
|
611
|
-
user.heartCount || 0,
|
|
612
|
-
user.refreshTime || null,
|
|
613
|
-
user.processed ? 1 : 0,
|
|
614
|
-
user.processedAt || null,
|
|
615
|
-
user.createdAt || now,
|
|
616
|
-
user.updatedAt || now,
|
|
617
|
-
user.region || null,
|
|
618
|
-
user.signature || null,
|
|
619
|
-
user.bioLink?.link || user.bioLink?.url || user.bioLink || null,
|
|
620
|
-
user.secUid || null,
|
|
621
|
-
);
|
|
622
|
-
}
|
|
623
|
-
|
|
624
|
-
function getUserDbCount() {
|
|
625
|
-
if (!db) return 0;
|
|
626
|
-
return db.prepare("SELECT COUNT(*) as c FROM users").get().c;
|
|
627
|
-
}
|
|
628
|
-
|
|
629
|
-
function getJobsCount() {
|
|
630
|
-
if (!db) return 0;
|
|
631
|
-
return db.prepare("SELECT COUNT(*) as c FROM jobs").get().c;
|
|
632
|
-
}
|
|
633
|
-
|
|
634
|
-
function getPendingJobsCount() {
|
|
635
|
-
if (!db) return 0;
|
|
636
|
-
return db
|
|
637
|
-
.prepare("SELECT COUNT(*) as c FROM jobs WHERE status = 'pending'")
|
|
638
|
-
.get().c;
|
|
639
|
-
}
|
|
640
|
-
|
|
641
|
-
function getPendingJobsUserUpdateCount() {
|
|
642
|
-
if (!db) return 0;
|
|
643
|
-
return db
|
|
644
|
-
.prepare(
|
|
645
|
-
`
|
|
646
|
-
SELECT COUNT(*) as c
|
|
647
|
-
FROM jobs
|
|
648
|
-
WHERE COALESCE(tt_seller, '') = ''
|
|
649
|
-
AND COALESCE(user_update_count, 0) <= 0
|
|
650
|
-
`,
|
|
651
|
-
)
|
|
652
|
-
.get().c;
|
|
653
|
-
}
|
|
654
|
-
|
|
655
|
-
function getRawJobsCount() {
|
|
656
|
-
if (!db) return 0;
|
|
657
|
-
return db.prepare("SELECT COUNT(*) as c FROM raw_jobs").get().c;
|
|
658
|
-
}
|
|
659
|
-
|
|
660
|
-
function getDashboardStatsFromDb(targetLocations = []) {
|
|
661
|
-
if (!db) return null;
|
|
662
|
-
|
|
663
|
-
const targetPlaceholders = targetLocations.map(() => "?").join(", ");
|
|
664
|
-
const targetParams = targetLocations.length ? targetLocations : [];
|
|
665
|
-
|
|
666
|
-
// 合并所有 jobs 表的聚合统计为单次扫描
|
|
667
|
-
const aggregateRow = db
|
|
668
|
-
.prepare(
|
|
669
|
-
`
|
|
670
|
-
SELECT
|
|
671
|
-
COUNT(*) as total,
|
|
672
|
-
SUM(CASE WHEN status = 'pending' THEN 1 ELSE 0 END) as pending,
|
|
673
|
-
SUM(CASE WHEN status = 'processing' THEN 1 ELSE 0 END) as processing,
|
|
674
|
-
SUM(CASE WHEN status = 'done' THEN 1 ELSE 0 END) as done,
|
|
675
|
-
SUM(CASE WHEN status = 'error' THEN 1 ELSE 0 END) as error,
|
|
676
|
-
SUM(CASE WHEN status = 'restricted' THEN 1 ELSE 0 END) as restricted,
|
|
677
|
-
SUM(CASE WHEN tt_seller = 1 AND verified = 0 ${
|
|
678
|
-
targetLocations.length
|
|
679
|
-
? `AND location_created IN (${targetPlaceholders})`
|
|
680
|
-
: "AND 1 = 0"
|
|
681
|
-
} THEN 1 ELSE 0 END) as targetUsers,
|
|
682
|
-
SUM(CASE WHEN no_video = 1 THEN 1 ELSE 0 END) as noVideo,
|
|
683
|
-
SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"video"') > 0 THEN 1 ELSE 0 END) as video,
|
|
684
|
-
SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"comment"') > 0 THEN 1 ELSE 0 END) as comment,
|
|
685
|
-
SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"guess"') > 0 THEN 1 ELSE 0 END) as guess,
|
|
686
|
-
SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"following"') > 0 THEN 1 ELSE 0 END) as following,
|
|
687
|
-
SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"follower"') > 0 THEN 1 ELSE 0 END) as follower,
|
|
688
|
-
SUM(CASE
|
|
689
|
-
WHEN status != 'done'
|
|
690
|
-
AND instr(COALESCE(sources, ''), '"video"') = 0
|
|
691
|
-
AND instr(COALESCE(sources, ''), '"comment"') = 0
|
|
692
|
-
AND instr(COALESCE(sources, ''), '"guess"') = 0
|
|
693
|
-
AND instr(COALESCE(sources, ''), '"following"') = 0
|
|
694
|
-
AND instr(COALESCE(sources, ''), '"follower"') = 0
|
|
695
|
-
THEN 1 ELSE 0 END) as seed
|
|
696
|
-
FROM jobs
|
|
697
|
-
`,
|
|
698
|
-
)
|
|
699
|
-
.get(...targetParams);
|
|
700
|
-
|
|
701
|
-
// userUpdateTasks 单独从 jobs_base 统计
|
|
702
|
-
const userUpdateTasksRow = db
|
|
703
|
-
.prepare(
|
|
704
|
-
`
|
|
705
|
-
SELECT COUNT(*) as userUpdateTasks
|
|
706
|
-
FROM jobs_base
|
|
707
|
-
WHERE COALESCE(tt_seller, '') = ''
|
|
708
|
-
AND COALESCE(user_update_count, 0) <= 0
|
|
709
|
-
`,
|
|
710
|
-
)
|
|
711
|
-
.get();
|
|
712
|
-
|
|
713
|
-
// countryStats 和 targetCountryStats 需要 GROUP BY,保留为独立查询
|
|
714
|
-
const countryStats = db
|
|
715
|
-
.prepare(
|
|
716
|
-
`
|
|
717
|
-
SELECT
|
|
718
|
-
COALESCE(location_created, '未知') as country,
|
|
719
|
-
COUNT(*) as count,
|
|
720
|
-
SUM(CASE
|
|
721
|
-
WHEN tt_seller = 1 AND verified = 0 ${
|
|
722
|
-
targetLocations.length
|
|
723
|
-
? `AND location_created IN (${targetPlaceholders})`
|
|
724
|
-
: "AND 1 = 0"
|
|
725
|
-
}
|
|
726
|
-
THEN 1 ELSE 0 END) as targetCount
|
|
727
|
-
FROM jobs
|
|
728
|
-
WHERE status = 'done'
|
|
729
|
-
GROUP BY COALESCE(location_created, '未知')
|
|
730
|
-
ORDER BY count DESC
|
|
731
|
-
`,
|
|
732
|
-
)
|
|
733
|
-
.all(...targetParams);
|
|
734
|
-
|
|
735
|
-
const targetCountryStats = targetLocations.length
|
|
736
|
-
? db
|
|
737
|
-
.prepare(
|
|
738
|
-
`
|
|
739
|
-
SELECT location_created as country, COUNT(*) as count
|
|
740
|
-
FROM jobs
|
|
741
|
-
WHERE tt_seller = 1
|
|
742
|
-
AND verified = 0
|
|
743
|
-
AND location_created IN (${targetPlaceholders})
|
|
744
|
-
GROUP BY location_created
|
|
745
|
-
ORDER BY count DESC
|
|
746
|
-
`,
|
|
747
|
-
)
|
|
748
|
-
.all(...targetLocations)
|
|
749
|
-
: [];
|
|
750
|
-
|
|
751
|
-
const jobsBaseCount = db
|
|
752
|
-
.prepare("SELECT COUNT(*) as total FROM jobs_base")
|
|
753
|
-
.get().total;
|
|
754
|
-
|
|
755
|
-
return {
|
|
756
|
-
totalUsers: aggregateRow.total,
|
|
757
|
-
rawJobs: getRawJobsCount(),
|
|
758
|
-
dbTotalUsers: getUserDbCount(),
|
|
759
|
-
jobsTotal: aggregateRow.total,
|
|
760
|
-
jobsBaseTotal: jobsBaseCount,
|
|
761
|
-
jobsPending: aggregateRow.pending,
|
|
762
|
-
processedUsers: aggregateRow.done,
|
|
763
|
-
pendingUsers: aggregateRow.pending,
|
|
764
|
-
processingUsers: aggregateRow.processing,
|
|
765
|
-
restrictedUsers: aggregateRow.restricted,
|
|
766
|
-
errorUsers: aggregateRow.error,
|
|
767
|
-
targetUsers: aggregateRow.targetUsers,
|
|
768
|
-
userUpdateTasks: userUpdateTasksRow.userUpdateTasks,
|
|
769
|
-
targetCountryStats,
|
|
770
|
-
countryStats,
|
|
771
|
-
sourceStats: {
|
|
772
|
-
seed: aggregateRow.seed || 0,
|
|
773
|
-
video: aggregateRow.video || 0,
|
|
774
|
-
comment: aggregateRow.comment || 0,
|
|
775
|
-
guess: aggregateRow.guess || 0,
|
|
776
|
-
following: aggregateRow.following || 0,
|
|
777
|
-
follower: aggregateRow.follower || 0,
|
|
778
|
-
processed: aggregateRow.done,
|
|
779
|
-
restricted: aggregateRow.restricted,
|
|
780
|
-
error: aggregateRow.error,
|
|
781
|
-
noVideo: aggregateRow.noVideo || 0,
|
|
782
|
-
},
|
|
783
|
-
};
|
|
784
|
-
}
|
|
785
|
-
|
|
786
|
-
function getPendingByCountryFromDb() {
|
|
787
|
-
if (!db) return [];
|
|
788
|
-
|
|
789
|
-
// 按 guessed_location 分组统计待处理任务
|
|
790
|
-
const rows = db
|
|
791
|
-
.prepare(
|
|
792
|
-
`
|
|
793
|
-
SELECT
|
|
794
|
-
COALESCE(guessed_location, '未知') as country,
|
|
795
|
-
COUNT(*) as count
|
|
796
|
-
FROM jobs
|
|
797
|
-
WHERE status = 'pending'
|
|
798
|
-
GROUP BY COALESCE(guessed_location, '未知')
|
|
799
|
-
ORDER BY count DESC
|
|
800
|
-
`,
|
|
801
|
-
)
|
|
802
|
-
.all();
|
|
803
|
-
|
|
804
|
-
return rows;
|
|
805
|
-
}
|
|
806
|
-
|
|
807
|
-
function getUserUpdateByCountryFromDb() {
|
|
808
|
-
if (!db) return [];
|
|
809
|
-
|
|
810
|
-
// 按 guessed_location 分组统计待补资料任务
|
|
811
|
-
const rows = db
|
|
812
|
-
.prepare(
|
|
813
|
-
`
|
|
814
|
-
SELECT
|
|
815
|
-
COALESCE(guessed_location, '未知') as country,
|
|
816
|
-
COUNT(*) as count
|
|
817
|
-
FROM jobs_base
|
|
818
|
-
WHERE tt_seller IS NULL
|
|
819
|
-
AND COALESCE(user_update_count, 0) <= 0
|
|
820
|
-
GROUP BY COALESCE(guessed_location, '未知')
|
|
821
|
-
ORDER BY count DESC
|
|
822
|
-
`,
|
|
823
|
-
)
|
|
824
|
-
.all();
|
|
825
|
-
|
|
826
|
-
return rows;
|
|
827
|
-
}
|
|
828
|
-
|
|
829
|
-
function getAttachStuckByCountryFromDb() {
|
|
830
|
-
if (!db) return [];
|
|
831
|
-
|
|
832
|
-
return db
|
|
833
|
-
.prepare(
|
|
834
|
-
`
|
|
835
|
-
SELECT
|
|
836
|
-
COALESCE(guessed_location, '未知') as country,
|
|
837
|
-
COUNT(*) as count
|
|
838
|
-
FROM jobs_base
|
|
839
|
-
WHERE tt_seller IS NULL
|
|
840
|
-
AND COALESCE(user_update_count, 0) = 1
|
|
841
|
-
GROUP BY COALESCE(guessed_location, '未知')
|
|
842
|
-
ORDER BY count DESC
|
|
843
|
-
`,
|
|
844
|
-
)
|
|
845
|
-
.all();
|
|
846
|
-
}
|
|
847
|
-
|
|
848
|
-
function restoreAttachStuckByCountry(country) {
|
|
849
|
-
if (!db) {
|
|
850
|
-
return { restored: 0, country, error: "db not ready" };
|
|
851
|
-
}
|
|
852
|
-
|
|
853
|
-
const normalizedCountry = String(country == null ? "未知" : country).trim();
|
|
854
|
-
if (!normalizedCountry) {
|
|
855
|
-
return {
|
|
856
|
-
restored: 0,
|
|
857
|
-
country: normalizedCountry,
|
|
858
|
-
error: "country is required",
|
|
859
|
-
};
|
|
860
|
-
}
|
|
861
|
-
|
|
862
|
-
const whereSql = `
|
|
863
|
-
COALESCE(tt_seller, '') = ''
|
|
864
|
-
AND COALESCE(user_update_count, 0) = 1
|
|
865
|
-
AND COALESCE(guessed_location, '未知') = ?
|
|
866
|
-
`;
|
|
867
|
-
const count =
|
|
868
|
-
db
|
|
869
|
-
.prepare(
|
|
870
|
-
`
|
|
871
|
-
SELECT COUNT(*) as c
|
|
872
|
-
FROM jobs_base
|
|
873
|
-
WHERE ${whereSql}
|
|
874
|
-
`,
|
|
875
|
-
)
|
|
876
|
-
.get(normalizedCountry)?.c || 0;
|
|
877
|
-
|
|
878
|
-
if (!count) {
|
|
879
|
-
return { restored: 0, country: normalizedCountry };
|
|
880
|
-
}
|
|
881
|
-
|
|
882
|
-
db.prepare(
|
|
883
|
-
`
|
|
884
|
-
UPDATE jobs_base
|
|
885
|
-
SET user_update_count = 0,
|
|
886
|
-
updated_at = ?,
|
|
887
|
-
claimed_by = NULL,
|
|
888
|
-
claimed_at = NULL
|
|
889
|
-
WHERE ${whereSql}
|
|
890
|
-
`,
|
|
891
|
-
).run(Date.now(), normalizedCountry);
|
|
892
|
-
|
|
893
|
-
return { restored: count, country: normalizedCountry };
|
|
894
|
-
}
|
|
895
|
-
|
|
896
|
-
function resetPendingByCountry(country) {
|
|
897
|
-
if (!db) {
|
|
898
|
-
return { reset: 0, country, error: "db not ready" };
|
|
899
|
-
}
|
|
900
|
-
|
|
901
|
-
const normalizedCountry = String(country == null ? "未知" : country).trim();
|
|
902
|
-
if (!normalizedCountry) {
|
|
903
|
-
return {
|
|
904
|
-
reset: 0,
|
|
905
|
-
country: normalizedCountry,
|
|
906
|
-
error: "country is required",
|
|
907
|
-
};
|
|
908
|
-
}
|
|
909
|
-
|
|
910
|
-
const whereSql = `
|
|
911
|
-
status = 'pending'
|
|
912
|
-
AND COALESCE(guessed_location, '未知') = ?
|
|
913
|
-
`;
|
|
914
|
-
const count =
|
|
915
|
-
db
|
|
916
|
-
.prepare(
|
|
917
|
-
`
|
|
918
|
-
SELECT COUNT(*) as c
|
|
919
|
-
FROM jobs
|
|
920
|
-
WHERE ${whereSql}
|
|
921
|
-
`,
|
|
922
|
-
)
|
|
923
|
-
.get(normalizedCountry)?.c || 0;
|
|
924
|
-
|
|
925
|
-
if (!count) {
|
|
926
|
-
return { reset: 0, country: normalizedCountry };
|
|
927
|
-
}
|
|
928
|
-
|
|
929
|
-
db.prepare(
|
|
930
|
-
`
|
|
931
|
-
UPDATE jobs
|
|
932
|
-
SET user_update_count = 0,
|
|
933
|
-
updated_at = ?,
|
|
934
|
-
claimed_by = NULL,
|
|
935
|
-
claimed_at = NULL
|
|
936
|
-
WHERE ${whereSql}
|
|
937
|
-
`,
|
|
938
|
-
).run(Date.now(), normalizedCountry);
|
|
939
|
-
|
|
940
|
-
return { reset: count, country: normalizedCountry };
|
|
941
|
-
}
|
|
942
|
-
|
|
943
|
-
function getRawByCountryFromDb() {
|
|
944
|
-
if (!db) return [];
|
|
945
|
-
|
|
946
|
-
return db
|
|
947
|
-
.prepare(
|
|
948
|
-
`
|
|
949
|
-
SELECT
|
|
950
|
-
COALESCE(guessed_location, '未知') as country,
|
|
951
|
-
COUNT(*) as count
|
|
952
|
-
FROM raw_jobs
|
|
953
|
-
GROUP BY COALESCE(guessed_location, '未知')
|
|
954
|
-
ORDER BY count DESC
|
|
955
|
-
`,
|
|
956
|
-
)
|
|
957
|
-
.all();
|
|
958
|
-
}
|
|
959
|
-
|
|
960
|
-
function moveJobsToRawByCountry(scope, country) {
|
|
961
|
-
if (!db) {
|
|
962
|
-
return { moved: 0, scope, country, error: "db not ready" };
|
|
963
|
-
}
|
|
964
|
-
|
|
965
|
-
const normalizedScope = String(scope || "").trim();
|
|
966
|
-
const normalizedCountry = String(country == null ? "未知" : country).trim();
|
|
967
|
-
if (!normalizedCountry) {
|
|
968
|
-
return {
|
|
969
|
-
moved: 0,
|
|
970
|
-
scope: normalizedScope,
|
|
971
|
-
country: normalizedCountry,
|
|
972
|
-
error: "country is required",
|
|
973
|
-
};
|
|
974
|
-
}
|
|
975
|
-
|
|
976
|
-
// pending 操作 jobs 表(与 getPendingByCountryFromDb 数据源一致)
|
|
977
|
-
// userUpdate 操作 jobs_base 表(与 getUserUpdateByCountryFromDb 数据源一致)
|
|
978
|
-
let sourceTable = "";
|
|
979
|
-
let scopeWhere = "";
|
|
980
|
-
let columns = "";
|
|
981
|
-
|
|
982
|
-
if (normalizedScope === "pending") {
|
|
983
|
-
sourceTable = "jobs";
|
|
984
|
-
scopeWhere = `status = 'pending'`;
|
|
985
|
-
columns = `
|
|
986
|
-
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
987
|
-
error, pinned, no_video, restricted, user_update_count,
|
|
988
|
-
tt_seller, verified, video_count, comment_count,
|
|
989
|
-
guessed_location, location_created, follower_count,
|
|
990
|
-
following_count, heart_count, refresh_time, processed,
|
|
991
|
-
processed_at, created_at, updated_at, region, signature,
|
|
992
|
-
sec_uid, latest_video_time, user_create_time
|
|
993
|
-
`;
|
|
994
|
-
} else if (normalizedScope === "userUpdate") {
|
|
995
|
-
sourceTable = "jobs_base";
|
|
996
|
-
scopeWhere = `tt_seller IS NULL AND COALESCE(user_update_count, 0) <= 0`;
|
|
997
|
-
columns = `
|
|
998
|
-
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
999
|
-
error, pinned, no_video, restricted, user_update_count,
|
|
1000
|
-
tt_seller, verified, video_count, comment_count,
|
|
1001
|
-
guessed_location, location_created, follower_count,
|
|
1002
|
-
following_count, heart_count, refresh_time, processed,
|
|
1003
|
-
processed_at, created_at, updated_at, region, signature,
|
|
1004
|
-
sec_uid, latest_video_time, user_create_time
|
|
1005
|
-
`;
|
|
1006
|
-
} else {
|
|
1007
|
-
return {
|
|
1008
|
-
moved: 0,
|
|
1009
|
-
scope: normalizedScope,
|
|
1010
|
-
country: normalizedCountry,
|
|
1011
|
-
error: "unsupported scope",
|
|
1012
|
-
};
|
|
1013
|
-
}
|
|
1014
|
-
|
|
1015
|
-
const whereSql = `
|
|
1016
|
-
${scopeWhere}
|
|
1017
|
-
AND COALESCE(guessed_location, '未知') = ?
|
|
1018
|
-
`;
|
|
1019
|
-
const count =
|
|
1020
|
-
db
|
|
1021
|
-
.prepare(
|
|
1022
|
-
`
|
|
1023
|
-
SELECT COUNT(*) as c
|
|
1024
|
-
FROM ${sourceTable}
|
|
1025
|
-
WHERE ${whereSql}
|
|
1026
|
-
`,
|
|
1027
|
-
)
|
|
1028
|
-
.get(normalizedCountry)?.c || 0;
|
|
1029
|
-
|
|
1030
|
-
if (!count) {
|
|
1031
|
-
return { moved: 0, scope: normalizedScope, country: normalizedCountry };
|
|
1032
|
-
}
|
|
1033
|
-
|
|
1034
|
-
const moveTxn = db.transaction((targetCountry) => {
|
|
1035
|
-
db.prepare(
|
|
1036
|
-
`
|
|
1037
|
-
INSERT OR REPLACE INTO raw_jobs (
|
|
1038
|
-
${columns}
|
|
1039
|
-
)
|
|
1040
|
-
SELECT
|
|
1041
|
-
${columns}
|
|
1042
|
-
FROM ${sourceTable}
|
|
1043
|
-
WHERE ${whereSql}
|
|
1044
|
-
`,
|
|
1045
|
-
).run(targetCountry);
|
|
1046
|
-
|
|
1047
|
-
db.prepare(
|
|
1048
|
-
`
|
|
1049
|
-
DELETE FROM ${sourceTable}
|
|
1050
|
-
WHERE ${whereSql}
|
|
1051
|
-
`,
|
|
1052
|
-
).run(targetCountry);
|
|
1053
|
-
});
|
|
1054
|
-
|
|
1055
|
-
moveTxn(normalizedCountry);
|
|
1056
|
-
return { moved: count, scope: normalizedScope, country: normalizedCountry };
|
|
1057
|
-
}
|
|
1058
|
-
|
|
1059
|
-
function restoreRawJobsByCountry(country) {
|
|
1060
|
-
if (!db) {
|
|
1061
|
-
return { restored: 0, country, error: "db not ready" };
|
|
1062
|
-
}
|
|
1063
|
-
|
|
1064
|
-
const normalizedCountry = String(country == null ? "未知" : country).trim();
|
|
1065
|
-
if (!normalizedCountry) {
|
|
1066
|
-
return {
|
|
1067
|
-
restored: 0,
|
|
1068
|
-
country: normalizedCountry,
|
|
1069
|
-
error: "country is required",
|
|
1070
|
-
};
|
|
1071
|
-
}
|
|
1072
|
-
|
|
1073
|
-
const whereSql = `COALESCE(guessed_location, '未知') = ?`;
|
|
1074
|
-
const count =
|
|
1075
|
-
db
|
|
1076
|
-
.prepare(
|
|
1077
|
-
`
|
|
1078
|
-
SELECT COUNT(*) as c
|
|
1079
|
-
FROM raw_jobs
|
|
1080
|
-
WHERE ${whereSql}
|
|
1081
|
-
`,
|
|
1082
|
-
)
|
|
1083
|
-
.get(normalizedCountry)?.c || 0;
|
|
1084
|
-
|
|
1085
|
-
if (!count) {
|
|
1086
|
-
return { restored: 0, country: normalizedCountry };
|
|
1087
|
-
}
|
|
1088
|
-
|
|
1089
|
-
const restoreTxn = db.transaction((targetCountry) => {
|
|
1090
|
-
db.prepare(
|
|
1091
|
-
`
|
|
1092
|
-
INSERT OR REPLACE INTO jobs (
|
|
1093
|
-
unique_id,
|
|
1094
|
-
nickname,
|
|
1095
|
-
status,
|
|
1096
|
-
sources,
|
|
1097
|
-
claimed_by,
|
|
1098
|
-
claimed_at,
|
|
1099
|
-
error,
|
|
1100
|
-
pinned,
|
|
1101
|
-
no_video,
|
|
1102
|
-
restricted,
|
|
1103
|
-
user_update_count,
|
|
1104
|
-
tt_seller,
|
|
1105
|
-
verified,
|
|
1106
|
-
video_count,
|
|
1107
|
-
comment_count,
|
|
1108
|
-
guessed_location,
|
|
1109
|
-
location_created,
|
|
1110
|
-
follower_count,
|
|
1111
|
-
following_count,
|
|
1112
|
-
heart_count,
|
|
1113
|
-
refresh_time,
|
|
1114
|
-
processed,
|
|
1115
|
-
processed_at,
|
|
1116
|
-
created_at,
|
|
1117
|
-
updated_at,
|
|
1118
|
-
region,
|
|
1119
|
-
signature,
|
|
1120
|
-
sec_uid
|
|
1121
|
-
)
|
|
1122
|
-
SELECT
|
|
1123
|
-
unique_id,
|
|
1124
|
-
nickname,
|
|
1125
|
-
status,
|
|
1126
|
-
sources,
|
|
1127
|
-
claimed_by,
|
|
1128
|
-
claimed_at,
|
|
1129
|
-
error,
|
|
1130
|
-
pinned,
|
|
1131
|
-
no_video,
|
|
1132
|
-
restricted,
|
|
1133
|
-
user_update_count,
|
|
1134
|
-
tt_seller,
|
|
1135
|
-
verified,
|
|
1136
|
-
video_count,
|
|
1137
|
-
comment_count,
|
|
1138
|
-
guessed_location,
|
|
1139
|
-
location_created,
|
|
1140
|
-
follower_count,
|
|
1141
|
-
following_count,
|
|
1142
|
-
heart_count,
|
|
1143
|
-
refresh_time,
|
|
1144
|
-
processed,
|
|
1145
|
-
processed_at,
|
|
1146
|
-
created_at,
|
|
1147
|
-
updated_at,
|
|
1148
|
-
region,
|
|
1149
|
-
signature,
|
|
1150
|
-
sec_uid
|
|
1151
|
-
FROM raw_jobs
|
|
1152
|
-
WHERE ${whereSql}
|
|
1153
|
-
`,
|
|
1154
|
-
).run(targetCountry);
|
|
1155
|
-
|
|
1156
|
-
db.prepare(
|
|
1157
|
-
`
|
|
1158
|
-
DELETE FROM raw_jobs
|
|
1159
|
-
WHERE ${whereSql}
|
|
1160
|
-
`,
|
|
1161
|
-
).run(targetCountry);
|
|
1162
|
-
});
|
|
1163
|
-
|
|
1164
|
-
restoreTxn(normalizedCountry);
|
|
1165
|
-
return { restored: count, country: normalizedCountry };
|
|
1166
|
-
}
|
|
1167
|
-
|
|
1168
|
-
function restoreRawJobById(uniqueId) {
|
|
1169
|
-
if (!db) {
|
|
1170
|
-
return { restored: 0, uniqueId, error: "db not ready" };
|
|
1171
|
-
}
|
|
1172
|
-
|
|
1173
|
-
const safeId = String(uniqueId).trim();
|
|
1174
|
-
if (!safeId) {
|
|
1175
|
-
return { restored: 0, uniqueId: safeId, error: "uniqueId is required" };
|
|
1176
|
-
}
|
|
1177
|
-
|
|
1178
|
-
const exists =
|
|
1179
|
-
db
|
|
1180
|
-
.prepare("SELECT COUNT(*) as c FROM raw_jobs WHERE unique_id = ?")
|
|
1181
|
-
.get(safeId)?.c || 0;
|
|
1182
|
-
|
|
1183
|
-
if (!exists) {
|
|
1184
|
-
return { restored: 0, uniqueId: safeId };
|
|
1185
|
-
}
|
|
1186
|
-
|
|
1187
|
-
const restoreTxn = db.transaction(() => {
|
|
1188
|
-
db.prepare(
|
|
1189
|
-
`
|
|
1190
|
-
INSERT OR REPLACE INTO jobs (
|
|
1191
|
-
unique_id, nickname, status, sources, claimed_by, claimed_at, error,
|
|
1192
|
-
pinned, no_video, restricted, user_update_count, tt_seller, verified,
|
|
1193
|
-
video_count, comment_count, guessed_location, location_created,
|
|
1194
|
-
follower_count, following_count, heart_count, refresh_time,
|
|
1195
|
-
processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
|
|
1196
|
-
)
|
|
1197
|
-
SELECT
|
|
1198
|
-
unique_id, nickname, status, sources, claimed_by, claimed_at, error,
|
|
1199
|
-
pinned, no_video, restricted, user_update_count, tt_seller, verified,
|
|
1200
|
-
video_count, comment_count, guessed_location, location_created,
|
|
1201
|
-
follower_count, following_count, heart_count, refresh_time,
|
|
1202
|
-
processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
|
|
1203
|
-
FROM raw_jobs WHERE unique_id = ?
|
|
1204
|
-
`,
|
|
1205
|
-
).run(safeId);
|
|
1206
|
-
|
|
1207
|
-
db.prepare("DELETE FROM raw_jobs WHERE unique_id = ?").run(safeId);
|
|
1208
|
-
});
|
|
1209
|
-
|
|
1210
|
-
restoreTxn();
|
|
1211
|
-
return { restored: 1, uniqueId: safeId };
|
|
1212
|
-
}
|
|
1213
|
-
|
|
1214
|
-
function restoreRawJobsByFilter({ search, location, hasVideo, hasFollower }) {
|
|
1215
|
-
if (!db) {
|
|
1216
|
-
return { restored: 0, error: "db not ready" };
|
|
1217
|
-
}
|
|
1218
|
-
|
|
1219
|
-
const where = [];
|
|
1220
|
-
const args = [];
|
|
1221
|
-
|
|
1222
|
-
if (search) {
|
|
1223
|
-
where.push(
|
|
1224
|
-
"(LOWER(unique_id) LIKE ? OR LOWER(COALESCE(nickname, '')) LIKE ?)",
|
|
1225
|
-
);
|
|
1226
|
-
const likeVal = `%${search.toLowerCase()}%`;
|
|
1227
|
-
args.push(likeVal, likeVal);
|
|
1228
|
-
}
|
|
1229
|
-
|
|
1230
|
-
if (location) {
|
|
1231
|
-
where.push("COALESCE(guessed_location, '未知') = ?");
|
|
1232
|
-
args.push(location);
|
|
1233
|
-
}
|
|
1234
|
-
|
|
1235
|
-
if (hasVideo) {
|
|
1236
|
-
where.push("COALESCE(video_count, 0) > 0");
|
|
1237
|
-
}
|
|
1238
|
-
|
|
1239
|
-
if (hasFollower) {
|
|
1240
|
-
where.push("COALESCE(follower_count, 0) > 0");
|
|
1241
|
-
}
|
|
1
|
+
/**
|
|
2
|
+
* 数据存储主模块 — createStore() 编排器
|
|
3
|
+
*
|
|
4
|
+
* 本文件是数据存储的入口点,负责编排各子模块:
|
|
5
|
+
* - db-schema.js: 建表、迁移、全局连接管理
|
|
6
|
+
* - db-columns.js: 共享列名常量和 SQL 生成
|
|
7
|
+
* - db-crud.js: 基础 CRUD(增删改查、行映射)
|
|
8
|
+
* - db-stats.js: 仪表盘统计、按国家分组
|
|
9
|
+
* - db-raw-jobs.js: raw_jobs 移入/恢复
|
|
10
|
+
* - db-tags.js: Tag 发现与打分
|
|
11
|
+
* - llm-scoring.js: LLM 国家匹配度打分
|
|
12
|
+
*
|
|
13
|
+
* createStore() 保留为运行时编排器,管理:
|
|
14
|
+
* - 任务认领/提交(claimNextJob/commitJob)
|
|
15
|
+
* - 客户端追踪、视频管理、备份
|
|
16
|
+
* - 内存索引、stats 缓存
|
|
17
|
+
*/
|
|
1242
18
|
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
db
|
|
1251
|
-
.prepare(`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql}`)
|
|
1252
|
-
.get(...args)?.c || 0;
|
|
1253
|
-
|
|
1254
|
-
if (!count) {
|
|
1255
|
-
return { restored: 0 };
|
|
1256
|
-
}
|
|
1257
|
-
|
|
1258
|
-
const restoreTxn = db.transaction(() => {
|
|
1259
|
-
db.prepare(
|
|
1260
|
-
`
|
|
1261
|
-
INSERT OR REPLACE INTO jobs (
|
|
1262
|
-
unique_id, nickname, status, sources, claimed_by, claimed_at, error,
|
|
1263
|
-
pinned, no_video, restricted, user_update_count, tt_seller, verified,
|
|
1264
|
-
video_count, comment_count, guessed_location, location_created,
|
|
1265
|
-
follower_count, following_count, heart_count, refresh_time,
|
|
1266
|
-
processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
|
|
1267
|
-
)
|
|
1268
|
-
SELECT
|
|
1269
|
-
unique_id, nickname, status, sources, claimed_by, claimed_at, error,
|
|
1270
|
-
pinned, no_video, restricted, user_update_count, tt_seller, verified,
|
|
1271
|
-
video_count, comment_count, guessed_location, location_created,
|
|
1272
|
-
follower_count, following_count, heart_count, refresh_time,
|
|
1273
|
-
processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
|
|
1274
|
-
FROM raw_jobs WHERE ${whereSql}
|
|
1275
|
-
`,
|
|
1276
|
-
).run(...args);
|
|
19
|
+
import fs from "fs";
|
|
20
|
+
import path from "path";
|
|
21
|
+
import Database from "better-sqlite3";
|
|
22
|
+
import {
|
|
23
|
+
isLocationInList,
|
|
24
|
+
DEFAULT_TARGET_LOCATIONS,
|
|
25
|
+
} from "../lib/target-locations.js";
|
|
1277
26
|
|
|
1278
|
-
|
|
1279
|
-
|
|
27
|
+
// Schema 与连接管理
|
|
28
|
+
import {
|
|
29
|
+
getDb,
|
|
30
|
+
getDbPath,
|
|
31
|
+
initDb,
|
|
32
|
+
resetDbConnection,
|
|
33
|
+
loadLegacyUsersFromFiles,
|
|
34
|
+
loadLegacyVideosFromFile,
|
|
35
|
+
} from "./db-schema.js";
|
|
36
|
+
|
|
37
|
+
// CRUD 操作
|
|
38
|
+
import {
|
|
39
|
+
snakeToCamel,
|
|
40
|
+
camelToSnake,
|
|
41
|
+
normalizeJobValue,
|
|
42
|
+
mapJobRow,
|
|
43
|
+
mapVideoRow,
|
|
44
|
+
inferStatus,
|
|
45
|
+
hasUserInDb,
|
|
46
|
+
addUserToDb,
|
|
47
|
+
addJobToDb,
|
|
48
|
+
addJobBaseToDb,
|
|
49
|
+
addJob,
|
|
50
|
+
getJobRow,
|
|
51
|
+
getJobBaseRow,
|
|
52
|
+
getJob,
|
|
53
|
+
getAllJobs,
|
|
54
|
+
getVideoRow,
|
|
55
|
+
getAllVideoRows,
|
|
56
|
+
updateJobInfo,
|
|
57
|
+
updateJobBaseInfo,
|
|
58
|
+
getUserDbCount,
|
|
59
|
+
getJobsCount,
|
|
60
|
+
getPendingJobsCount,
|
|
61
|
+
getPendingJobsUserUpdateCount,
|
|
62
|
+
getRawJobsCount,
|
|
63
|
+
} from "./db-crud.js";
|
|
64
|
+
|
|
65
|
+
// 统计查询
|
|
66
|
+
import {
|
|
67
|
+
getDashboardStatsFromDb,
|
|
68
|
+
getPendingByCountryFromDb,
|
|
69
|
+
getUserUpdateByCountryFromDb,
|
|
70
|
+
getAttachStuckByCountryFromDb,
|
|
71
|
+
getRawByCountryFromDb,
|
|
72
|
+
restoreAttachStuckByCountry,
|
|
73
|
+
resetPendingByCountry,
|
|
74
|
+
} from "./db-stats.js";
|
|
75
|
+
|
|
76
|
+
// Raw Jobs 管理
|
|
77
|
+
import {
|
|
78
|
+
moveJobsToRawByCountry,
|
|
79
|
+
restoreRawJobsByCountry,
|
|
80
|
+
restoreRawJobById,
|
|
81
|
+
restoreRawJobsByFilter,
|
|
82
|
+
getRawJobsPageFromDb,
|
|
83
|
+
} from "./db-raw-jobs.js";
|
|
84
|
+
|
|
85
|
+
// Tag CRUD
|
|
86
|
+
import {
|
|
87
|
+
insertTag,
|
|
88
|
+
getTagsByStatus,
|
|
89
|
+
getTagsByCountry,
|
|
90
|
+
getDeadTags,
|
|
91
|
+
claimTag,
|
|
92
|
+
reportTagScore,
|
|
93
|
+
getAllTags,
|
|
94
|
+
rawQuery,
|
|
95
|
+
normalizeTags,
|
|
96
|
+
clearTags,
|
|
97
|
+
} from "./db-tags.js";
|
|
98
|
+
|
|
99
|
+
// LLM 打分
|
|
100
|
+
import {
|
|
101
|
+
scoreJobLocation,
|
|
102
|
+
scoreJobsBatch,
|
|
103
|
+
createLlmOffsetStore,
|
|
104
|
+
} from "./llm-scoring.js";
|
|
1280
105
|
|
|
1281
|
-
|
|
1282
|
-
return { restored: count };
|
|
1283
|
-
}
|
|
106
|
+
// ===== 薄包装函数(保持外部 API 不变)=====
|
|
1284
107
|
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
108
|
+
/**
|
|
109
|
+
* 导入历史 JSON 数据到 SQLite
|
|
110
|
+
*/
|
|
111
|
+
export function importLegacyJsonToDb({
|
|
112
|
+
dbFilePath,
|
|
113
|
+
usersFilePath,
|
|
114
|
+
doneFilePath,
|
|
115
|
+
videosFilePath,
|
|
1292
116
|
}) {
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
const safeLimit = Math.max(1, Math.min(200, parseInt(limit) || 50));
|
|
1296
|
-
const safeOffset = Math.max(0, parseInt(offset) || 0);
|
|
1297
|
-
const where = [];
|
|
1298
|
-
const args = [];
|
|
1299
|
-
|
|
1300
|
-
if (search) {
|
|
1301
|
-
where.push(
|
|
1302
|
-
"(LOWER(unique_id) LIKE ? OR LOWER(COALESCE(nickname, '')) LIKE ?)",
|
|
1303
|
-
);
|
|
1304
|
-
const pattern = `%${String(search).toLowerCase()}%`;
|
|
1305
|
-
args.push(pattern, pattern);
|
|
1306
|
-
}
|
|
1307
|
-
if (location) {
|
|
1308
|
-
where.push("COALESCE(guessed_location, '未知') = ?");
|
|
1309
|
-
args.push(location);
|
|
1310
|
-
}
|
|
1311
|
-
if (hasVideo) {
|
|
1312
|
-
where.push("COALESCE(video_count, 0) > 0");
|
|
1313
|
-
}
|
|
1314
|
-
if (hasFollower) {
|
|
1315
|
-
where.push("COALESCE(follower_count, 0) > 0");
|
|
1316
|
-
}
|
|
1317
|
-
|
|
1318
|
-
const whereSql = where.length ? `WHERE ${where.join(" AND ")}` : "";
|
|
1319
|
-
const total = db
|
|
1320
|
-
.prepare(`SELECT COUNT(*) as c FROM raw_jobs ${whereSql}`)
|
|
1321
|
-
.get(...args).c;
|
|
1322
|
-
|
|
1323
|
-
const rows = db
|
|
1324
|
-
.prepare(
|
|
1325
|
-
`
|
|
1326
|
-
SELECT *
|
|
1327
|
-
FROM raw_jobs
|
|
1328
|
-
${whereSql}
|
|
1329
|
-
ORDER BY created_at DESC, unique_id ASC
|
|
1330
|
-
LIMIT ? OFFSET ?
|
|
1331
|
-
`,
|
|
1332
|
-
)
|
|
1333
|
-
.all(...args, safeLimit, safeOffset);
|
|
1334
|
-
|
|
1335
|
-
return {
|
|
1336
|
-
total,
|
|
1337
|
-
limit: safeLimit,
|
|
1338
|
-
offset: safeOffset,
|
|
1339
|
-
users: rows.map(mapJobRow),
|
|
1340
|
-
};
|
|
1341
|
-
}
|
|
1342
|
-
|
|
1343
|
-
// ====== Tag 发现与打分 CRUD ======
|
|
1344
|
-
|
|
1345
|
-
function insertTag(tag, countries, source = "llm") {
|
|
1346
|
-
if (!db) return { inserted: false, error: "db not ready" };
|
|
1347
|
-
// 防止存入带 # 前缀的 tag
|
|
1348
|
-
const normalized = tag.replace(/^#+/, "").trim().toLowerCase();
|
|
1349
|
-
if (!normalized || normalized.length < 2) {
|
|
1350
|
-
return { inserted: false, error: "invalid tag" };
|
|
1351
|
-
}
|
|
1352
|
-
try {
|
|
1353
|
-
const result = db
|
|
1354
|
-
.prepare(
|
|
1355
|
-
`
|
|
1356
|
-
INSERT OR IGNORE INTO tags (tag, countries, source)
|
|
1357
|
-
VALUES (?, ?, ?)
|
|
1358
|
-
`,
|
|
1359
|
-
)
|
|
1360
|
-
.run(normalized, JSON.stringify(countries), source);
|
|
1361
|
-
return { inserted: result.changes > 0, tag: normalized };
|
|
1362
|
-
} catch (e) {
|
|
1363
|
-
return { inserted: false, error: e.message };
|
|
1364
|
-
}
|
|
1365
|
-
}
|
|
1366
|
-
|
|
1367
|
-
function getTagsByStatus(status, limit = 100) {
|
|
1368
|
-
if (!db) return [];
|
|
1369
|
-
const rows = db
|
|
1370
|
-
.prepare(
|
|
1371
|
-
`
|
|
1372
|
-
SELECT * FROM tags WHERE status = ? ORDER BY score ASC, created_at ASC LIMIT ?
|
|
1373
|
-
`,
|
|
1374
|
-
)
|
|
1375
|
-
.all(status, limit);
|
|
1376
|
-
return rows.map((r) => ({
|
|
1377
|
-
...r,
|
|
1378
|
-
countries: JSON.parse(r.countries || "[]"),
|
|
1379
|
-
matched_countries: JSON.parse(r.matched_countries || "[]"),
|
|
1380
|
-
}));
|
|
1381
|
-
}
|
|
1382
|
-
|
|
1383
|
-
function getTagsByCountry(country, minScore = 0) {
|
|
1384
|
-
if (!db) return [];
|
|
1385
|
-
const rows = db
|
|
1386
|
-
.prepare(
|
|
1387
|
-
`
|
|
1388
|
-
SELECT * FROM tags WHERE status != 'dead'
|
|
1389
|
-
ORDER BY score DESC
|
|
1390
|
-
`,
|
|
1391
|
-
)
|
|
1392
|
-
.all();
|
|
1393
|
-
// Filter in JS since countries is JSON
|
|
1394
|
-
return rows
|
|
1395
|
-
.map((r) => ({
|
|
1396
|
-
...r,
|
|
1397
|
-
countries: JSON.parse(r.countries || "[]"),
|
|
1398
|
-
matched_countries: JSON.parse(r.matched_countries || "[]"),
|
|
1399
|
-
}))
|
|
1400
|
-
.filter((r) => r.countries.includes(country) && r.score >= minScore);
|
|
1401
|
-
}
|
|
1402
|
-
|
|
1403
|
-
function getDeadTags(country) {
|
|
1404
|
-
if (!db) return [];
|
|
1405
|
-
const rows = db
|
|
1406
|
-
.prepare(
|
|
1407
|
-
`
|
|
1408
|
-
SELECT * FROM tags WHERE status = 'dead' ORDER BY score ASC
|
|
1409
|
-
`,
|
|
1410
|
-
)
|
|
1411
|
-
.all();
|
|
1412
|
-
return rows
|
|
1413
|
-
.map((r) => ({
|
|
1414
|
-
...r,
|
|
1415
|
-
countries: JSON.parse(r.countries || "[]"),
|
|
1416
|
-
matched_countries: JSON.parse(r.matched_countries || "[]"),
|
|
1417
|
-
}))
|
|
1418
|
-
.filter((r) => r.countries.includes(country));
|
|
1419
|
-
}
|
|
117
|
+
resetDbConnection();
|
|
118
|
+
initDb(dbFilePath);
|
|
1420
119
|
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
// 原子操作:只有 status='new' 时才更新为 'scoring',避免竞态
|
|
1424
|
-
const result = db
|
|
1425
|
-
.prepare(
|
|
1426
|
-
"UPDATE tags SET status = 'scoring' WHERE tag = ? AND status = 'new'",
|
|
1427
|
-
)
|
|
1428
|
-
.run(tag);
|
|
1429
|
-
if (result.changes === 0) {
|
|
1430
|
-
// 检查是否不存在 vs 已被别人锁定
|
|
1431
|
-
const row = db.prepare("SELECT status FROM tags WHERE tag = ?").get(tag);
|
|
1432
|
-
if (!row) return { ok: false, error: "tag not found" };
|
|
1433
|
-
return { ok: false, error: `tag status is ${row.status}, already claimed` };
|
|
1434
|
-
}
|
|
1435
|
-
return { ok: true, tag };
|
|
1436
|
-
}
|
|
120
|
+
const db = getDb();
|
|
121
|
+
const dbPath = getDbPath();
|
|
1437
122
|
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
const {
|
|
1441
|
-
score,
|
|
1442
|
-
status,
|
|
1443
|
-
totalPosts,
|
|
1444
|
-
authorCount,
|
|
1445
|
-
matchedAuthors,
|
|
1446
|
-
matchedCountries,
|
|
1447
|
-
pushedUsers,
|
|
1448
|
-
error,
|
|
1449
|
-
} = fields;
|
|
1450
|
-
const matchedCountriesJson = matchedCountries
|
|
1451
|
-
? JSON.stringify(matchedCountries)
|
|
1452
|
-
: null;
|
|
1453
|
-
const now = new Date().toISOString();
|
|
1454
|
-
|
|
1455
|
-
try {
|
|
1456
|
-
const result = db
|
|
1457
|
-
.prepare(
|
|
1458
|
-
`
|
|
1459
|
-
UPDATE tags SET
|
|
1460
|
-
score = COALESCE(?, score),
|
|
1461
|
-
status = COALESCE(?, status),
|
|
1462
|
-
total_posts = COALESCE(?, total_posts),
|
|
1463
|
-
author_count = COALESCE(?, author_count),
|
|
1464
|
-
matched_authors = COALESCE(?, matched_authors),
|
|
1465
|
-
matched_countries = COALESCE(?, matched_countries),
|
|
1466
|
-
pushed_users = COALESCE(?, pushed_users),
|
|
1467
|
-
last_error = COALESCE(?, last_error),
|
|
1468
|
-
scored_at = ?,
|
|
1469
|
-
score_count = score_count + 1
|
|
1470
|
-
WHERE tag = ?
|
|
1471
|
-
`,
|
|
1472
|
-
)
|
|
1473
|
-
.run(
|
|
1474
|
-
score ?? null,
|
|
1475
|
-
status ?? null,
|
|
1476
|
-
totalPosts ?? null,
|
|
1477
|
-
authorCount ?? null,
|
|
1478
|
-
matchedAuthors ?? null,
|
|
1479
|
-
matchedCountriesJson,
|
|
1480
|
-
pushedUsers ?? null,
|
|
1481
|
-
error ?? null,
|
|
1482
|
-
now,
|
|
1483
|
-
tag,
|
|
1484
|
-
);
|
|
1485
|
-
return { ok: result.changes > 0, tag };
|
|
1486
|
-
} catch (e) {
|
|
1487
|
-
return { ok: false, error: e.message };
|
|
1488
|
-
}
|
|
1489
|
-
}
|
|
123
|
+
const legacyUsers = loadLegacyUsersFromFiles(usersFilePath, doneFilePath);
|
|
124
|
+
const legacyVideos = loadLegacyVideosFromFile(videosFilePath);
|
|
1490
125
|
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1496
|
-
SELECT *
|
|
1497
|
-
|
|
1498
|
-
)
|
|
1499
|
-
.all(limit);
|
|
1500
|
-
return rows.map((r) => ({
|
|
1501
|
-
...r,
|
|
1502
|
-
countries: JSON.parse(r.countries || "[]"),
|
|
1503
|
-
matched_countries: JSON.parse(r.matched_countries || "[]"),
|
|
1504
|
-
}));
|
|
1505
|
-
}
|
|
126
|
+
const beforeUsers = getDb()
|
|
127
|
+
.prepare("SELECT COUNT(*) as c FROM users")
|
|
128
|
+
.get().c;
|
|
129
|
+
const beforeJobs = getDb().prepare("SELECT COUNT(*) as c FROM jobs").get().c;
|
|
130
|
+
const beforeVideos = getDb()
|
|
131
|
+
.prepare("SELECT COUNT(*) as c FROM videos")
|
|
132
|
+
.get().c;
|
|
1506
133
|
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
} catch (e) {
|
|
1514
|
-
return { error: e.message };
|
|
1515
|
-
}
|
|
1516
|
-
}
|
|
134
|
+
const insertUserStmt = getDb().prepare(
|
|
135
|
+
`INSERT OR IGNORE INTO users (unique_id) VALUES (?)`,
|
|
136
|
+
);
|
|
137
|
+
const insertVideoStmt = getDb().prepare(
|
|
138
|
+
`INSERT OR IGNORE INTO videos (id, href, author_unique_id, location_created, tt_seller, registered_at, user_update_count, create_time) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
139
|
+
);
|
|
1517
140
|
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
|
|
1524
|
-
const fixed = [];
|
|
1525
|
-
const merged = [];
|
|
1526
|
-
const skipped = [];
|
|
1527
|
-
|
|
1528
|
-
for (const row of dirtyRows) {
|
|
1529
|
-
const cleanTag = row.tag.replace(/^#+/, "").trim().toLowerCase();
|
|
1530
|
-
if (!cleanTag || cleanTag.length < 2) {
|
|
1531
|
-
db.prepare("DELETE FROM tags WHERE id = ?").run(row.id);
|
|
1532
|
-
skipped.push({
|
|
1533
|
-
dirty: row.tag,
|
|
1534
|
-
reason: "empty after normalize, deleted",
|
|
1535
|
-
});
|
|
1536
|
-
continue;
|
|
141
|
+
const importUsersTxn = getDb().transaction((items) => {
|
|
142
|
+
for (const item of items) {
|
|
143
|
+
const uniqueId = item.uniqueId || item.unique_id;
|
|
144
|
+
if (!uniqueId) continue;
|
|
145
|
+
insertUserStmt.run(uniqueId);
|
|
146
|
+
addJobToDb({ ...item, uniqueId });
|
|
1537
147
|
}
|
|
148
|
+
});
|
|
1538
149
|
|
|
1539
|
-
|
|
1540
|
-
const
|
|
1541
|
-
|
|
1542
|
-
.
|
|
1543
|
-
|
|
1544
|
-
|
|
1545
|
-
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
JSON.stringify(mergedCountries),
|
|
1552
|
-
cleanTag,
|
|
150
|
+
const importVideosTxn = getDb().transaction((items) => {
|
|
151
|
+
for (const item of items) {
|
|
152
|
+
if (!item?.id) continue;
|
|
153
|
+
insertVideoStmt.run(
|
|
154
|
+
item.id,
|
|
155
|
+
item.href || null,
|
|
156
|
+
item.authorUniqueId || item.author_unique_id || null,
|
|
157
|
+
item.locationCreated || item.location_created || null,
|
|
158
|
+
item.ttSeller ? 1 : 0,
|
|
159
|
+
item.registeredAt || item.registered_at || Date.now(),
|
|
160
|
+
item.userUpdateCount || item.user_update_count || 0,
|
|
161
|
+
item.createTime || item.create_time || null,
|
|
1553
162
|
);
|
|
1554
|
-
// 删除脏数据
|
|
1555
|
-
db.prepare("DELETE FROM tags WHERE id = ?").run(row.id);
|
|
1556
|
-
merged.push({ dirty: row.tag, clean: cleanTag, id: row.id });
|
|
1557
|
-
} else {
|
|
1558
|
-
// 直接重命名
|
|
1559
|
-
db.prepare("UPDATE tags SET tag = ? WHERE id = ?").run(cleanTag, row.id);
|
|
1560
|
-
fixed.push({ dirty: row.tag, clean: cleanTag, id: row.id });
|
|
1561
163
|
}
|
|
1562
|
-
}
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
importUsersTxn(legacyUsers);
|
|
167
|
+
importVideosTxn(legacyVideos);
|
|
168
|
+
|
|
169
|
+
const afterUsers = getDb().prepare("SELECT COUNT(*) as c FROM users").get().c;
|
|
170
|
+
const afterJobs = getDb().prepare("SELECT COUNT(*) as c FROM jobs").get().c;
|
|
171
|
+
const afterVideos = getDb()
|
|
172
|
+
.prepare("SELECT COUNT(*) as c FROM videos")
|
|
173
|
+
.get().c;
|
|
1563
174
|
|
|
1564
175
|
return {
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1569
|
-
|
|
176
|
+
dbPath: getDbPath(),
|
|
177
|
+
usersImported: afterUsers - beforeUsers,
|
|
178
|
+
jobsImported: afterJobs - beforeJobs,
|
|
179
|
+
videosImported: afterVideos - beforeVideos,
|
|
180
|
+
totalUsers: afterUsers,
|
|
181
|
+
totalJobs: afterJobs,
|
|
182
|
+
totalVideos: afterVideos,
|
|
1570
183
|
};
|
|
1571
184
|
}
|
|
1572
185
|
|
|
1573
|
-
function
|
|
1574
|
-
|
|
1575
|
-
const count = db.prepare("SELECT COUNT(*) as c FROM tags").get().c;
|
|
1576
|
-
db.exec("DELETE FROM tags");
|
|
1577
|
-
return { ok: true, deleted: count };
|
|
186
|
+
export function closeStoreDb() {
|
|
187
|
+
resetDbConnection();
|
|
1578
188
|
}
|
|
1579
189
|
|
|
1580
190
|
function getUsersPageFromDb({
|
|
@@ -1587,7 +197,7 @@ function getUsersPageFromDb({
|
|
|
1587
197
|
offset,
|
|
1588
198
|
targetLocations = [],
|
|
1589
199
|
}) {
|
|
1590
|
-
if (!
|
|
200
|
+
if (!getDb()) return null;
|
|
1591
201
|
|
|
1592
202
|
const safeLimit = Math.max(1, Math.min(200, parseInt(limit) || 50));
|
|
1593
203
|
const safeOffset = Math.max(0, parseInt(offset) || 0);
|
|
@@ -1636,7 +246,7 @@ function getUsersPageFromDb({
|
|
|
1636
246
|
if (cachedCount && Date.now() - cachedCount.time < 5000) {
|
|
1637
247
|
total = cachedCount.c;
|
|
1638
248
|
} else {
|
|
1639
|
-
total =
|
|
249
|
+
total = getDb()
|
|
1640
250
|
.prepare(`SELECT COUNT(*) as c FROM jobs ${whereSql}`)
|
|
1641
251
|
.get(...args).c;
|
|
1642
252
|
getUsersPageFromDb._countCache.set(cacheKey, {
|
|
@@ -1646,7 +256,7 @@ function getUsersPageFromDb({
|
|
|
1646
256
|
}
|
|
1647
257
|
|
|
1648
258
|
// 只查询前端需要的列,避免 SELECT * 带来的大字段传输和 mapJobRow 开销
|
|
1649
|
-
const rows =
|
|
259
|
+
const rows = getDb()
|
|
1650
260
|
.prepare(
|
|
1651
261
|
`
|
|
1652
262
|
SELECT
|
|
@@ -1688,13 +298,13 @@ function getUsersPageFromDb({
|
|
|
1688
298
|
}
|
|
1689
299
|
|
|
1690
300
|
function getTargetUsersFromDb(targetLocations = []) {
|
|
1691
|
-
if (!
|
|
301
|
+
if (!getDb()) return null;
|
|
1692
302
|
if (!targetLocations.length) {
|
|
1693
303
|
return { total: 0, users: [] };
|
|
1694
304
|
}
|
|
1695
305
|
|
|
1696
306
|
const placeholders = targetLocations.map(() => "?").join(", ");
|
|
1697
|
-
const rows =
|
|
307
|
+
const rows = getDb()
|
|
1698
308
|
.prepare(
|
|
1699
309
|
`
|
|
1700
310
|
SELECT
|
|
@@ -1703,6 +313,8 @@ function getTargetUsersFromDb(targetLocations = []) {
|
|
|
1703
313
|
location_created, latest_video_time, refresh_time,
|
|
1704
314
|
guessed_location, pinned, processed_at, video_count,
|
|
1705
315
|
no_video, claimed_by, claimed_at, created_at, updated_at
|
|
316
|
+
FROM jobs
|
|
317
|
+
WHERE tt_seller = 1
|
|
1706
318
|
AND verified = 0
|
|
1707
319
|
AND location_created IN (${placeholders})
|
|
1708
320
|
ORDER BY COALESCE(follower_count, 0) DESC, unique_id ASC
|
|
@@ -1718,7 +330,7 @@ function getTargetUsersFromDb(targetLocations = []) {
|
|
|
1718
330
|
}
|
|
1719
331
|
|
|
1720
332
|
function getTargetUsersByCountryFromDb(targetLocations = [], options = {}) {
|
|
1721
|
-
if (!
|
|
333
|
+
if (!getDb()) return null;
|
|
1722
334
|
if (!targetLocations.length) {
|
|
1723
335
|
return { countries: [] };
|
|
1724
336
|
}
|
|
@@ -1735,7 +347,7 @@ function getTargetUsersByCountryFromDb(targetLocations = [], options = {}) {
|
|
|
1735
347
|
|
|
1736
348
|
// 摘要模式:只返回各国统计数,不返回用户数据
|
|
1737
349
|
if (summaryOnly) {
|
|
1738
|
-
const statsRows =
|
|
350
|
+
const statsRows = getDb()
|
|
1739
351
|
.prepare(
|
|
1740
352
|
`
|
|
1741
353
|
SELECT location_created as country, COUNT(*) as count
|
|
@@ -1803,397 +415,79 @@ function getTargetUsersByCountryFromDb(targetLocations = [], options = {}) {
|
|
|
1803
415
|
/SELECT[^FROM]*FROM/,
|
|
1804
416
|
"SELECT COUNT(*) as cnt FROM",
|
|
1805
417
|
);
|
|
1806
|
-
const total =
|
|
418
|
+
const total =
|
|
419
|
+
getDb()
|
|
420
|
+
.prepare(countSql)
|
|
421
|
+
.get(...params)?.cnt || 0;
|
|
1807
422
|
|
|
1808
423
|
sql += ` LIMIT ? OFFSET ?`;
|
|
1809
424
|
const safeLimit = Math.min(Math.floor(limit), 10000);
|
|
1810
425
|
const safeOffset = Math.max(Math.floor(offset), 0);
|
|
1811
426
|
|
|
1812
|
-
const rows =
|
|
1813
|
-
.prepare(sql)
|
|
1814
|
-
.all(...params, safeLimit, safeOffset)
|
|
1815
|
-
.map(mapJobRow);
|
|
1816
|
-
|
|
1817
|
-
return {
|
|
1818
|
-
total,
|
|
1819
|
-
limit: safeLimit,
|
|
1820
|
-
offset: safeOffset,
|
|
1821
|
-
users: rows,
|
|
1822
|
-
};
|
|
1823
|
-
}
|
|
1824
|
-
|
|
1825
|
-
const rows = db
|
|
1826
|
-
.prepare(
|
|
1827
|
-
`
|
|
1828
|
-
SELECT
|
|
1829
|
-
unique_id,
|
|
1830
|
-
nickname,
|
|
1831
|
-
follower_count,
|
|
1832
|
-
video_count,
|
|
1833
|
-
tt_seller,
|
|
1834
|
-
verified,
|
|
1835
|
-
location_created,
|
|
1836
|
-
confirmed_location,
|
|
1837
|
-
modified_at,
|
|
1838
|
-
latest_video_time,
|
|
1839
|
-
refresh_time,
|
|
1840
|
-
status,
|
|
1841
|
-
sources
|
|
1842
|
-
FROM jobs
|
|
1843
|
-
WHERE tt_seller = 1
|
|
1844
|
-
AND verified = 0
|
|
1845
|
-
AND location_created IN (${placeholders})
|
|
1846
|
-
ORDER BY location_created ASC, COALESCE(latest_video_time, 0) DESC
|
|
1847
|
-
`,
|
|
1848
|
-
)
|
|
1849
|
-
.all(...targetLocations)
|
|
1850
|
-
.map(mapJobRow);
|
|
1851
|
-
|
|
1852
|
-
const countryMap = new Map();
|
|
1853
|
-
for (const row of rows) {
|
|
1854
|
-
const country = row.locationCreated || "未知";
|
|
1855
|
-
if (!countryMap.has(country)) {
|
|
1856
|
-
countryMap.set(country, []);
|
|
1857
|
-
}
|
|
1858
|
-
countryMap.get(country).push(row);
|
|
1859
|
-
}
|
|
1860
|
-
|
|
1861
|
-
const countries = [];
|
|
1862
|
-
for (const [country, users] of countryMap) {
|
|
1863
|
-
countries.push({
|
|
1864
|
-
country,
|
|
1865
|
-
count: users.length,
|
|
1866
|
-
users,
|
|
1867
|
-
});
|
|
1868
|
-
}
|
|
1869
|
-
|
|
1870
|
-
return {
|
|
1871
|
-
total: rows.length,
|
|
1872
|
-
countries,
|
|
1873
|
-
};
|
|
1874
|
-
}
|
|
1875
|
-
|
|
1876
|
-
function snakeToCamel(key) {
|
|
1877
|
-
return key.replace(/_([a-z])/g, (_, ch) => ch.toUpperCase());
|
|
1878
|
-
}
|
|
1879
|
-
|
|
1880
|
-
function camelToSnake(key) {
|
|
1881
|
-
return key.replace(/[A-Z]/g, (ch) => `_${ch.toLowerCase()}`);
|
|
1882
|
-
}
|
|
1883
|
-
|
|
1884
|
-
const jobBooleanColumns = new Set([
|
|
1885
|
-
"pinned",
|
|
1886
|
-
"no_video",
|
|
1887
|
-
"restricted",
|
|
1888
|
-
"processed",
|
|
1889
|
-
"tt_seller",
|
|
1890
|
-
"verified",
|
|
1891
|
-
"error",
|
|
1892
|
-
]);
|
|
1893
|
-
|
|
1894
|
-
const videoBooleanColumns = new Set(["tt_seller"]);
|
|
1895
|
-
|
|
1896
|
-
const writableJobColumns = new Set([
|
|
1897
|
-
"nickname",
|
|
1898
|
-
"status",
|
|
1899
|
-
"sources",
|
|
1900
|
-
"claimed_by",
|
|
1901
|
-
"claimed_at",
|
|
1902
|
-
"error",
|
|
1903
|
-
"pinned",
|
|
1904
|
-
"no_video",
|
|
1905
|
-
"restricted",
|
|
1906
|
-
"user_update_count",
|
|
1907
|
-
"tt_seller",
|
|
1908
|
-
"verified",
|
|
1909
|
-
"video_count",
|
|
1910
|
-
"comment_count",
|
|
1911
|
-
"guessed_location",
|
|
1912
|
-
"location_created",
|
|
1913
|
-
"confirmed_location",
|
|
1914
|
-
"modified_at",
|
|
1915
|
-
"follower_count",
|
|
1916
|
-
"following_count",
|
|
1917
|
-
"heart_count",
|
|
1918
|
-
"refresh_time",
|
|
1919
|
-
"processed",
|
|
1920
|
-
"processed_at",
|
|
1921
|
-
"updated_at",
|
|
1922
|
-
"region",
|
|
1923
|
-
"signature",
|
|
1924
|
-
"bio_link",
|
|
1925
|
-
"sec_uid",
|
|
1926
|
-
"status_code",
|
|
1927
|
-
"latest_video_time",
|
|
1928
|
-
"top_video_play_count",
|
|
1929
|
-
"top_video_href",
|
|
1930
|
-
"user_create_time",
|
|
1931
|
-
]);
|
|
1932
|
-
|
|
1933
|
-
function normalizeJobValue(column, value) {
|
|
1934
|
-
if (value === undefined || value === null) return null;
|
|
1935
|
-
if (column === "sources") {
|
|
1936
|
-
if (!Array.isArray(value)) return JSON.stringify([]);
|
|
1937
|
-
return JSON.stringify([...new Set(value)]);
|
|
1938
|
-
}
|
|
1939
|
-
if (jobBooleanColumns.has(column)) {
|
|
1940
|
-
return value ? 1 : 0;
|
|
1941
|
-
}
|
|
1942
|
-
// 防御:如果值是对象或数组,转为 JSON 字符串
|
|
1943
|
-
if (typeof value === "object") return JSON.stringify(value);
|
|
1944
|
-
return value;
|
|
1945
|
-
}
|
|
1946
|
-
|
|
1947
|
-
function mapJobRow(row) {
|
|
1948
|
-
if (!row) return undefined;
|
|
1949
|
-
const mapped = {};
|
|
1950
|
-
for (const [key, value] of Object.entries(row)) {
|
|
1951
|
-
const camelKey = snakeToCamel(key);
|
|
1952
|
-
if (key === "sources") {
|
|
1953
|
-
try {
|
|
1954
|
-
mapped[camelKey] = value ? JSON.parse(value) : [];
|
|
1955
|
-
} catch {
|
|
1956
|
-
mapped[camelKey] = [];
|
|
1957
|
-
}
|
|
1958
|
-
continue;
|
|
1959
|
-
}
|
|
1960
|
-
if (jobBooleanColumns.has(key)) {
|
|
1961
|
-
mapped[camelKey] = value === null || value === undefined ? null : !!value;
|
|
1962
|
-
continue;
|
|
1963
|
-
}
|
|
1964
|
-
mapped[camelKey] = value;
|
|
1965
|
-
}
|
|
1966
|
-
return mapped;
|
|
1967
|
-
}
|
|
1968
|
-
|
|
1969
|
-
function getJobRow(uniqueId) {
|
|
1970
|
-
if (!db) return null;
|
|
1971
|
-
return db.prepare("SELECT * FROM jobs WHERE unique_id = ?").get(uniqueId);
|
|
1972
|
-
}
|
|
1973
|
-
|
|
1974
|
-
function getJobBaseRow(uniqueId) {
|
|
1975
|
-
if (!db) return null;
|
|
1976
|
-
return db
|
|
1977
|
-
.prepare("SELECT * FROM jobs_base WHERE unique_id = ?")
|
|
1978
|
-
.get(uniqueId);
|
|
1979
|
-
}
|
|
1980
|
-
|
|
1981
|
-
function getJob(uniqueId) {
|
|
1982
|
-
return mapJobRow(getJobRow(uniqueId));
|
|
1983
|
-
}
|
|
1984
|
-
|
|
1985
|
-
function getAllJobs() {
|
|
1986
|
-
if (!db) return [];
|
|
1987
|
-
return db.prepare("SELECT * FROM jobs").all().map(mapJobRow);
|
|
1988
|
-
}
|
|
1989
|
-
|
|
1990
|
-
function mapVideoRow(row) {
|
|
1991
|
-
if (!row) return undefined;
|
|
1992
|
-
const mapped = {};
|
|
1993
|
-
for (const [key, value] of Object.entries(row)) {
|
|
1994
|
-
const camelKey = snakeToCamel(key);
|
|
1995
|
-
if (videoBooleanColumns.has(key)) {
|
|
1996
|
-
mapped[camelKey] = value === null || value === undefined ? null : !!value;
|
|
1997
|
-
continue;
|
|
1998
|
-
}
|
|
1999
|
-
mapped[camelKey] = value;
|
|
2000
|
-
}
|
|
2001
|
-
return mapped;
|
|
2002
|
-
}
|
|
2003
|
-
|
|
2004
|
-
function getVideoRow(videoId) {
|
|
2005
|
-
if (!db) return null;
|
|
2006
|
-
return db.prepare("SELECT * FROM videos WHERE id = ?").get(videoId);
|
|
2007
|
-
}
|
|
2008
|
-
|
|
2009
|
-
function getAllVideoRows() {
|
|
2010
|
-
if (!db) return [];
|
|
2011
|
-
return db.prepare("SELECT * FROM videos").all();
|
|
2012
|
-
}
|
|
2013
|
-
|
|
2014
|
-
function updateJobInfo(uniqueId, info, incrementCount = true) {
|
|
2015
|
-
if (!db) return { error: "db not initialized" };
|
|
2016
|
-
const existing = getJobRow(uniqueId);
|
|
2017
|
-
if (!existing) return { error: "user not found" };
|
|
2018
|
-
|
|
2019
|
-
const nextValues = {};
|
|
2020
|
-
for (const [key, value] of Object.entries(info || {})) {
|
|
2021
|
-
if (key === "uniqueId" || key === "unique_id") continue;
|
|
2022
|
-
if (value === undefined || value === "") continue;
|
|
2023
|
-
let column = camelToSnake(key);
|
|
2024
|
-
// 字段别名:bio → signature, createTime → user_create_time
|
|
2025
|
-
if (column === "bio") column = "signature";
|
|
2026
|
-
if (column === "create_time") column = "user_create_time";
|
|
2027
|
-
if (!writableJobColumns.has(column)) continue;
|
|
2028
|
-
nextValues[column] = normalizeJobValue(column, value);
|
|
2029
|
-
}
|
|
2030
|
-
|
|
2031
|
-
nextValues.updated_at = Date.now();
|
|
2032
|
-
if (incrementCount) {
|
|
2033
|
-
nextValues.user_update_count = (existing.user_update_count || 0) + 1;
|
|
2034
|
-
}
|
|
2035
|
-
|
|
2036
|
-
const columns = Object.keys(nextValues);
|
|
2037
|
-
if (columns.length > 0) {
|
|
2038
|
-
const sql = `UPDATE jobs SET ${columns.map((column) => `${column} = ?`).join(", ")} WHERE unique_id = ?`;
|
|
2039
|
-
db.prepare(sql).run(
|
|
2040
|
-
...columns.map((column) => nextValues[column]),
|
|
2041
|
-
uniqueId,
|
|
2042
|
-
);
|
|
2043
|
-
}
|
|
2044
|
-
|
|
2045
|
-
return {
|
|
2046
|
-
ok: true,
|
|
2047
|
-
userUpdateCount:
|
|
2048
|
-
nextValues.user_update_count ?? existing.user_update_count ?? 0,
|
|
2049
|
-
};
|
|
2050
|
-
}
|
|
2051
|
-
|
|
2052
|
-
function inferStatus(u) {
|
|
2053
|
-
if (u.restricted) return "restricted";
|
|
2054
|
-
if (u.error) return "error";
|
|
2055
|
-
if (u.processed) return "done";
|
|
2056
|
-
return "pending";
|
|
2057
|
-
}
|
|
427
|
+
const rows = getDb()
|
|
428
|
+
.prepare(sql)
|
|
429
|
+
.all(...params, safeLimit, safeOffset)
|
|
430
|
+
.map(mapJobRow);
|
|
2058
431
|
|
|
2059
|
-
|
|
2060
|
-
|
|
2061
|
-
|
|
2062
|
-
|
|
2063
|
-
|
|
2064
|
-
|
|
2065
|
-
for (const [key, value] of Object.entries(info || {})) {
|
|
2066
|
-
if (key === "uniqueId" || key === "unique_id") continue;
|
|
2067
|
-
if (value === undefined || value === "") continue;
|
|
2068
|
-
let column = camelToSnake(key);
|
|
2069
|
-
// 字段别名:bio → signature, createTime → user_create_time
|
|
2070
|
-
if (column === "bio") column = "signature";
|
|
2071
|
-
if (column === "create_time") column = "user_create_time";
|
|
2072
|
-
if (!writableJobColumns.has(column)) continue;
|
|
2073
|
-
nextValues[column] = normalizeJobValue(column, value);
|
|
432
|
+
return {
|
|
433
|
+
total,
|
|
434
|
+
limit: safeLimit,
|
|
435
|
+
offset: safeOffset,
|
|
436
|
+
users: rows,
|
|
437
|
+
};
|
|
2074
438
|
}
|
|
2075
439
|
|
|
2076
|
-
|
|
2077
|
-
|
|
2078
|
-
|
|
440
|
+
const rows = getDb()
|
|
441
|
+
.prepare(
|
|
442
|
+
`
|
|
443
|
+
SELECT
|
|
444
|
+
unique_id,
|
|
445
|
+
nickname,
|
|
446
|
+
follower_count,
|
|
447
|
+
video_count,
|
|
448
|
+
tt_seller,
|
|
449
|
+
verified,
|
|
450
|
+
location_created,
|
|
451
|
+
confirmed_location,
|
|
452
|
+
modified_at,
|
|
453
|
+
latest_video_time,
|
|
454
|
+
refresh_time,
|
|
455
|
+
status,
|
|
456
|
+
sources
|
|
457
|
+
FROM jobs
|
|
458
|
+
WHERE tt_seller = 1
|
|
459
|
+
AND verified = 0
|
|
460
|
+
AND location_created IN (${placeholders})
|
|
461
|
+
ORDER BY location_created ASC, COALESCE(latest_video_time, 0) DESC
|
|
462
|
+
`,
|
|
463
|
+
)
|
|
464
|
+
.all(...targetLocations)
|
|
465
|
+
.map(mapJobRow);
|
|
466
|
+
|
|
467
|
+
const countryMap = new Map();
|
|
468
|
+
for (const row of rows) {
|
|
469
|
+
const country = row.locationCreated || "未知";
|
|
470
|
+
if (!countryMap.has(country)) {
|
|
471
|
+
countryMap.set(country, []);
|
|
472
|
+
}
|
|
473
|
+
countryMap.get(country).push(row);
|
|
2079
474
|
}
|
|
2080
475
|
|
|
2081
|
-
const
|
|
2082
|
-
|
|
2083
|
-
|
|
2084
|
-
|
|
2085
|
-
|
|
2086
|
-
|
|
2087
|
-
);
|
|
476
|
+
const countries = [];
|
|
477
|
+
for (const [country, users] of countryMap) {
|
|
478
|
+
countries.push({
|
|
479
|
+
country,
|
|
480
|
+
count: users.length,
|
|
481
|
+
users,
|
|
482
|
+
});
|
|
2088
483
|
}
|
|
2089
484
|
|
|
2090
485
|
return {
|
|
2091
|
-
|
|
2092
|
-
|
|
2093
|
-
nextValues.user_update_count ?? existing.user_update_count ?? 0,
|
|
486
|
+
total: rows.length,
|
|
487
|
+
countries,
|
|
2094
488
|
};
|
|
2095
489
|
}
|
|
2096
490
|
|
|
2097
|
-
function addJobBaseToDb(user) {
|
|
2098
|
-
if (!db) return;
|
|
2099
|
-
const now = Date.now();
|
|
2100
|
-
db.prepare(
|
|
2101
|
-
`
|
|
2102
|
-
INSERT OR IGNORE INTO jobs_base (
|
|
2103
|
-
unique_id,
|
|
2104
|
-
nickname,
|
|
2105
|
-
status,
|
|
2106
|
-
sources,
|
|
2107
|
-
claimed_by,
|
|
2108
|
-
claimed_at,
|
|
2109
|
-
error,
|
|
2110
|
-
pinned,
|
|
2111
|
-
no_video,
|
|
2112
|
-
restricted,
|
|
2113
|
-
user_update_count,
|
|
2114
|
-
tt_seller,
|
|
2115
|
-
verified,
|
|
2116
|
-
video_count,
|
|
2117
|
-
comment_count,
|
|
2118
|
-
guessed_location,
|
|
2119
|
-
location_created,
|
|
2120
|
-
follower_count,
|
|
2121
|
-
following_count,
|
|
2122
|
-
heart_count,
|
|
2123
|
-
refresh_time,
|
|
2124
|
-
processed,
|
|
2125
|
-
processed_at,
|
|
2126
|
-
created_at,
|
|
2127
|
-
updated_at,
|
|
2128
|
-
region,
|
|
2129
|
-
signature,
|
|
2130
|
-
bio_link,
|
|
2131
|
-
sec_uid
|
|
2132
|
-
)
|
|
2133
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
2134
|
-
`,
|
|
2135
|
-
).run(
|
|
2136
|
-
user.uniqueId,
|
|
2137
|
-
user.nickname || null,
|
|
2138
|
-
user.status || inferStatus(user),
|
|
2139
|
-
JSON.stringify(
|
|
2140
|
-
Array.isArray(user.sources) ? [...new Set(user.sources)] : [],
|
|
2141
|
-
),
|
|
2142
|
-
user.claimedBy || null,
|
|
2143
|
-
user.claimedAt || null,
|
|
2144
|
-
user.error || null,
|
|
2145
|
-
user.pinned ? 1 : 0,
|
|
2146
|
-
user.noVideo ? 1 : 0,
|
|
2147
|
-
user.restricted ? 1 : 0,
|
|
2148
|
-
user.userUpdateCount || 0,
|
|
2149
|
-
user.ttSeller === undefined ||
|
|
2150
|
-
user.ttSeller === null ||
|
|
2151
|
-
user.ttSeller === ""
|
|
2152
|
-
? null
|
|
2153
|
-
: user.ttSeller
|
|
2154
|
-
? 1
|
|
2155
|
-
: 0,
|
|
2156
|
-
user.verified === undefined ||
|
|
2157
|
-
user.verified === null ||
|
|
2158
|
-
user.verified === ""
|
|
2159
|
-
? null
|
|
2160
|
-
: user.verified
|
|
2161
|
-
? 1
|
|
2162
|
-
: 0,
|
|
2163
|
-
user.videoCount || 0,
|
|
2164
|
-
user.commentCount || 0,
|
|
2165
|
-
user.guessedLocation || null,
|
|
2166
|
-
user.locationCreated || null,
|
|
2167
|
-
user.followerCount || 0,
|
|
2168
|
-
user.followingCount || 0,
|
|
2169
|
-
user.heartCount || 0,
|
|
2170
|
-
user.refreshTime || null,
|
|
2171
|
-
user.processed ? 1 : 0,
|
|
2172
|
-
user.processedAt || null,
|
|
2173
|
-
user.createdAt || now,
|
|
2174
|
-
user.updatedAt || now,
|
|
2175
|
-
user.region || null,
|
|
2176
|
-
user.signature || null,
|
|
2177
|
-
user.bioLink?.link || user.bioLink?.url || user.bioLink || null,
|
|
2178
|
-
user.secUid || null,
|
|
2179
|
-
);
|
|
2180
|
-
}
|
|
2181
|
-
|
|
2182
|
-
function addJob(user) {
|
|
2183
|
-
if (!db) {
|
|
2184
|
-
addUserToDb(user);
|
|
2185
|
-
return;
|
|
2186
|
-
}
|
|
2187
|
-
if (!user.status) user.status = inferStatus(user);
|
|
2188
|
-
if (!user.createdAt) user.createdAt = Date.now();
|
|
2189
|
-
if (!user.updatedAt) user.updatedAt = user.createdAt;
|
|
2190
|
-
const writeTxn = db.transaction((job) => {
|
|
2191
|
-
addUserToDb(job);
|
|
2192
|
-
addJobToDb(job);
|
|
2193
|
-
});
|
|
2194
|
-
writeTxn(user);
|
|
2195
|
-
}
|
|
2196
|
-
|
|
2197
491
|
export function createStore(filePath, options = {}) {
|
|
2198
492
|
if (!filePath) {
|
|
2199
493
|
throw new Error("createStore requires an explicit .db path");
|
|
@@ -2219,62 +513,12 @@ export function createStore(filePath, options = {}) {
|
|
|
2219
513
|
let refillLock = null; // Promise | null
|
|
2220
514
|
// LLM 采样偏移量记忆:按猜测国家记录上次查询位置,避免重复采样
|
|
2221
515
|
// 格式: { "ES": 300, "PL": 500, "NL": 400 }
|
|
2222
|
-
|
|
516
|
+
const offsetStore = createLlmOffsetStore();
|
|
2223
517
|
if (filePath) {
|
|
2224
518
|
// 初始化 SQLite 用户表(用于判重)
|
|
2225
|
-
|
|
519
|
+
initDb(filePath);
|
|
2226
520
|
// 从数据库恢复偏移量
|
|
2227
|
-
|
|
2228
|
-
}
|
|
2229
|
-
|
|
2230
|
-
/**
|
|
2231
|
-
* 从数据库加载 LLM 采样偏移量
|
|
2232
|
-
*/
|
|
2233
|
-
function loadLlmSampleOffsets() {
|
|
2234
|
-
try {
|
|
2235
|
-
const row = db
|
|
2236
|
-
.prepare(`SELECT offsets FROM _llm_sample_offsets LIMIT 1`)
|
|
2237
|
-
.get();
|
|
2238
|
-
if (row && row.offsets) {
|
|
2239
|
-
const parsed = JSON.parse(row.offsets);
|
|
2240
|
-
if (parsed && typeof parsed === "object") {
|
|
2241
|
-
Object.entries(parsed).forEach(([k, v]) => {
|
|
2242
|
-
llmSampleOffsets.set(k, v);
|
|
2243
|
-
});
|
|
2244
|
-
console.error(
|
|
2245
|
-
`[data-store] 已恢复 LLM 采样偏移量: ${Array.from(
|
|
2246
|
-
llmSampleOffsets.entries(),
|
|
2247
|
-
)
|
|
2248
|
-
.map(([k, v]) => `${k}:${v}`)
|
|
2249
|
-
.join(", ")}`,
|
|
2250
|
-
);
|
|
2251
|
-
}
|
|
2252
|
-
}
|
|
2253
|
-
} catch (e) {
|
|
2254
|
-
// 表不存在或解析失败,使用空偏移量
|
|
2255
|
-
console.error(
|
|
2256
|
-
`[data-store] 加载 LLM 采样偏移量失败,使用空偏移量: ${e.message}`,
|
|
2257
|
-
);
|
|
2258
|
-
}
|
|
2259
|
-
}
|
|
2260
|
-
|
|
2261
|
-
/**
|
|
2262
|
-
* 将 LLM 采样偏移量持久化到数据库
|
|
2263
|
-
*/
|
|
2264
|
-
function saveLlmSampleOffsets() {
|
|
2265
|
-
try {
|
|
2266
|
-
const offsetsJson = JSON.stringify(Object.fromEntries(llmSampleOffsets));
|
|
2267
|
-
// 表不存在则创建
|
|
2268
|
-
db.prepare(
|
|
2269
|
-
`CREATE TABLE IF NOT EXISTS _llm_sample_offsets (id INTEGER PRIMARY KEY CHECK (id = 1), offsets TEXT)`,
|
|
2270
|
-
).run();
|
|
2271
|
-
// 插入或更新
|
|
2272
|
-
db.prepare(
|
|
2273
|
-
`INSERT OR REPLACE INTO _llm_sample_offsets (id, offsets) VALUES (1, ?)`,
|
|
2274
|
-
).run(offsetsJson);
|
|
2275
|
-
} catch (e) {
|
|
2276
|
-
console.error(`[data-store] 保存 LLM 采样偏移量失败: ${e.message}`);
|
|
2277
|
-
}
|
|
521
|
+
offsetStore.load();
|
|
2278
522
|
}
|
|
2279
523
|
|
|
2280
524
|
// stats 缓存
|
|
@@ -2287,7 +531,7 @@ export function createStore(filePath, options = {}) {
|
|
|
2287
531
|
}
|
|
2288
532
|
|
|
2289
533
|
function computeStatsInternal() {
|
|
2290
|
-
if (
|
|
534
|
+
if (getDb()) {
|
|
2291
535
|
const total = getJobsCount();
|
|
2292
536
|
const statusCounts = {
|
|
2293
537
|
pending: 0,
|
|
@@ -2296,7 +540,7 @@ export function createStore(filePath, options = {}) {
|
|
|
2296
540
|
error: 0,
|
|
2297
541
|
restricted: 0,
|
|
2298
542
|
};
|
|
2299
|
-
const rows =
|
|
543
|
+
const rows = getDb()
|
|
2300
544
|
.prepare(
|
|
2301
545
|
`
|
|
2302
546
|
SELECT status, COUNT(*) as count
|
|
@@ -2372,7 +616,7 @@ export function createStore(filePath, options = {}) {
|
|
|
2372
616
|
}
|
|
2373
617
|
|
|
2374
618
|
function rebuildStatusGroups() {
|
|
2375
|
-
if (
|
|
619
|
+
if (getDb()) {
|
|
2376
620
|
statusGroups = {
|
|
2377
621
|
pending: [],
|
|
2378
622
|
processing: [],
|
|
@@ -2436,9 +680,9 @@ export function createStore(filePath, options = {}) {
|
|
|
2436
680
|
|
|
2437
681
|
function flushSave() {
|
|
2438
682
|
// 数据库模式:先保存 LLM 偏移量,再备份数据库
|
|
2439
|
-
if (
|
|
683
|
+
if (getDb() && getDbPath()) {
|
|
2440
684
|
try {
|
|
2441
|
-
|
|
685
|
+
offsetStore.save();
|
|
2442
686
|
} catch (e) {
|
|
2443
687
|
console.error(`[data-store] 保存 LLM 偏移量失败: ${e.message}`);
|
|
2444
688
|
}
|
|
@@ -2452,7 +696,7 @@ export function createStore(filePath, options = {}) {
|
|
|
2452
696
|
* @returns {string|null} 备份文件路径,失败返回 null
|
|
2453
697
|
*/
|
|
2454
698
|
function backupDatabase(maxBackups = 3) {
|
|
2455
|
-
if (!
|
|
699
|
+
if (!getDb() || !getDbPath()) {
|
|
2456
700
|
console.error("[data-store] 数据库未初始化,跳过备份");
|
|
2457
701
|
return null;
|
|
2458
702
|
}
|
|
@@ -2464,17 +708,16 @@ export function createStore(filePath, options = {}) {
|
|
|
2464
708
|
.toISOString()
|
|
2465
709
|
.replace(/[-:T.]/g, "")
|
|
2466
710
|
.slice(0, 15); // YYYYMMDDHHmmss
|
|
2467
|
-
const baseName = path.basename(
|
|
711
|
+
const baseName = path.basename(getDbPath(), ".db");
|
|
2468
712
|
const backupName = `${baseName}-${timestamp}.db`;
|
|
2469
|
-
const backupDir = path.dirname(
|
|
713
|
+
const backupDir = path.dirname(getDbPath());
|
|
2470
714
|
const backupPath = path.join(backupDir, backupName);
|
|
2471
715
|
|
|
2472
716
|
console.error(`[data-store] 正在备份数据库: ${backupName}`);
|
|
2473
717
|
|
|
2474
|
-
//
|
|
2475
|
-
|
|
2476
|
-
|
|
2477
|
-
backupDb.close();
|
|
718
|
+
// 先 WAL checkpoint 确保所有数据落盘,再同步复制文件
|
|
719
|
+
getDb().exec("PRAGMA wal_checkpoint(TRUNCATE)");
|
|
720
|
+
fs.copyFileSync(getDbPath(), backupPath);
|
|
2478
721
|
|
|
2479
722
|
// 验证备份文件大小
|
|
2480
723
|
const stat = fs.statSync(backupPath);
|
|
@@ -2523,7 +766,7 @@ export function createStore(filePath, options = {}) {
|
|
|
2523
766
|
|
|
2524
767
|
function stopBackup() {
|
|
2525
768
|
// 退出时执行备份
|
|
2526
|
-
if (
|
|
769
|
+
if (getDb() && getDbPath()) {
|
|
2527
770
|
backupDatabase();
|
|
2528
771
|
}
|
|
2529
772
|
}
|
|
@@ -2531,7 +774,7 @@ export function createStore(filePath, options = {}) {
|
|
|
2531
774
|
function getUser(uid) {
|
|
2532
775
|
const idx = uidIndex.get(uid);
|
|
2533
776
|
if (idx !== undefined) return data[idx];
|
|
2534
|
-
if (
|
|
777
|
+
if (getDb()) return getJob(uid);
|
|
2535
778
|
return undefined;
|
|
2536
779
|
}
|
|
2537
780
|
|
|
@@ -2549,12 +792,25 @@ export function createStore(filePath, options = {}) {
|
|
|
2549
792
|
|
|
2550
793
|
function addUser(user, append) {
|
|
2551
794
|
const memoryIdx = uidIndex.get(user.uniqueId);
|
|
2552
|
-
if (
|
|
795
|
+
if (getDb() && memoryIdx === undefined) {
|
|
2553
796
|
// 用 users 表判重(所有发现过的用户合集),而不是 jobs 表
|
|
2554
797
|
if (hasUserInDb(user.uniqueId)) {
|
|
2555
798
|
return;
|
|
2556
799
|
}
|
|
2557
|
-
|
|
800
|
+
const now = Date.now();
|
|
801
|
+
const writeTxn = getDb().transaction((job) => {
|
|
802
|
+
addUserToDb({
|
|
803
|
+
...job,
|
|
804
|
+
createdAt: job.createdAt || now,
|
|
805
|
+
updatedAt: job.updatedAt || now,
|
|
806
|
+
});
|
|
807
|
+
addJobBaseToDb({
|
|
808
|
+
...job,
|
|
809
|
+
createdAt: job.createdAt || now,
|
|
810
|
+
updatedAt: job.updatedAt || now,
|
|
811
|
+
});
|
|
812
|
+
});
|
|
813
|
+
writeTxn(user);
|
|
2558
814
|
return;
|
|
2559
815
|
}
|
|
2560
816
|
|
|
@@ -2616,7 +872,7 @@ export function createStore(filePath, options = {}) {
|
|
|
2616
872
|
createdAt: now,
|
|
2617
873
|
updatedAt: now,
|
|
2618
874
|
};
|
|
2619
|
-
const writeTxn =
|
|
875
|
+
const writeTxn = getDb().transaction((job) => {
|
|
2620
876
|
addUserToDb(job);
|
|
2621
877
|
addJobBaseToDb(job);
|
|
2622
878
|
});
|
|
@@ -2628,195 +884,26 @@ export function createStore(filePath, options = {}) {
|
|
|
2628
884
|
}
|
|
2629
885
|
|
|
2630
886
|
function getPendingUsers() {
|
|
2631
|
-
if (
|
|
887
|
+
if (getDb()) {
|
|
2632
888
|
return getAllJobs().filter((u) => u.status === "pending");
|
|
2633
889
|
}
|
|
2634
890
|
return data.filter((u) => u.status === "pending");
|
|
2635
891
|
}
|
|
2636
892
|
|
|
2637
893
|
function getProcessedUsers() {
|
|
2638
|
-
if (
|
|
894
|
+
if (getDb()) {
|
|
2639
895
|
return getAllJobs().filter((u) => u.status === "done");
|
|
2640
896
|
}
|
|
2641
897
|
return data.filter((u) => u.status === "done");
|
|
2642
898
|
}
|
|
2643
899
|
|
|
2644
900
|
function getAllUsers() {
|
|
2645
|
-
if (
|
|
901
|
+
if (getDb()) {
|
|
2646
902
|
return getAllJobs();
|
|
2647
903
|
}
|
|
2648
904
|
return data;
|
|
2649
905
|
}
|
|
2650
906
|
|
|
2651
|
-
/**
|
|
2652
|
-
* 使用 LLM 对单个 job 的国家匹配度打分(0-100)
|
|
2653
|
-
* @param {Object} job - raw_jobs 中的一条记录
|
|
2654
|
-
* @param {string[]} targetLocations - 目标国家列表
|
|
2655
|
-
* @returns {Promise<{ uniqueId: string, score: number, reason: string }>}
|
|
2656
|
-
*/
|
|
2657
|
-
async function scoreJobLocation(job, targetLocations) {
|
|
2658
|
-
const { fetch: undiciFetch } = await import("undici");
|
|
2659
|
-
|
|
2660
|
-
const prompt = `
|
|
2661
|
-
你是一个 TikTok 用户数据分析助手。请根据以下用户信息,判断该用户是否来自以下**任意一个**目标国家。
|
|
2662
|
-
|
|
2663
|
-
目标国家列表: ${targetLocations.join(", ")}
|
|
2664
|
-
|
|
2665
|
-
重要:
|
|
2666
|
-
- 用户只要来自上述**任意一个**国家就算匹配。
|
|
2667
|
-
- guessed_location 是系统初步猜测的结果,**仅供参考**,不要完全依赖它。
|
|
2668
|
-
- 请综合用户名、昵称、签名、位置等信息做判断。
|
|
2669
|
-
|
|
2670
|
-
用户信息:
|
|
2671
|
-
- 用户名: ${job.unique_id || "未知"}
|
|
2672
|
-
- 昵称: ${job.nickname || "未知"}
|
|
2673
|
-
- 签名: ${job.signature || "未知"}
|
|
2674
|
-
- 地区: ${job.region || "未知"}
|
|
2675
|
-
- 猜测国家(参考): ${job.guessed_location || "未知"}
|
|
2676
|
-
- 位置信息: ${job.location_created || "未知"}
|
|
2677
|
-
- 主页链接: ${job.bio_link || "未知"}
|
|
2678
|
-
|
|
2679
|
-
返回 JSON(仅返回 JSON,无其他内容):
|
|
2680
|
-
{"score": 0-100, "reason": "English only, under 50 chars, no quotes/brackets"}
|
|
2681
|
-
|
|
2682
|
-
Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unlikely
|
|
2683
|
-
`;
|
|
2684
|
-
|
|
2685
|
-
try {
|
|
2686
|
-
const apiKey = process.env.APIKEY || "";
|
|
2687
|
-
const response = await undiciFetch(
|
|
2688
|
-
"http://82.156.52.214:18000/v1/chat/completions",
|
|
2689
|
-
{
|
|
2690
|
-
method: "POST",
|
|
2691
|
-
headers: {
|
|
2692
|
-
"Content-Type": "application/json",
|
|
2693
|
-
Authorization: `Bearer ${apiKey}`,
|
|
2694
|
-
},
|
|
2695
|
-
body: JSON.stringify({
|
|
2696
|
-
model: "zc-fast",
|
|
2697
|
-
messages: [{ role: "user", content: prompt }],
|
|
2698
|
-
max_tokens: 512,
|
|
2699
|
-
temperature: 0.1,
|
|
2700
|
-
}),
|
|
2701
|
-
},
|
|
2702
|
-
);
|
|
2703
|
-
|
|
2704
|
-
const result = await response.json();
|
|
2705
|
-
const content = result.choices?.[0]?.message?.content || "";
|
|
2706
|
-
|
|
2707
|
-
// 解析 JSON 响应(多层容错)
|
|
2708
|
-
let parsed = null;
|
|
2709
|
-
|
|
2710
|
-
// 尝试 1: 直接解析
|
|
2711
|
-
try {
|
|
2712
|
-
parsed = JSON.parse(content);
|
|
2713
|
-
} catch {
|
|
2714
|
-
// 尝试 2: 提取 {} 包裹的内容
|
|
2715
|
-
const match = content.match(/\{[\s\S]*\}/);
|
|
2716
|
-
if (match) {
|
|
2717
|
-
try {
|
|
2718
|
-
parsed = JSON.parse(match[0]);
|
|
2719
|
-
} catch {
|
|
2720
|
-
// 尝试 3: 清理常见问题后解析
|
|
2721
|
-
const cleaned = match[0]
|
|
2722
|
-
.replace(/"/g, '"') // 弯引号 → 直引号
|
|
2723
|
-
.replace(/\s+/g, " ") // 多余空白
|
|
2724
|
-
.trim();
|
|
2725
|
-
try {
|
|
2726
|
-
parsed = JSON.parse(cleaned);
|
|
2727
|
-
} catch {
|
|
2728
|
-
// 尝试 4: 从文本中提取 score 和 reason(reason 可能包含引号等特殊字符)
|
|
2729
|
-
const scoreMatch = content.match(/"?score"?\s*:\s*(\d+)/i);
|
|
2730
|
-
if (scoreMatch) {
|
|
2731
|
-
let reason = "解析降级";
|
|
2732
|
-
// 找 "reason": 的位置,取到最后一个 } 前的内容
|
|
2733
|
-
const reasonKeyPos = content.search(/"?reason"?\s*:\s*"/i);
|
|
2734
|
-
if (reasonKeyPos !== -1) {
|
|
2735
|
-
const afterKey = content.substring(reasonKeyPos);
|
|
2736
|
-
const colonPos = afterKey.indexOf(":");
|
|
2737
|
-
const valueStart = afterKey.indexOf('"', colonPos + 1) + 1;
|
|
2738
|
-
const rawValue = afterKey.substring(valueStart);
|
|
2739
|
-
// 取到原始 content 最后一个 } 前
|
|
2740
|
-
const lastBrace = content.lastIndexOf("}");
|
|
2741
|
-
const reasonEnd = lastBrace - reasonKeyPos - valueStart;
|
|
2742
|
-
if (reasonEnd > 0) {
|
|
2743
|
-
reason = rawValue.substring(0, reasonEnd).trim();
|
|
2744
|
-
// 去掉首尾的引号
|
|
2745
|
-
if (reason.startsWith('"')) reason = reason.substring(1);
|
|
2746
|
-
if (reason.endsWith('"'))
|
|
2747
|
-
reason = reason.substring(0, reason.length - 1);
|
|
2748
|
-
}
|
|
2749
|
-
}
|
|
2750
|
-
parsed = {
|
|
2751
|
-
score: parseInt(scoreMatch[1]) || 50,
|
|
2752
|
-
reason,
|
|
2753
|
-
};
|
|
2754
|
-
}
|
|
2755
|
-
}
|
|
2756
|
-
}
|
|
2757
|
-
}
|
|
2758
|
-
|
|
2759
|
-
// 尝试 5: 如果以上都失败,用更宽松的正则提取
|
|
2760
|
-
if (!parsed) {
|
|
2761
|
-
const scoreMatch = content.match(/"score"\s*:\s*(\d+)/);
|
|
2762
|
-
const reasonMatch = content.match(/"reason"\s*:\s*"([^"]*)"/);
|
|
2763
|
-
if (scoreMatch) {
|
|
2764
|
-
parsed = {
|
|
2765
|
-
score: parseInt(scoreMatch[1]) || 50,
|
|
2766
|
-
reason: reasonMatch ? reasonMatch[1] : "解析降级 - 宽松模式",
|
|
2767
|
-
};
|
|
2768
|
-
}
|
|
2769
|
-
}
|
|
2770
|
-
}
|
|
2771
|
-
|
|
2772
|
-
if (parsed && typeof parsed.score === "number") {
|
|
2773
|
-
return {
|
|
2774
|
-
uniqueId: job.unique_id,
|
|
2775
|
-
score: Math.max(0, Math.min(100, parsed.score)),
|
|
2776
|
-
reason: parsed.reason || "",
|
|
2777
|
-
};
|
|
2778
|
-
}
|
|
2779
|
-
|
|
2780
|
-
// 所有解析都失败,返回默认分
|
|
2781
|
-
console.error(
|
|
2782
|
-
`[scoreJobLocation] JSON 解析失败 (${job.unique_id}): ${content.substring(0, 100)}`,
|
|
2783
|
-
);
|
|
2784
|
-
return {
|
|
2785
|
-
uniqueId: job.unique_id,
|
|
2786
|
-
score: 50,
|
|
2787
|
-
reason: "LLM 响应解析失败,使用默认分",
|
|
2788
|
-
};
|
|
2789
|
-
} catch (e) {
|
|
2790
|
-
console.error(
|
|
2791
|
-
`[scoreJobLocation] LLM 调用失败 (${job.unique_id}): ${e.message}`,
|
|
2792
|
-
);
|
|
2793
|
-
return {
|
|
2794
|
-
uniqueId: job.unique_id,
|
|
2795
|
-
score: 50,
|
|
2796
|
-
reason: `LLM 调用异常: ${e.message}`,
|
|
2797
|
-
};
|
|
2798
|
-
}
|
|
2799
|
-
}
|
|
2800
|
-
|
|
2801
|
-
/**
|
|
2802
|
-
* 批量对 jobs 进行 LLM 国家匹配度打分
|
|
2803
|
-
* @param {Object[]} jobs - raw_jobs 记录数组
|
|
2804
|
-
* @param {string[]} targetLocations - 目标国家列表
|
|
2805
|
-
* @param {number} batchSize - 每批处理数量(并发),默认 10
|
|
2806
|
-
* @returns {Promise<Array<{ uniqueId: string, score: number, reason: string }>>}
|
|
2807
|
-
*/
|
|
2808
|
-
async function scoreJobsBatch(jobs, targetLocations, batchSize = 10) {
|
|
2809
|
-
const results = [];
|
|
2810
|
-
for (let i = 0; i < jobs.length; i += batchSize) {
|
|
2811
|
-
const batch = jobs.slice(i, i + batchSize);
|
|
2812
|
-
const batchResults = await Promise.all(
|
|
2813
|
-
batch.map((job) => scoreJobLocation(job, targetLocations)),
|
|
2814
|
-
);
|
|
2815
|
-
results.push(...batchResults);
|
|
2816
|
-
}
|
|
2817
|
-
return results;
|
|
2818
|
-
}
|
|
2819
|
-
|
|
2820
907
|
/**
|
|
2821
908
|
* 从 raw_jobs 中移动一批符合条件的任务到 jobs 表
|
|
2822
909
|
* @param {string[]} locations - 目标国家列表(null 表示不限制)
|
|
@@ -2828,8 +915,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2828
915
|
* @returns {{ moved: number }} 实际移动的数量
|
|
2829
916
|
*/
|
|
2830
917
|
function refillJobsFromRaw(locations = null, limit = 500, options = {}) {
|
|
2831
|
-
if (!
|
|
2832
|
-
return { moved: 0, error: "
|
|
918
|
+
if (!getDb()) {
|
|
919
|
+
return { moved: 0, error: "getDb() not ready" };
|
|
2833
920
|
}
|
|
2834
921
|
|
|
2835
922
|
const safeLimit = Math.max(1, Math.min(2000, parseInt(limit) || 500));
|
|
@@ -2860,7 +947,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2860
947
|
|
|
2861
948
|
// 统计符合条件的数量
|
|
2862
949
|
const count =
|
|
2863
|
-
|
|
950
|
+
getDb()
|
|
2864
951
|
.prepare(`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql}`)
|
|
2865
952
|
.get(...args)?.c || 0;
|
|
2866
953
|
|
|
@@ -2868,156 +955,142 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2868
955
|
return { moved: 0 };
|
|
2869
956
|
}
|
|
2870
957
|
|
|
2871
|
-
// 如果启用 LLM
|
|
958
|
+
// 如果启用 LLM 打分:先取 tag(一次性),再取非 tag 走 LLM 打分
|
|
2872
959
|
if (useLlm && normalizedLocations && normalizedLocations.length > 0) {
|
|
2873
|
-
const
|
|
2874
|
-
const
|
|
2875
|
-
const
|
|
2876
|
-
const
|
|
2877
|
-
|
|
2878
|
-
|
|
2879
|
-
const
|
|
2880
|
-
|
|
2881
|
-
.join(", ");
|
|
960
|
+
const llmTotal = options.llmTotal ?? 200; // 总条数
|
|
961
|
+
const llmTagRatio = options.llmTagRatio ?? 0.6; // tag 占比 60%
|
|
962
|
+
const llmTagLimit = Math.floor(llmTotal * llmTagRatio); // tag 上限 120
|
|
963
|
+
const llmNonTagTarget = llmTotal - llmTagLimit; // 非 tag 目标 80
|
|
964
|
+
const llmMinScore = options.llmMinScore ?? 60;
|
|
965
|
+
const llmSampleSize = options.llmSampleSize ?? 100;
|
|
966
|
+
const maxBatches = options.llmMaxBatches ?? 10;
|
|
967
|
+
|
|
2882
968
|
console.error(
|
|
2883
|
-
`[data-store] LLM 打分开始:
|
|
969
|
+
`[data-store] LLM 打分开始: 总目标 ${llmTotal} 条,tag 最多 ${llmTagLimit} 条(一次性),非 tag 目标 ${llmNonTagTarget} 条(LLM 打分)`,
|
|
2884
970
|
);
|
|
2885
|
-
if (offsetSummary) {
|
|
2886
|
-
console.error(`[data-store] 偏移量记忆: ${offsetSummary}`);
|
|
2887
|
-
}
|
|
2888
971
|
|
|
2889
972
|
// 返回 Promise,调用方需要 await
|
|
2890
973
|
return (async () => {
|
|
2891
|
-
const allTagQualified = [];
|
|
2892
|
-
const allNonTagQualified = [];
|
|
974
|
+
const allTagQualified = [];
|
|
975
|
+
const allNonTagQualified = [];
|
|
2893
976
|
const allScores = [];
|
|
2894
977
|
|
|
2895
|
-
//
|
|
2896
|
-
|
|
2897
|
-
|
|
978
|
+
// ===== 第一步:一次性取所有 tag(全局,最多 llmTagLimit 条)=====
|
|
979
|
+
let tagOffset = offsetStore.get("_tag") || 0;
|
|
980
|
+
const tagGlobalCount =
|
|
981
|
+
getDb()
|
|
982
|
+
.prepare(
|
|
983
|
+
`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql} AND sources LIKE '%tag%'`,
|
|
984
|
+
)
|
|
985
|
+
.get(...args)?.c || 0;
|
|
2898
986
|
|
|
2899
|
-
|
|
2900
|
-
|
|
2901
|
-
|
|
987
|
+
if (tagOffset >= tagGlobalCount) {
|
|
988
|
+
tagOffset = 0;
|
|
989
|
+
offsetStore.set("_tag", 0);
|
|
990
|
+
}
|
|
2902
991
|
|
|
2903
|
-
|
|
2904
|
-
|
|
2905
|
-
|
|
2906
|
-
const locationCount =
|
|
2907
|
-
db.prepare(locationCountSql).get(...locationArgs)?.c || 0;
|
|
992
|
+
console.error(
|
|
993
|
+
`[data-store] Tag 全局共 ${tagGlobalCount} 条,从偏移量 ${tagOffset} 开始`,
|
|
994
|
+
);
|
|
2908
995
|
|
|
2909
|
-
|
|
2910
|
-
|
|
2911
|
-
|
|
996
|
+
while (
|
|
997
|
+
allTagQualified.length < llmTagLimit &&
|
|
998
|
+
tagOffset < tagGlobalCount
|
|
999
|
+
) {
|
|
1000
|
+
const batch = getDb()
|
|
1001
|
+
.prepare(
|
|
1002
|
+
`
|
|
1003
|
+
SELECT * FROM raw_jobs WHERE ${whereSql} AND sources LIKE '%tag%'
|
|
1004
|
+
ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
|
|
1005
|
+
LIMIT ? OFFSET ?
|
|
1006
|
+
`,
|
|
1007
|
+
)
|
|
1008
|
+
.all(
|
|
1009
|
+
...args,
|
|
1010
|
+
Math.min(llmSampleSize, llmTagLimit - allTagQualified.length),
|
|
1011
|
+
tagOffset,
|
|
2912
1012
|
);
|
|
1013
|
+
|
|
1014
|
+
if (!batch.length) break;
|
|
1015
|
+
|
|
1016
|
+
allTagQualified.push(...batch.map((s) => s.unique_id));
|
|
1017
|
+
tagOffset += batch.length;
|
|
1018
|
+
|
|
1019
|
+
console.error(
|
|
1020
|
+
`[data-store] Tag 本批 ${batch.length} 条,累计 ${allTagQualified.length}/${llmTagLimit}`,
|
|
1021
|
+
);
|
|
1022
|
+
}
|
|
1023
|
+
|
|
1024
|
+
offsetStore.set("_tag", tagOffset);
|
|
1025
|
+
|
|
1026
|
+
// ===== 第二步:按国家取非 tag,走 LLM 打分,直到合格数达到 llmNonTagTarget =====
|
|
1027
|
+
for (const location of normalizedLocations) {
|
|
1028
|
+
if (allNonTagQualified.length >= llmNonTagTarget) break;
|
|
1029
|
+
|
|
1030
|
+
const nonTagOffsetKey = `${location}:nonTag`;
|
|
1031
|
+
let offset = offsetStore.get(nonTagOffsetKey) || 0;
|
|
1032
|
+
|
|
1033
|
+
const locationArgs = [...args, location];
|
|
1034
|
+
const nonTagCount =
|
|
1035
|
+
getDb()
|
|
1036
|
+
.prepare(
|
|
1037
|
+
`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql} AND guessed_location = ? AND (sources NOT LIKE '%tag%' OR sources IS NULL)`,
|
|
1038
|
+
)
|
|
1039
|
+
.get(...locationArgs)?.c || 0;
|
|
1040
|
+
|
|
1041
|
+
if (nonTagCount === 0) {
|
|
1042
|
+
console.error(`[data-store] 国家 ${location}: 无非 tag 数据,跳过`);
|
|
2913
1043
|
continue;
|
|
2914
1044
|
}
|
|
2915
1045
|
|
|
2916
|
-
|
|
2917
|
-
if (offset >= locationCount) {
|
|
1046
|
+
if (offset >= nonTagCount) {
|
|
2918
1047
|
offset = 0;
|
|
2919
|
-
|
|
1048
|
+
offsetStore.set(nonTagOffsetKey, 0);
|
|
2920
1049
|
}
|
|
2921
1050
|
|
|
2922
1051
|
console.error(
|
|
2923
|
-
`[data-store] 国家 ${location}: 共 ${
|
|
1052
|
+
`[data-store] 国家 ${location}: 非 tag 共 ${nonTagCount} 条,从偏移量 ${offset} 开始`,
|
|
2924
1053
|
);
|
|
2925
1054
|
|
|
2926
1055
|
for (let batch = 0; batch < maxBatches; batch++) {
|
|
2927
|
-
|
|
2928
|
-
if (remaining <= 0) break;
|
|
1056
|
+
if (allNonTagQualified.length >= llmNonTagTarget) break;
|
|
2929
1057
|
|
|
2930
|
-
const
|
|
2931
|
-
const samples = db
|
|
1058
|
+
const samples = getDb()
|
|
2932
1059
|
.prepare(
|
|
2933
1060
|
`
|
|
2934
1061
|
SELECT * FROM raw_jobs WHERE ${whereSql} AND guessed_location = ?
|
|
2935
|
-
|
|
2936
|
-
|
|
2937
|
-
COALESCE(video_count, 0) DESC, created_at DESC
|
|
1062
|
+
AND (sources NOT LIKE '%tag%' OR sources IS NULL)
|
|
1063
|
+
ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
|
|
2938
1064
|
LIMIT ? OFFSET ?
|
|
2939
1065
|
`,
|
|
2940
1066
|
)
|
|
2941
|
-
.all(...locationArgs,
|
|
1067
|
+
.all(...locationArgs, llmSampleSize, offset);
|
|
2942
1068
|
|
|
2943
|
-
if (samples.length
|
|
1069
|
+
if (!samples.length) break;
|
|
2944
1070
|
|
|
2945
|
-
|
|
2946
|
-
|
|
2947
|
-
|
|
1071
|
+
const scores = await scoreJobsBatch(
|
|
1072
|
+
samples,
|
|
1073
|
+
DEFAULT_TARGET_LOCATIONS,
|
|
2948
1074
|
);
|
|
2949
|
-
const
|
|
2950
|
-
|
|
2951
|
-
);
|
|
2952
|
-
|
|
2953
|
-
// tag 来源直接加入合格列表
|
|
2954
|
-
if (tagSamples.length > 0) {
|
|
2955
|
-
allTagQualified.push(...tagSamples.map((s) => s.unique_id));
|
|
2956
|
-
console.error(
|
|
2957
|
-
`[data-store] ${location}: 本批 ${tagSamples.length} 条 tag 来源任务跳过 LLM 打分直接合格`,
|
|
2958
|
-
);
|
|
2959
|
-
}
|
|
2960
|
-
|
|
2961
|
-
// 非 tag 来源走 LLM 打分
|
|
2962
|
-
let batchQualified = [];
|
|
2963
|
-
let scores = [];
|
|
2964
|
-
if (nonTagSamples.length > 0) {
|
|
2965
|
-
scores = await scoreJobsBatch(
|
|
2966
|
-
nonTagSamples,
|
|
2967
|
-
DEFAULT_TARGET_LOCATIONS,
|
|
2968
|
-
);
|
|
2969
|
-
batchQualified = scores.filter((s) => s.score >= llmMinScore);
|
|
2970
|
-
allNonTagQualified.push(...batchQualified.map((s) => s.uniqueId));
|
|
2971
|
-
}
|
|
2972
|
-
|
|
1075
|
+
const qualified = scores.filter((s) => s.score >= llmMinScore);
|
|
1076
|
+
allNonTagQualified.push(...qualified.map((s) => s.uniqueId));
|
|
2973
1077
|
allScores.push(...scores);
|
|
2974
1078
|
|
|
2975
|
-
|
|
2976
|
-
|
|
1079
|
+
offset += samples.length;
|
|
1080
|
+
offsetStore.set(nonTagOffsetKey, offset);
|
|
1081
|
+
|
|
2977
1082
|
console.error(
|
|
2978
|
-
`[data-store] ${location} 第 ${batch + 1} 批: 采样 ${samples.length}
|
|
1083
|
+
`[data-store] ${location} 第 ${batch + 1} 批: 采样 ${samples.length} 条,本批合格 ${qualified.length} 条,非 tag 累计 ${allNonTagQualified.length}/${llmNonTagTarget}`,
|
|
2979
1084
|
);
|
|
2980
|
-
|
|
2981
|
-
// 更新偏移量记忆
|
|
2982
|
-
offset += samples.length;
|
|
2983
|
-
llmSampleOffsets.set(location, offset);
|
|
2984
|
-
|
|
2985
|
-
// 检查是否两个类型都达到阈值,都达到才停止
|
|
2986
|
-
const tagReached = allTagQualified.length >= llmMinTagReturn;
|
|
2987
|
-
const nonTagReached = allNonTagQualified.length >= llmMinNonTagReturn;
|
|
2988
|
-
if (tagReached && nonTagReached) {
|
|
2989
|
-
console.error(
|
|
2990
|
-
`[data-store] 两类任务均已达标 (tag: ${allTagQualified.length}/${llmMinTagReturn}, 非 tag: ${allNonTagQualified.length}/${llmMinNonTagReturn}),停止采样`,
|
|
2991
|
-
);
|
|
2992
|
-
break;
|
|
2993
|
-
}
|
|
2994
1085
|
}
|
|
2995
|
-
|
|
2996
|
-
// 检查是否两个类型都达到阈值,都达到才停止所有国家采样
|
|
2997
|
-
const tagReachedGlobal = allTagQualified.length >= llmMinTagReturn;
|
|
2998
|
-
const nonTagReachedGlobal = allNonTagQualified.length >= llmMinNonTagReturn;
|
|
2999
|
-
if (tagReachedGlobal && nonTagReachedGlobal) break;
|
|
3000
1086
|
}
|
|
3001
1087
|
|
|
3002
|
-
//
|
|
3003
|
-
|
|
3004
|
-
const tagMaxCount = Math.floor(safeLimit * 0.7);
|
|
3005
|
-
const tagCount = Math.min(allTagQualified.length, tagMaxCount);
|
|
3006
|
-
const nonTagMaxCount = safeLimit - tagCount;
|
|
3007
|
-
|
|
3008
|
-
const nonTagQualifiedScores = allScores
|
|
3009
|
-
.filter((s) => s.score >= llmMinScore)
|
|
3010
|
-
.sort((a, b) => b.score - a.score);
|
|
3011
|
-
const finalNonTagQualified = nonTagQualifiedScores.slice(0, nonTagMaxCount).map((s) => s.uniqueId);
|
|
3012
|
-
|
|
3013
|
-
const qualified = [
|
|
3014
|
-
...allTagQualified.slice(0, tagCount),
|
|
3015
|
-
...finalNonTagQualified,
|
|
3016
|
-
];
|
|
1088
|
+
// ===== 最终结果 =====
|
|
1089
|
+
const qualified = [...allTagQualified, ...allNonTagQualified];
|
|
3017
1090
|
|
|
3018
1091
|
if (!qualified.length) {
|
|
3019
1092
|
console.error(
|
|
3020
|
-
`[data-store] LLM
|
|
1093
|
+
`[data-store] LLM 打分后无符合条件的任务(tag: ${allTagQualified.length}, 非 tag: ${allNonTagQualified.length})`,
|
|
3021
1094
|
);
|
|
3022
1095
|
return {
|
|
3023
1096
|
moved: 0,
|
|
@@ -3027,11 +1100,16 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3027
1100
|
};
|
|
3028
1101
|
}
|
|
3029
1102
|
|
|
1103
|
+
console.error(
|
|
1104
|
+
`[data-store] LLM 打分完成: tag ${allTagQualified.length} 条 + 非 tag ${allNonTagQualified.length} 条 = 共 ${qualified.length} 条`,
|
|
1105
|
+
);
|
|
1106
|
+
|
|
3030
1107
|
// 移动符合条件的记录
|
|
3031
1108
|
const placeholders = qualified.map(() => "?").join(", ");
|
|
3032
|
-
const moveTxn =
|
|
3033
|
-
|
|
3034
|
-
|
|
1109
|
+
const moveTxn = getDb().transaction(() => {
|
|
1110
|
+
getDb()
|
|
1111
|
+
.prepare(
|
|
1112
|
+
`
|
|
3035
1113
|
INSERT OR IGNORE INTO jobs (
|
|
3036
1114
|
unique_id, nickname, status, sources, pinned,
|
|
3037
1115
|
tt_seller, verified, video_count, comment_count,
|
|
@@ -3050,41 +1128,41 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3050
1128
|
FROM raw_jobs
|
|
3051
1129
|
WHERE unique_id IN (${placeholders})
|
|
3052
1130
|
`,
|
|
3053
|
-
|
|
1131
|
+
)
|
|
1132
|
+
.run(...qualified);
|
|
3054
1133
|
|
|
3055
|
-
|
|
3056
|
-
|
|
3057
|
-
|
|
1134
|
+
getDb()
|
|
1135
|
+
.prepare(
|
|
1136
|
+
`DELETE FROM raw_jobs WHERE unique_id IN (${placeholders})`,
|
|
1137
|
+
)
|
|
1138
|
+
.run(...qualified);
|
|
3058
1139
|
});
|
|
3059
|
-
|
|
3060
1140
|
moveTxn();
|
|
3061
1141
|
markStatsDirty();
|
|
3062
1142
|
|
|
3063
1143
|
// 持久化偏移量到数据库
|
|
3064
|
-
|
|
1144
|
+
offsetStore.save();
|
|
3065
1145
|
|
|
3066
1146
|
// 打印最终偏移量状态
|
|
3067
|
-
const finalOffsetSummary = Array.from(
|
|
1147
|
+
const finalOffsetSummary = Array.from(offsetStore.entries())
|
|
3068
1148
|
.map(([k, v]) => `${k}:${v}`)
|
|
3069
1149
|
.join(", ");
|
|
3070
|
-
console.error(
|
|
3071
|
-
`[data-store] LLM 打分完成: 共采样 ${allScores.length} 条,合格 ${qualified.length} 条,已移动到 jobs`,
|
|
3072
|
-
);
|
|
3073
1150
|
console.error(`[data-store] 偏移量记忆更新: ${finalOffsetSummary}`);
|
|
3074
|
-
|
|
1151
|
+
|
|
3075
1152
|
return {
|
|
3076
1153
|
moved: qualified.length,
|
|
3077
1154
|
scored: allScores.length,
|
|
3078
1155
|
qualified: qualified.length,
|
|
3079
|
-
scores:
|
|
1156
|
+
scores: allScores,
|
|
3080
1157
|
};
|
|
3081
1158
|
})();
|
|
3082
1159
|
}
|
|
3083
1160
|
|
|
3084
1161
|
// 常规移动:INSERT + DELETE 事务
|
|
3085
|
-
const moveTxn =
|
|
3086
|
-
|
|
3087
|
-
|
|
1162
|
+
const moveTxn = getDb().transaction(() => {
|
|
1163
|
+
getDb()
|
|
1164
|
+
.prepare(
|
|
1165
|
+
`
|
|
3088
1166
|
INSERT OR IGNORE INTO jobs (
|
|
3089
1167
|
unique_id, nickname, status, sources, pinned,
|
|
3090
1168
|
tt_seller, verified, video_count, comment_count,
|
|
@@ -3107,11 +1185,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3107
1185
|
COALESCE(video_count, 0) DESC, created_at DESC
|
|
3108
1186
|
LIMIT ?
|
|
3109
1187
|
`,
|
|
3110
|
-
|
|
1188
|
+
)
|
|
1189
|
+
.run(...args, safeLimit);
|
|
3111
1190
|
|
|
3112
1191
|
// 删除已移动的记录:用子查询匹配刚 INSERT 的 unique_id
|
|
3113
|
-
|
|
3114
|
-
|
|
1192
|
+
getDb()
|
|
1193
|
+
.prepare(
|
|
1194
|
+
`
|
|
3115
1195
|
DELETE FROM raw_jobs
|
|
3116
1196
|
WHERE unique_id IN (
|
|
3117
1197
|
SELECT unique_id FROM raw_jobs
|
|
@@ -3122,7 +1202,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3122
1202
|
LIMIT ?
|
|
3123
1203
|
)
|
|
3124
1204
|
`,
|
|
3125
|
-
|
|
1205
|
+
)
|
|
1206
|
+
.run(...args, safeLimit);
|
|
3126
1207
|
});
|
|
3127
1208
|
|
|
3128
1209
|
moveTxn();
|
|
@@ -3140,9 +1221,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3140
1221
|
) {
|
|
3141
1222
|
// 记录客户端登录状态
|
|
3142
1223
|
clientLoginStatus.set(userId, !!loggedIn);
|
|
3143
|
-
if (
|
|
1224
|
+
if (getDb()) {
|
|
3144
1225
|
const now = Date.now();
|
|
3145
|
-
const ongoingRow =
|
|
1226
|
+
const ongoingRow = getDb()
|
|
3146
1227
|
.prepare(
|
|
3147
1228
|
`
|
|
3148
1229
|
SELECT *
|
|
@@ -3157,10 +1238,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3157
1238
|
)
|
|
3158
1239
|
.get(userId, now, expireMs);
|
|
3159
1240
|
if (ongoingRow) {
|
|
3160
|
-
|
|
3161
|
-
|
|
3162
|
-
ongoingRow.unique_id
|
|
3163
|
-
);
|
|
1241
|
+
getDb()
|
|
1242
|
+
.prepare("UPDATE jobs SET claimed_at = ? WHERE unique_id = ?")
|
|
1243
|
+
.run(now, ongoingRow.unique_id);
|
|
3164
1244
|
return {
|
|
3165
1245
|
uniqueId: ongoingRow.unique_id,
|
|
3166
1246
|
nickname: ongoingRow.nickname,
|
|
@@ -3242,7 +1322,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3242
1322
|
for (const filter of filters) {
|
|
3243
1323
|
where.push(filter);
|
|
3244
1324
|
}
|
|
3245
|
-
return
|
|
1325
|
+
return getDb()
|
|
3246
1326
|
.prepare(
|
|
3247
1327
|
`
|
|
3248
1328
|
SELECT *
|
|
@@ -3285,7 +1365,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3285
1365
|
);
|
|
3286
1366
|
args.push(...normalizedLocations);
|
|
3287
1367
|
}
|
|
3288
|
-
return
|
|
1368
|
+
return getDb()
|
|
3289
1369
|
.prepare(
|
|
3290
1370
|
`
|
|
3291
1371
|
SELECT *
|
|
@@ -3355,9 +1435,11 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3355
1435
|
|
|
3356
1436
|
function claimRow(row) {
|
|
3357
1437
|
if (!row) return null;
|
|
3358
|
-
|
|
3359
|
-
|
|
3360
|
-
|
|
1438
|
+
getDb()
|
|
1439
|
+
.prepare(
|
|
1440
|
+
"UPDATE jobs SET status = 'processing', claimed_at = ?, claimed_by = ? WHERE unique_id = ?",
|
|
1441
|
+
)
|
|
1442
|
+
.run(now, userId, row.unique_id);
|
|
3361
1443
|
markStatsDirty();
|
|
3362
1444
|
return {
|
|
3363
1445
|
uniqueId: row.unique_id,
|
|
@@ -3367,7 +1449,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3367
1449
|
};
|
|
3368
1450
|
}
|
|
3369
1451
|
|
|
3370
|
-
const expiredRow =
|
|
1452
|
+
const expiredRow = getDb()
|
|
3371
1453
|
.prepare(
|
|
3372
1454
|
`
|
|
3373
1455
|
SELECT *
|
|
@@ -3382,9 +1464,11 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3382
1464
|
.get(now, expireMs);
|
|
3383
1465
|
let expiredCandidate = null;
|
|
3384
1466
|
if (expiredRow) {
|
|
3385
|
-
|
|
3386
|
-
|
|
3387
|
-
|
|
1467
|
+
getDb()
|
|
1468
|
+
.prepare(
|
|
1469
|
+
"UPDATE jobs SET status = 'pending', claimed_at = NULL WHERE unique_id = ?",
|
|
1470
|
+
)
|
|
1471
|
+
.run(expiredRow.unique_id);
|
|
3388
1472
|
expiredCandidate = mapJobRow({
|
|
3389
1473
|
...expiredRow,
|
|
3390
1474
|
status: "pending",
|
|
@@ -3476,7 +1560,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3476
1560
|
return null;
|
|
3477
1561
|
}
|
|
3478
1562
|
|
|
3479
|
-
if (!
|
|
1563
|
+
if (!getDb()) {
|
|
3480
1564
|
const now = Date.now();
|
|
3481
1565
|
|
|
3482
1566
|
// 0. 该客户端有未过期的任务,续期返回
|
|
@@ -3614,16 +1698,16 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3614
1698
|
locations = null,
|
|
3615
1699
|
loggedIn = true,
|
|
3616
1700
|
) {
|
|
3617
|
-
if (
|
|
1701
|
+
if (getDb()) {
|
|
3618
1702
|
const now = Date.now();
|
|
3619
1703
|
const info = {
|
|
3620
|
-
path: "
|
|
1704
|
+
path: "getDb()",
|
|
3621
1705
|
userId,
|
|
3622
1706
|
expireMs,
|
|
3623
1707
|
loggedIn,
|
|
3624
1708
|
};
|
|
3625
1709
|
|
|
3626
|
-
const ongoingRow =
|
|
1710
|
+
const ongoingRow = getDb()
|
|
3627
1711
|
.prepare(
|
|
3628
1712
|
`
|
|
3629
1713
|
SELECT *
|
|
@@ -3723,7 +1807,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3723
1807
|
ORDER BY follower_count DESC, created_at ASC, unique_id ASC
|
|
3724
1808
|
LIMIT 1
|
|
3725
1809
|
`;
|
|
3726
|
-
const row =
|
|
1810
|
+
const row = getDb()
|
|
1811
|
+
.prepare(sql)
|
|
1812
|
+
.get(...args);
|
|
3727
1813
|
return { row, sql, args };
|
|
3728
1814
|
}
|
|
3729
1815
|
|
|
@@ -3764,7 +1850,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3764
1850
|
ORDER BY created_at ASC, unique_id ASC
|
|
3765
1851
|
LIMIT 1
|
|
3766
1852
|
`;
|
|
3767
|
-
const row =
|
|
1853
|
+
const row = getDb()
|
|
1854
|
+
.prepare(sql)
|
|
1855
|
+
.get(...args);
|
|
3768
1856
|
return { row, sql, args };
|
|
3769
1857
|
}
|
|
3770
1858
|
|
|
@@ -3777,7 +1865,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3777
1865
|
ORDER BY claimed_at ASC
|
|
3778
1866
|
LIMIT 1
|
|
3779
1867
|
`;
|
|
3780
|
-
const expiredRow =
|
|
1868
|
+
const expiredRow = getDb().prepare(expiredSql).get(now, expireMs);
|
|
3781
1869
|
info.expired = expiredRow
|
|
3782
1870
|
? {
|
|
3783
1871
|
uniqueId: expiredRow.unique_id,
|
|
@@ -4103,7 +2191,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4103
2191
|
}
|
|
4104
2192
|
|
|
4105
2193
|
function commitJob(uniqueId, result) {
|
|
4106
|
-
if (
|
|
2194
|
+
if (getDb()) {
|
|
4107
2195
|
const user = getJob(uniqueId);
|
|
4108
2196
|
if (!user) return { saved: false, error: "user not found" };
|
|
4109
2197
|
|
|
@@ -4129,7 +2217,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4129
2217
|
}
|
|
4130
2218
|
|
|
4131
2219
|
function commitNewExplore(uniqueId, result) {
|
|
4132
|
-
if (
|
|
2220
|
+
if (getDb()) {
|
|
4133
2221
|
const existing = getJob(uniqueId);
|
|
4134
2222
|
if (existing) {
|
|
4135
2223
|
updateUserFromResult(existing, result);
|
|
@@ -4179,7 +2267,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4179
2267
|
}
|
|
4180
2268
|
|
|
4181
2269
|
function resetJob(uniqueId) {
|
|
4182
|
-
if (
|
|
2270
|
+
if (getDb()) {
|
|
4183
2271
|
const user = getJob(uniqueId);
|
|
4184
2272
|
if (!user) return { saved: false, error: "user not found" };
|
|
4185
2273
|
user.status = "pending";
|
|
@@ -4210,7 +2298,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4210
2298
|
}
|
|
4211
2299
|
|
|
4212
2300
|
function togglePin(uniqueId) {
|
|
4213
|
-
if (
|
|
2301
|
+
if (getDb()) {
|
|
4214
2302
|
const user = getJob(uniqueId);
|
|
4215
2303
|
if (!user) return { saved: false, error: "user not found" };
|
|
4216
2304
|
const nextPinned = !user.pinned;
|
|
@@ -4227,13 +2315,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4227
2315
|
}
|
|
4228
2316
|
|
|
4229
2317
|
function getNextRedoJob(userId, maxAgeSeconds = 43200) {
|
|
4230
|
-
if (
|
|
2318
|
+
if (getDb()) {
|
|
4231
2319
|
const now = Date.now();
|
|
4232
2320
|
const threshold = now - maxAgeSeconds * 1000;
|
|
4233
2321
|
const defaultTime = new Date("2016-01-01T00:00:00Z").getTime();
|
|
4234
2322
|
const targetLocations = DEFAULT_TARGET_LOCATIONS;
|
|
4235
2323
|
const placeholders = targetLocations.map(() => "?").join(",");
|
|
4236
|
-
const row =
|
|
2324
|
+
const row = getDb()
|
|
4237
2325
|
.prepare(
|
|
4238
2326
|
`
|
|
4239
2327
|
SELECT *
|
|
@@ -4248,9 +2336,11 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4248
2336
|
)
|
|
4249
2337
|
.get(...targetLocations, defaultTime, threshold, defaultTime);
|
|
4250
2338
|
if (!row) return null;
|
|
4251
|
-
|
|
4252
|
-
|
|
4253
|
-
|
|
2339
|
+
getDb()
|
|
2340
|
+
.prepare(
|
|
2341
|
+
"UPDATE jobs SET refresh_time = ?, updated_at = ? WHERE unique_id = ?",
|
|
2342
|
+
)
|
|
2343
|
+
.run(now, now, row.unique_id);
|
|
4254
2344
|
return {
|
|
4255
2345
|
uniqueId: row.unique_id,
|
|
4256
2346
|
nickname: row.nickname,
|
|
@@ -4299,7 +2389,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4299
2389
|
}
|
|
4300
2390
|
|
|
4301
2391
|
function commitRedoJob(uniqueId, result) {
|
|
4302
|
-
if (
|
|
2392
|
+
if (getDb()) {
|
|
4303
2393
|
const user = getJob(uniqueId);
|
|
4304
2394
|
if (!user) return { saved: false, error: "user not found" };
|
|
4305
2395
|
user.refreshTime = Date.now();
|
|
@@ -4443,13 +2533,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4443
2533
|
: [];
|
|
4444
2534
|
const hasCountryFilter = targetCountries.length > 0;
|
|
4445
2535
|
|
|
4446
|
-
if (
|
|
2536
|
+
if (getDb()) {
|
|
4447
2537
|
const l = Math.max(1, parseInt(limit) || 5);
|
|
4448
2538
|
|
|
4449
2539
|
let sql = `
|
|
4450
2540
|
SELECT *
|
|
4451
2541
|
FROM jobs_base
|
|
4452
|
-
WHERE COALESCE(tt_seller, '') = ''
|
|
2542
|
+
WHERE (COALESCE(tt_seller, '') = '' OR tt_seller = 1)
|
|
4453
2543
|
AND COALESCE(user_update_count, 0) <= 0
|
|
4454
2544
|
`;
|
|
4455
2545
|
const sqlParams = [];
|
|
@@ -4460,18 +2550,21 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4460
2550
|
sqlParams.push(...targetCountries);
|
|
4461
2551
|
}
|
|
4462
2552
|
|
|
4463
|
-
// 优先级:
|
|
2553
|
+
// 优先级:tt_seller=1 的商家重处理任务优先 > tag 来源 > 其余(最新任务优先)
|
|
4464
2554
|
sql += ` ORDER BY
|
|
2555
|
+
CASE WHEN tt_seller = 1 THEN 0 ELSE 1 END,
|
|
4465
2556
|
CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
|
|
4466
|
-
created_at
|
|
4467
|
-
unique_id
|
|
2557
|
+
created_at DESC,
|
|
2558
|
+
unique_id DESC
|
|
4468
2559
|
LIMIT ?`;
|
|
4469
2560
|
sqlParams.push(l);
|
|
4470
2561
|
|
|
4471
|
-
const rows =
|
|
2562
|
+
const rows = getDb()
|
|
2563
|
+
.prepare(sql)
|
|
2564
|
+
.all(...sqlParams);
|
|
4472
2565
|
if (rows.length === 0) return [];
|
|
4473
2566
|
const now = Date.now();
|
|
4474
|
-
const bumpStmt =
|
|
2567
|
+
const bumpStmt = getDb().prepare(
|
|
4475
2568
|
`
|
|
4476
2569
|
UPDATE jobs_base
|
|
4477
2570
|
SET user_update_count = COALESCE(user_update_count, 0) + 1,
|
|
@@ -4479,7 +2572,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4479
2572
|
WHERE unique_id = ?
|
|
4480
2573
|
`,
|
|
4481
2574
|
);
|
|
4482
|
-
const bumpTxn =
|
|
2575
|
+
const bumpTxn = getDb().transaction((items) => {
|
|
4483
2576
|
for (const item of items) {
|
|
4484
2577
|
bumpStmt.run(now, item.unique_id);
|
|
4485
2578
|
}
|
|
@@ -4497,9 +2590,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4497
2590
|
const pending = data
|
|
4498
2591
|
.filter((u) => {
|
|
4499
2592
|
const updateCount = u.userUpdateCount;
|
|
4500
|
-
|
|
4501
|
-
|
|
4502
|
-
|
|
2593
|
+
// ttSeller 为空 或 ttSeller=1(商家重处理)都可以领取
|
|
2594
|
+
const ttSellerEligible =
|
|
2595
|
+
u.ttSeller === null ||
|
|
2596
|
+
u.ttSeller === undefined ||
|
|
2597
|
+
u.ttSeller === "" ||
|
|
2598
|
+
u.ttSeller === 1;
|
|
2599
|
+
if (!ttSellerEligible) return false;
|
|
4503
2600
|
if (
|
|
4504
2601
|
updateCount === null ||
|
|
4505
2602
|
updateCount === undefined ||
|
|
@@ -4514,7 +2611,10 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4514
2611
|
return false;
|
|
4515
2612
|
})
|
|
4516
2613
|
.sort((a, b) => {
|
|
4517
|
-
// 优先级:
|
|
2614
|
+
// 优先级:tt_seller=1 的商家重处理任务优先 > tag 来源 > 其余
|
|
2615
|
+
const aIsSeller = a.ttSeller === 1 ? 0 : 1;
|
|
2616
|
+
const bIsSeller = b.ttSeller === 1 ? 0 : 1;
|
|
2617
|
+
if (aIsSeller !== bIsSeller) return aIsSeller - bIsSeller;
|
|
4518
2618
|
const aIsTag = (a.sources || "").includes("tag");
|
|
4519
2619
|
const bIsTag = (b.sources || "").includes("tag");
|
|
4520
2620
|
if (aIsTag !== bIsTag) return aIsTag ? -1 : 1;
|
|
@@ -4531,7 +2631,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4531
2631
|
}
|
|
4532
2632
|
|
|
4533
2633
|
function updateUserInfo(uniqueId, info) {
|
|
4534
|
-
if (
|
|
2634
|
+
if (getDb()) {
|
|
4535
2635
|
return updateJobInfo(uniqueId, info, true);
|
|
4536
2636
|
}
|
|
4537
2637
|
|
|
@@ -4550,15 +2650,17 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4550
2650
|
}
|
|
4551
2651
|
|
|
4552
2652
|
function updateUserLocation(uniqueId, location) {
|
|
4553
|
-
if (
|
|
4554
|
-
const existing =
|
|
2653
|
+
if (getDb()) {
|
|
2654
|
+
const existing = getDb()
|
|
4555
2655
|
.prepare("SELECT * FROM jobs WHERE unique_id = ?")
|
|
4556
2656
|
.get(uniqueId);
|
|
4557
2657
|
if (!existing) return { error: "user not found" };
|
|
4558
2658
|
const now = Date.now();
|
|
4559
|
-
|
|
4560
|
-
|
|
4561
|
-
|
|
2659
|
+
getDb()
|
|
2660
|
+
.prepare(
|
|
2661
|
+
"UPDATE jobs SET location_created = ?, modified_at = ?, updated_at = ? WHERE unique_id = ?",
|
|
2662
|
+
)
|
|
2663
|
+
.run(location, now, now, uniqueId);
|
|
4562
2664
|
return { ok: true, location, modifiedAt: now };
|
|
4563
2665
|
}
|
|
4564
2666
|
|
|
@@ -4574,13 +2676,14 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4574
2676
|
|
|
4575
2677
|
// 将单个 job 移动到 raw_jobs 表(完整字段复制 + 删除原记录)
|
|
4576
2678
|
function moveJobToRaw(uniqueId) {
|
|
4577
|
-
if (!
|
|
2679
|
+
if (!getDb()) return false;
|
|
4578
2680
|
const safeId = String(uniqueId).trim();
|
|
4579
2681
|
if (!safeId) return false;
|
|
4580
2682
|
|
|
4581
|
-
const moveSingleTxn =
|
|
4582
|
-
|
|
4583
|
-
|
|
2683
|
+
const moveSingleTxn = getDb().transaction(() => {
|
|
2684
|
+
getDb()
|
|
2685
|
+
.prepare(
|
|
2686
|
+
`
|
|
4584
2687
|
INSERT OR REPLACE INTO raw_jobs (
|
|
4585
2688
|
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
4586
2689
|
error, pinned, no_video, restricted, user_update_count,
|
|
@@ -4602,21 +2705,22 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4602
2705
|
user_create_time
|
|
4603
2706
|
FROM jobs WHERE unique_id = ?
|
|
4604
2707
|
`,
|
|
4605
|
-
|
|
2708
|
+
)
|
|
2709
|
+
.run(safeId);
|
|
4606
2710
|
|
|
4607
|
-
|
|
2711
|
+
getDb().prepare("DELETE FROM jobs WHERE unique_id = ?").run(safeId);
|
|
4608
2712
|
});
|
|
4609
2713
|
moveSingleTxn();
|
|
4610
2714
|
return true;
|
|
4611
2715
|
}
|
|
4612
2716
|
|
|
4613
2717
|
function batchUpdateUserInfo(updates) {
|
|
4614
|
-
if (
|
|
2718
|
+
if (getDb()) {
|
|
4615
2719
|
const results = [];
|
|
4616
2720
|
const rawMoveList = [];
|
|
4617
2721
|
const sellerMoveList = [];
|
|
4618
2722
|
|
|
4619
|
-
const txn =
|
|
2723
|
+
const txn = getDb().transaction((items) => {
|
|
4620
2724
|
items.forEach((item) => {
|
|
4621
2725
|
const uniqueId = item.uniqueId;
|
|
4622
2726
|
// 处理 { error: true, statusCode: xxx } 的情况
|
|
@@ -4668,8 +2772,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4668
2772
|
// 批量移动商家用户到 jobs
|
|
4669
2773
|
if (sellerMoveList.length > 0) {
|
|
4670
2774
|
const placeholders = sellerMoveList.map(() => "?").join(",");
|
|
4671
|
-
|
|
4672
|
-
|
|
2775
|
+
getDb()
|
|
2776
|
+
.prepare(
|
|
2777
|
+
`
|
|
4673
2778
|
INSERT OR REPLACE INTO jobs (
|
|
4674
2779
|
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
4675
2780
|
error, pinned, no_video, restricted, user_update_count,
|
|
@@ -4691,18 +2796,20 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4691
2796
|
user_create_time
|
|
4692
2797
|
FROM jobs_base WHERE unique_id IN (${placeholders})
|
|
4693
2798
|
`,
|
|
4694
|
-
|
|
2799
|
+
)
|
|
2800
|
+
.run(...sellerMoveList);
|
|
4695
2801
|
|
|
4696
|
-
|
|
4697
|
-
`DELETE FROM jobs_base WHERE unique_id IN (${placeholders})
|
|
4698
|
-
|
|
2802
|
+
getDb()
|
|
2803
|
+
.prepare(`DELETE FROM jobs_base WHERE unique_id IN (${placeholders})`)
|
|
2804
|
+
.run(...sellerMoveList);
|
|
4699
2805
|
}
|
|
4700
2806
|
|
|
4701
2807
|
// 批量移动非商家用户到 raw_jobs
|
|
4702
2808
|
if (rawMoveList.length > 0) {
|
|
4703
2809
|
const placeholders = rawMoveList.map(() => "?").join(",");
|
|
4704
|
-
|
|
4705
|
-
|
|
2810
|
+
getDb()
|
|
2811
|
+
.prepare(
|
|
2812
|
+
`
|
|
4706
2813
|
INSERT OR REPLACE INTO raw_jobs (
|
|
4707
2814
|
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
4708
2815
|
error, pinned, no_video, restricted, user_update_count,
|
|
@@ -4724,11 +2831,12 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4724
2831
|
user_create_time
|
|
4725
2832
|
FROM jobs_base WHERE unique_id IN (${placeholders})
|
|
4726
2833
|
`,
|
|
4727
|
-
|
|
2834
|
+
)
|
|
2835
|
+
.run(...rawMoveList);
|
|
4728
2836
|
|
|
4729
|
-
|
|
4730
|
-
`DELETE FROM jobs_base WHERE unique_id IN (${placeholders})
|
|
4731
|
-
|
|
2837
|
+
getDb()
|
|
2838
|
+
.prepare(`DELETE FROM jobs_base WHERE unique_id IN (${placeholders})`)
|
|
2839
|
+
.run(...rawMoveList);
|
|
4732
2840
|
}
|
|
4733
2841
|
|
|
4734
2842
|
// 清理内部标记
|
|
@@ -4780,8 +2888,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4780
2888
|
return { registered: 0, skipped: 0 };
|
|
4781
2889
|
}
|
|
4782
2890
|
|
|
4783
|
-
if (
|
|
4784
|
-
const insertStmt =
|
|
2891
|
+
if (getDb()) {
|
|
2892
|
+
const insertStmt = getDb().prepare(`
|
|
4785
2893
|
INSERT OR IGNORE INTO videos (
|
|
4786
2894
|
id,
|
|
4787
2895
|
href,
|
|
@@ -4797,7 +2905,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4797
2905
|
let registered = 0;
|
|
4798
2906
|
let skipped = 0;
|
|
4799
2907
|
const now = Date.now();
|
|
4800
|
-
const txn =
|
|
2908
|
+
const txn = getDb().transaction((items) => {
|
|
4801
2909
|
for (const item of items) {
|
|
4802
2910
|
const result = insertStmt.run(
|
|
4803
2911
|
item.id,
|
|
@@ -4844,7 +2952,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4844
2952
|
}
|
|
4845
2953
|
|
|
4846
2954
|
function getVideos() {
|
|
4847
|
-
if (
|
|
2955
|
+
if (getDb()) {
|
|
4848
2956
|
return getAllVideoRows().map(mapVideoRow);
|
|
4849
2957
|
}
|
|
4850
2958
|
return videos;
|
|
@@ -4852,7 +2960,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4852
2960
|
|
|
4853
2961
|
function getVideo(videoId) {
|
|
4854
2962
|
if (!videoId) return null;
|
|
4855
|
-
if (
|
|
2963
|
+
if (getDb()) {
|
|
4856
2964
|
return mapVideoRow(getVideoRow(videoId));
|
|
4857
2965
|
}
|
|
4858
2966
|
return videos.find((video) => video.id === videoId) || null;
|
|
@@ -4862,8 +2970,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4862
2970
|
const safeLimit = Math.max(1, Math.min(100, parseInt(limit) || 50));
|
|
4863
2971
|
const safeOffset = Math.max(0, parseInt(offset) || 0);
|
|
4864
2972
|
|
|
4865
|
-
if (
|
|
4866
|
-
const rows =
|
|
2973
|
+
if (getDb()) {
|
|
2974
|
+
const rows = getDb()
|
|
4867
2975
|
.prepare(
|
|
4868
2976
|
`
|
|
4869
2977
|
SELECT *
|
|
@@ -4873,7 +2981,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4873
2981
|
`,
|
|
4874
2982
|
)
|
|
4875
2983
|
.all(safeLimit, safeOffset);
|
|
4876
|
-
const total =
|
|
2984
|
+
const total = getDb().prepare("SELECT COUNT(*) as c FROM videos").get().c;
|
|
4877
2985
|
return {
|
|
4878
2986
|
total,
|
|
4879
2987
|
limit: safeLimit,
|
|
@@ -4891,16 +2999,16 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4891
2999
|
}
|
|
4892
3000
|
|
|
4893
3001
|
function getVideoCount() {
|
|
4894
|
-
if (
|
|
4895
|
-
return
|
|
3002
|
+
if (getDb()) {
|
|
3003
|
+
return getDb().prepare("SELECT COUNT(*) as c FROM videos").get().c;
|
|
4896
3004
|
}
|
|
4897
3005
|
return videos.length;
|
|
4898
3006
|
}
|
|
4899
3007
|
|
|
4900
3008
|
function getPendingCommentTasks(limit) {
|
|
4901
|
-
if (
|
|
3009
|
+
if (getDb()) {
|
|
4902
3010
|
const l = Math.max(1, parseInt(limit) || 1);
|
|
4903
|
-
const rows =
|
|
3011
|
+
const rows = getDb()
|
|
4904
3012
|
.prepare(
|
|
4905
3013
|
`
|
|
4906
3014
|
SELECT *
|
|
@@ -4912,14 +3020,14 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4912
3020
|
)
|
|
4913
3021
|
.all(l);
|
|
4914
3022
|
if (rows.length === 0) return [];
|
|
4915
|
-
const bumpStmt =
|
|
3023
|
+
const bumpStmt = getDb().prepare(
|
|
4916
3024
|
`
|
|
4917
3025
|
UPDATE videos
|
|
4918
3026
|
SET user_update_count = COALESCE(user_update_count, 0) + 1
|
|
4919
3027
|
WHERE id = ?
|
|
4920
3028
|
`,
|
|
4921
3029
|
);
|
|
4922
|
-
const bumpTxn =
|
|
3030
|
+
const bumpTxn = getDb().transaction((items) => {
|
|
4923
3031
|
for (const item of items) bumpStmt.run(item.id);
|
|
4924
3032
|
});
|
|
4925
3033
|
bumpTxn(rows);
|
|
@@ -4949,17 +3057,19 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4949
3057
|
}
|
|
4950
3058
|
|
|
4951
3059
|
function commitCommentTask(videoId) {
|
|
4952
|
-
if (
|
|
3060
|
+
if (getDb()) {
|
|
4953
3061
|
const video = getVideoRow(videoId);
|
|
4954
3062
|
if (!video) return { ok: false, error: "video not found" };
|
|
4955
3063
|
const nextCount = (video.user_update_count || 0) + 1;
|
|
4956
|
-
|
|
4957
|
-
|
|
3064
|
+
getDb()
|
|
3065
|
+
.prepare(
|
|
3066
|
+
`
|
|
4958
3067
|
UPDATE videos
|
|
4959
3068
|
SET user_update_count = ?
|
|
4960
3069
|
WHERE id = ?
|
|
4961
3070
|
`,
|
|
4962
|
-
|
|
3071
|
+
)
|
|
3072
|
+
.run(nextCount, videoId);
|
|
4963
3073
|
return { ok: true, userUpdateCount: nextCount };
|
|
4964
3074
|
}
|
|
4965
3075
|
|
|
@@ -5024,6 +3134,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
5024
3134
|
getClientLoginStatus,
|
|
5025
3135
|
trackClient,
|
|
5026
3136
|
getActiveClients,
|
|
3137
|
+
moveSellerJobsToBase, // 将 jobs/raw_jobs 中 tt_seller=1 且 video_count=0 的记录移动到 jobs_base
|
|
5027
3138
|
registerVideos,
|
|
5028
3139
|
getVideo,
|
|
5029
3140
|
getVideos,
|
|
@@ -5051,6 +3162,126 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
5051
3162
|
|
|
5052
3163
|
// 辅助函数:获取 LLM 采样偏移量
|
|
5053
3164
|
function getLlmSampleOffsets() {
|
|
5054
|
-
return Object.fromEntries(
|
|
3165
|
+
return Object.fromEntries(offsetStore.entries());
|
|
3166
|
+
}
|
|
3167
|
+
|
|
3168
|
+
// ===== 将 jobs/raw_jobs 中商家用户移动到 jobs_base =====
|
|
3169
|
+
|
|
3170
|
+
/**
|
|
3171
|
+
* 将 jobs 和 raw_jobs 中 tt_seller=1 且 video_count=0 的记录移动到 jobs_base,
|
|
3172
|
+
* 并重置 user_update_count=0,保留 tt_seller=1 以便后续优先级判断。
|
|
3173
|
+
*/
|
|
3174
|
+
function moveSellerJobsToBase() {
|
|
3175
|
+
const db = getDb();
|
|
3176
|
+
if (!db) return { ok: false, error: "db not ready" };
|
|
3177
|
+
|
|
3178
|
+
const COLUMNS = [
|
|
3179
|
+
"unique_id",
|
|
3180
|
+
"nickname",
|
|
3181
|
+
"status",
|
|
3182
|
+
"sources",
|
|
3183
|
+
"claimed_by",
|
|
3184
|
+
"claimed_at",
|
|
3185
|
+
"error",
|
|
3186
|
+
"pinned",
|
|
3187
|
+
"no_video",
|
|
3188
|
+
"restricted",
|
|
3189
|
+
"user_update_count",
|
|
3190
|
+
"tt_seller",
|
|
3191
|
+
"verified",
|
|
3192
|
+
"video_count",
|
|
3193
|
+
"comment_count",
|
|
3194
|
+
"guessed_location",
|
|
3195
|
+
"location_created",
|
|
3196
|
+
"confirmed_location",
|
|
3197
|
+
"modified_at",
|
|
3198
|
+
"follower_count",
|
|
3199
|
+
"following_count",
|
|
3200
|
+
"heart_count",
|
|
3201
|
+
"refresh_time",
|
|
3202
|
+
"processed",
|
|
3203
|
+
"processed_at",
|
|
3204
|
+
"created_at",
|
|
3205
|
+
"updated_at",
|
|
3206
|
+
"region",
|
|
3207
|
+
"signature",
|
|
3208
|
+
"sec_uid",
|
|
3209
|
+
"status_code",
|
|
3210
|
+
"latest_video_time",
|
|
3211
|
+
"bio_link",
|
|
3212
|
+
];
|
|
3213
|
+
const cols = COLUMNS.join(",");
|
|
3214
|
+
const insertSql = `INSERT OR IGNORE INTO jobs_base (${cols}) SELECT ${cols} FROM `;
|
|
3215
|
+
const condition = "WHERE tt_seller = 1 AND video_count = 0";
|
|
3216
|
+
|
|
3217
|
+
let fromJobs = 0;
|
|
3218
|
+
let fromRawJobs = 0;
|
|
3219
|
+
|
|
3220
|
+
try {
|
|
3221
|
+
// 1. jobs → jobs_base
|
|
3222
|
+
const result1 = db.prepare(insertSql + "jobs " + condition).run();
|
|
3223
|
+
fromJobs = result1.changes || 0;
|
|
3224
|
+
|
|
3225
|
+
// 2. raw_jobs → jobs_base
|
|
3226
|
+
const result2 = db.prepare(insertSql + "raw_jobs " + condition).run();
|
|
3227
|
+
fromRawJobs = result2.changes || 0;
|
|
3228
|
+
} catch (e) {
|
|
3229
|
+
return { ok: false, error: e.message };
|
|
3230
|
+
}
|
|
3231
|
+
|
|
3232
|
+
// 3. 重置 user_update_count=0,保留 tt_seller=1 以便后续优先级判断
|
|
3233
|
+
// 包括:新移动过来的记录 + jobs_base 中已有的滞留记录
|
|
3234
|
+
let resetCount = 0;
|
|
3235
|
+
try {
|
|
3236
|
+
const resetStmt = db.prepare(
|
|
3237
|
+
`UPDATE jobs_base
|
|
3238
|
+
SET user_update_count = 0
|
|
3239
|
+
WHERE video_count = 0
|
|
3240
|
+
AND tt_seller = 1`,
|
|
3241
|
+
);
|
|
3242
|
+
resetStmt.run();
|
|
3243
|
+
resetCount = resetStmt.changes || 0;
|
|
3244
|
+
} catch (e) {
|
|
3245
|
+
return {
|
|
3246
|
+
ok: false,
|
|
3247
|
+
error: `reset failed: ${e.message}, inserted: jobs=${fromJobs}, raw_jobs=${fromRawJobs}`,
|
|
3248
|
+
};
|
|
3249
|
+
}
|
|
3250
|
+
|
|
3251
|
+
// 4. 删除 jobs 和 raw_jobs 中已移动的记录
|
|
3252
|
+
try {
|
|
3253
|
+
db.prepare("DELETE FROM jobs " + condition).run();
|
|
3254
|
+
db.prepare("DELETE FROM raw_jobs " + condition).run();
|
|
3255
|
+
} catch (e) {
|
|
3256
|
+
return {
|
|
3257
|
+
ok: false,
|
|
3258
|
+
error: `delete failed: ${e.message}, inserted: jobs=${fromJobs}, raw_jobs=${fromRawJobs}`,
|
|
3259
|
+
};
|
|
3260
|
+
}
|
|
3261
|
+
|
|
3262
|
+
// 5. 验证:统计 jobs_base 中可被 attach 领取的记录数(tt_seller=1 且 user_update_count<=0)
|
|
3263
|
+
let available = 0;
|
|
3264
|
+
try {
|
|
3265
|
+
const row = db
|
|
3266
|
+
.prepare(
|
|
3267
|
+
`SELECT COUNT(*) as total FROM jobs_base
|
|
3268
|
+
WHERE tt_seller = 1
|
|
3269
|
+
AND COALESCE(user_update_count, 0) <= 0
|
|
3270
|
+
AND video_count = 0`,
|
|
3271
|
+
)
|
|
3272
|
+
.get();
|
|
3273
|
+
available = row.total;
|
|
3274
|
+
} catch (_) {
|
|
3275
|
+
// ignore
|
|
3276
|
+
}
|
|
3277
|
+
|
|
3278
|
+
return {
|
|
3279
|
+
ok: true,
|
|
3280
|
+
fromJobs,
|
|
3281
|
+
fromRawJobs,
|
|
3282
|
+
totalInserted: fromJobs + fromRawJobs,
|
|
3283
|
+
resetCount,
|
|
3284
|
+
availableInBase: available,
|
|
3285
|
+
};
|
|
5055
3286
|
}
|
|
5056
3287
|
}
|