tt-help-cli-ycl 1.3.92 → 1.3.94
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli/comments.js +49 -24
- package/src/cli/tag.js +239 -94
- package/src/lib/args.js +23 -0
- package/src/lib/browser/cdp.js +4 -1
- package/src/lib/constants.js +15 -0
- package/src/lib/tag-fetcher.js +69 -63
- package/src/watch/data-store.js +537 -2298
- package/src/watch/data-store.js.bak +5091 -0
- package/src/watch/data-store.js.bak2 +5019 -0
- package/src/watch/db-columns.js +160 -0
- package/src/watch/db-crud.js +458 -0
- package/src/watch/db-mappers.js +128 -0
- package/src/watch/db-raw-jobs.js +235 -0
- package/src/watch/db-schema.js +367 -0
- package/src/watch/db-stats.js +235 -0
- package/src/watch/db-tags.js +348 -0
- package/src/watch/llm-scoring.js +235 -0
- package/src/watch/public/app.js +47 -0
- package/src/watch/public/index.html +6 -0
- package/src/watch/server.js +24 -0
- package/src/watch/tag-service.js +142 -11
|
@@ -0,0 +1,5019 @@
|
|
|
1
|
+
import fs from "fs";
|
|
2
|
+
import path from "path";
|
|
3
|
+
import Database from "better-sqlite3";
|
|
4
|
+
import {
|
|
5
|
+
isLocationInList,
|
|
6
|
+
DEFAULT_TARGET_LOCATIONS,
|
|
7
|
+
} from "../lib/target-locations.js";
|
|
8
|
+
|
|
9
|
+
// SQLite 用户表(用于判重)
|
|
10
|
+
let db = null;
|
|
11
|
+
let dbPath = null;
|
|
12
|
+
|
|
13
|
+
function normalizeDbFilePath(filePath) {
|
|
14
|
+
if (!filePath) {
|
|
15
|
+
throw new Error("db path is required");
|
|
16
|
+
}
|
|
17
|
+
const resolved = path.resolve(filePath);
|
|
18
|
+
if (path.extname(resolved).toLowerCase() !== ".db") {
|
|
19
|
+
throw new Error(`仅支持 .db 路径,当前为: ${filePath}`);
|
|
20
|
+
}
|
|
21
|
+
return resolved;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function resetDbConnection() {
|
|
25
|
+
if (db) {
|
|
26
|
+
db.close();
|
|
27
|
+
db = null;
|
|
28
|
+
}
|
|
29
|
+
dbPath = null;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function loadLegacyUsersFromFiles(userFilePath, doneFilePath) {
|
|
33
|
+
const merged = new Map();
|
|
34
|
+
|
|
35
|
+
const tryLoad = (targetPath, label) => {
|
|
36
|
+
if (!targetPath) return;
|
|
37
|
+
if (!fs.existsSync(targetPath)) return;
|
|
38
|
+
try {
|
|
39
|
+
const parsed = JSON.parse(fs.readFileSync(targetPath, "utf-8"));
|
|
40
|
+
if (!Array.isArray(parsed)) return;
|
|
41
|
+
for (const item of parsed) {
|
|
42
|
+
const uniqueId = item?.uniqueId || item?.unique_id;
|
|
43
|
+
if (!uniqueId) continue;
|
|
44
|
+
merged.set(uniqueId, {
|
|
45
|
+
...merged.get(uniqueId),
|
|
46
|
+
...item,
|
|
47
|
+
uniqueId,
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
} catch (e) {
|
|
51
|
+
console.error(`[data-store] SQLite 导入 ${label} 失败: ${e.message}`);
|
|
52
|
+
}
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
tryLoad(userFilePath, "result.json");
|
|
56
|
+
tryLoad(doneFilePath, "result-done.json");
|
|
57
|
+
|
|
58
|
+
return [...merged.values()];
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function loadLegacyVideosFromFile(videoPath) {
|
|
62
|
+
if (!videoPath) return [];
|
|
63
|
+
if (!fs.existsSync(videoPath)) return [];
|
|
64
|
+
|
|
65
|
+
try {
|
|
66
|
+
const parsed = JSON.parse(fs.readFileSync(videoPath, "utf-8"));
|
|
67
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
68
|
+
} catch (e) {
|
|
69
|
+
console.error(
|
|
70
|
+
`[data-store] SQLite 导入 result-videos.json 失败: ${e.message}`,
|
|
71
|
+
);
|
|
72
|
+
return [];
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function initUserDb(filePath) {
|
|
77
|
+
dbPath = normalizeDbFilePath(filePath);
|
|
78
|
+
fs.mkdirSync(path.dirname(dbPath), { recursive: true });
|
|
79
|
+
db = new Database(dbPath);
|
|
80
|
+
db.pragma("journal_mode = WAL");
|
|
81
|
+
db.exec(`
|
|
82
|
+
CREATE TABLE IF NOT EXISTS users (
|
|
83
|
+
unique_id TEXT PRIMARY KEY,
|
|
84
|
+
tt_seller TEXT,
|
|
85
|
+
verified INTEGER,
|
|
86
|
+
location_created TEXT,
|
|
87
|
+
created_at TEXT,
|
|
88
|
+
updated_at TEXT
|
|
89
|
+
)
|
|
90
|
+
`);
|
|
91
|
+
db.exec(`
|
|
92
|
+
CREATE TABLE IF NOT EXISTS jobs (
|
|
93
|
+
unique_id TEXT PRIMARY KEY,
|
|
94
|
+
nickname TEXT,
|
|
95
|
+
status TEXT DEFAULT 'pending',
|
|
96
|
+
sources TEXT,
|
|
97
|
+
claimed_by TEXT,
|
|
98
|
+
claimed_at INTEGER,
|
|
99
|
+
error TEXT,
|
|
100
|
+
pinned INTEGER DEFAULT 0,
|
|
101
|
+
no_video INTEGER DEFAULT 0,
|
|
102
|
+
restricted INTEGER DEFAULT 0,
|
|
103
|
+
user_update_count INTEGER DEFAULT 0,
|
|
104
|
+
tt_seller INTEGER,
|
|
105
|
+
verified INTEGER,
|
|
106
|
+
video_count INTEGER DEFAULT 0,
|
|
107
|
+
comment_count INTEGER DEFAULT 0,
|
|
108
|
+
guessed_location TEXT,
|
|
109
|
+
location_created TEXT,
|
|
110
|
+
confirmed_location TEXT,
|
|
111
|
+
modified_at INTEGER,
|
|
112
|
+
follower_count INTEGER DEFAULT 0,
|
|
113
|
+
following_count INTEGER DEFAULT 0,
|
|
114
|
+
heart_count INTEGER DEFAULT 0,
|
|
115
|
+
refresh_time INTEGER,
|
|
116
|
+
processed INTEGER DEFAULT 0,
|
|
117
|
+
processed_at INTEGER,
|
|
118
|
+
created_at INTEGER,
|
|
119
|
+
updated_at INTEGER,
|
|
120
|
+
region TEXT,
|
|
121
|
+
signature TEXT,
|
|
122
|
+
sec_uid TEXT,
|
|
123
|
+
status_code INTEGER
|
|
124
|
+
)
|
|
125
|
+
`);
|
|
126
|
+
|
|
127
|
+
// 迁移:为已存在的 jobs 表添加 status_code 列
|
|
128
|
+
const existingJobColumns = new Set(
|
|
129
|
+
db
|
|
130
|
+
.prepare("PRAGMA table_info(jobs)")
|
|
131
|
+
.all()
|
|
132
|
+
.map((c) => c.name),
|
|
133
|
+
);
|
|
134
|
+
if (!existingJobColumns.has("status_code")) {
|
|
135
|
+
db.exec(`ALTER TABLE jobs ADD COLUMN status_code INTEGER`);
|
|
136
|
+
}
|
|
137
|
+
if (!existingJobColumns.has("latest_video_time")) {
|
|
138
|
+
db.exec(`ALTER TABLE jobs ADD COLUMN latest_video_time INTEGER`);
|
|
139
|
+
}
|
|
140
|
+
if (!existingJobColumns.has("confirmed_location")) {
|
|
141
|
+
db.exec(`ALTER TABLE jobs ADD COLUMN confirmed_location TEXT`);
|
|
142
|
+
}
|
|
143
|
+
if (!existingJobColumns.has("modified_at")) {
|
|
144
|
+
db.exec(`ALTER TABLE jobs ADD COLUMN modified_at INTEGER`);
|
|
145
|
+
}
|
|
146
|
+
if (!existingJobColumns.has("bio_link")) {
|
|
147
|
+
db.exec(`ALTER TABLE jobs ADD COLUMN bio_link TEXT`);
|
|
148
|
+
}
|
|
149
|
+
if (!existingJobColumns.has("top_video_play_count")) {
|
|
150
|
+
db.exec(`ALTER TABLE jobs ADD COLUMN top_video_play_count INTEGER`);
|
|
151
|
+
}
|
|
152
|
+
if (!existingJobColumns.has("top_video_href")) {
|
|
153
|
+
db.exec(`ALTER TABLE jobs ADD COLUMN top_video_href TEXT`);
|
|
154
|
+
}
|
|
155
|
+
if (!existingJobColumns.has("user_create_time")) {
|
|
156
|
+
db.exec(`ALTER TABLE jobs ADD COLUMN user_create_time INTEGER`);
|
|
157
|
+
}
|
|
158
|
+
db.exec(`
|
|
159
|
+
CREATE TABLE IF NOT EXISTS jobs_base (
|
|
160
|
+
unique_id TEXT PRIMARY KEY,
|
|
161
|
+
nickname TEXT,
|
|
162
|
+
status TEXT DEFAULT 'pending',
|
|
163
|
+
sources TEXT,
|
|
164
|
+
claimed_by TEXT,
|
|
165
|
+
claimed_at INTEGER,
|
|
166
|
+
error TEXT,
|
|
167
|
+
pinned INTEGER DEFAULT 0,
|
|
168
|
+
no_video INTEGER DEFAULT 0,
|
|
169
|
+
restricted INTEGER DEFAULT 0,
|
|
170
|
+
user_update_count INTEGER DEFAULT 0,
|
|
171
|
+
tt_seller INTEGER,
|
|
172
|
+
verified INTEGER,
|
|
173
|
+
video_count INTEGER DEFAULT 0,
|
|
174
|
+
comment_count INTEGER DEFAULT 0,
|
|
175
|
+
guessed_location TEXT,
|
|
176
|
+
location_created TEXT,
|
|
177
|
+
confirmed_location TEXT,
|
|
178
|
+
modified_at INTEGER,
|
|
179
|
+
follower_count INTEGER DEFAULT 0,
|
|
180
|
+
following_count INTEGER DEFAULT 0,
|
|
181
|
+
heart_count INTEGER DEFAULT 0,
|
|
182
|
+
refresh_time INTEGER,
|
|
183
|
+
processed INTEGER DEFAULT 0,
|
|
184
|
+
processed_at INTEGER,
|
|
185
|
+
created_at INTEGER,
|
|
186
|
+
updated_at INTEGER,
|
|
187
|
+
region TEXT,
|
|
188
|
+
signature TEXT,
|
|
189
|
+
sec_uid TEXT,
|
|
190
|
+
status_code INTEGER,
|
|
191
|
+
latest_video_time INTEGER,
|
|
192
|
+
bio_link TEXT
|
|
193
|
+
)
|
|
194
|
+
`);
|
|
195
|
+
|
|
196
|
+
// 迁移:为已存在的 jobs_base 表补全列
|
|
197
|
+
const existingJobBaseColumns = new Set(
|
|
198
|
+
db
|
|
199
|
+
.prepare("PRAGMA table_info(jobs_base)")
|
|
200
|
+
.all()
|
|
201
|
+
.map((c) => c.name),
|
|
202
|
+
);
|
|
203
|
+
if (!existingJobBaseColumns.has("status_code")) {
|
|
204
|
+
db.exec(`ALTER TABLE jobs_base ADD COLUMN status_code INTEGER`);
|
|
205
|
+
}
|
|
206
|
+
if (!existingJobBaseColumns.has("latest_video_time")) {
|
|
207
|
+
db.exec(`ALTER TABLE jobs_base ADD COLUMN latest_video_time INTEGER`);
|
|
208
|
+
}
|
|
209
|
+
if (!existingJobBaseColumns.has("confirmed_location")) {
|
|
210
|
+
db.exec(`ALTER TABLE jobs_base ADD COLUMN confirmed_location TEXT`);
|
|
211
|
+
}
|
|
212
|
+
if (!existingJobBaseColumns.has("modified_at")) {
|
|
213
|
+
db.exec(`ALTER TABLE jobs_base ADD COLUMN modified_at INTEGER`);
|
|
214
|
+
}
|
|
215
|
+
if (!existingJobBaseColumns.has("bio_link")) {
|
|
216
|
+
db.exec(`ALTER TABLE jobs_base ADD COLUMN bio_link TEXT`);
|
|
217
|
+
}
|
|
218
|
+
if (!existingJobBaseColumns.has("user_create_time")) {
|
|
219
|
+
db.exec(`ALTER TABLE jobs_base ADD COLUMN user_create_time INTEGER`);
|
|
220
|
+
}
|
|
221
|
+
db.exec(`
|
|
222
|
+
CREATE TABLE IF NOT EXISTS raw_jobs (
|
|
223
|
+
unique_id TEXT PRIMARY KEY,
|
|
224
|
+
nickname TEXT,
|
|
225
|
+
status TEXT DEFAULT 'pending',
|
|
226
|
+
sources TEXT,
|
|
227
|
+
claimed_by TEXT,
|
|
228
|
+
claimed_at INTEGER,
|
|
229
|
+
error TEXT,
|
|
230
|
+
pinned INTEGER DEFAULT 0,
|
|
231
|
+
no_video INTEGER DEFAULT 0,
|
|
232
|
+
restricted INTEGER DEFAULT 0,
|
|
233
|
+
user_update_count INTEGER DEFAULT 0,
|
|
234
|
+
tt_seller INTEGER,
|
|
235
|
+
verified INTEGER,
|
|
236
|
+
video_count INTEGER DEFAULT 0,
|
|
237
|
+
comment_count INTEGER DEFAULT 0,
|
|
238
|
+
guessed_location TEXT,
|
|
239
|
+
location_created TEXT,
|
|
240
|
+
confirmed_location TEXT,
|
|
241
|
+
modified_at INTEGER,
|
|
242
|
+
follower_count INTEGER DEFAULT 0,
|
|
243
|
+
following_count INTEGER DEFAULT 0,
|
|
244
|
+
heart_count INTEGER DEFAULT 0,
|
|
245
|
+
refresh_time INTEGER,
|
|
246
|
+
processed INTEGER DEFAULT 0,
|
|
247
|
+
processed_at INTEGER,
|
|
248
|
+
created_at INTEGER,
|
|
249
|
+
updated_at INTEGER,
|
|
250
|
+
region TEXT,
|
|
251
|
+
signature TEXT,
|
|
252
|
+
sec_uid TEXT,
|
|
253
|
+
status_code INTEGER,
|
|
254
|
+
latest_video_time INTEGER
|
|
255
|
+
)
|
|
256
|
+
`);
|
|
257
|
+
|
|
258
|
+
// 迁移:为已存在的 raw_jobs 表添加 status_code 列
|
|
259
|
+
const existingRawJobColumns = new Set(
|
|
260
|
+
db
|
|
261
|
+
.prepare("PRAGMA table_info(raw_jobs)")
|
|
262
|
+
.all()
|
|
263
|
+
.map((c) => c.name),
|
|
264
|
+
);
|
|
265
|
+
if (!existingRawJobColumns.has("status_code")) {
|
|
266
|
+
db.exec(`ALTER TABLE raw_jobs ADD COLUMN status_code INTEGER`);
|
|
267
|
+
}
|
|
268
|
+
if (!existingRawJobColumns.has("latest_video_time")) {
|
|
269
|
+
db.exec(`ALTER TABLE raw_jobs ADD COLUMN latest_video_time INTEGER`);
|
|
270
|
+
}
|
|
271
|
+
if (!existingRawJobColumns.has("confirmed_location")) {
|
|
272
|
+
db.exec(`ALTER TABLE raw_jobs ADD COLUMN confirmed_location TEXT`);
|
|
273
|
+
}
|
|
274
|
+
if (!existingRawJobColumns.has("modified_at")) {
|
|
275
|
+
db.exec(`ALTER TABLE raw_jobs ADD COLUMN modified_at INTEGER`);
|
|
276
|
+
}
|
|
277
|
+
if (!existingRawJobColumns.has("bio_link")) {
|
|
278
|
+
db.exec(`ALTER TABLE raw_jobs ADD COLUMN bio_link TEXT`);
|
|
279
|
+
}
|
|
280
|
+
if (!existingRawJobColumns.has("user_create_time")) {
|
|
281
|
+
db.exec(`ALTER TABLE raw_jobs ADD COLUMN user_create_time INTEGER`);
|
|
282
|
+
}
|
|
283
|
+
db.exec(`
|
|
284
|
+
CREATE TABLE IF NOT EXISTS videos (
|
|
285
|
+
id TEXT PRIMARY KEY,
|
|
286
|
+
href TEXT,
|
|
287
|
+
author_unique_id TEXT,
|
|
288
|
+
location_created TEXT,
|
|
289
|
+
tt_seller INTEGER DEFAULT 0,
|
|
290
|
+
registered_at INTEGER,
|
|
291
|
+
user_update_count INTEGER DEFAULT 0,
|
|
292
|
+
play_count INTEGER,
|
|
293
|
+
digg_count INTEGER,
|
|
294
|
+
comment_count INTEGER,
|
|
295
|
+
share_count INTEGER,
|
|
296
|
+
collect_count INTEGER,
|
|
297
|
+
stats_updated_at INTEGER,
|
|
298
|
+
create_time INTEGER
|
|
299
|
+
)
|
|
300
|
+
`);
|
|
301
|
+
db.exec(`
|
|
302
|
+
CREATE INDEX IF NOT EXISTS idx_jobs_status_video
|
|
303
|
+
ON jobs(status, video_count DESC)
|
|
304
|
+
`);
|
|
305
|
+
db.exec(`
|
|
306
|
+
CREATE INDEX IF NOT EXISTS idx_jobs_claimed_by_status
|
|
307
|
+
ON jobs(claimed_by, status, claimed_at)
|
|
308
|
+
`);
|
|
309
|
+
db.exec(`
|
|
310
|
+
CREATE INDEX IF NOT EXISTS idx_jobs_status_claimed_at
|
|
311
|
+
ON jobs(status, claimed_at)
|
|
312
|
+
`);
|
|
313
|
+
db.exec(`
|
|
314
|
+
CREATE INDEX IF NOT EXISTS idx_jobs_redo_target
|
|
315
|
+
ON jobs(tt_seller, verified, location_created, refresh_time)
|
|
316
|
+
`);
|
|
317
|
+
db.exec(`
|
|
318
|
+
CREATE INDEX IF NOT EXISTS idx_jobs_pending_priority
|
|
319
|
+
ON jobs(status, pinned DESC, guessed_location, follower_count DESC)
|
|
320
|
+
`);
|
|
321
|
+
db.exec(`
|
|
322
|
+
CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_pinned
|
|
323
|
+
ON jobs(created_at ASC, unique_id ASC)
|
|
324
|
+
WHERE status = 'pending' AND COALESCE(pinned, 0) = 1
|
|
325
|
+
`);
|
|
326
|
+
db.exec(`
|
|
327
|
+
CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_seller
|
|
328
|
+
ON jobs(UPPER(COALESCE(guessed_location, '')), follower_count DESC, created_at ASC, unique_id ASC)
|
|
329
|
+
WHERE status = 'pending'
|
|
330
|
+
AND COALESCE(pinned, 0) = 0
|
|
331
|
+
AND tt_seller = 1
|
|
332
|
+
AND verified = 0
|
|
333
|
+
`);
|
|
334
|
+
db.exec(`
|
|
335
|
+
CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_follow
|
|
336
|
+
ON jobs(UPPER(COALESCE(guessed_location, '')), follower_count DESC, created_at ASC, unique_id ASC)
|
|
337
|
+
WHERE status = 'pending'
|
|
338
|
+
AND COALESCE(pinned, 0) = 0
|
|
339
|
+
AND (
|
|
340
|
+
instr(COALESCE(sources, ''), '"following"') > 0
|
|
341
|
+
OR instr(COALESCE(sources, ''), '"follower"') > 0
|
|
342
|
+
)
|
|
343
|
+
`);
|
|
344
|
+
db.exec(`
|
|
345
|
+
CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_other
|
|
346
|
+
ON jobs(UPPER(COALESCE(guessed_location, '')), follower_count DESC, created_at ASC, unique_id ASC)
|
|
347
|
+
WHERE status = 'pending' AND COALESCE(pinned, 0) = 0
|
|
348
|
+
`);
|
|
349
|
+
db.exec(`
|
|
350
|
+
CREATE INDEX IF NOT EXISTS idx_jobs_user_update_queue
|
|
351
|
+
ON jobs(created_at ASC, unique_id ASC)
|
|
352
|
+
WHERE (tt_seller IS NULL OR tt_seller = '')
|
|
353
|
+
AND (user_update_count IS NULL OR user_update_count <= 0)
|
|
354
|
+
`);
|
|
355
|
+
db.exec(`
|
|
356
|
+
CREATE INDEX IF NOT EXISTS idx_jobs_user_update_queue_expr
|
|
357
|
+
ON jobs(created_at ASC, unique_id ASC)
|
|
358
|
+
WHERE COALESCE(tt_seller, '') = ''
|
|
359
|
+
AND COALESCE(user_update_count, 0) <= 0
|
|
360
|
+
`);
|
|
361
|
+
db.exec(`
|
|
362
|
+
CREATE INDEX IF NOT EXISTS idx_videos_comment_queue
|
|
363
|
+
ON videos(user_update_count, tt_seller DESC, registered_at ASC)
|
|
364
|
+
`);
|
|
365
|
+
db.exec(`
|
|
366
|
+
CREATE INDEX IF NOT EXISTS idx_videos_comment_queue_pending
|
|
367
|
+
ON videos(tt_seller DESC, registered_at ASC, id)
|
|
368
|
+
WHERE user_update_count IS NULL OR user_update_count <= 0
|
|
369
|
+
`);
|
|
370
|
+
|
|
371
|
+
const existingVideoColumns = new Set(
|
|
372
|
+
db
|
|
373
|
+
.prepare("PRAGMA table_info(videos)")
|
|
374
|
+
.all()
|
|
375
|
+
.map((column) => column.name),
|
|
376
|
+
);
|
|
377
|
+
const requiredVideoColumns = {
|
|
378
|
+
play_count: "INTEGER",
|
|
379
|
+
digg_count: "INTEGER",
|
|
380
|
+
comment_count: "INTEGER",
|
|
381
|
+
share_count: "INTEGER",
|
|
382
|
+
collect_count: "INTEGER",
|
|
383
|
+
stats_updated_at: "INTEGER",
|
|
384
|
+
};
|
|
385
|
+
for (const [column, type] of Object.entries(requiredVideoColumns)) {
|
|
386
|
+
if (!existingVideoColumns.has(column)) {
|
|
387
|
+
db.exec(`ALTER TABLE videos ADD COLUMN ${column} ${type}`);
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
// 迁移:videos 表添加 create_time 列
|
|
392
|
+
if (!existingVideoColumns.has("create_time")) {
|
|
393
|
+
db.exec(`ALTER TABLE videos ADD COLUMN create_time INTEGER`);
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
// tags 表:标签发现与打分系统
|
|
397
|
+
db.exec(`
|
|
398
|
+
CREATE TABLE IF NOT EXISTS tags (
|
|
399
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
400
|
+
tag TEXT NOT NULL UNIQUE,
|
|
401
|
+
status TEXT NOT NULL DEFAULT 'new',
|
|
402
|
+
score REAL NOT NULL DEFAULT 0,
|
|
403
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
404
|
+
scored_at TEXT,
|
|
405
|
+
score_count INTEGER NOT NULL DEFAULT 0,
|
|
406
|
+
countries TEXT NOT NULL DEFAULT '[]',
|
|
407
|
+
matched_countries TEXT DEFAULT '[]',
|
|
408
|
+
total_posts INTEGER DEFAULT 0,
|
|
409
|
+
author_count INTEGER DEFAULT 0,
|
|
410
|
+
matched_authors INTEGER DEFAULT 0,
|
|
411
|
+
pushed_users INTEGER DEFAULT 0,
|
|
412
|
+
source TEXT NOT NULL DEFAULT 'llm',
|
|
413
|
+
user_prompt TEXT,
|
|
414
|
+
last_error TEXT
|
|
415
|
+
)
|
|
416
|
+
`);
|
|
417
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_tags_status ON tags(status)`);
|
|
418
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_tags_score ON tags(score DESC)`);
|
|
419
|
+
|
|
420
|
+
const count = db.prepare("SELECT COUNT(*) as c FROM users").get().c;
|
|
421
|
+
console.log(`[data-store] SQLite users 表初始化完成: ${count} 条`);
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
export function importLegacyJsonToDb({
|
|
425
|
+
dbFilePath,
|
|
426
|
+
usersFilePath,
|
|
427
|
+
doneFilePath,
|
|
428
|
+
videosFilePath,
|
|
429
|
+
}) {
|
|
430
|
+
resetDbConnection();
|
|
431
|
+
initUserDb(dbFilePath);
|
|
432
|
+
|
|
433
|
+
const legacyUsers = loadLegacyUsersFromFiles(usersFilePath, doneFilePath);
|
|
434
|
+
const legacyVideos = loadLegacyVideosFromFile(videosFilePath);
|
|
435
|
+
|
|
436
|
+
const beforeUsers = db.prepare("SELECT COUNT(*) as c FROM users").get().c;
|
|
437
|
+
const beforeJobs = db.prepare("SELECT COUNT(*) as c FROM jobs").get().c;
|
|
438
|
+
const beforeVideos = db.prepare("SELECT COUNT(*) as c FROM videos").get().c;
|
|
439
|
+
|
|
440
|
+
const insertUserStmt = db.prepare(`
|
|
441
|
+
INSERT OR IGNORE INTO users (unique_id) VALUES (?)
|
|
442
|
+
`);
|
|
443
|
+
const insertVideoStmt = db.prepare(`
|
|
444
|
+
INSERT OR IGNORE INTO videos (
|
|
445
|
+
id,
|
|
446
|
+
href,
|
|
447
|
+
author_unique_id,
|
|
448
|
+
location_created,
|
|
449
|
+
tt_seller,
|
|
450
|
+
registered_at,
|
|
451
|
+
user_update_count,
|
|
452
|
+
create_time
|
|
453
|
+
)
|
|
454
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
455
|
+
`);
|
|
456
|
+
|
|
457
|
+
const importUsersTxn = db.transaction((items) => {
|
|
458
|
+
for (const item of items) {
|
|
459
|
+
const uniqueId = item.uniqueId || item.unique_id;
|
|
460
|
+
if (!uniqueId) continue;
|
|
461
|
+
insertUserStmt.run(uniqueId);
|
|
462
|
+
addJobToDb({ ...item, uniqueId });
|
|
463
|
+
}
|
|
464
|
+
});
|
|
465
|
+
|
|
466
|
+
const importVideosTxn = db.transaction((items) => {
|
|
467
|
+
for (const item of items) {
|
|
468
|
+
if (!item?.id) continue;
|
|
469
|
+
insertVideoStmt.run(
|
|
470
|
+
item.id,
|
|
471
|
+
item.href || null,
|
|
472
|
+
item.authorUniqueId || item.author_unique_id || null,
|
|
473
|
+
item.locationCreated || item.location_created || null,
|
|
474
|
+
item.ttSeller ? 1 : 0,
|
|
475
|
+
item.registeredAt || item.registered_at || Date.now(),
|
|
476
|
+
item.userUpdateCount || item.user_update_count || 0,
|
|
477
|
+
item.createTime || item.create_time || null,
|
|
478
|
+
);
|
|
479
|
+
}
|
|
480
|
+
});
|
|
481
|
+
|
|
482
|
+
importUsersTxn(legacyUsers);
|
|
483
|
+
importVideosTxn(legacyVideos);
|
|
484
|
+
|
|
485
|
+
const afterUsers = db.prepare("SELECT COUNT(*) as c FROM users").get().c;
|
|
486
|
+
const afterJobs = db.prepare("SELECT COUNT(*) as c FROM jobs").get().c;
|
|
487
|
+
const afterVideos = db.prepare("SELECT COUNT(*) as c FROM videos").get().c;
|
|
488
|
+
|
|
489
|
+
return {
|
|
490
|
+
dbPath,
|
|
491
|
+
usersImported: afterUsers - beforeUsers,
|
|
492
|
+
jobsImported: afterJobs - beforeJobs,
|
|
493
|
+
videosImported: afterVideos - beforeVideos,
|
|
494
|
+
totalUsers: afterUsers,
|
|
495
|
+
totalJobs: afterJobs,
|
|
496
|
+
totalVideos: afterVideos,
|
|
497
|
+
};
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
export function closeStoreDb() {
|
|
501
|
+
resetDbConnection();
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
function hasUserInDb(uid) {
|
|
505
|
+
if (!db) return false;
|
|
506
|
+
const row = db.prepare("SELECT 1 FROM users WHERE unique_id = ?").get(uid);
|
|
507
|
+
return !!row;
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
function addUserToDb(user) {
|
|
511
|
+
if (!db) return;
|
|
512
|
+
db.prepare(
|
|
513
|
+
`
|
|
514
|
+
INSERT OR IGNORE INTO users (unique_id, tt_seller, verified, location_created, created_at, updated_at)
|
|
515
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
516
|
+
`,
|
|
517
|
+
).run(
|
|
518
|
+
user.uniqueId,
|
|
519
|
+
user.ttSeller === undefined ||
|
|
520
|
+
user.ttSeller === null ||
|
|
521
|
+
user.ttSeller === ""
|
|
522
|
+
? null
|
|
523
|
+
: user.ttSeller
|
|
524
|
+
? 1
|
|
525
|
+
: 0,
|
|
526
|
+
user.verified === undefined ||
|
|
527
|
+
user.verified === null ||
|
|
528
|
+
user.verified === ""
|
|
529
|
+
? null
|
|
530
|
+
: user.verified
|
|
531
|
+
? 1
|
|
532
|
+
: 0,
|
|
533
|
+
user.locationCreated || null,
|
|
534
|
+
new Date().toISOString(),
|
|
535
|
+
new Date().toISOString(),
|
|
536
|
+
);
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
function addJobToDb(user) {
|
|
540
|
+
if (!db) return;
|
|
541
|
+
const now = Date.now();
|
|
542
|
+
db.prepare(
|
|
543
|
+
`
|
|
544
|
+
INSERT OR IGNORE INTO jobs (
|
|
545
|
+
unique_id,
|
|
546
|
+
nickname,
|
|
547
|
+
status,
|
|
548
|
+
sources,
|
|
549
|
+
claimed_by,
|
|
550
|
+
claimed_at,
|
|
551
|
+
error,
|
|
552
|
+
pinned,
|
|
553
|
+
no_video,
|
|
554
|
+
restricted,
|
|
555
|
+
user_update_count,
|
|
556
|
+
tt_seller,
|
|
557
|
+
verified,
|
|
558
|
+
video_count,
|
|
559
|
+
comment_count,
|
|
560
|
+
guessed_location,
|
|
561
|
+
location_created,
|
|
562
|
+
follower_count,
|
|
563
|
+
following_count,
|
|
564
|
+
heart_count,
|
|
565
|
+
refresh_time,
|
|
566
|
+
processed,
|
|
567
|
+
processed_at,
|
|
568
|
+
created_at,
|
|
569
|
+
updated_at,
|
|
570
|
+
region,
|
|
571
|
+
signature,
|
|
572
|
+
bio_link,
|
|
573
|
+
sec_uid
|
|
574
|
+
)
|
|
575
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
576
|
+
`,
|
|
577
|
+
).run(
|
|
578
|
+
user.uniqueId,
|
|
579
|
+
user.nickname || null,
|
|
580
|
+
user.status || inferStatus(user),
|
|
581
|
+
JSON.stringify(
|
|
582
|
+
Array.isArray(user.sources) ? [...new Set(user.sources)] : [],
|
|
583
|
+
),
|
|
584
|
+
user.claimedBy || null,
|
|
585
|
+
user.claimedAt || null,
|
|
586
|
+
user.error || null,
|
|
587
|
+
user.pinned ? 1 : 0,
|
|
588
|
+
user.noVideo ? 1 : 0,
|
|
589
|
+
user.restricted ? 1 : 0,
|
|
590
|
+
user.userUpdateCount || 0,
|
|
591
|
+
user.ttSeller === undefined ||
|
|
592
|
+
user.ttSeller === null ||
|
|
593
|
+
user.ttSeller === ""
|
|
594
|
+
? null
|
|
595
|
+
: user.ttSeller
|
|
596
|
+
? 1
|
|
597
|
+
: 0,
|
|
598
|
+
user.verified === undefined ||
|
|
599
|
+
user.verified === null ||
|
|
600
|
+
user.verified === ""
|
|
601
|
+
? null
|
|
602
|
+
: user.verified
|
|
603
|
+
? 1
|
|
604
|
+
: 0,
|
|
605
|
+
user.videoCount || 0,
|
|
606
|
+
user.commentCount || 0,
|
|
607
|
+
user.guessedLocation || null,
|
|
608
|
+
user.locationCreated || null,
|
|
609
|
+
user.followerCount || 0,
|
|
610
|
+
user.followingCount || 0,
|
|
611
|
+
user.heartCount || 0,
|
|
612
|
+
user.refreshTime || null,
|
|
613
|
+
user.processed ? 1 : 0,
|
|
614
|
+
user.processedAt || null,
|
|
615
|
+
user.createdAt || now,
|
|
616
|
+
user.updatedAt || now,
|
|
617
|
+
user.region || null,
|
|
618
|
+
user.signature || null,
|
|
619
|
+
user.bioLink?.link || user.bioLink?.url || user.bioLink || null,
|
|
620
|
+
user.secUid || null,
|
|
621
|
+
);
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
function getUserDbCount() {
|
|
625
|
+
if (!db) return 0;
|
|
626
|
+
return db.prepare("SELECT COUNT(*) as c FROM users").get().c;
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
function getJobsCount() {
|
|
630
|
+
if (!db) return 0;
|
|
631
|
+
return db.prepare("SELECT COUNT(*) as c FROM jobs").get().c;
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
function getPendingJobsCount() {
|
|
635
|
+
if (!db) return 0;
|
|
636
|
+
return db
|
|
637
|
+
.prepare("SELECT COUNT(*) as c FROM jobs WHERE status = 'pending'")
|
|
638
|
+
.get().c;
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
function getPendingJobsUserUpdateCount() {
|
|
642
|
+
if (!db) return 0;
|
|
643
|
+
return db
|
|
644
|
+
.prepare(
|
|
645
|
+
`
|
|
646
|
+
SELECT COUNT(*) as c
|
|
647
|
+
FROM jobs
|
|
648
|
+
WHERE COALESCE(tt_seller, '') = ''
|
|
649
|
+
AND COALESCE(user_update_count, 0) <= 0
|
|
650
|
+
`,
|
|
651
|
+
)
|
|
652
|
+
.get().c;
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
function getRawJobsCount() {
|
|
656
|
+
if (!db) return 0;
|
|
657
|
+
return db.prepare("SELECT COUNT(*) as c FROM raw_jobs").get().c;
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
function getDashboardStatsFromDb(targetLocations = []) {
|
|
661
|
+
if (!db) return null;
|
|
662
|
+
|
|
663
|
+
const targetPlaceholders = targetLocations.map(() => "?").join(", ");
|
|
664
|
+
const targetParams = targetLocations.length ? targetLocations : [];
|
|
665
|
+
|
|
666
|
+
// 合并所有 jobs 表的聚合统计为单次扫描
|
|
667
|
+
const aggregateRow = db
|
|
668
|
+
.prepare(
|
|
669
|
+
`
|
|
670
|
+
SELECT
|
|
671
|
+
COUNT(*) as total,
|
|
672
|
+
SUM(CASE WHEN status = 'pending' THEN 1 ELSE 0 END) as pending,
|
|
673
|
+
SUM(CASE WHEN status = 'processing' THEN 1 ELSE 0 END) as processing,
|
|
674
|
+
SUM(CASE WHEN status = 'done' THEN 1 ELSE 0 END) as done,
|
|
675
|
+
SUM(CASE WHEN status = 'error' THEN 1 ELSE 0 END) as error,
|
|
676
|
+
SUM(CASE WHEN status = 'restricted' THEN 1 ELSE 0 END) as restricted,
|
|
677
|
+
SUM(CASE WHEN tt_seller = 1 AND verified = 0 ${
|
|
678
|
+
targetLocations.length
|
|
679
|
+
? `AND location_created IN (${targetPlaceholders})`
|
|
680
|
+
: "AND 1 = 0"
|
|
681
|
+
} THEN 1 ELSE 0 END) as targetUsers,
|
|
682
|
+
SUM(CASE WHEN no_video = 1 THEN 1 ELSE 0 END) as noVideo,
|
|
683
|
+
SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"video"') > 0 THEN 1 ELSE 0 END) as video,
|
|
684
|
+
SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"comment"') > 0 THEN 1 ELSE 0 END) as comment,
|
|
685
|
+
SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"guess"') > 0 THEN 1 ELSE 0 END) as guess,
|
|
686
|
+
SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"following"') > 0 THEN 1 ELSE 0 END) as following,
|
|
687
|
+
SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"follower"') > 0 THEN 1 ELSE 0 END) as follower,
|
|
688
|
+
SUM(CASE
|
|
689
|
+
WHEN status != 'done'
|
|
690
|
+
AND instr(COALESCE(sources, ''), '"video"') = 0
|
|
691
|
+
AND instr(COALESCE(sources, ''), '"comment"') = 0
|
|
692
|
+
AND instr(COALESCE(sources, ''), '"guess"') = 0
|
|
693
|
+
AND instr(COALESCE(sources, ''), '"following"') = 0
|
|
694
|
+
AND instr(COALESCE(sources, ''), '"follower"') = 0
|
|
695
|
+
THEN 1 ELSE 0 END) as seed
|
|
696
|
+
FROM jobs
|
|
697
|
+
`,
|
|
698
|
+
)
|
|
699
|
+
.get(...targetParams);
|
|
700
|
+
|
|
701
|
+
// userUpdateTasks 单独从 jobs_base 统计
|
|
702
|
+
const userUpdateTasksRow = db
|
|
703
|
+
.prepare(
|
|
704
|
+
`
|
|
705
|
+
SELECT COUNT(*) as userUpdateTasks
|
|
706
|
+
FROM jobs_base
|
|
707
|
+
WHERE COALESCE(tt_seller, '') = ''
|
|
708
|
+
AND COALESCE(user_update_count, 0) <= 0
|
|
709
|
+
`,
|
|
710
|
+
)
|
|
711
|
+
.get();
|
|
712
|
+
|
|
713
|
+
// countryStats 和 targetCountryStats 需要 GROUP BY,保留为独立查询
|
|
714
|
+
const countryStats = db
|
|
715
|
+
.prepare(
|
|
716
|
+
`
|
|
717
|
+
SELECT
|
|
718
|
+
COALESCE(location_created, '未知') as country,
|
|
719
|
+
COUNT(*) as count,
|
|
720
|
+
SUM(CASE
|
|
721
|
+
WHEN tt_seller = 1 AND verified = 0 ${
|
|
722
|
+
targetLocations.length
|
|
723
|
+
? `AND location_created IN (${targetPlaceholders})`
|
|
724
|
+
: "AND 1 = 0"
|
|
725
|
+
}
|
|
726
|
+
THEN 1 ELSE 0 END) as targetCount
|
|
727
|
+
FROM jobs
|
|
728
|
+
WHERE status = 'done'
|
|
729
|
+
GROUP BY COALESCE(location_created, '未知')
|
|
730
|
+
ORDER BY count DESC
|
|
731
|
+
`,
|
|
732
|
+
)
|
|
733
|
+
.all(...targetParams);
|
|
734
|
+
|
|
735
|
+
const targetCountryStats = targetLocations.length
|
|
736
|
+
? db
|
|
737
|
+
.prepare(
|
|
738
|
+
`
|
|
739
|
+
SELECT location_created as country, COUNT(*) as count
|
|
740
|
+
FROM jobs
|
|
741
|
+
WHERE tt_seller = 1
|
|
742
|
+
AND verified = 0
|
|
743
|
+
AND location_created IN (${targetPlaceholders})
|
|
744
|
+
GROUP BY location_created
|
|
745
|
+
ORDER BY count DESC
|
|
746
|
+
`,
|
|
747
|
+
)
|
|
748
|
+
.all(...targetLocations)
|
|
749
|
+
: [];
|
|
750
|
+
|
|
751
|
+
const jobsBaseCount = db
|
|
752
|
+
.prepare("SELECT COUNT(*) as total FROM jobs_base")
|
|
753
|
+
.get().total;
|
|
754
|
+
|
|
755
|
+
return {
|
|
756
|
+
totalUsers: aggregateRow.total,
|
|
757
|
+
rawJobs: getRawJobsCount(),
|
|
758
|
+
dbTotalUsers: getUserDbCount(),
|
|
759
|
+
jobsTotal: aggregateRow.total,
|
|
760
|
+
jobsBaseTotal: jobsBaseCount,
|
|
761
|
+
jobsPending: aggregateRow.pending,
|
|
762
|
+
processedUsers: aggregateRow.done,
|
|
763
|
+
pendingUsers: aggregateRow.pending,
|
|
764
|
+
processingUsers: aggregateRow.processing,
|
|
765
|
+
restrictedUsers: aggregateRow.restricted,
|
|
766
|
+
errorUsers: aggregateRow.error,
|
|
767
|
+
targetUsers: aggregateRow.targetUsers,
|
|
768
|
+
userUpdateTasks: userUpdateTasksRow.userUpdateTasks,
|
|
769
|
+
targetCountryStats,
|
|
770
|
+
countryStats,
|
|
771
|
+
sourceStats: {
|
|
772
|
+
seed: aggregateRow.seed || 0,
|
|
773
|
+
video: aggregateRow.video || 0,
|
|
774
|
+
comment: aggregateRow.comment || 0,
|
|
775
|
+
guess: aggregateRow.guess || 0,
|
|
776
|
+
following: aggregateRow.following || 0,
|
|
777
|
+
follower: aggregateRow.follower || 0,
|
|
778
|
+
processed: aggregateRow.done,
|
|
779
|
+
restricted: aggregateRow.restricted,
|
|
780
|
+
error: aggregateRow.error,
|
|
781
|
+
noVideo: aggregateRow.noVideo || 0,
|
|
782
|
+
},
|
|
783
|
+
};
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
function getPendingByCountryFromDb() {
|
|
787
|
+
if (!db) return [];
|
|
788
|
+
|
|
789
|
+
// 按 guessed_location 分组统计待处理任务
|
|
790
|
+
const rows = db
|
|
791
|
+
.prepare(
|
|
792
|
+
`
|
|
793
|
+
SELECT
|
|
794
|
+
COALESCE(guessed_location, '未知') as country,
|
|
795
|
+
COUNT(*) as count
|
|
796
|
+
FROM jobs
|
|
797
|
+
WHERE status = 'pending'
|
|
798
|
+
GROUP BY COALESCE(guessed_location, '未知')
|
|
799
|
+
ORDER BY count DESC
|
|
800
|
+
`,
|
|
801
|
+
)
|
|
802
|
+
.all();
|
|
803
|
+
|
|
804
|
+
return rows;
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
function getUserUpdateByCountryFromDb() {
|
|
808
|
+
if (!db) return [];
|
|
809
|
+
|
|
810
|
+
// 按 guessed_location 分组统计待补资料任务
|
|
811
|
+
const rows = db
|
|
812
|
+
.prepare(
|
|
813
|
+
`
|
|
814
|
+
SELECT
|
|
815
|
+
COALESCE(guessed_location, '未知') as country,
|
|
816
|
+
COUNT(*) as count
|
|
817
|
+
FROM jobs_base
|
|
818
|
+
WHERE tt_seller IS NULL
|
|
819
|
+
AND COALESCE(user_update_count, 0) <= 0
|
|
820
|
+
GROUP BY COALESCE(guessed_location, '未知')
|
|
821
|
+
ORDER BY count DESC
|
|
822
|
+
`,
|
|
823
|
+
)
|
|
824
|
+
.all();
|
|
825
|
+
|
|
826
|
+
return rows;
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
function getAttachStuckByCountryFromDb() {
|
|
830
|
+
if (!db) return [];
|
|
831
|
+
|
|
832
|
+
return db
|
|
833
|
+
.prepare(
|
|
834
|
+
`
|
|
835
|
+
SELECT
|
|
836
|
+
COALESCE(guessed_location, '未知') as country,
|
|
837
|
+
COUNT(*) as count
|
|
838
|
+
FROM jobs_base
|
|
839
|
+
WHERE tt_seller IS NULL
|
|
840
|
+
AND COALESCE(user_update_count, 0) = 1
|
|
841
|
+
GROUP BY COALESCE(guessed_location, '未知')
|
|
842
|
+
ORDER BY count DESC
|
|
843
|
+
`,
|
|
844
|
+
)
|
|
845
|
+
.all();
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
function restoreAttachStuckByCountry(country) {
|
|
849
|
+
if (!db) {
|
|
850
|
+
return { restored: 0, country, error: "db not ready" };
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
const normalizedCountry = String(country == null ? "未知" : country).trim();
|
|
854
|
+
if (!normalizedCountry) {
|
|
855
|
+
return {
|
|
856
|
+
restored: 0,
|
|
857
|
+
country: normalizedCountry,
|
|
858
|
+
error: "country is required",
|
|
859
|
+
};
|
|
860
|
+
}
|
|
861
|
+
|
|
862
|
+
const whereSql = `
|
|
863
|
+
COALESCE(tt_seller, '') = ''
|
|
864
|
+
AND COALESCE(user_update_count, 0) = 1
|
|
865
|
+
AND COALESCE(guessed_location, '未知') = ?
|
|
866
|
+
`;
|
|
867
|
+
const count =
|
|
868
|
+
db
|
|
869
|
+
.prepare(
|
|
870
|
+
`
|
|
871
|
+
SELECT COUNT(*) as c
|
|
872
|
+
FROM jobs_base
|
|
873
|
+
WHERE ${whereSql}
|
|
874
|
+
`,
|
|
875
|
+
)
|
|
876
|
+
.get(normalizedCountry)?.c || 0;
|
|
877
|
+
|
|
878
|
+
if (!count) {
|
|
879
|
+
return { restored: 0, country: normalizedCountry };
|
|
880
|
+
}
|
|
881
|
+
|
|
882
|
+
db.prepare(
|
|
883
|
+
`
|
|
884
|
+
UPDATE jobs_base
|
|
885
|
+
SET user_update_count = 0,
|
|
886
|
+
updated_at = ?,
|
|
887
|
+
claimed_by = NULL,
|
|
888
|
+
claimed_at = NULL
|
|
889
|
+
WHERE ${whereSql}
|
|
890
|
+
`,
|
|
891
|
+
).run(Date.now(), normalizedCountry);
|
|
892
|
+
|
|
893
|
+
return { restored: count, country: normalizedCountry };
|
|
894
|
+
}
|
|
895
|
+
|
|
896
|
+
function resetPendingByCountry(country) {
|
|
897
|
+
if (!db) {
|
|
898
|
+
return { reset: 0, country, error: "db not ready" };
|
|
899
|
+
}
|
|
900
|
+
|
|
901
|
+
const normalizedCountry = String(country == null ? "未知" : country).trim();
|
|
902
|
+
if (!normalizedCountry) {
|
|
903
|
+
return {
|
|
904
|
+
reset: 0,
|
|
905
|
+
country: normalizedCountry,
|
|
906
|
+
error: "country is required",
|
|
907
|
+
};
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
const whereSql = `
|
|
911
|
+
status = 'pending'
|
|
912
|
+
AND COALESCE(guessed_location, '未知') = ?
|
|
913
|
+
`;
|
|
914
|
+
const count =
|
|
915
|
+
db
|
|
916
|
+
.prepare(
|
|
917
|
+
`
|
|
918
|
+
SELECT COUNT(*) as c
|
|
919
|
+
FROM jobs
|
|
920
|
+
WHERE ${whereSql}
|
|
921
|
+
`,
|
|
922
|
+
)
|
|
923
|
+
.get(normalizedCountry)?.c || 0;
|
|
924
|
+
|
|
925
|
+
if (!count) {
|
|
926
|
+
return { reset: 0, country: normalizedCountry };
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
db.prepare(
|
|
930
|
+
`
|
|
931
|
+
UPDATE jobs
|
|
932
|
+
SET user_update_count = 0,
|
|
933
|
+
updated_at = ?,
|
|
934
|
+
claimed_by = NULL,
|
|
935
|
+
claimed_at = NULL
|
|
936
|
+
WHERE ${whereSql}
|
|
937
|
+
`,
|
|
938
|
+
).run(Date.now(), normalizedCountry);
|
|
939
|
+
|
|
940
|
+
return { reset: count, country: normalizedCountry };
|
|
941
|
+
}
|
|
942
|
+
|
|
943
|
+
function getRawByCountryFromDb() {
|
|
944
|
+
if (!db) return [];
|
|
945
|
+
|
|
946
|
+
return db
|
|
947
|
+
.prepare(
|
|
948
|
+
`
|
|
949
|
+
SELECT
|
|
950
|
+
COALESCE(guessed_location, '未知') as country,
|
|
951
|
+
COUNT(*) as count
|
|
952
|
+
FROM raw_jobs
|
|
953
|
+
GROUP BY COALESCE(guessed_location, '未知')
|
|
954
|
+
ORDER BY count DESC
|
|
955
|
+
`,
|
|
956
|
+
)
|
|
957
|
+
.all();
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
function moveJobsToRawByCountry(scope, country) {
|
|
961
|
+
if (!db) {
|
|
962
|
+
return { moved: 0, scope, country, error: "db not ready" };
|
|
963
|
+
}
|
|
964
|
+
|
|
965
|
+
const normalizedScope = String(scope || "").trim();
|
|
966
|
+
const normalizedCountry = String(country == null ? "未知" : country).trim();
|
|
967
|
+
if (!normalizedCountry) {
|
|
968
|
+
return {
|
|
969
|
+
moved: 0,
|
|
970
|
+
scope: normalizedScope,
|
|
971
|
+
country: normalizedCountry,
|
|
972
|
+
error: "country is required",
|
|
973
|
+
};
|
|
974
|
+
}
|
|
975
|
+
|
|
976
|
+
// pending 操作 jobs 表(与 getPendingByCountryFromDb 数据源一致)
|
|
977
|
+
// userUpdate 操作 jobs_base 表(与 getUserUpdateByCountryFromDb 数据源一致)
|
|
978
|
+
let sourceTable = "";
|
|
979
|
+
let scopeWhere = "";
|
|
980
|
+
let columns = "";
|
|
981
|
+
|
|
982
|
+
if (normalizedScope === "pending") {
|
|
983
|
+
sourceTable = "jobs";
|
|
984
|
+
scopeWhere = `status = 'pending'`;
|
|
985
|
+
columns = `
|
|
986
|
+
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
987
|
+
error, pinned, no_video, restricted, user_update_count,
|
|
988
|
+
tt_seller, verified, video_count, comment_count,
|
|
989
|
+
guessed_location, location_created, follower_count,
|
|
990
|
+
following_count, heart_count, refresh_time, processed,
|
|
991
|
+
processed_at, created_at, updated_at, region, signature,
|
|
992
|
+
sec_uid, latest_video_time, user_create_time
|
|
993
|
+
`;
|
|
994
|
+
} else if (normalizedScope === "userUpdate") {
|
|
995
|
+
sourceTable = "jobs_base";
|
|
996
|
+
scopeWhere = `tt_seller IS NULL AND COALESCE(user_update_count, 0) <= 0`;
|
|
997
|
+
columns = `
|
|
998
|
+
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
999
|
+
error, pinned, no_video, restricted, user_update_count,
|
|
1000
|
+
tt_seller, verified, video_count, comment_count,
|
|
1001
|
+
guessed_location, location_created, follower_count,
|
|
1002
|
+
following_count, heart_count, refresh_time, processed,
|
|
1003
|
+
processed_at, created_at, updated_at, region, signature,
|
|
1004
|
+
sec_uid, latest_video_time, user_create_time
|
|
1005
|
+
`;
|
|
1006
|
+
} else {
|
|
1007
|
+
return {
|
|
1008
|
+
moved: 0,
|
|
1009
|
+
scope: normalizedScope,
|
|
1010
|
+
country: normalizedCountry,
|
|
1011
|
+
error: "unsupported scope",
|
|
1012
|
+
};
|
|
1013
|
+
}
|
|
1014
|
+
|
|
1015
|
+
const whereSql = `
|
|
1016
|
+
${scopeWhere}
|
|
1017
|
+
AND COALESCE(guessed_location, '未知') = ?
|
|
1018
|
+
`;
|
|
1019
|
+
const count =
|
|
1020
|
+
db
|
|
1021
|
+
.prepare(
|
|
1022
|
+
`
|
|
1023
|
+
SELECT COUNT(*) as c
|
|
1024
|
+
FROM ${sourceTable}
|
|
1025
|
+
WHERE ${whereSql}
|
|
1026
|
+
`,
|
|
1027
|
+
)
|
|
1028
|
+
.get(normalizedCountry)?.c || 0;
|
|
1029
|
+
|
|
1030
|
+
if (!count) {
|
|
1031
|
+
return { moved: 0, scope: normalizedScope, country: normalizedCountry };
|
|
1032
|
+
}
|
|
1033
|
+
|
|
1034
|
+
const moveTxn = db.transaction((targetCountry) => {
|
|
1035
|
+
db.prepare(
|
|
1036
|
+
`
|
|
1037
|
+
INSERT OR REPLACE INTO raw_jobs (
|
|
1038
|
+
${columns}
|
|
1039
|
+
)
|
|
1040
|
+
SELECT
|
|
1041
|
+
${columns}
|
|
1042
|
+
FROM ${sourceTable}
|
|
1043
|
+
WHERE ${whereSql}
|
|
1044
|
+
`,
|
|
1045
|
+
).run(targetCountry);
|
|
1046
|
+
|
|
1047
|
+
db.prepare(
|
|
1048
|
+
`
|
|
1049
|
+
DELETE FROM ${sourceTable}
|
|
1050
|
+
WHERE ${whereSql}
|
|
1051
|
+
`,
|
|
1052
|
+
).run(targetCountry);
|
|
1053
|
+
});
|
|
1054
|
+
|
|
1055
|
+
moveTxn(normalizedCountry);
|
|
1056
|
+
return { moved: count, scope: normalizedScope, country: normalizedCountry };
|
|
1057
|
+
}
|
|
1058
|
+
|
|
1059
|
+
function restoreRawJobsByCountry(country) {
|
|
1060
|
+
if (!db) {
|
|
1061
|
+
return { restored: 0, country, error: "db not ready" };
|
|
1062
|
+
}
|
|
1063
|
+
|
|
1064
|
+
const normalizedCountry = String(country == null ? "未知" : country).trim();
|
|
1065
|
+
if (!normalizedCountry) {
|
|
1066
|
+
return {
|
|
1067
|
+
restored: 0,
|
|
1068
|
+
country: normalizedCountry,
|
|
1069
|
+
error: "country is required",
|
|
1070
|
+
};
|
|
1071
|
+
}
|
|
1072
|
+
|
|
1073
|
+
const whereSql = `COALESCE(guessed_location, '未知') = ?`;
|
|
1074
|
+
const count =
|
|
1075
|
+
db
|
|
1076
|
+
.prepare(
|
|
1077
|
+
`
|
|
1078
|
+
SELECT COUNT(*) as c
|
|
1079
|
+
FROM raw_jobs
|
|
1080
|
+
WHERE ${whereSql}
|
|
1081
|
+
`,
|
|
1082
|
+
)
|
|
1083
|
+
.get(normalizedCountry)?.c || 0;
|
|
1084
|
+
|
|
1085
|
+
if (!count) {
|
|
1086
|
+
return { restored: 0, country: normalizedCountry };
|
|
1087
|
+
}
|
|
1088
|
+
|
|
1089
|
+
const restoreTxn = db.transaction((targetCountry) => {
|
|
1090
|
+
db.prepare(
|
|
1091
|
+
`
|
|
1092
|
+
INSERT OR REPLACE INTO jobs (
|
|
1093
|
+
unique_id,
|
|
1094
|
+
nickname,
|
|
1095
|
+
status,
|
|
1096
|
+
sources,
|
|
1097
|
+
claimed_by,
|
|
1098
|
+
claimed_at,
|
|
1099
|
+
error,
|
|
1100
|
+
pinned,
|
|
1101
|
+
no_video,
|
|
1102
|
+
restricted,
|
|
1103
|
+
user_update_count,
|
|
1104
|
+
tt_seller,
|
|
1105
|
+
verified,
|
|
1106
|
+
video_count,
|
|
1107
|
+
comment_count,
|
|
1108
|
+
guessed_location,
|
|
1109
|
+
location_created,
|
|
1110
|
+
follower_count,
|
|
1111
|
+
following_count,
|
|
1112
|
+
heart_count,
|
|
1113
|
+
refresh_time,
|
|
1114
|
+
processed,
|
|
1115
|
+
processed_at,
|
|
1116
|
+
created_at,
|
|
1117
|
+
updated_at,
|
|
1118
|
+
region,
|
|
1119
|
+
signature,
|
|
1120
|
+
sec_uid
|
|
1121
|
+
)
|
|
1122
|
+
SELECT
|
|
1123
|
+
unique_id,
|
|
1124
|
+
nickname,
|
|
1125
|
+
status,
|
|
1126
|
+
sources,
|
|
1127
|
+
claimed_by,
|
|
1128
|
+
claimed_at,
|
|
1129
|
+
error,
|
|
1130
|
+
pinned,
|
|
1131
|
+
no_video,
|
|
1132
|
+
restricted,
|
|
1133
|
+
user_update_count,
|
|
1134
|
+
tt_seller,
|
|
1135
|
+
verified,
|
|
1136
|
+
video_count,
|
|
1137
|
+
comment_count,
|
|
1138
|
+
guessed_location,
|
|
1139
|
+
location_created,
|
|
1140
|
+
follower_count,
|
|
1141
|
+
following_count,
|
|
1142
|
+
heart_count,
|
|
1143
|
+
refresh_time,
|
|
1144
|
+
processed,
|
|
1145
|
+
processed_at,
|
|
1146
|
+
created_at,
|
|
1147
|
+
updated_at,
|
|
1148
|
+
region,
|
|
1149
|
+
signature,
|
|
1150
|
+
sec_uid
|
|
1151
|
+
FROM raw_jobs
|
|
1152
|
+
WHERE ${whereSql}
|
|
1153
|
+
`,
|
|
1154
|
+
).run(targetCountry);
|
|
1155
|
+
|
|
1156
|
+
db.prepare(
|
|
1157
|
+
`
|
|
1158
|
+
DELETE FROM raw_jobs
|
|
1159
|
+
WHERE ${whereSql}
|
|
1160
|
+
`,
|
|
1161
|
+
).run(targetCountry);
|
|
1162
|
+
});
|
|
1163
|
+
|
|
1164
|
+
restoreTxn(normalizedCountry);
|
|
1165
|
+
return { restored: count, country: normalizedCountry };
|
|
1166
|
+
}
|
|
1167
|
+
|
|
1168
|
+
function restoreRawJobById(uniqueId) {
|
|
1169
|
+
if (!db) {
|
|
1170
|
+
return { restored: 0, uniqueId, error: "db not ready" };
|
|
1171
|
+
}
|
|
1172
|
+
|
|
1173
|
+
const safeId = String(uniqueId).trim();
|
|
1174
|
+
if (!safeId) {
|
|
1175
|
+
return { restored: 0, uniqueId: safeId, error: "uniqueId is required" };
|
|
1176
|
+
}
|
|
1177
|
+
|
|
1178
|
+
const exists =
|
|
1179
|
+
db
|
|
1180
|
+
.prepare("SELECT COUNT(*) as c FROM raw_jobs WHERE unique_id = ?")
|
|
1181
|
+
.get(safeId)?.c || 0;
|
|
1182
|
+
|
|
1183
|
+
if (!exists) {
|
|
1184
|
+
return { restored: 0, uniqueId: safeId };
|
|
1185
|
+
}
|
|
1186
|
+
|
|
1187
|
+
const restoreTxn = db.transaction(() => {
|
|
1188
|
+
db.prepare(
|
|
1189
|
+
`
|
|
1190
|
+
INSERT OR REPLACE INTO jobs (
|
|
1191
|
+
unique_id, nickname, status, sources, claimed_by, claimed_at, error,
|
|
1192
|
+
pinned, no_video, restricted, user_update_count, tt_seller, verified,
|
|
1193
|
+
video_count, comment_count, guessed_location, location_created,
|
|
1194
|
+
follower_count, following_count, heart_count, refresh_time,
|
|
1195
|
+
processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
|
|
1196
|
+
)
|
|
1197
|
+
SELECT
|
|
1198
|
+
unique_id, nickname, status, sources, claimed_by, claimed_at, error,
|
|
1199
|
+
pinned, no_video, restricted, user_update_count, tt_seller, verified,
|
|
1200
|
+
video_count, comment_count, guessed_location, location_created,
|
|
1201
|
+
follower_count, following_count, heart_count, refresh_time,
|
|
1202
|
+
processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
|
|
1203
|
+
FROM raw_jobs WHERE unique_id = ?
|
|
1204
|
+
`,
|
|
1205
|
+
).run(safeId);
|
|
1206
|
+
|
|
1207
|
+
db.prepare("DELETE FROM raw_jobs WHERE unique_id = ?").run(safeId);
|
|
1208
|
+
});
|
|
1209
|
+
|
|
1210
|
+
restoreTxn();
|
|
1211
|
+
return { restored: 1, uniqueId: safeId };
|
|
1212
|
+
}
|
|
1213
|
+
|
|
1214
|
+
function restoreRawJobsByFilter({ search, location, hasVideo, hasFollower }) {
|
|
1215
|
+
if (!db) {
|
|
1216
|
+
return { restored: 0, error: "db not ready" };
|
|
1217
|
+
}
|
|
1218
|
+
|
|
1219
|
+
const where = [];
|
|
1220
|
+
const args = [];
|
|
1221
|
+
|
|
1222
|
+
if (search) {
|
|
1223
|
+
where.push(
|
|
1224
|
+
"(LOWER(unique_id) LIKE ? OR LOWER(COALESCE(nickname, '')) LIKE ?)",
|
|
1225
|
+
);
|
|
1226
|
+
const likeVal = `%${search.toLowerCase()}%`;
|
|
1227
|
+
args.push(likeVal, likeVal);
|
|
1228
|
+
}
|
|
1229
|
+
|
|
1230
|
+
if (location) {
|
|
1231
|
+
where.push("COALESCE(guessed_location, '未知') = ?");
|
|
1232
|
+
args.push(location);
|
|
1233
|
+
}
|
|
1234
|
+
|
|
1235
|
+
if (hasVideo) {
|
|
1236
|
+
where.push("COALESCE(video_count, 0) > 0");
|
|
1237
|
+
}
|
|
1238
|
+
|
|
1239
|
+
if (hasFollower) {
|
|
1240
|
+
where.push("COALESCE(follower_count, 0) > 0");
|
|
1241
|
+
}
|
|
1242
|
+
|
|
1243
|
+
if (where.length === 0) {
|
|
1244
|
+
return { restored: 0, error: "at least one filter is required" };
|
|
1245
|
+
}
|
|
1246
|
+
|
|
1247
|
+
const whereSql = where.join(" AND ");
|
|
1248
|
+
|
|
1249
|
+
const count =
|
|
1250
|
+
db
|
|
1251
|
+
.prepare(`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql}`)
|
|
1252
|
+
.get(...args)?.c || 0;
|
|
1253
|
+
|
|
1254
|
+
if (!count) {
|
|
1255
|
+
return { restored: 0 };
|
|
1256
|
+
}
|
|
1257
|
+
|
|
1258
|
+
const restoreTxn = db.transaction(() => {
|
|
1259
|
+
db.prepare(
|
|
1260
|
+
`
|
|
1261
|
+
INSERT OR REPLACE INTO jobs (
|
|
1262
|
+
unique_id, nickname, status, sources, claimed_by, claimed_at, error,
|
|
1263
|
+
pinned, no_video, restricted, user_update_count, tt_seller, verified,
|
|
1264
|
+
video_count, comment_count, guessed_location, location_created,
|
|
1265
|
+
follower_count, following_count, heart_count, refresh_time,
|
|
1266
|
+
processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
|
|
1267
|
+
)
|
|
1268
|
+
SELECT
|
|
1269
|
+
unique_id, nickname, status, sources, claimed_by, claimed_at, error,
|
|
1270
|
+
pinned, no_video, restricted, user_update_count, tt_seller, verified,
|
|
1271
|
+
video_count, comment_count, guessed_location, location_created,
|
|
1272
|
+
follower_count, following_count, heart_count, refresh_time,
|
|
1273
|
+
processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
|
|
1274
|
+
FROM raw_jobs WHERE ${whereSql}
|
|
1275
|
+
`,
|
|
1276
|
+
).run(...args);
|
|
1277
|
+
|
|
1278
|
+
db.prepare(`DELETE FROM raw_jobs WHERE ${whereSql}`).run(...args);
|
|
1279
|
+
});
|
|
1280
|
+
|
|
1281
|
+
restoreTxn();
|
|
1282
|
+
return { restored: count };
|
|
1283
|
+
}
|
|
1284
|
+
|
|
1285
|
+
function getRawJobsPageFromDb({
|
|
1286
|
+
search,
|
|
1287
|
+
location,
|
|
1288
|
+
limit,
|
|
1289
|
+
offset,
|
|
1290
|
+
hasVideo,
|
|
1291
|
+
hasFollower,
|
|
1292
|
+
}) {
|
|
1293
|
+
if (!db) return null;
|
|
1294
|
+
|
|
1295
|
+
const safeLimit = Math.max(1, Math.min(200, parseInt(limit) || 50));
|
|
1296
|
+
const safeOffset = Math.max(0, parseInt(offset) || 0);
|
|
1297
|
+
const where = [];
|
|
1298
|
+
const args = [];
|
|
1299
|
+
|
|
1300
|
+
if (search) {
|
|
1301
|
+
where.push(
|
|
1302
|
+
"(LOWER(unique_id) LIKE ? OR LOWER(COALESCE(nickname, '')) LIKE ?)",
|
|
1303
|
+
);
|
|
1304
|
+
const pattern = `%${String(search).toLowerCase()}%`;
|
|
1305
|
+
args.push(pattern, pattern);
|
|
1306
|
+
}
|
|
1307
|
+
if (location) {
|
|
1308
|
+
where.push("COALESCE(guessed_location, '未知') = ?");
|
|
1309
|
+
args.push(location);
|
|
1310
|
+
}
|
|
1311
|
+
if (hasVideo) {
|
|
1312
|
+
where.push("COALESCE(video_count, 0) > 0");
|
|
1313
|
+
}
|
|
1314
|
+
if (hasFollower) {
|
|
1315
|
+
where.push("COALESCE(follower_count, 0) > 0");
|
|
1316
|
+
}
|
|
1317
|
+
|
|
1318
|
+
const whereSql = where.length ? `WHERE ${where.join(" AND ")}` : "";
|
|
1319
|
+
const total = db
|
|
1320
|
+
.prepare(`SELECT COUNT(*) as c FROM raw_jobs ${whereSql}`)
|
|
1321
|
+
.get(...args).c;
|
|
1322
|
+
|
|
1323
|
+
const rows = db
|
|
1324
|
+
.prepare(
|
|
1325
|
+
`
|
|
1326
|
+
SELECT *
|
|
1327
|
+
FROM raw_jobs
|
|
1328
|
+
${whereSql}
|
|
1329
|
+
ORDER BY created_at DESC, unique_id ASC
|
|
1330
|
+
LIMIT ? OFFSET ?
|
|
1331
|
+
`,
|
|
1332
|
+
)
|
|
1333
|
+
.all(...args, safeLimit, safeOffset);
|
|
1334
|
+
|
|
1335
|
+
return {
|
|
1336
|
+
total,
|
|
1337
|
+
limit: safeLimit,
|
|
1338
|
+
offset: safeOffset,
|
|
1339
|
+
users: rows.map(mapJobRow),
|
|
1340
|
+
};
|
|
1341
|
+
}
|
|
1342
|
+
|
|
1343
|
+
// ====== Tag 发现与打分 CRUD ======
|
|
1344
|
+
|
|
1345
|
+
function insertTag(tag, countries, source = "llm") {
|
|
1346
|
+
if (!db) return { inserted: false, error: "db not ready" };
|
|
1347
|
+
// 防止存入带 # 前缀的 tag
|
|
1348
|
+
const normalized = tag.replace(/^#+/, "").trim().toLowerCase();
|
|
1349
|
+
if (!normalized || normalized.length < 2) {
|
|
1350
|
+
return { inserted: false, error: "invalid tag" };
|
|
1351
|
+
}
|
|
1352
|
+
try {
|
|
1353
|
+
const result = db
|
|
1354
|
+
.prepare(
|
|
1355
|
+
`
|
|
1356
|
+
INSERT OR IGNORE INTO tags (tag, countries, source)
|
|
1357
|
+
VALUES (?, ?, ?)
|
|
1358
|
+
`,
|
|
1359
|
+
)
|
|
1360
|
+
.run(normalized, JSON.stringify(countries), source);
|
|
1361
|
+
return { inserted: result.changes > 0, tag: normalized };
|
|
1362
|
+
} catch (e) {
|
|
1363
|
+
return { inserted: false, error: e.message };
|
|
1364
|
+
}
|
|
1365
|
+
}
|
|
1366
|
+
|
|
1367
|
+
function getTagsByStatus(status, limit = 100) {
|
|
1368
|
+
if (!db) return [];
|
|
1369
|
+
const rows = db
|
|
1370
|
+
.prepare(
|
|
1371
|
+
`
|
|
1372
|
+
SELECT * FROM tags WHERE status = ? ORDER BY score ASC, created_at ASC LIMIT ?
|
|
1373
|
+
`,
|
|
1374
|
+
)
|
|
1375
|
+
.all(status, limit);
|
|
1376
|
+
return rows.map((r) => ({
|
|
1377
|
+
...r,
|
|
1378
|
+
countries: JSON.parse(r.countries || "[]"),
|
|
1379
|
+
matched_countries: JSON.parse(r.matched_countries || "[]"),
|
|
1380
|
+
}));
|
|
1381
|
+
}
|
|
1382
|
+
|
|
1383
|
+
function getTagsByCountry(country, minScore = 0) {
|
|
1384
|
+
if (!db) return [];
|
|
1385
|
+
const rows = db
|
|
1386
|
+
.prepare(
|
|
1387
|
+
`
|
|
1388
|
+
SELECT * FROM tags WHERE status != 'dead'
|
|
1389
|
+
ORDER BY score DESC
|
|
1390
|
+
`,
|
|
1391
|
+
)
|
|
1392
|
+
.all();
|
|
1393
|
+
// Filter in JS since countries is JSON
|
|
1394
|
+
return rows
|
|
1395
|
+
.map((r) => ({
|
|
1396
|
+
...r,
|
|
1397
|
+
countries: JSON.parse(r.countries || "[]"),
|
|
1398
|
+
matched_countries: JSON.parse(r.matched_countries || "[]"),
|
|
1399
|
+
}))
|
|
1400
|
+
.filter((r) => r.countries.includes(country) && r.score >= minScore);
|
|
1401
|
+
}
|
|
1402
|
+
|
|
1403
|
+
function getDeadTags(country) {
|
|
1404
|
+
if (!db) return [];
|
|
1405
|
+
const rows = db
|
|
1406
|
+
.prepare(
|
|
1407
|
+
`
|
|
1408
|
+
SELECT * FROM tags WHERE status = 'dead' ORDER BY score ASC
|
|
1409
|
+
`,
|
|
1410
|
+
)
|
|
1411
|
+
.all();
|
|
1412
|
+
return rows
|
|
1413
|
+
.map((r) => ({
|
|
1414
|
+
...r,
|
|
1415
|
+
countries: JSON.parse(r.countries || "[]"),
|
|
1416
|
+
matched_countries: JSON.parse(r.matched_countries || "[]"),
|
|
1417
|
+
}))
|
|
1418
|
+
.filter((r) => r.countries.includes(country));
|
|
1419
|
+
}
|
|
1420
|
+
|
|
1421
|
+
function claimTag(tag) {
|
|
1422
|
+
if (!db) return { ok: false, error: "db not ready" };
|
|
1423
|
+
// 原子操作:只有 status='new' 时才更新为 'scoring',避免竞态
|
|
1424
|
+
const result = db
|
|
1425
|
+
.prepare(
|
|
1426
|
+
"UPDATE tags SET status = 'scoring' WHERE tag = ? AND status = 'new'",
|
|
1427
|
+
)
|
|
1428
|
+
.run(tag);
|
|
1429
|
+
if (result.changes === 0) {
|
|
1430
|
+
// 检查是否不存在 vs 已被别人锁定
|
|
1431
|
+
const row = db.prepare("SELECT status FROM tags WHERE tag = ?").get(tag);
|
|
1432
|
+
if (!row) return { ok: false, error: "tag not found" };
|
|
1433
|
+
return { ok: false, error: `tag status is ${row.status}, already claimed` };
|
|
1434
|
+
}
|
|
1435
|
+
return { ok: true, tag };
|
|
1436
|
+
}
|
|
1437
|
+
|
|
1438
|
+
function reportTagScore(tag, fields) {
|
|
1439
|
+
if (!db) return { ok: false, error: "db not ready" };
|
|
1440
|
+
const {
|
|
1441
|
+
score,
|
|
1442
|
+
status,
|
|
1443
|
+
totalPosts,
|
|
1444
|
+
authorCount,
|
|
1445
|
+
matchedAuthors,
|
|
1446
|
+
matchedCountries,
|
|
1447
|
+
pushedUsers,
|
|
1448
|
+
error,
|
|
1449
|
+
} = fields;
|
|
1450
|
+
const matchedCountriesJson = matchedCountries
|
|
1451
|
+
? JSON.stringify(matchedCountries)
|
|
1452
|
+
: null;
|
|
1453
|
+
const now = new Date().toISOString();
|
|
1454
|
+
|
|
1455
|
+
try {
|
|
1456
|
+
const result = db
|
|
1457
|
+
.prepare(
|
|
1458
|
+
`
|
|
1459
|
+
UPDATE tags SET
|
|
1460
|
+
score = COALESCE(?, score),
|
|
1461
|
+
status = COALESCE(?, status),
|
|
1462
|
+
total_posts = COALESCE(?, total_posts),
|
|
1463
|
+
author_count = COALESCE(?, author_count),
|
|
1464
|
+
matched_authors = COALESCE(?, matched_authors),
|
|
1465
|
+
matched_countries = COALESCE(?, matched_countries),
|
|
1466
|
+
pushed_users = COALESCE(?, pushed_users),
|
|
1467
|
+
last_error = COALESCE(?, last_error),
|
|
1468
|
+
scored_at = ?,
|
|
1469
|
+
score_count = score_count + 1
|
|
1470
|
+
WHERE tag = ?
|
|
1471
|
+
`,
|
|
1472
|
+
)
|
|
1473
|
+
.run(
|
|
1474
|
+
score ?? null,
|
|
1475
|
+
status ?? null,
|
|
1476
|
+
totalPosts ?? null,
|
|
1477
|
+
authorCount ?? null,
|
|
1478
|
+
matchedAuthors ?? null,
|
|
1479
|
+
matchedCountriesJson,
|
|
1480
|
+
pushedUsers ?? null,
|
|
1481
|
+
error ?? null,
|
|
1482
|
+
now,
|
|
1483
|
+
tag,
|
|
1484
|
+
);
|
|
1485
|
+
return { ok: result.changes > 0, tag };
|
|
1486
|
+
} catch (e) {
|
|
1487
|
+
return { ok: false, error: e.message };
|
|
1488
|
+
}
|
|
1489
|
+
}
|
|
1490
|
+
|
|
1491
|
+
function getAllTags(limit = 200) {
|
|
1492
|
+
if (!db) return [];
|
|
1493
|
+
const rows = db
|
|
1494
|
+
.prepare(
|
|
1495
|
+
`
|
|
1496
|
+
SELECT * FROM tags ORDER BY score DESC, created_at DESC LIMIT ?
|
|
1497
|
+
`,
|
|
1498
|
+
)
|
|
1499
|
+
.all(limit);
|
|
1500
|
+
return rows.map((r) => ({
|
|
1501
|
+
...r,
|
|
1502
|
+
countries: JSON.parse(r.countries || "[]"),
|
|
1503
|
+
matched_countries: JSON.parse(r.matched_countries || "[]"),
|
|
1504
|
+
}));
|
|
1505
|
+
}
|
|
1506
|
+
|
|
1507
|
+
// 调试接口:直接执行 SQL 查询,返回原始数据
|
|
1508
|
+
function rawQuery(sql, params = []) {
|
|
1509
|
+
if (!db) return { error: "db not ready" };
|
|
1510
|
+
try {
|
|
1511
|
+
const rows = db.prepare(sql).all(...params);
|
|
1512
|
+
return { rows };
|
|
1513
|
+
} catch (e) {
|
|
1514
|
+
return { error: e.message };
|
|
1515
|
+
}
|
|
1516
|
+
}
|
|
1517
|
+
|
|
1518
|
+
// 清理 tags 表中以 # 开头的脏数据
|
|
1519
|
+
function normalizeTags() {
|
|
1520
|
+
if (!db) return { ok: false, error: "db not ready" };
|
|
1521
|
+
const dirtyRows = db
|
|
1522
|
+
.prepare("SELECT id, tag, countries FROM tags WHERE tag LIKE '#%'")
|
|
1523
|
+
.all();
|
|
1524
|
+
const fixed = [];
|
|
1525
|
+
const merged = [];
|
|
1526
|
+
const skipped = [];
|
|
1527
|
+
|
|
1528
|
+
for (const row of dirtyRows) {
|
|
1529
|
+
const cleanTag = row.tag.replace(/^#+/, "").trim().toLowerCase();
|
|
1530
|
+
if (!cleanTag || cleanTag.length < 2) {
|
|
1531
|
+
db.prepare("DELETE FROM tags WHERE id = ?").run(row.id);
|
|
1532
|
+
skipped.push({
|
|
1533
|
+
dirty: row.tag,
|
|
1534
|
+
reason: "empty after normalize, deleted",
|
|
1535
|
+
});
|
|
1536
|
+
continue;
|
|
1537
|
+
}
|
|
1538
|
+
|
|
1539
|
+
// 检查 cleanTag 是否已存在
|
|
1540
|
+
const existing = db
|
|
1541
|
+
.prepare("SELECT * FROM tags WHERE tag = ?")
|
|
1542
|
+
.get(cleanTag);
|
|
1543
|
+
if (existing) {
|
|
1544
|
+
// 合并:保留已有 clean 版本,合并 countries
|
|
1545
|
+
const oldCountries = JSON.parse(row.countries || "[]");
|
|
1546
|
+
const existCountries = JSON.parse(existing.countries || "[]");
|
|
1547
|
+
const mergedCountries = [
|
|
1548
|
+
...new Set([...existCountries, ...oldCountries]),
|
|
1549
|
+
];
|
|
1550
|
+
db.prepare("UPDATE tags SET countries = ? WHERE tag = ?").run(
|
|
1551
|
+
JSON.stringify(mergedCountries),
|
|
1552
|
+
cleanTag,
|
|
1553
|
+
);
|
|
1554
|
+
// 删除脏数据
|
|
1555
|
+
db.prepare("DELETE FROM tags WHERE id = ?").run(row.id);
|
|
1556
|
+
merged.push({ dirty: row.tag, clean: cleanTag, id: row.id });
|
|
1557
|
+
} else {
|
|
1558
|
+
// 直接重命名
|
|
1559
|
+
db.prepare("UPDATE tags SET tag = ? WHERE id = ?").run(cleanTag, row.id);
|
|
1560
|
+
fixed.push({ dirty: row.tag, clean: cleanTag, id: row.id });
|
|
1561
|
+
}
|
|
1562
|
+
}
|
|
1563
|
+
|
|
1564
|
+
return {
|
|
1565
|
+
ok: true,
|
|
1566
|
+
fixed: fixed.length,
|
|
1567
|
+
merged: merged.length,
|
|
1568
|
+
skipped: skipped.length,
|
|
1569
|
+
details: { fixed, merged, skipped },
|
|
1570
|
+
};
|
|
1571
|
+
}
|
|
1572
|
+
|
|
1573
|
+
function clearTags() {
|
|
1574
|
+
if (!db) return { ok: false, error: "db not ready" };
|
|
1575
|
+
const count = db.prepare("SELECT COUNT(*) as c FROM tags").get().c;
|
|
1576
|
+
db.exec("DELETE FROM tags");
|
|
1577
|
+
return { ok: true, deleted: count };
|
|
1578
|
+
}
|
|
1579
|
+
|
|
1580
|
+
function getUsersPageFromDb({
|
|
1581
|
+
status,
|
|
1582
|
+
search,
|
|
1583
|
+
location,
|
|
1584
|
+
target,
|
|
1585
|
+
targetLocation,
|
|
1586
|
+
limit,
|
|
1587
|
+
offset,
|
|
1588
|
+
targetLocations = [],
|
|
1589
|
+
}) {
|
|
1590
|
+
if (!db) return null;
|
|
1591
|
+
|
|
1592
|
+
const safeLimit = Math.max(1, Math.min(200, parseInt(limit) || 50));
|
|
1593
|
+
const safeOffset = Math.max(0, parseInt(offset) || 0);
|
|
1594
|
+
const where = [];
|
|
1595
|
+
const args = [];
|
|
1596
|
+
|
|
1597
|
+
if (status && status !== "all") {
|
|
1598
|
+
where.push("status = ?");
|
|
1599
|
+
args.push(status);
|
|
1600
|
+
}
|
|
1601
|
+
if (target === "1") {
|
|
1602
|
+
if (targetLocation) {
|
|
1603
|
+
where.push("tt_seller = 1 AND verified = 0 AND location_created = ?");
|
|
1604
|
+
args.push(targetLocation);
|
|
1605
|
+
} else if (targetLocations.length > 0) {
|
|
1606
|
+
where.push(
|
|
1607
|
+
`tt_seller = 1 AND verified = 0 AND location_created IN (${targetLocations
|
|
1608
|
+
.map(() => "?")
|
|
1609
|
+
.join(", ")})`,
|
|
1610
|
+
);
|
|
1611
|
+
args.push(...targetLocations);
|
|
1612
|
+
} else {
|
|
1613
|
+
where.push("1 = 0");
|
|
1614
|
+
}
|
|
1615
|
+
}
|
|
1616
|
+
if (search) {
|
|
1617
|
+
where.push(
|
|
1618
|
+
"(LOWER(unique_id) LIKE ? OR LOWER(COALESCE(nickname, '')) LIKE ?)",
|
|
1619
|
+
);
|
|
1620
|
+
const pattern = `%${String(search).toLowerCase()}%`;
|
|
1621
|
+
args.push(pattern, pattern);
|
|
1622
|
+
}
|
|
1623
|
+
if (location) {
|
|
1624
|
+
where.push("location_created = ?");
|
|
1625
|
+
args.push(location);
|
|
1626
|
+
}
|
|
1627
|
+
|
|
1628
|
+
const whereSql = where.length ? `WHERE ${where.join(" AND ")}` : "";
|
|
1629
|
+
|
|
1630
|
+
// COUNT 缓存:134 万条数据全表扫描慢,5 秒内返回缓存值
|
|
1631
|
+
const cacheKey = whereSql + "|" + args.join(",");
|
|
1632
|
+
if (!getUsersPageFromDb._countCache)
|
|
1633
|
+
getUsersPageFromDb._countCache = new Map();
|
|
1634
|
+
const cachedCount = getUsersPageFromDb._countCache.get(cacheKey);
|
|
1635
|
+
let total;
|
|
1636
|
+
if (cachedCount && Date.now() - cachedCount.time < 5000) {
|
|
1637
|
+
total = cachedCount.c;
|
|
1638
|
+
} else {
|
|
1639
|
+
total = db
|
|
1640
|
+
.prepare(`SELECT COUNT(*) as c FROM jobs ${whereSql}`)
|
|
1641
|
+
.get(...args).c;
|
|
1642
|
+
getUsersPageFromDb._countCache.set(cacheKey, {
|
|
1643
|
+
c: total,
|
|
1644
|
+
time: Date.now(),
|
|
1645
|
+
});
|
|
1646
|
+
}
|
|
1647
|
+
|
|
1648
|
+
// 只查询前端需要的列,避免 SELECT * 带来的大字段传输和 mapJobRow 开销
|
|
1649
|
+
const rows = db
|
|
1650
|
+
.prepare(
|
|
1651
|
+
`
|
|
1652
|
+
SELECT
|
|
1653
|
+
unique_id, nickname, sec_uid, status, sources,
|
|
1654
|
+
tt_seller, verified, follower_count, following_count,
|
|
1655
|
+
location_created, latest_video_time, refresh_time,
|
|
1656
|
+
guessed_location, pinned, processed_at, video_count,
|
|
1657
|
+
no_video, claimed_by, claimed_at, created_at, updated_at
|
|
1658
|
+
FROM jobs
|
|
1659
|
+
${whereSql}
|
|
1660
|
+
ORDER BY
|
|
1661
|
+
pinned DESC,
|
|
1662
|
+
CASE
|
|
1663
|
+
WHEN ? = 'done' THEN COALESCE(processed_at, 0) * -1
|
|
1664
|
+
WHEN ? = '1' THEN COALESCE(refresh_time, 0) * -1
|
|
1665
|
+
ELSE 0
|
|
1666
|
+
END ASC,
|
|
1667
|
+
CASE status
|
|
1668
|
+
WHEN 'processing' THEN 0
|
|
1669
|
+
WHEN 'pending' THEN 1
|
|
1670
|
+
WHEN 'done' THEN 2
|
|
1671
|
+
WHEN 'error' THEN 3
|
|
1672
|
+
WHEN 'restricted' THEN 4
|
|
1673
|
+
ELSE 9
|
|
1674
|
+
END ASC,
|
|
1675
|
+
COALESCE(follower_count, 0) DESC,
|
|
1676
|
+
COALESCE(processed_at, 0) DESC,
|
|
1677
|
+
unique_id ASC
|
|
1678
|
+
LIMIT ? OFFSET ?
|
|
1679
|
+
`,
|
|
1680
|
+
)
|
|
1681
|
+
.all(...args, status || "", target, safeLimit, safeOffset)
|
|
1682
|
+
.map(mapJobRow);
|
|
1683
|
+
|
|
1684
|
+
return {
|
|
1685
|
+
total,
|
|
1686
|
+
users: rows,
|
|
1687
|
+
};
|
|
1688
|
+
}
|
|
1689
|
+
|
|
1690
|
+
function getTargetUsersFromDb(targetLocations = []) {
|
|
1691
|
+
if (!db) return null;
|
|
1692
|
+
if (!targetLocations.length) {
|
|
1693
|
+
return { total: 0, users: [] };
|
|
1694
|
+
}
|
|
1695
|
+
|
|
1696
|
+
const placeholders = targetLocations.map(() => "?").join(", ");
|
|
1697
|
+
const rows = db
|
|
1698
|
+
.prepare(
|
|
1699
|
+
`
|
|
1700
|
+
SELECT
|
|
1701
|
+
unique_id, nickname, sec_uid, status, sources,
|
|
1702
|
+
tt_seller, verified, follower_count, following_count,
|
|
1703
|
+
location_created, latest_video_time, refresh_time,
|
|
1704
|
+
guessed_location, pinned, processed_at, video_count,
|
|
1705
|
+
no_video, claimed_by, claimed_at, created_at, updated_at
|
|
1706
|
+
AND verified = 0
|
|
1707
|
+
AND location_created IN (${placeholders})
|
|
1708
|
+
ORDER BY COALESCE(follower_count, 0) DESC, unique_id ASC
|
|
1709
|
+
`,
|
|
1710
|
+
)
|
|
1711
|
+
.all(...targetLocations)
|
|
1712
|
+
.map(mapJobRow);
|
|
1713
|
+
|
|
1714
|
+
return {
|
|
1715
|
+
total: rows.length,
|
|
1716
|
+
users: rows,
|
|
1717
|
+
};
|
|
1718
|
+
}
|
|
1719
|
+
|
|
1720
|
+
function getTargetUsersByCountryFromDb(targetLocations = [], options = {}) {
|
|
1721
|
+
if (!db) return null;
|
|
1722
|
+
if (!targetLocations.length) {
|
|
1723
|
+
return { countries: [] };
|
|
1724
|
+
}
|
|
1725
|
+
|
|
1726
|
+
const {
|
|
1727
|
+
summaryOnly = false,
|
|
1728
|
+
country: filterCountry,
|
|
1729
|
+
search,
|
|
1730
|
+
limit,
|
|
1731
|
+
offset,
|
|
1732
|
+
} = options;
|
|
1733
|
+
const placeholders = targetLocations.map(() => "?").join(", ");
|
|
1734
|
+
const baseParams = [...targetLocations];
|
|
1735
|
+
|
|
1736
|
+
// 摘要模式:只返回各国统计数,不返回用户数据
|
|
1737
|
+
if (summaryOnly) {
|
|
1738
|
+
const statsRows = db
|
|
1739
|
+
.prepare(
|
|
1740
|
+
`
|
|
1741
|
+
SELECT location_created as country, COUNT(*) as count
|
|
1742
|
+
FROM jobs
|
|
1743
|
+
WHERE tt_seller = 1
|
|
1744
|
+
AND verified = 0
|
|
1745
|
+
AND location_created IN (${placeholders})
|
|
1746
|
+
GROUP BY location_created
|
|
1747
|
+
ORDER BY count DESC
|
|
1748
|
+
`,
|
|
1749
|
+
)
|
|
1750
|
+
.all(...targetLocations);
|
|
1751
|
+
|
|
1752
|
+
const countries = statsRows.map((r) => ({
|
|
1753
|
+
country: r.country,
|
|
1754
|
+
count: r.count,
|
|
1755
|
+
users: undefined,
|
|
1756
|
+
}));
|
|
1757
|
+
return {
|
|
1758
|
+
total: statsRows.reduce((s, r) => s + r.count, 0),
|
|
1759
|
+
countries,
|
|
1760
|
+
};
|
|
1761
|
+
}
|
|
1762
|
+
|
|
1763
|
+
// 分页模式:按国家或全局分页查询用户
|
|
1764
|
+
if (limit !== undefined) {
|
|
1765
|
+
let sql = `
|
|
1766
|
+
SELECT
|
|
1767
|
+
unique_id,
|
|
1768
|
+
nickname,
|
|
1769
|
+
follower_count,
|
|
1770
|
+
video_count,
|
|
1771
|
+
tt_seller,
|
|
1772
|
+
verified,
|
|
1773
|
+
location_created,
|
|
1774
|
+
confirmed_location,
|
|
1775
|
+
modified_at,
|
|
1776
|
+
latest_video_time,
|
|
1777
|
+
refresh_time,
|
|
1778
|
+
top_video_play_count,
|
|
1779
|
+
top_video_href,
|
|
1780
|
+
status,
|
|
1781
|
+
sources
|
|
1782
|
+
FROM jobs
|
|
1783
|
+
WHERE tt_seller = 1
|
|
1784
|
+
AND verified = 0
|
|
1785
|
+
AND location_created IN (${placeholders})
|
|
1786
|
+
`;
|
|
1787
|
+
const params = [...targetLocations];
|
|
1788
|
+
|
|
1789
|
+
if (filterCountry) {
|
|
1790
|
+
sql += ` AND location_created = ?`;
|
|
1791
|
+
params.push(filterCountry);
|
|
1792
|
+
}
|
|
1793
|
+
|
|
1794
|
+
if (search) {
|
|
1795
|
+
sql += ` AND (unique_id LIKE ? OR nickname LIKE ?)`;
|
|
1796
|
+
const likeSearch = `%${search}%`;
|
|
1797
|
+
params.push(likeSearch, likeSearch);
|
|
1798
|
+
}
|
|
1799
|
+
|
|
1800
|
+
sql += ` ORDER BY location_created ASC, COALESCE(latest_video_time, 0) DESC`;
|
|
1801
|
+
|
|
1802
|
+
const countSql = sql.replace(
|
|
1803
|
+
/SELECT[^FROM]*FROM/,
|
|
1804
|
+
"SELECT COUNT(*) as cnt FROM",
|
|
1805
|
+
);
|
|
1806
|
+
const total = db.prepare(countSql).get(...params)?.cnt || 0;
|
|
1807
|
+
|
|
1808
|
+
sql += ` LIMIT ? OFFSET ?`;
|
|
1809
|
+
const safeLimit = Math.min(Math.floor(limit), 10000);
|
|
1810
|
+
const safeOffset = Math.max(Math.floor(offset), 0);
|
|
1811
|
+
|
|
1812
|
+
const rows = db
|
|
1813
|
+
.prepare(sql)
|
|
1814
|
+
.all(...params, safeLimit, safeOffset)
|
|
1815
|
+
.map(mapJobRow);
|
|
1816
|
+
|
|
1817
|
+
return {
|
|
1818
|
+
total,
|
|
1819
|
+
limit: safeLimit,
|
|
1820
|
+
offset: safeOffset,
|
|
1821
|
+
users: rows,
|
|
1822
|
+
};
|
|
1823
|
+
}
|
|
1824
|
+
|
|
1825
|
+
const rows = db
|
|
1826
|
+
.prepare(
|
|
1827
|
+
`
|
|
1828
|
+
SELECT
|
|
1829
|
+
unique_id,
|
|
1830
|
+
nickname,
|
|
1831
|
+
follower_count,
|
|
1832
|
+
video_count,
|
|
1833
|
+
tt_seller,
|
|
1834
|
+
verified,
|
|
1835
|
+
location_created,
|
|
1836
|
+
confirmed_location,
|
|
1837
|
+
modified_at,
|
|
1838
|
+
latest_video_time,
|
|
1839
|
+
refresh_time,
|
|
1840
|
+
status,
|
|
1841
|
+
sources
|
|
1842
|
+
FROM jobs
|
|
1843
|
+
WHERE tt_seller = 1
|
|
1844
|
+
AND verified = 0
|
|
1845
|
+
AND location_created IN (${placeholders})
|
|
1846
|
+
ORDER BY location_created ASC, COALESCE(latest_video_time, 0) DESC
|
|
1847
|
+
`,
|
|
1848
|
+
)
|
|
1849
|
+
.all(...targetLocations)
|
|
1850
|
+
.map(mapJobRow);
|
|
1851
|
+
|
|
1852
|
+
const countryMap = new Map();
|
|
1853
|
+
for (const row of rows) {
|
|
1854
|
+
const country = row.locationCreated || "未知";
|
|
1855
|
+
if (!countryMap.has(country)) {
|
|
1856
|
+
countryMap.set(country, []);
|
|
1857
|
+
}
|
|
1858
|
+
countryMap.get(country).push(row);
|
|
1859
|
+
}
|
|
1860
|
+
|
|
1861
|
+
const countries = [];
|
|
1862
|
+
for (const [country, users] of countryMap) {
|
|
1863
|
+
countries.push({
|
|
1864
|
+
country,
|
|
1865
|
+
count: users.length,
|
|
1866
|
+
users,
|
|
1867
|
+
});
|
|
1868
|
+
}
|
|
1869
|
+
|
|
1870
|
+
return {
|
|
1871
|
+
total: rows.length,
|
|
1872
|
+
countries,
|
|
1873
|
+
};
|
|
1874
|
+
}
|
|
1875
|
+
|
|
1876
|
+
function snakeToCamel(key) {
|
|
1877
|
+
return key.replace(/_([a-z])/g, (_, ch) => ch.toUpperCase());
|
|
1878
|
+
}
|
|
1879
|
+
|
|
1880
|
+
function camelToSnake(key) {
|
|
1881
|
+
return key.replace(/[A-Z]/g, (ch) => `_${ch.toLowerCase()}`);
|
|
1882
|
+
}
|
|
1883
|
+
|
|
1884
|
+
const jobBooleanColumns = new Set([
|
|
1885
|
+
"pinned",
|
|
1886
|
+
"no_video",
|
|
1887
|
+
"restricted",
|
|
1888
|
+
"processed",
|
|
1889
|
+
"tt_seller",
|
|
1890
|
+
"verified",
|
|
1891
|
+
"error",
|
|
1892
|
+
]);
|
|
1893
|
+
|
|
1894
|
+
const videoBooleanColumns = new Set(["tt_seller"]);
|
|
1895
|
+
|
|
1896
|
+
const writableJobColumns = new Set([
|
|
1897
|
+
"nickname",
|
|
1898
|
+
"status",
|
|
1899
|
+
"sources",
|
|
1900
|
+
"claimed_by",
|
|
1901
|
+
"claimed_at",
|
|
1902
|
+
"error",
|
|
1903
|
+
"pinned",
|
|
1904
|
+
"no_video",
|
|
1905
|
+
"restricted",
|
|
1906
|
+
"user_update_count",
|
|
1907
|
+
"tt_seller",
|
|
1908
|
+
"verified",
|
|
1909
|
+
"video_count",
|
|
1910
|
+
"comment_count",
|
|
1911
|
+
"guessed_location",
|
|
1912
|
+
"location_created",
|
|
1913
|
+
"confirmed_location",
|
|
1914
|
+
"modified_at",
|
|
1915
|
+
"follower_count",
|
|
1916
|
+
"following_count",
|
|
1917
|
+
"heart_count",
|
|
1918
|
+
"refresh_time",
|
|
1919
|
+
"processed",
|
|
1920
|
+
"processed_at",
|
|
1921
|
+
"updated_at",
|
|
1922
|
+
"region",
|
|
1923
|
+
"signature",
|
|
1924
|
+
"bio_link",
|
|
1925
|
+
"sec_uid",
|
|
1926
|
+
"status_code",
|
|
1927
|
+
"latest_video_time",
|
|
1928
|
+
"top_video_play_count",
|
|
1929
|
+
"top_video_href",
|
|
1930
|
+
"user_create_time",
|
|
1931
|
+
]);
|
|
1932
|
+
|
|
1933
|
+
function normalizeJobValue(column, value) {
|
|
1934
|
+
if (value === undefined || value === null) return null;
|
|
1935
|
+
if (column === "sources") {
|
|
1936
|
+
if (!Array.isArray(value)) return JSON.stringify([]);
|
|
1937
|
+
return JSON.stringify([...new Set(value)]);
|
|
1938
|
+
}
|
|
1939
|
+
if (jobBooleanColumns.has(column)) {
|
|
1940
|
+
return value ? 1 : 0;
|
|
1941
|
+
}
|
|
1942
|
+
// 防御:如果值是对象或数组,转为 JSON 字符串
|
|
1943
|
+
if (typeof value === "object") return JSON.stringify(value);
|
|
1944
|
+
return value;
|
|
1945
|
+
}
|
|
1946
|
+
|
|
1947
|
+
function mapJobRow(row) {
|
|
1948
|
+
if (!row) return undefined;
|
|
1949
|
+
const mapped = {};
|
|
1950
|
+
for (const [key, value] of Object.entries(row)) {
|
|
1951
|
+
const camelKey = snakeToCamel(key);
|
|
1952
|
+
if (key === "sources") {
|
|
1953
|
+
try {
|
|
1954
|
+
mapped[camelKey] = value ? JSON.parse(value) : [];
|
|
1955
|
+
} catch {
|
|
1956
|
+
mapped[camelKey] = [];
|
|
1957
|
+
}
|
|
1958
|
+
continue;
|
|
1959
|
+
}
|
|
1960
|
+
if (jobBooleanColumns.has(key)) {
|
|
1961
|
+
mapped[camelKey] = value === null || value === undefined ? null : !!value;
|
|
1962
|
+
continue;
|
|
1963
|
+
}
|
|
1964
|
+
mapped[camelKey] = value;
|
|
1965
|
+
}
|
|
1966
|
+
return mapped;
|
|
1967
|
+
}
|
|
1968
|
+
|
|
1969
|
+
function getJobRow(uniqueId) {
|
|
1970
|
+
if (!db) return null;
|
|
1971
|
+
return db.prepare("SELECT * FROM jobs WHERE unique_id = ?").get(uniqueId);
|
|
1972
|
+
}
|
|
1973
|
+
|
|
1974
|
+
function getJobBaseRow(uniqueId) {
|
|
1975
|
+
if (!db) return null;
|
|
1976
|
+
return db
|
|
1977
|
+
.prepare("SELECT * FROM jobs_base WHERE unique_id = ?")
|
|
1978
|
+
.get(uniqueId);
|
|
1979
|
+
}
|
|
1980
|
+
|
|
1981
|
+
function getJob(uniqueId) {
|
|
1982
|
+
return mapJobRow(getJobRow(uniqueId));
|
|
1983
|
+
}
|
|
1984
|
+
|
|
1985
|
+
function getAllJobs() {
|
|
1986
|
+
if (!db) return [];
|
|
1987
|
+
return db.prepare("SELECT * FROM jobs").all().map(mapJobRow);
|
|
1988
|
+
}
|
|
1989
|
+
|
|
1990
|
+
function mapVideoRow(row) {
|
|
1991
|
+
if (!row) return undefined;
|
|
1992
|
+
const mapped = {};
|
|
1993
|
+
for (const [key, value] of Object.entries(row)) {
|
|
1994
|
+
const camelKey = snakeToCamel(key);
|
|
1995
|
+
if (videoBooleanColumns.has(key)) {
|
|
1996
|
+
mapped[camelKey] = value === null || value === undefined ? null : !!value;
|
|
1997
|
+
continue;
|
|
1998
|
+
}
|
|
1999
|
+
mapped[camelKey] = value;
|
|
2000
|
+
}
|
|
2001
|
+
return mapped;
|
|
2002
|
+
}
|
|
2003
|
+
|
|
2004
|
+
function getVideoRow(videoId) {
|
|
2005
|
+
if (!db) return null;
|
|
2006
|
+
return db.prepare("SELECT * FROM videos WHERE id = ?").get(videoId);
|
|
2007
|
+
}
|
|
2008
|
+
|
|
2009
|
+
function getAllVideoRows() {
|
|
2010
|
+
if (!db) return [];
|
|
2011
|
+
return db.prepare("SELECT * FROM videos").all();
|
|
2012
|
+
}
|
|
2013
|
+
|
|
2014
|
+
function updateJobInfo(uniqueId, info, incrementCount = true) {
|
|
2015
|
+
if (!db) return { error: "db not initialized" };
|
|
2016
|
+
const existing = getJobRow(uniqueId);
|
|
2017
|
+
if (!existing) return { error: "user not found" };
|
|
2018
|
+
|
|
2019
|
+
const nextValues = {};
|
|
2020
|
+
for (const [key, value] of Object.entries(info || {})) {
|
|
2021
|
+
if (key === "uniqueId" || key === "unique_id") continue;
|
|
2022
|
+
if (value === undefined || value === "") continue;
|
|
2023
|
+
let column = camelToSnake(key);
|
|
2024
|
+
// 字段别名:bio → signature, createTime → user_create_time
|
|
2025
|
+
if (column === "bio") column = "signature";
|
|
2026
|
+
if (column === "create_time") column = "user_create_time";
|
|
2027
|
+
if (!writableJobColumns.has(column)) continue;
|
|
2028
|
+
nextValues[column] = normalizeJobValue(column, value);
|
|
2029
|
+
}
|
|
2030
|
+
|
|
2031
|
+
nextValues.updated_at = Date.now();
|
|
2032
|
+
if (incrementCount) {
|
|
2033
|
+
nextValues.user_update_count = (existing.user_update_count || 0) + 1;
|
|
2034
|
+
}
|
|
2035
|
+
|
|
2036
|
+
const columns = Object.keys(nextValues);
|
|
2037
|
+
if (columns.length > 0) {
|
|
2038
|
+
const sql = `UPDATE jobs SET ${columns.map((column) => `${column} = ?`).join(", ")} WHERE unique_id = ?`;
|
|
2039
|
+
db.prepare(sql).run(
|
|
2040
|
+
...columns.map((column) => nextValues[column]),
|
|
2041
|
+
uniqueId,
|
|
2042
|
+
);
|
|
2043
|
+
}
|
|
2044
|
+
|
|
2045
|
+
return {
|
|
2046
|
+
ok: true,
|
|
2047
|
+
userUpdateCount:
|
|
2048
|
+
nextValues.user_update_count ?? existing.user_update_count ?? 0,
|
|
2049
|
+
};
|
|
2050
|
+
}
|
|
2051
|
+
|
|
2052
|
+
function inferStatus(u) {
|
|
2053
|
+
if (u.restricted) return "restricted";
|
|
2054
|
+
if (u.error) return "error";
|
|
2055
|
+
if (u.processed) return "done";
|
|
2056
|
+
return "pending";
|
|
2057
|
+
}
|
|
2058
|
+
|
|
2059
|
+
function updateJobBaseInfo(uniqueId, info, incrementCount = true) {
|
|
2060
|
+
if (!db) return { error: "db not initialized" };
|
|
2061
|
+
const existing = getJobBaseRow(uniqueId);
|
|
2062
|
+
if (!existing) return { error: "user not found" };
|
|
2063
|
+
|
|
2064
|
+
const nextValues = {};
|
|
2065
|
+
for (const [key, value] of Object.entries(info || {})) {
|
|
2066
|
+
if (key === "uniqueId" || key === "unique_id") continue;
|
|
2067
|
+
if (value === undefined || value === "") continue;
|
|
2068
|
+
let column = camelToSnake(key);
|
|
2069
|
+
// 字段别名:bio → signature, createTime → user_create_time
|
|
2070
|
+
if (column === "bio") column = "signature";
|
|
2071
|
+
if (column === "create_time") column = "user_create_time";
|
|
2072
|
+
if (!writableJobColumns.has(column)) continue;
|
|
2073
|
+
nextValues[column] = normalizeJobValue(column, value);
|
|
2074
|
+
}
|
|
2075
|
+
|
|
2076
|
+
nextValues.updated_at = Date.now();
|
|
2077
|
+
if (incrementCount) {
|
|
2078
|
+
nextValues.user_update_count = (existing.user_update_count || 0) + 1;
|
|
2079
|
+
}
|
|
2080
|
+
|
|
2081
|
+
const columns = Object.keys(nextValues);
|
|
2082
|
+
if (columns.length > 0) {
|
|
2083
|
+
const sql = `UPDATE jobs_base SET ${columns.map((column) => `${column} = ?`).join(", ")} WHERE unique_id = ?`;
|
|
2084
|
+
db.prepare(sql).run(
|
|
2085
|
+
...columns.map((column) => nextValues[column]),
|
|
2086
|
+
uniqueId,
|
|
2087
|
+
);
|
|
2088
|
+
}
|
|
2089
|
+
|
|
2090
|
+
return {
|
|
2091
|
+
ok: true,
|
|
2092
|
+
userUpdateCount:
|
|
2093
|
+
nextValues.user_update_count ?? existing.user_update_count ?? 0,
|
|
2094
|
+
};
|
|
2095
|
+
}
|
|
2096
|
+
|
|
2097
|
+
function addJobBaseToDb(user) {
|
|
2098
|
+
if (!db) return;
|
|
2099
|
+
const now = Date.now();
|
|
2100
|
+
db.prepare(
|
|
2101
|
+
`
|
|
2102
|
+
INSERT OR IGNORE INTO jobs_base (
|
|
2103
|
+
unique_id,
|
|
2104
|
+
nickname,
|
|
2105
|
+
status,
|
|
2106
|
+
sources,
|
|
2107
|
+
claimed_by,
|
|
2108
|
+
claimed_at,
|
|
2109
|
+
error,
|
|
2110
|
+
pinned,
|
|
2111
|
+
no_video,
|
|
2112
|
+
restricted,
|
|
2113
|
+
user_update_count,
|
|
2114
|
+
tt_seller,
|
|
2115
|
+
verified,
|
|
2116
|
+
video_count,
|
|
2117
|
+
comment_count,
|
|
2118
|
+
guessed_location,
|
|
2119
|
+
location_created,
|
|
2120
|
+
follower_count,
|
|
2121
|
+
following_count,
|
|
2122
|
+
heart_count,
|
|
2123
|
+
refresh_time,
|
|
2124
|
+
processed,
|
|
2125
|
+
processed_at,
|
|
2126
|
+
created_at,
|
|
2127
|
+
updated_at,
|
|
2128
|
+
region,
|
|
2129
|
+
signature,
|
|
2130
|
+
bio_link,
|
|
2131
|
+
sec_uid
|
|
2132
|
+
)
|
|
2133
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
2134
|
+
`,
|
|
2135
|
+
).run(
|
|
2136
|
+
user.uniqueId,
|
|
2137
|
+
user.nickname || null,
|
|
2138
|
+
user.status || inferStatus(user),
|
|
2139
|
+
JSON.stringify(
|
|
2140
|
+
Array.isArray(user.sources) ? [...new Set(user.sources)] : [],
|
|
2141
|
+
),
|
|
2142
|
+
user.claimedBy || null,
|
|
2143
|
+
user.claimedAt || null,
|
|
2144
|
+
user.error || null,
|
|
2145
|
+
user.pinned ? 1 : 0,
|
|
2146
|
+
user.noVideo ? 1 : 0,
|
|
2147
|
+
user.restricted ? 1 : 0,
|
|
2148
|
+
user.userUpdateCount || 0,
|
|
2149
|
+
user.ttSeller === undefined ||
|
|
2150
|
+
user.ttSeller === null ||
|
|
2151
|
+
user.ttSeller === ""
|
|
2152
|
+
? null
|
|
2153
|
+
: user.ttSeller
|
|
2154
|
+
? 1
|
|
2155
|
+
: 0,
|
|
2156
|
+
user.verified === undefined ||
|
|
2157
|
+
user.verified === null ||
|
|
2158
|
+
user.verified === ""
|
|
2159
|
+
? null
|
|
2160
|
+
: user.verified
|
|
2161
|
+
? 1
|
|
2162
|
+
: 0,
|
|
2163
|
+
user.videoCount || 0,
|
|
2164
|
+
user.commentCount || 0,
|
|
2165
|
+
user.guessedLocation || null,
|
|
2166
|
+
user.locationCreated || null,
|
|
2167
|
+
user.followerCount || 0,
|
|
2168
|
+
user.followingCount || 0,
|
|
2169
|
+
user.heartCount || 0,
|
|
2170
|
+
user.refreshTime || null,
|
|
2171
|
+
user.processed ? 1 : 0,
|
|
2172
|
+
user.processedAt || null,
|
|
2173
|
+
user.createdAt || now,
|
|
2174
|
+
user.updatedAt || now,
|
|
2175
|
+
user.region || null,
|
|
2176
|
+
user.signature || null,
|
|
2177
|
+
user.bioLink?.link || user.bioLink?.url || user.bioLink || null,
|
|
2178
|
+
user.secUid || null,
|
|
2179
|
+
);
|
|
2180
|
+
}
|
|
2181
|
+
|
|
2182
|
+
function addJob(user) {
|
|
2183
|
+
if (!db) {
|
|
2184
|
+
addUserToDb(user);
|
|
2185
|
+
return;
|
|
2186
|
+
}
|
|
2187
|
+
if (!user.status) user.status = inferStatus(user);
|
|
2188
|
+
if (!user.createdAt) user.createdAt = Date.now();
|
|
2189
|
+
if (!user.updatedAt) user.updatedAt = user.createdAt;
|
|
2190
|
+
const writeTxn = db.transaction((job) => {
|
|
2191
|
+
addUserToDb(job);
|
|
2192
|
+
addJobToDb(job);
|
|
2193
|
+
});
|
|
2194
|
+
writeTxn(user);
|
|
2195
|
+
}
|
|
2196
|
+
|
|
2197
|
+
export function createStore(filePath, options = {}) {
|
|
2198
|
+
if (!filePath) {
|
|
2199
|
+
throw new Error("createStore requires an explicit .db path");
|
|
2200
|
+
}
|
|
2201
|
+
|
|
2202
|
+
// refillJobsFromRaw 的 LLM 打分配置(自动补充任务时使用)
|
|
2203
|
+
const refillLlmConfig = {
|
|
2204
|
+
llmScore: false,
|
|
2205
|
+
llmMinScore: 60,
|
|
2206
|
+
llmSampleSize: 100,
|
|
2207
|
+
...options.refillLlm,
|
|
2208
|
+
};
|
|
2209
|
+
|
|
2210
|
+
let data = [];
|
|
2211
|
+
// uniqueId → index 内存索引,O(1) 查找
|
|
2212
|
+
let uidIndex = new Map();
|
|
2213
|
+
let clientErrors = new Map();
|
|
2214
|
+
// 客户端登录状态:userId → boolean
|
|
2215
|
+
let clientLoginStatus = new Map();
|
|
2216
|
+
// 活跃客户端追踪:clientId → { type, ip, port, userId, lastSeen }
|
|
2217
|
+
let activeClients = new Map();
|
|
2218
|
+
// refill 锁:防止多个 claimNextJob 同时触发 LLM refill
|
|
2219
|
+
let refillLock = null; // Promise | null
|
|
2220
|
+
// LLM 采样偏移量记忆:按猜测国家记录上次查询位置,避免重复采样
|
|
2221
|
+
// 格式: { "ES": 300, "PL": 500, "NL": 400 }
|
|
2222
|
+
let llmSampleOffsets = new Map();
|
|
2223
|
+
if (filePath) {
|
|
2224
|
+
// 初始化 SQLite 用户表(用于判重)
|
|
2225
|
+
initUserDb(filePath);
|
|
2226
|
+
// 从数据库恢复偏移量
|
|
2227
|
+
loadLlmSampleOffsets();
|
|
2228
|
+
}
|
|
2229
|
+
|
|
2230
|
+
/**
|
|
2231
|
+
* 从数据库加载 LLM 采样偏移量
|
|
2232
|
+
*/
|
|
2233
|
+
function loadLlmSampleOffsets() {
|
|
2234
|
+
try {
|
|
2235
|
+
const row = db
|
|
2236
|
+
.prepare(`SELECT offsets FROM _llm_sample_offsets LIMIT 1`)
|
|
2237
|
+
.get();
|
|
2238
|
+
if (row && row.offsets) {
|
|
2239
|
+
const parsed = JSON.parse(row.offsets);
|
|
2240
|
+
if (parsed && typeof parsed === "object") {
|
|
2241
|
+
Object.entries(parsed).forEach(([k, v]) => {
|
|
2242
|
+
llmSampleOffsets.set(k, v);
|
|
2243
|
+
});
|
|
2244
|
+
console.error(
|
|
2245
|
+
`[data-store] 已恢复 LLM 采样偏移量: ${Array.from(
|
|
2246
|
+
llmSampleOffsets.entries(),
|
|
2247
|
+
)
|
|
2248
|
+
.map(([k, v]) => `${k}:${v}`)
|
|
2249
|
+
.join(", ")}`,
|
|
2250
|
+
);
|
|
2251
|
+
}
|
|
2252
|
+
}
|
|
2253
|
+
} catch (e) {
|
|
2254
|
+
// 表不存在或解析失败,使用空偏移量
|
|
2255
|
+
console.error(
|
|
2256
|
+
`[data-store] 加载 LLM 采样偏移量失败,使用空偏移量: ${e.message}`,
|
|
2257
|
+
);
|
|
2258
|
+
}
|
|
2259
|
+
}
|
|
2260
|
+
|
|
2261
|
+
/**
|
|
2262
|
+
* 将 LLM 采样偏移量持久化到数据库
|
|
2263
|
+
*/
|
|
2264
|
+
function saveLlmSampleOffsets() {
|
|
2265
|
+
try {
|
|
2266
|
+
const offsetsJson = JSON.stringify(Object.fromEntries(llmSampleOffsets));
|
|
2267
|
+
// 表不存在则创建
|
|
2268
|
+
db.prepare(
|
|
2269
|
+
`CREATE TABLE IF NOT EXISTS _llm_sample_offsets (id INTEGER PRIMARY KEY CHECK (id = 1), offsets TEXT)`,
|
|
2270
|
+
).run();
|
|
2271
|
+
// 插入或更新
|
|
2272
|
+
db.prepare(
|
|
2273
|
+
`INSERT OR REPLACE INTO _llm_sample_offsets (id, offsets) VALUES (1, ?)`,
|
|
2274
|
+
).run(offsetsJson);
|
|
2275
|
+
} catch (e) {
|
|
2276
|
+
console.error(`[data-store] 保存 LLM 采样偏移量失败: ${e.message}`);
|
|
2277
|
+
}
|
|
2278
|
+
}
|
|
2279
|
+
|
|
2280
|
+
// stats 缓存
|
|
2281
|
+
let statsCache = null;
|
|
2282
|
+
let statsDirty = true;
|
|
2283
|
+
|
|
2284
|
+
function markStatsDirty() {
|
|
2285
|
+
statsDirty = true;
|
|
2286
|
+
groupsDirty = true;
|
|
2287
|
+
}
|
|
2288
|
+
|
|
2289
|
+
function computeStatsInternal() {
|
|
2290
|
+
if (db) {
|
|
2291
|
+
const total = getJobsCount();
|
|
2292
|
+
const statusCounts = {
|
|
2293
|
+
pending: 0,
|
|
2294
|
+
processing: 0,
|
|
2295
|
+
done: 0,
|
|
2296
|
+
error: 0,
|
|
2297
|
+
restricted: 0,
|
|
2298
|
+
};
|
|
2299
|
+
const rows = db
|
|
2300
|
+
.prepare(
|
|
2301
|
+
`
|
|
2302
|
+
SELECT status, COUNT(*) as count
|
|
2303
|
+
FROM jobs
|
|
2304
|
+
GROUP BY status
|
|
2305
|
+
`,
|
|
2306
|
+
)
|
|
2307
|
+
.all();
|
|
2308
|
+
for (const row of rows) {
|
|
2309
|
+
if (row.status && statusCounts[row.status] !== undefined) {
|
|
2310
|
+
statusCounts[row.status] = row.count;
|
|
2311
|
+
}
|
|
2312
|
+
}
|
|
2313
|
+
statsCache = { total, statusCounts };
|
|
2314
|
+
statsDirty = false;
|
|
2315
|
+
return statsCache;
|
|
2316
|
+
}
|
|
2317
|
+
|
|
2318
|
+
const total = data.length;
|
|
2319
|
+
const statusCounts = {
|
|
2320
|
+
pending: 0,
|
|
2321
|
+
processing: 0,
|
|
2322
|
+
done: 0,
|
|
2323
|
+
error: 0,
|
|
2324
|
+
restricted: 0,
|
|
2325
|
+
};
|
|
2326
|
+
for (const u of data) {
|
|
2327
|
+
statusCounts[u.status] = (statusCounts[u.status] || 0) + 1;
|
|
2328
|
+
}
|
|
2329
|
+
statsCache = { total, statusCounts };
|
|
2330
|
+
statsDirty = false;
|
|
2331
|
+
return statsCache;
|
|
2332
|
+
}
|
|
2333
|
+
|
|
2334
|
+
function getStats() {
|
|
2335
|
+
if (statsDirty) {
|
|
2336
|
+
return computeStatsInternal();
|
|
2337
|
+
}
|
|
2338
|
+
return statsCache;
|
|
2339
|
+
}
|
|
2340
|
+
|
|
2341
|
+
// 按 status 的分组索引,避免每次请求全量遍历
|
|
2342
|
+
let statusGroups = null;
|
|
2343
|
+
let groupsDirty = true;
|
|
2344
|
+
|
|
2345
|
+
const tier1LocSet = new Set(["PL", "NL", "BE"]);
|
|
2346
|
+
const tier2LocSet = new Set(["DE", "FR", "IT", "IE", "ES"]);
|
|
2347
|
+
function locationTier(u) {
|
|
2348
|
+
const loc = (u.guessedLocation || "").toUpperCase();
|
|
2349
|
+
if (tier1LocSet.has(loc)) return 0;
|
|
2350
|
+
if (tier2LocSet.has(loc)) return 1;
|
|
2351
|
+
return 2;
|
|
2352
|
+
}
|
|
2353
|
+
|
|
2354
|
+
function sortGroup(key, arr) {
|
|
2355
|
+
if (key === "done")
|
|
2356
|
+
arr.sort((a, b) => (b.processedAt || 0) - (a.processedAt || 0));
|
|
2357
|
+
else if (key === "pending")
|
|
2358
|
+
arr.sort((a, b) => {
|
|
2359
|
+
const aSeller = a.ttSeller === true && a.verified === false ? 0 : 1;
|
|
2360
|
+
const bSeller = b.ttSeller === true && b.verified === false ? 0 : 1;
|
|
2361
|
+
if (aSeller !== bSeller) return aSeller - bSeller;
|
|
2362
|
+
const la = locationTier(a),
|
|
2363
|
+
lb = locationTier(b);
|
|
2364
|
+
if (la !== lb) return la - lb;
|
|
2365
|
+
return (b.followerCount || 0) - (a.followerCount || 0);
|
|
2366
|
+
});
|
|
2367
|
+
else arr.sort((a, b) => (b.followerCount || 0) - (a.followerCount || 0));
|
|
2368
|
+
// 置顶冒泡到组首
|
|
2369
|
+
const pinned = arr.filter((u) => u.pinned);
|
|
2370
|
+
const unpinned = arr.filter((u) => !u.pinned);
|
|
2371
|
+
return pinned.concat(unpinned);
|
|
2372
|
+
}
|
|
2373
|
+
|
|
2374
|
+
function rebuildStatusGroups() {
|
|
2375
|
+
if (db) {
|
|
2376
|
+
statusGroups = {
|
|
2377
|
+
pending: [],
|
|
2378
|
+
processing: [],
|
|
2379
|
+
done: [],
|
|
2380
|
+
error: [],
|
|
2381
|
+
restricted: [],
|
|
2382
|
+
};
|
|
2383
|
+
for (const u of getAllJobs()) {
|
|
2384
|
+
const key = u.status || "pending";
|
|
2385
|
+
if (statusGroups[key]) statusGroups[key].push(u);
|
|
2386
|
+
else statusGroups[key] = [u];
|
|
2387
|
+
}
|
|
2388
|
+
for (const key of Object.keys(statusGroups)) {
|
|
2389
|
+
statusGroups[key] = sortGroup(key, statusGroups[key]);
|
|
2390
|
+
}
|
|
2391
|
+
groupsDirty = false;
|
|
2392
|
+
return;
|
|
2393
|
+
}
|
|
2394
|
+
|
|
2395
|
+
statusGroups = {
|
|
2396
|
+
pending: [],
|
|
2397
|
+
processing: [],
|
|
2398
|
+
done: [],
|
|
2399
|
+
error: [],
|
|
2400
|
+
restricted: [],
|
|
2401
|
+
};
|
|
2402
|
+
for (const u of data) {
|
|
2403
|
+
const key = u.status || "pending";
|
|
2404
|
+
if (statusGroups[key]) statusGroups[key].push(u);
|
|
2405
|
+
else statusGroups[key] = [u];
|
|
2406
|
+
}
|
|
2407
|
+
// 各组内排序
|
|
2408
|
+
for (const key of Object.keys(statusGroups)) {
|
|
2409
|
+
statusGroups[key] = sortGroup(key, statusGroups[key]);
|
|
2410
|
+
}
|
|
2411
|
+
groupsDirty = false;
|
|
2412
|
+
}
|
|
2413
|
+
|
|
2414
|
+
function getStatusGroups() {
|
|
2415
|
+
if (groupsDirty) rebuildStatusGroups();
|
|
2416
|
+
return statusGroups;
|
|
2417
|
+
}
|
|
2418
|
+
|
|
2419
|
+
function markGroupsDirty() {
|
|
2420
|
+
groupsDirty = true;
|
|
2421
|
+
}
|
|
2422
|
+
|
|
2423
|
+
// 视频存储(SQLite 真相源)
|
|
2424
|
+
let videos = [];
|
|
2425
|
+
|
|
2426
|
+
// 构建索引 + 推断 status
|
|
2427
|
+
for (let i = 0; i < data.length; i++) {
|
|
2428
|
+
const u = data[i];
|
|
2429
|
+
if (!u.status) u.status = inferStatus(u);
|
|
2430
|
+
uidIndex.set(u.uniqueId, i);
|
|
2431
|
+
}
|
|
2432
|
+
|
|
2433
|
+
function save() {
|
|
2434
|
+
return;
|
|
2435
|
+
}
|
|
2436
|
+
|
|
2437
|
+
function flushSave() {
|
|
2438
|
+
// 数据库模式:先保存 LLM 偏移量,再备份数据库
|
|
2439
|
+
if (db && dbPath) {
|
|
2440
|
+
try {
|
|
2441
|
+
saveLlmSampleOffsets();
|
|
2442
|
+
} catch (e) {
|
|
2443
|
+
console.error(`[data-store] 保存 LLM 偏移量失败: ${e.message}`);
|
|
2444
|
+
}
|
|
2445
|
+
}
|
|
2446
|
+
return Promise.resolve();
|
|
2447
|
+
}
|
|
2448
|
+
|
|
2449
|
+
/**
|
|
2450
|
+
* 数据库备份:使用 SQLite BACKUP 命令,保留最新 maxBackups 个备份
|
|
2451
|
+
* @param {number} maxBackups - 保留的备份数量,默认 3
|
|
2452
|
+
* @returns {string|null} 备份文件路径,失败返回 null
|
|
2453
|
+
*/
|
|
2454
|
+
function backupDatabase(maxBackups = 3) {
|
|
2455
|
+
if (!db || !dbPath) {
|
|
2456
|
+
console.error("[data-store] 数据库未初始化,跳过备份");
|
|
2457
|
+
return null;
|
|
2458
|
+
}
|
|
2459
|
+
|
|
2460
|
+
try {
|
|
2461
|
+
// 生成备份文件名:result-20260627T094400.db
|
|
2462
|
+
const now = new Date();
|
|
2463
|
+
const timestamp = now
|
|
2464
|
+
.toISOString()
|
|
2465
|
+
.replace(/[-:T.]/g, "")
|
|
2466
|
+
.slice(0, 15); // YYYYMMDDHHmmss
|
|
2467
|
+
const baseName = path.basename(dbPath, ".db");
|
|
2468
|
+
const backupName = `${baseName}-${timestamp}.db`;
|
|
2469
|
+
const backupDir = path.dirname(dbPath);
|
|
2470
|
+
const backupPath = path.join(backupDir, backupName);
|
|
2471
|
+
|
|
2472
|
+
console.error(`[data-store] 正在备份数据库: ${backupName}`);
|
|
2473
|
+
|
|
2474
|
+
// 使用 better-sqlite3 的 backup API(原子性备份,安全可靠)
|
|
2475
|
+
const backupDb = new Database(backupPath);
|
|
2476
|
+
db.backup("main", backupDb, "main");
|
|
2477
|
+
backupDb.close();
|
|
2478
|
+
|
|
2479
|
+
// 验证备份文件大小
|
|
2480
|
+
const stat = fs.statSync(backupPath);
|
|
2481
|
+
const sizeMB = (stat.size / 1024 / 1024).toFixed(2);
|
|
2482
|
+
console.error(`[data-store] 备份完成: ${backupName} (${sizeMB} MB)`);
|
|
2483
|
+
|
|
2484
|
+
// 清理旧备份:保留最新 maxBackups 个
|
|
2485
|
+
cleanupOldBackups(backupDir, baseName, maxBackups);
|
|
2486
|
+
|
|
2487
|
+
return backupPath;
|
|
2488
|
+
} catch (e) {
|
|
2489
|
+
console.error(`[data-store] 备份失败: ${e.message}`);
|
|
2490
|
+
return null;
|
|
2491
|
+
}
|
|
2492
|
+
}
|
|
2493
|
+
|
|
2494
|
+
/**
|
|
2495
|
+
* 清理旧备份文件,保留最新 maxBackups 个
|
|
2496
|
+
*/
|
|
2497
|
+
function cleanupOldBackups(backupDir, baseName, maxBackups) {
|
|
2498
|
+
try {
|
|
2499
|
+
// 查找所有备份文件:baseName-YYYYMMDDHHmmss.db
|
|
2500
|
+
const pattern = new RegExp(`^${baseName}-\\d{15}\\.db$`);
|
|
2501
|
+
const backups = fs
|
|
2502
|
+
.readdirSync(backupDir)
|
|
2503
|
+
.filter((f) => pattern.test(f))
|
|
2504
|
+
.sort() // 按时间戳排序(ASCII 排序 = 时间排序)
|
|
2505
|
+
.reverse(); // 最新的在前
|
|
2506
|
+
|
|
2507
|
+
if (backups.length > maxBackups) {
|
|
2508
|
+
const toDelete = backups.slice(maxBackups);
|
|
2509
|
+
for (const file of toDelete) {
|
|
2510
|
+
const filePath = path.join(backupDir, file);
|
|
2511
|
+
fs.unlinkSync(filePath);
|
|
2512
|
+
console.error(`[data-store] 已清理旧备份: ${file}`);
|
|
2513
|
+
}
|
|
2514
|
+
}
|
|
2515
|
+
|
|
2516
|
+
console.error(
|
|
2517
|
+
`[data-store] 备份清理完成: 保留 ${Math.min(backups.length, maxBackups)} / ${backups.length} 个备份`,
|
|
2518
|
+
);
|
|
2519
|
+
} catch (e) {
|
|
2520
|
+
console.error(`[data-store] 清理旧备份失败: ${e.message}`);
|
|
2521
|
+
}
|
|
2522
|
+
}
|
|
2523
|
+
|
|
2524
|
+
function stopBackup() {
|
|
2525
|
+
// 退出时执行备份
|
|
2526
|
+
if (db && dbPath) {
|
|
2527
|
+
backupDatabase();
|
|
2528
|
+
}
|
|
2529
|
+
}
|
|
2530
|
+
|
|
2531
|
+
function getUser(uid) {
|
|
2532
|
+
const idx = uidIndex.get(uid);
|
|
2533
|
+
if (idx !== undefined) return data[idx];
|
|
2534
|
+
if (db) return getJob(uid);
|
|
2535
|
+
return undefined;
|
|
2536
|
+
}
|
|
2537
|
+
|
|
2538
|
+
function hasUser(uid) {
|
|
2539
|
+
// 优先用内存索引,兜底用 SQLite
|
|
2540
|
+
if (uidIndex.has(uid)) return true;
|
|
2541
|
+
return hasUserInDb(uid);
|
|
2542
|
+
}
|
|
2543
|
+
|
|
2544
|
+
function userExists(uid) {
|
|
2545
|
+
// 优先用内存索引,兜底用 SQLite
|
|
2546
|
+
if (uidIndex.has(uid)) return true;
|
|
2547
|
+
return hasUserInDb(uid);
|
|
2548
|
+
}
|
|
2549
|
+
|
|
2550
|
+
function addUser(user, append) {
|
|
2551
|
+
const memoryIdx = uidIndex.get(user.uniqueId);
|
|
2552
|
+
if (db && memoryIdx === undefined) {
|
|
2553
|
+
// 用 users 表判重(所有发现过的用户合集),而不是 jobs 表
|
|
2554
|
+
if (hasUserInDb(user.uniqueId)) {
|
|
2555
|
+
return;
|
|
2556
|
+
}
|
|
2557
|
+
addJob(user);
|
|
2558
|
+
return;
|
|
2559
|
+
}
|
|
2560
|
+
|
|
2561
|
+
const existing = getUser(user.uniqueId);
|
|
2562
|
+
if (existing) {
|
|
2563
|
+
let changed = false;
|
|
2564
|
+
for (const key of Object.keys(user)) {
|
|
2565
|
+
if (key === "uniqueId" || key === "sources") continue;
|
|
2566
|
+
if (user[key] !== undefined && user[key] !== null && user[key] !== "") {
|
|
2567
|
+
if (existing[key] !== user[key]) {
|
|
2568
|
+
existing[key] = user[key];
|
|
2569
|
+
changed = true;
|
|
2570
|
+
}
|
|
2571
|
+
}
|
|
2572
|
+
}
|
|
2573
|
+
if (changed) save();
|
|
2574
|
+
} else {
|
|
2575
|
+
if (!user.status) user.status = inferStatus(user);
|
|
2576
|
+
if (user.processed) user.processedAt = user.processedAt || Date.now();
|
|
2577
|
+
if (!user.createdAt) user.createdAt = Date.now();
|
|
2578
|
+
if (append) {
|
|
2579
|
+
const idx = data.length;
|
|
2580
|
+
data.push(user);
|
|
2581
|
+
uidIndex.set(user.uniqueId, idx);
|
|
2582
|
+
} else {
|
|
2583
|
+
data.unshift(user);
|
|
2584
|
+
uidIndex.set(user.uniqueId, 0);
|
|
2585
|
+
}
|
|
2586
|
+
// 同步写入 SQLite
|
|
2587
|
+
addUserToDb(user);
|
|
2588
|
+
markStatsDirty();
|
|
2589
|
+
save();
|
|
2590
|
+
}
|
|
2591
|
+
}
|
|
2592
|
+
|
|
2593
|
+
function addRawUsers(users) {
|
|
2594
|
+
if (!Array.isArray(users)) return { added: 0, skipped: 0 };
|
|
2595
|
+
const now = Date.now();
|
|
2596
|
+
let added = 0;
|
|
2597
|
+
let skipped = 0;
|
|
2598
|
+
|
|
2599
|
+
for (const u of users) {
|
|
2600
|
+
const uniqueId = (u.uniqueId || "").replace(/^@/, "").trim();
|
|
2601
|
+
if (!uniqueId) continue;
|
|
2602
|
+
if (hasUser(uniqueId)) {
|
|
2603
|
+
skipped++;
|
|
2604
|
+
continue;
|
|
2605
|
+
}
|
|
2606
|
+
const userObj = {
|
|
2607
|
+
uniqueId,
|
|
2608
|
+
status: "pending",
|
|
2609
|
+
sources: Array.isArray(u.sources)
|
|
2610
|
+
? u.sources
|
|
2611
|
+
: u.sources
|
|
2612
|
+
? [u.sources]
|
|
2613
|
+
: ["tag"],
|
|
2614
|
+
guessedLocation: u.guessedLocation || u.locationCreated || null,
|
|
2615
|
+
locationCreated: u.locationCreated || null,
|
|
2616
|
+
createdAt: now,
|
|
2617
|
+
updatedAt: now,
|
|
2618
|
+
};
|
|
2619
|
+
const writeTxn = db.transaction((job) => {
|
|
2620
|
+
addUserToDb(job);
|
|
2621
|
+
addJobBaseToDb(job);
|
|
2622
|
+
});
|
|
2623
|
+
writeTxn(userObj);
|
|
2624
|
+
added++;
|
|
2625
|
+
}
|
|
2626
|
+
|
|
2627
|
+
return { added, skipped };
|
|
2628
|
+
}
|
|
2629
|
+
|
|
2630
|
+
function getPendingUsers() {
|
|
2631
|
+
if (db) {
|
|
2632
|
+
return getAllJobs().filter((u) => u.status === "pending");
|
|
2633
|
+
}
|
|
2634
|
+
return data.filter((u) => u.status === "pending");
|
|
2635
|
+
}
|
|
2636
|
+
|
|
2637
|
+
function getProcessedUsers() {
|
|
2638
|
+
if (db) {
|
|
2639
|
+
return getAllJobs().filter((u) => u.status === "done");
|
|
2640
|
+
}
|
|
2641
|
+
return data.filter((u) => u.status === "done");
|
|
2642
|
+
}
|
|
2643
|
+
|
|
2644
|
+
function getAllUsers() {
|
|
2645
|
+
if (db) {
|
|
2646
|
+
return getAllJobs();
|
|
2647
|
+
}
|
|
2648
|
+
return data;
|
|
2649
|
+
}
|
|
2650
|
+
|
|
2651
|
+
/**
|
|
2652
|
+
* 使用 LLM 对单个 job 的国家匹配度打分(0-100)
|
|
2653
|
+
* @param {Object} job - raw_jobs 中的一条记录
|
|
2654
|
+
* @param {string[]} targetLocations - 目标国家列表
|
|
2655
|
+
* @returns {Promise<{ uniqueId: string, score: number, reason: string }>}
|
|
2656
|
+
*/
|
|
2657
|
+
async function scoreJobLocation(job, targetLocations) {
|
|
2658
|
+
const { fetch: undiciFetch } = await import("undici");
|
|
2659
|
+
|
|
2660
|
+
const prompt = `
|
|
2661
|
+
你是一个 TikTok 用户数据分析助手。请根据以下用户信息,判断该用户是否来自以下**任意一个**目标国家。
|
|
2662
|
+
|
|
2663
|
+
目标国家列表: ${targetLocations.join(", ")}
|
|
2664
|
+
|
|
2665
|
+
重要:
|
|
2666
|
+
- 用户只要来自上述**任意一个**国家就算匹配。
|
|
2667
|
+
- guessed_location 是系统初步猜测的结果,**仅供参考**,不要完全依赖它。
|
|
2668
|
+
- 请综合用户名、昵称、签名、位置等信息做判断。
|
|
2669
|
+
|
|
2670
|
+
用户信息:
|
|
2671
|
+
- 用户名: ${job.unique_id || "未知"}
|
|
2672
|
+
- 昵称: ${job.nickname || "未知"}
|
|
2673
|
+
- 签名: ${job.signature || "未知"}
|
|
2674
|
+
- 地区: ${job.region || "未知"}
|
|
2675
|
+
- 猜测国家(参考): ${job.guessed_location || "未知"}
|
|
2676
|
+
- 位置信息: ${job.location_created || "未知"}
|
|
2677
|
+
- 主页链接: ${job.bio_link || "未知"}
|
|
2678
|
+
|
|
2679
|
+
返回 JSON(仅返回 JSON,无其他内容):
|
|
2680
|
+
{"score": 0-100, "reason": "English only, under 50 chars, no quotes/brackets"}
|
|
2681
|
+
|
|
2682
|
+
Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unlikely
|
|
2683
|
+
`;
|
|
2684
|
+
|
|
2685
|
+
try {
|
|
2686
|
+
const apiKey = process.env.APIKEY || "";
|
|
2687
|
+
const response = await undiciFetch(
|
|
2688
|
+
"http://82.156.52.214:18000/v1/chat/completions",
|
|
2689
|
+
{
|
|
2690
|
+
method: "POST",
|
|
2691
|
+
headers: {
|
|
2692
|
+
"Content-Type": "application/json",
|
|
2693
|
+
Authorization: `Bearer ${apiKey}`,
|
|
2694
|
+
},
|
|
2695
|
+
body: JSON.stringify({
|
|
2696
|
+
model: "zc-fast",
|
|
2697
|
+
messages: [{ role: "user", content: prompt }],
|
|
2698
|
+
max_tokens: 512,
|
|
2699
|
+
temperature: 0.1,
|
|
2700
|
+
}),
|
|
2701
|
+
},
|
|
2702
|
+
);
|
|
2703
|
+
|
|
2704
|
+
const result = await response.json();
|
|
2705
|
+
const content = result.choices?.[0]?.message?.content || "";
|
|
2706
|
+
|
|
2707
|
+
// 解析 JSON 响应(多层容错)
|
|
2708
|
+
let parsed = null;
|
|
2709
|
+
|
|
2710
|
+
// 尝试 1: 直接解析
|
|
2711
|
+
try {
|
|
2712
|
+
parsed = JSON.parse(content);
|
|
2713
|
+
} catch {
|
|
2714
|
+
// 尝试 2: 提取 {} 包裹的内容
|
|
2715
|
+
const match = content.match(/\{[\s\S]*\}/);
|
|
2716
|
+
if (match) {
|
|
2717
|
+
try {
|
|
2718
|
+
parsed = JSON.parse(match[0]);
|
|
2719
|
+
} catch {
|
|
2720
|
+
// 尝试 3: 清理常见问题后解析
|
|
2721
|
+
const cleaned = match[0]
|
|
2722
|
+
.replace(/"/g, '"') // 弯引号 → 直引号
|
|
2723
|
+
.replace(/\s+/g, " ") // 多余空白
|
|
2724
|
+
.trim();
|
|
2725
|
+
try {
|
|
2726
|
+
parsed = JSON.parse(cleaned);
|
|
2727
|
+
} catch {
|
|
2728
|
+
// 尝试 4: 从文本中提取 score 和 reason(reason 可能包含引号等特殊字符)
|
|
2729
|
+
const scoreMatch = content.match(/"?score"?\s*:\s*(\d+)/i);
|
|
2730
|
+
if (scoreMatch) {
|
|
2731
|
+
let reason = "解析降级";
|
|
2732
|
+
// 找 "reason": 的位置,取到最后一个 } 前的内容
|
|
2733
|
+
const reasonKeyPos = content.search(/"?reason"?\s*:\s*"/i);
|
|
2734
|
+
if (reasonKeyPos !== -1) {
|
|
2735
|
+
const afterKey = content.substring(reasonKeyPos);
|
|
2736
|
+
const colonPos = afterKey.indexOf(":");
|
|
2737
|
+
const valueStart = afterKey.indexOf('"', colonPos + 1) + 1;
|
|
2738
|
+
const rawValue = afterKey.substring(valueStart);
|
|
2739
|
+
// 取到原始 content 最后一个 } 前
|
|
2740
|
+
const lastBrace = content.lastIndexOf("}");
|
|
2741
|
+
const reasonEnd = lastBrace - reasonKeyPos - valueStart;
|
|
2742
|
+
if (reasonEnd > 0) {
|
|
2743
|
+
reason = rawValue.substring(0, reasonEnd).trim();
|
|
2744
|
+
// 去掉首尾的引号
|
|
2745
|
+
if (reason.startsWith('"')) reason = reason.substring(1);
|
|
2746
|
+
if (reason.endsWith('"'))
|
|
2747
|
+
reason = reason.substring(0, reason.length - 1);
|
|
2748
|
+
}
|
|
2749
|
+
}
|
|
2750
|
+
parsed = {
|
|
2751
|
+
score: parseInt(scoreMatch[1]) || 50,
|
|
2752
|
+
reason,
|
|
2753
|
+
};
|
|
2754
|
+
}
|
|
2755
|
+
}
|
|
2756
|
+
}
|
|
2757
|
+
}
|
|
2758
|
+
|
|
2759
|
+
// 尝试 5: 如果以上都失败,用更宽松的正则提取
|
|
2760
|
+
if (!parsed) {
|
|
2761
|
+
const scoreMatch = content.match(/"score"\s*:\s*(\d+)/);
|
|
2762
|
+
const reasonMatch = content.match(/"reason"\s*:\s*"([^"]*)"/);
|
|
2763
|
+
if (scoreMatch) {
|
|
2764
|
+
parsed = {
|
|
2765
|
+
score: parseInt(scoreMatch[1]) || 50,
|
|
2766
|
+
reason: reasonMatch ? reasonMatch[1] : "解析降级 - 宽松模式",
|
|
2767
|
+
};
|
|
2768
|
+
}
|
|
2769
|
+
}
|
|
2770
|
+
}
|
|
2771
|
+
|
|
2772
|
+
if (parsed && typeof parsed.score === "number") {
|
|
2773
|
+
return {
|
|
2774
|
+
uniqueId: job.unique_id,
|
|
2775
|
+
score: Math.max(0, Math.min(100, parsed.score)),
|
|
2776
|
+
reason: parsed.reason || "",
|
|
2777
|
+
};
|
|
2778
|
+
}
|
|
2779
|
+
|
|
2780
|
+
// 所有解析都失败,返回默认分
|
|
2781
|
+
console.error(
|
|
2782
|
+
`[scoreJobLocation] JSON 解析失败 (${job.unique_id}): ${content.substring(0, 100)}`,
|
|
2783
|
+
);
|
|
2784
|
+
return {
|
|
2785
|
+
uniqueId: job.unique_id,
|
|
2786
|
+
score: 50,
|
|
2787
|
+
reason: "LLM 响应解析失败,使用默认分",
|
|
2788
|
+
};
|
|
2789
|
+
} catch (e) {
|
|
2790
|
+
console.error(
|
|
2791
|
+
`[scoreJobLocation] LLM 调用失败 (${job.unique_id}): ${e.message}`,
|
|
2792
|
+
);
|
|
2793
|
+
return {
|
|
2794
|
+
uniqueId: job.unique_id,
|
|
2795
|
+
score: 50,
|
|
2796
|
+
reason: `LLM 调用异常: ${e.message}`,
|
|
2797
|
+
};
|
|
2798
|
+
}
|
|
2799
|
+
}
|
|
2800
|
+
|
|
2801
|
+
/**
|
|
2802
|
+
* 批量对 jobs 进行 LLM 国家匹配度打分
|
|
2803
|
+
* @param {Object[]} jobs - raw_jobs 记录数组
|
|
2804
|
+
* @param {string[]} targetLocations - 目标国家列表
|
|
2805
|
+
* @param {number} batchSize - 每批处理数量(并发),默认 10
|
|
2806
|
+
* @returns {Promise<Array<{ uniqueId: string, score: number, reason: string }>>}
|
|
2807
|
+
*/
|
|
2808
|
+
async function scoreJobsBatch(jobs, targetLocations, batchSize = 10) {
|
|
2809
|
+
const results = [];
|
|
2810
|
+
for (let i = 0; i < jobs.length; i += batchSize) {
|
|
2811
|
+
const batch = jobs.slice(i, i + batchSize);
|
|
2812
|
+
const batchResults = await Promise.all(
|
|
2813
|
+
batch.map((job) => scoreJobLocation(job, targetLocations)),
|
|
2814
|
+
);
|
|
2815
|
+
results.push(...batchResults);
|
|
2816
|
+
}
|
|
2817
|
+
return results;
|
|
2818
|
+
}
|
|
2819
|
+
|
|
2820
|
+
/**
|
|
2821
|
+
* 从 raw_jobs 中移动一批符合条件的任务到 jobs 表
|
|
2822
|
+
* @param {string[]} locations - 目标国家列表(null 表示不限制)
|
|
2823
|
+
* @param {number} limit - 每次移动的最大数量,默认 500
|
|
2824
|
+
* @param {Object} options - 可选配置
|
|
2825
|
+
* @param {boolean} options.llmScore - 是否启用 LLM 打分过滤,默认 false
|
|
2826
|
+
* @param {number} options.llmMinScore - LLM 最低分数阈值,默认 60
|
|
2827
|
+
* @param {number} options.llmSampleSize - LLM 打分的采样数量,默认 100
|
|
2828
|
+
* @returns {{ moved: number }} 实际移动的数量
|
|
2829
|
+
*/
|
|
2830
|
+
function refillJobsFromRaw(locations = null, limit = 500, options = {}) {
|
|
2831
|
+
if (!db) {
|
|
2832
|
+
return { moved: 0, error: "db not ready" };
|
|
2833
|
+
}
|
|
2834
|
+
|
|
2835
|
+
const safeLimit = Math.max(1, Math.min(2000, parseInt(limit) || 500));
|
|
2836
|
+
const normalizedLocations = locations
|
|
2837
|
+
? locations.map((loc) => String(loc).trim().toUpperCase()).filter(Boolean)
|
|
2838
|
+
: null;
|
|
2839
|
+
|
|
2840
|
+
const useLlm = !!options.llmScore;
|
|
2841
|
+
const llmMinScore = options.llmMinScore ?? 60;
|
|
2842
|
+
const llmSampleSize = options.llmSampleSize ?? 100;
|
|
2843
|
+
|
|
2844
|
+
// 构建 WHERE 条件
|
|
2845
|
+
const conditions = [
|
|
2846
|
+
"COALESCE(video_count, 0) > 6",
|
|
2847
|
+
"COALESCE(follower_count, 0) > 0",
|
|
2848
|
+
"COALESCE(following_count, 0) > 0",
|
|
2849
|
+
];
|
|
2850
|
+
const args = [];
|
|
2851
|
+
|
|
2852
|
+
if (normalizedLocations && normalizedLocations.length > 0) {
|
|
2853
|
+
conditions.push(
|
|
2854
|
+
`UPPER(COALESCE(guessed_location, '')) IN (${normalizedLocations.map(() => "?").join(", ")})`,
|
|
2855
|
+
);
|
|
2856
|
+
args.push(...normalizedLocations);
|
|
2857
|
+
}
|
|
2858
|
+
|
|
2859
|
+
const whereSql = conditions.join(" AND ");
|
|
2860
|
+
|
|
2861
|
+
// 统计符合条件的数量
|
|
2862
|
+
const count =
|
|
2863
|
+
db
|
|
2864
|
+
.prepare(`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql}`)
|
|
2865
|
+
.get(...args)?.c || 0;
|
|
2866
|
+
|
|
2867
|
+
if (!count) {
|
|
2868
|
+
return { moved: 0 };
|
|
2869
|
+
}
|
|
2870
|
+
|
|
2871
|
+
// 如果启用 LLM 打分:先取 tag(一次性),再取非 tag 走 LLM 打分
|
|
2872
|
+
if (useLlm && normalizedLocations && normalizedLocations.length > 0) {
|
|
2873
|
+
const llmTotal = options.llmTotal ?? 200; // 总条数
|
|
2874
|
+
const llmTagRatio = options.llmTagRatio ?? 0.6; // tag 占比 60%
|
|
2875
|
+
const llmTagLimit = Math.floor(llmTotal * llmTagRatio); // tag 上限 120
|
|
2876
|
+
const llmNonTagTarget = llmTotal - llmTagLimit; // 非 tag 目标 80
|
|
2877
|
+
const llmMinScore = options.llmMinScore ?? 60;
|
|
2878
|
+
const llmSampleSize = options.llmSampleSize ?? 100;
|
|
2879
|
+
const maxBatches = options.llmMaxBatches ?? 10;
|
|
2880
|
+
|
|
2881
|
+
console.error(
|
|
2882
|
+
`[data-store] LLM 打分开始: 总目标 ${llmTotal} 条,tag 最多 ${llmTagLimit} 条(一次性),非 tag 目标 ${llmNonTagTarget} 条(LLM 打分)`,
|
|
2883
|
+
);
|
|
2884
|
+
|
|
2885
|
+
// 返回 Promise,调用方需要 await
|
|
2886
|
+
return (async () => {
|
|
2887
|
+
const allTagQualified = [];
|
|
2888
|
+
const allNonTagQualified = [];
|
|
2889
|
+
const allScores = [];
|
|
2890
|
+
|
|
2891
|
+
// ===== 第一步:一次性取所有 tag(全局,最多 llmTagLimit 条)=====
|
|
2892
|
+
let tagOffset = llmSampleOffsets.get("_tag") || 0;
|
|
2893
|
+
const tagGlobalCount = db
|
|
2894
|
+
.prepare(`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql} AND sources LIKE '%tag%'`)
|
|
2895
|
+
.get(...args)?.c || 0;
|
|
2896
|
+
|
|
2897
|
+
if (tagOffset >= tagGlobalCount) {
|
|
2898
|
+
tagOffset = 0;
|
|
2899
|
+
llmSampleOffsets.set("_tag", 0);
|
|
2900
|
+
}
|
|
2901
|
+
|
|
2902
|
+
console.error(`[data-store] Tag 全局共 ${tagGlobalCount} 条,从偏移量 ${tagOffset} 开始`);
|
|
2903
|
+
|
|
2904
|
+
while (allTagQualified.length < llmTagLimit && tagOffset < tagGlobalCount) {
|
|
2905
|
+
const batch = db
|
|
2906
|
+
.prepare(
|
|
2907
|
+
`
|
|
2908
|
+
SELECT * FROM raw_jobs WHERE ${whereSql} AND sources LIKE '%tag%'
|
|
2909
|
+
ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
|
|
2910
|
+
LIMIT ? OFFSET ?
|
|
2911
|
+
`,
|
|
2912
|
+
)
|
|
2913
|
+
.all(Math.min(llmSampleSize, llmTagLimit - allTagQualified.length), ...args, tagOffset);
|
|
2914
|
+
|
|
2915
|
+
if (!batch.length) break;
|
|
2916
|
+
|
|
2917
|
+
allTagQualified.push(...batch.map((s) => s.unique_id));
|
|
2918
|
+
tagOffset += batch.length;
|
|
2919
|
+
|
|
2920
|
+
console.error(
|
|
2921
|
+
`[data-store] Tag 本批 ${batch.length} 条,累计 ${allTagQualified.length}/${llmTagLimit}`,
|
|
2922
|
+
);
|
|
2923
|
+
}
|
|
2924
|
+
|
|
2925
|
+
llmSampleOffsets.set("_tag", tagOffset);
|
|
2926
|
+
|
|
2927
|
+
// ===== 第二步:按国家取非 tag,走 LLM 打分,直到合格数达到 llmNonTagTarget =====
|
|
2928
|
+
for (const location of normalizedLocations) {
|
|
2929
|
+
if (allNonTagQualified.length >= llmNonTagTarget) break;
|
|
2930
|
+
|
|
2931
|
+
const nonTagOffsetKey = `${location}:nonTag`;
|
|
2932
|
+
let offset = llmSampleOffsets.get(nonTagOffsetKey) || 0;
|
|
2933
|
+
|
|
2934
|
+
const locationArgs = [...args, location];
|
|
2935
|
+
const nonTagCount = db
|
|
2936
|
+
.prepare(
|
|
2937
|
+
`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql} AND guessed_location = ? AND (sources NOT LIKE '%tag%' OR sources IS NULL)`,
|
|
2938
|
+
)
|
|
2939
|
+
.get(...locationArgs)?.c || 0;
|
|
2940
|
+
|
|
2941
|
+
if (nonTagCount === 0) {
|
|
2942
|
+
console.error(`[data-store] 国家 ${location}: 无非 tag 数据,跳过`);
|
|
2943
|
+
continue;
|
|
2944
|
+
}
|
|
2945
|
+
|
|
2946
|
+
if (offset >= nonTagCount) {
|
|
2947
|
+
offset = 0;
|
|
2948
|
+
llmSampleOffsets.set(nonTagOffsetKey, 0);
|
|
2949
|
+
}
|
|
2950
|
+
|
|
2951
|
+
console.error(
|
|
2952
|
+
`[data-store] 国家 ${location}: 非 tag 共 ${nonTagCount} 条,从偏移量 ${offset} 开始`,
|
|
2953
|
+
);
|
|
2954
|
+
|
|
2955
|
+
for (let batch = 0; batch < maxBatches; batch++) {
|
|
2956
|
+
if (allNonTagQualified.length >= llmNonTagTarget) break;
|
|
2957
|
+
|
|
2958
|
+
const samples = db
|
|
2959
|
+
.prepare(
|
|
2960
|
+
`
|
|
2961
|
+
SELECT * FROM raw_jobs WHERE ${whereSql} AND guessed_location = ?
|
|
2962
|
+
AND (sources NOT LIKE '%tag%' OR sources IS NULL)
|
|
2963
|
+
ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
|
|
2964
|
+
LIMIT ? OFFSET ?
|
|
2965
|
+
`,
|
|
2966
|
+
)
|
|
2967
|
+
.all(...locationArgs, llmSampleSize, offset);
|
|
2968
|
+
|
|
2969
|
+
if (!samples.length) break;
|
|
2970
|
+
|
|
2971
|
+
const scores = await scoreJobsBatch(samples, DEFAULT_TARGET_LOCATIONS);
|
|
2972
|
+
const qualified = scores.filter((s) => s.score >= llmMinScore);
|
|
2973
|
+
allNonTagQualified.push(...qualified.map((s) => s.uniqueId));
|
|
2974
|
+
allScores.push(...scores);
|
|
2975
|
+
|
|
2976
|
+
offset += samples.length;
|
|
2977
|
+
llmSampleOffsets.set(nonTagOffsetKey, offset);
|
|
2978
|
+
|
|
2979
|
+
console.error(
|
|
2980
|
+
`[data-store] ${location} 第 ${batch + 1} 批: 采样 ${samples.length} 条,本批合格 ${qualified.length} 条,非 tag 累计 ${allNonTagQualified.length}/${llmNonTagTarget}`,
|
|
2981
|
+
);
|
|
2982
|
+
}
|
|
2983
|
+
}
|
|
2984
|
+
|
|
2985
|
+
// ===== 最终结果 =====
|
|
2986
|
+
const qualified = [...allTagQualified, ...allNonTagQualified];
|
|
2987
|
+
|
|
2988
|
+
if (!qualified.length) {
|
|
2989
|
+
console.error(
|
|
2990
|
+
`[data-store] LLM 打分后无符合条件的任务(tag: ${allTagQualified.length}, 非 tag: ${allNonTagQualified.length})`,
|
|
2991
|
+
);
|
|
2992
|
+
return { moved: 0, scored: allScores.length, qualified: 0, scores: allScores };
|
|
2993
|
+
}
|
|
2994
|
+
|
|
2995
|
+
console.error(
|
|
2996
|
+
`[data-store] LLM 打分完成: tag ${allTagQualified.length} 条 + 非 tag ${allNonTagQualified.length} 条 = 共 ${qualified.length} 条`,
|
|
2997
|
+
);
|
|
2998
|
+
|
|
2999
|
+
// 移动符合条件的记录
|
|
3000
|
+
const placeholders = qualified.map(() => "?").join(", ");
|
|
3001
|
+
const moveTxn = db.transaction(() => {
|
|
3002
|
+
db.prepare(
|
|
3003
|
+
`
|
|
3004
|
+
INSERT OR IGNORE INTO jobs (
|
|
3005
|
+
unique_id, nickname, status, sources, pinned,
|
|
3006
|
+
tt_seller, verified, video_count, comment_count,
|
|
3007
|
+
guessed_location, location_created, confirmed_location,
|
|
3008
|
+
follower_count, following_count, heart_count,
|
|
3009
|
+
created_at, updated_at, region, signature, bio_link, sec_uid,
|
|
3010
|
+
status_code, latest_video_time
|
|
3011
|
+
)
|
|
3012
|
+
SELECT
|
|
3013
|
+
unique_id, nickname, 'pending', sources, pinned,
|
|
3014
|
+
tt_seller, verified, video_count, comment_count,
|
|
3015
|
+
guessed_location, location_created, confirmed_location,
|
|
3016
|
+
follower_count, following_count, heart_count,
|
|
3017
|
+
created_at, updated_at, region, signature, bio_link, sec_uid,
|
|
3018
|
+
status_code, latest_video_time
|
|
3019
|
+
FROM raw_jobs
|
|
3020
|
+
WHERE unique_id IN (${placeholders})
|
|
3021
|
+
`,
|
|
3022
|
+
).run(...qualified);
|
|
3023
|
+
|
|
3024
|
+
db.prepare(`DELETE FROM raw_jobs WHERE unique_id IN (${placeholders})`).run(...qualified);
|
|
3025
|
+
});
|
|
3026
|
+
moveTxn();
|
|
3027
|
+
markStatsDirty();
|
|
3028
|
+
|
|
3029
|
+
// 持久化偏移量到数据库
|
|
3030
|
+
saveLlmSampleOffsets();
|
|
3031
|
+
|
|
3032
|
+
// 打印最终偏移量状态
|
|
3033
|
+
const finalOffsetSummary = Array.from(llmSampleOffsets.entries())
|
|
3034
|
+
.map(([k, v]) => `${k}:${v}`)
|
|
3035
|
+
.join(", ");
|
|
3036
|
+
console.error(`[data-store] 偏移量记忆更新: ${finalOffsetSummary}`);
|
|
3037
|
+
|
|
3038
|
+
return {
|
|
3039
|
+
moved: qualified.length,
|
|
3040
|
+
scored: allScores.length,
|
|
3041
|
+
qualified: qualified.length,
|
|
3042
|
+
scores: allScores,
|
|
3043
|
+
};
|
|
3044
|
+
})();
|
|
3045
|
+
}
|
|
3046
|
+
|
|
3047
|
+
// 常规移动:INSERT + DELETE 事务
|
|
3048
|
+
const moveTxn = db.transaction(() => {
|
|
3049
|
+
db.prepare(
|
|
3050
|
+
`
|
|
3051
|
+
INSERT OR IGNORE INTO jobs (
|
|
3052
|
+
unique_id, nickname, status, sources, pinned,
|
|
3053
|
+
tt_seller, verified, video_count, comment_count,
|
|
3054
|
+
guessed_location, location_created, confirmed_location,
|
|
3055
|
+
follower_count, following_count, heart_count,
|
|
3056
|
+
created_at, updated_at, region, signature, bio_link, sec_uid,
|
|
3057
|
+
status_code, latest_video_time, user_create_time
|
|
3058
|
+
)
|
|
3059
|
+
SELECT
|
|
3060
|
+
unique_id, nickname, 'pending', sources, pinned,
|
|
3061
|
+
tt_seller, verified, video_count, comment_count,
|
|
3062
|
+
guessed_location, location_created, confirmed_location,
|
|
3063
|
+
follower_count, following_count, heart_count,
|
|
3064
|
+
created_at, updated_at, region, signature, bio_link, sec_uid,
|
|
3065
|
+
status_code, latest_video_time, user_create_time
|
|
3066
|
+
FROM raw_jobs
|
|
3067
|
+
WHERE ${whereSql}
|
|
3068
|
+
ORDER BY
|
|
3069
|
+
CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
|
|
3070
|
+
COALESCE(video_count, 0) DESC, created_at DESC
|
|
3071
|
+
LIMIT ?
|
|
3072
|
+
`,
|
|
3073
|
+
).run(...args, safeLimit);
|
|
3074
|
+
|
|
3075
|
+
// 删除已移动的记录:用子查询匹配刚 INSERT 的 unique_id
|
|
3076
|
+
db.prepare(
|
|
3077
|
+
`
|
|
3078
|
+
DELETE FROM raw_jobs
|
|
3079
|
+
WHERE unique_id IN (
|
|
3080
|
+
SELECT unique_id FROM raw_jobs
|
|
3081
|
+
WHERE ${whereSql}
|
|
3082
|
+
ORDER BY
|
|
3083
|
+
CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
|
|
3084
|
+
COALESCE(video_count, 0) DESC, created_at DESC
|
|
3085
|
+
LIMIT ?
|
|
3086
|
+
)
|
|
3087
|
+
`,
|
|
3088
|
+
).run(...args, safeLimit);
|
|
3089
|
+
});
|
|
3090
|
+
|
|
3091
|
+
moveTxn();
|
|
3092
|
+
markStatsDirty();
|
|
3093
|
+
|
|
3094
|
+
const actualMoved = Math.min(count, safeLimit);
|
|
3095
|
+
return { moved: actualMoved };
|
|
3096
|
+
}
|
|
3097
|
+
|
|
3098
|
+
async function claimNextJob(
|
|
3099
|
+
userId,
|
|
3100
|
+
expireMs = 5 * 60 * 1000,
|
|
3101
|
+
locations = null,
|
|
3102
|
+
loggedIn = true,
|
|
3103
|
+
) {
|
|
3104
|
+
// 记录客户端登录状态
|
|
3105
|
+
clientLoginStatus.set(userId, !!loggedIn);
|
|
3106
|
+
if (db) {
|
|
3107
|
+
const now = Date.now();
|
|
3108
|
+
const ongoingRow = db
|
|
3109
|
+
.prepare(
|
|
3110
|
+
`
|
|
3111
|
+
SELECT *
|
|
3112
|
+
FROM jobs
|
|
3113
|
+
WHERE status = 'processing'
|
|
3114
|
+
AND claimed_by = ?
|
|
3115
|
+
AND claimed_at IS NOT NULL
|
|
3116
|
+
AND ? - claimed_at < ?
|
|
3117
|
+
ORDER BY claimed_at DESC
|
|
3118
|
+
LIMIT 1
|
|
3119
|
+
`,
|
|
3120
|
+
)
|
|
3121
|
+
.get(userId, now, expireMs);
|
|
3122
|
+
if (ongoingRow) {
|
|
3123
|
+
db.prepare("UPDATE jobs SET claimed_at = ? WHERE unique_id = ?").run(
|
|
3124
|
+
now,
|
|
3125
|
+
ongoingRow.unique_id,
|
|
3126
|
+
);
|
|
3127
|
+
return {
|
|
3128
|
+
uniqueId: ongoingRow.unique_id,
|
|
3129
|
+
nickname: ongoingRow.nickname,
|
|
3130
|
+
claimedAt: now,
|
|
3131
|
+
claimedBy: userId,
|
|
3132
|
+
};
|
|
3133
|
+
}
|
|
3134
|
+
|
|
3135
|
+
const tier1 = new Set(["PL", "NL", "BE"]);
|
|
3136
|
+
const tier2 = new Set(["DE", "FR", "IT", "IE", "ES"]);
|
|
3137
|
+
const normalizedLocations = Array.isArray(locations)
|
|
3138
|
+
? locations
|
|
3139
|
+
.map((loc) => String(loc).trim().toUpperCase())
|
|
3140
|
+
.filter(Boolean)
|
|
3141
|
+
: [];
|
|
3142
|
+
|
|
3143
|
+
function getLocationGroups() {
|
|
3144
|
+
const selected = normalizedLocations.length
|
|
3145
|
+
? normalizedLocations
|
|
3146
|
+
: null;
|
|
3147
|
+
const tier1List = selected
|
|
3148
|
+
? selected.filter((loc) => tier1.has(loc))
|
|
3149
|
+
: [...tier1];
|
|
3150
|
+
const tier2List = selected
|
|
3151
|
+
? selected.filter((loc) => tier2.has(loc))
|
|
3152
|
+
: [...tier2];
|
|
3153
|
+
const otherList = selected
|
|
3154
|
+
? selected.filter((loc) => !tier1.has(loc) && !tier2.has(loc))
|
|
3155
|
+
: null;
|
|
3156
|
+
const groups = [];
|
|
3157
|
+
if (tier1List.length > 0)
|
|
3158
|
+
groups.push({ type: "include", values: tier1List });
|
|
3159
|
+
if (tier2List.length > 0)
|
|
3160
|
+
groups.push({ type: "include", values: tier2List });
|
|
3161
|
+
if (selected) {
|
|
3162
|
+
if (otherList.length > 0)
|
|
3163
|
+
groups.push({ type: "include", values: otherList });
|
|
3164
|
+
} else {
|
|
3165
|
+
groups.push({ type: "exclude", values: [...tier1, ...tier2] });
|
|
3166
|
+
}
|
|
3167
|
+
return groups;
|
|
3168
|
+
}
|
|
3169
|
+
|
|
3170
|
+
const locationGroups = getLocationGroups();
|
|
3171
|
+
|
|
3172
|
+
function applyLocationGroup(where, args, group) {
|
|
3173
|
+
if (!group) return;
|
|
3174
|
+
if (group.type === "include") {
|
|
3175
|
+
where.push(
|
|
3176
|
+
`UPPER(COALESCE(guessed_location, '')) IN (${group.values.map(() => "?").join(", ")})`,
|
|
3177
|
+
);
|
|
3178
|
+
args.push(...group.values);
|
|
3179
|
+
return;
|
|
3180
|
+
}
|
|
3181
|
+
where.push(
|
|
3182
|
+
`UPPER(COALESCE(guessed_location, '')) NOT IN (${group.values.map(() => "?").join(", ")})`,
|
|
3183
|
+
);
|
|
3184
|
+
args.push(...group.values);
|
|
3185
|
+
}
|
|
3186
|
+
|
|
3187
|
+
function queryPendingOne({ requireVideo, group, filters = [] }) {
|
|
3188
|
+
const where = ["status = 'pending'"];
|
|
3189
|
+
const args = [];
|
|
3190
|
+
if (!loggedIn) {
|
|
3191
|
+
where.push("COALESCE(tt_seller, 0) != 1");
|
|
3192
|
+
// 未登录:只能领取 status_code 为空或 0 的任务
|
|
3193
|
+
where.push("(status_code IS NULL OR status_code = 0)");
|
|
3194
|
+
} else {
|
|
3195
|
+
// 登录:可以领取 status_code 为空、0、或 209002 的任务
|
|
3196
|
+
where.push(
|
|
3197
|
+
"(status_code IS NULL OR status_code = 0 OR status_code = 209002)",
|
|
3198
|
+
);
|
|
3199
|
+
}
|
|
3200
|
+
// 其他 status_code 值的任务不被领取
|
|
3201
|
+
if (requireVideo) {
|
|
3202
|
+
where.push("COALESCE(video_count, 0) > 0");
|
|
3203
|
+
}
|
|
3204
|
+
applyLocationGroup(where, args, group);
|
|
3205
|
+
for (const filter of filters) {
|
|
3206
|
+
where.push(filter);
|
|
3207
|
+
}
|
|
3208
|
+
return db
|
|
3209
|
+
.prepare(
|
|
3210
|
+
`
|
|
3211
|
+
SELECT *
|
|
3212
|
+
FROM jobs
|
|
3213
|
+
WHERE ${where.join(" AND ")}
|
|
3214
|
+
ORDER BY follower_count DESC, created_at ASC, unique_id ASC
|
|
3215
|
+
LIMIT 1
|
|
3216
|
+
`,
|
|
3217
|
+
)
|
|
3218
|
+
.get(...args);
|
|
3219
|
+
}
|
|
3220
|
+
|
|
3221
|
+
function queryPendingByGroup({ requireVideo, group, filters = [] }) {
|
|
3222
|
+
if (group?.type === "include" && group.values.length > 1) {
|
|
3223
|
+
for (const location of group.values) {
|
|
3224
|
+
const row = queryPendingOne({
|
|
3225
|
+
requireVideo,
|
|
3226
|
+
group: { type: "include", values: [location] },
|
|
3227
|
+
filters,
|
|
3228
|
+
});
|
|
3229
|
+
if (row) return row;
|
|
3230
|
+
}
|
|
3231
|
+
return null;
|
|
3232
|
+
}
|
|
3233
|
+
return queryPendingOne({ requireVideo, group, filters });
|
|
3234
|
+
}
|
|
3235
|
+
|
|
3236
|
+
function findPinnedPending(requireVideo) {
|
|
3237
|
+
const where = ["status = 'pending'", "COALESCE(pinned, 0) = 1"];
|
|
3238
|
+
const args = [];
|
|
3239
|
+
if (!loggedIn) {
|
|
3240
|
+
where.push("COALESCE(tt_seller, 0) != 1");
|
|
3241
|
+
}
|
|
3242
|
+
if (requireVideo) {
|
|
3243
|
+
where.push("COALESCE(video_count, 0) > 0");
|
|
3244
|
+
}
|
|
3245
|
+
if (normalizedLocations.length > 0) {
|
|
3246
|
+
where.push(
|
|
3247
|
+
`UPPER(COALESCE(guessed_location, '')) IN (${normalizedLocations.map(() => "?").join(", ")})`,
|
|
3248
|
+
);
|
|
3249
|
+
args.push(...normalizedLocations);
|
|
3250
|
+
}
|
|
3251
|
+
return db
|
|
3252
|
+
.prepare(
|
|
3253
|
+
`
|
|
3254
|
+
SELECT *
|
|
3255
|
+
FROM jobs
|
|
3256
|
+
WHERE ${where.join(" AND ")}
|
|
3257
|
+
ORDER BY created_at ASC, unique_id ASC
|
|
3258
|
+
LIMIT 1
|
|
3259
|
+
`,
|
|
3260
|
+
)
|
|
3261
|
+
.get(...args);
|
|
3262
|
+
}
|
|
3263
|
+
|
|
3264
|
+
function findPrioritizedPending(requireVideo) {
|
|
3265
|
+
for (const group of locationGroups) {
|
|
3266
|
+
const seed = queryPendingByGroup({
|
|
3267
|
+
requireVideo,
|
|
3268
|
+
group,
|
|
3269
|
+
filters: [
|
|
3270
|
+
"COALESCE(pinned, 0) = 0",
|
|
3271
|
+
`instr(COALESCE(sources, ''), '"seed"') > 0`,
|
|
3272
|
+
],
|
|
3273
|
+
});
|
|
3274
|
+
if (seed) return seed;
|
|
3275
|
+
}
|
|
3276
|
+
|
|
3277
|
+
if (loggedIn) {
|
|
3278
|
+
for (const group of locationGroups) {
|
|
3279
|
+
const seller = queryPendingByGroup({
|
|
3280
|
+
requireVideo,
|
|
3281
|
+
group,
|
|
3282
|
+
filters: [
|
|
3283
|
+
"COALESCE(pinned, 0) = 0",
|
|
3284
|
+
"tt_seller = 1",
|
|
3285
|
+
"verified = 0",
|
|
3286
|
+
],
|
|
3287
|
+
});
|
|
3288
|
+
if (seller) return seller;
|
|
3289
|
+
}
|
|
3290
|
+
}
|
|
3291
|
+
|
|
3292
|
+
for (const group of locationGroups) {
|
|
3293
|
+
const follow = queryPendingByGroup({
|
|
3294
|
+
requireVideo,
|
|
3295
|
+
group,
|
|
3296
|
+
filters: [
|
|
3297
|
+
"COALESCE(pinned, 0) = 0",
|
|
3298
|
+
`(
|
|
3299
|
+
instr(COALESCE(sources, ''), '"following"') > 0
|
|
3300
|
+
OR instr(COALESCE(sources, ''), '"follower"') > 0
|
|
3301
|
+
)`,
|
|
3302
|
+
],
|
|
3303
|
+
});
|
|
3304
|
+
if (follow) return follow;
|
|
3305
|
+
}
|
|
3306
|
+
|
|
3307
|
+
for (const group of locationGroups) {
|
|
3308
|
+
const other = queryPendingByGroup({
|
|
3309
|
+
requireVideo,
|
|
3310
|
+
group,
|
|
3311
|
+
filters: ["COALESCE(pinned, 0) = 0"],
|
|
3312
|
+
});
|
|
3313
|
+
if (other) return other;
|
|
3314
|
+
}
|
|
3315
|
+
|
|
3316
|
+
return null;
|
|
3317
|
+
}
|
|
3318
|
+
|
|
3319
|
+
function claimRow(row) {
|
|
3320
|
+
if (!row) return null;
|
|
3321
|
+
db.prepare(
|
|
3322
|
+
"UPDATE jobs SET status = 'processing', claimed_at = ?, claimed_by = ? WHERE unique_id = ?",
|
|
3323
|
+
).run(now, userId, row.unique_id);
|
|
3324
|
+
markStatsDirty();
|
|
3325
|
+
return {
|
|
3326
|
+
uniqueId: row.unique_id,
|
|
3327
|
+
nickname: row.nickname,
|
|
3328
|
+
claimedAt: now,
|
|
3329
|
+
claimedBy: userId,
|
|
3330
|
+
};
|
|
3331
|
+
}
|
|
3332
|
+
|
|
3333
|
+
const expiredRow = db
|
|
3334
|
+
.prepare(
|
|
3335
|
+
`
|
|
3336
|
+
SELECT *
|
|
3337
|
+
FROM jobs
|
|
3338
|
+
WHERE status = 'processing'
|
|
3339
|
+
AND claimed_at IS NOT NULL
|
|
3340
|
+
AND ? - claimed_at > ?
|
|
3341
|
+
ORDER BY claimed_at ASC
|
|
3342
|
+
LIMIT 1
|
|
3343
|
+
`,
|
|
3344
|
+
)
|
|
3345
|
+
.get(now, expireMs);
|
|
3346
|
+
let expiredCandidate = null;
|
|
3347
|
+
if (expiredRow) {
|
|
3348
|
+
db.prepare(
|
|
3349
|
+
"UPDATE jobs SET status = 'pending', claimed_at = NULL WHERE unique_id = ?",
|
|
3350
|
+
).run(expiredRow.unique_id);
|
|
3351
|
+
expiredCandidate = mapJobRow({
|
|
3352
|
+
...expiredRow,
|
|
3353
|
+
status: "pending",
|
|
3354
|
+
claimed_at: null,
|
|
3355
|
+
});
|
|
3356
|
+
}
|
|
3357
|
+
|
|
3358
|
+
for (const requireVideo of [true, false]) {
|
|
3359
|
+
const pinned = findPinnedPending(requireVideo);
|
|
3360
|
+
if (pinned) {
|
|
3361
|
+
return claimRow(pinned);
|
|
3362
|
+
}
|
|
3363
|
+
if (expiredCandidate) {
|
|
3364
|
+
return claimRow({
|
|
3365
|
+
unique_id: expiredCandidate.uniqueId,
|
|
3366
|
+
nickname: expiredCandidate.nickname,
|
|
3367
|
+
});
|
|
3368
|
+
}
|
|
3369
|
+
const ranked = findPrioritizedPending(requireVideo);
|
|
3370
|
+
if (ranked) {
|
|
3371
|
+
return claimRow(ranked);
|
|
3372
|
+
}
|
|
3373
|
+
}
|
|
3374
|
+
// 尝试从 raw_jobs 毛料库补充任务(使用 createStore 时配置的 LLM 打分)
|
|
3375
|
+
// 使用锁防止多个请求同时触发 LLM refill
|
|
3376
|
+
if (refillLock) {
|
|
3377
|
+
// 已有 refill 在进行中,等待完成后重新尝试领取
|
|
3378
|
+
await refillLock;
|
|
3379
|
+
for (const requireVideo of [true, false]) {
|
|
3380
|
+
const pinned = findPinnedPending(requireVideo);
|
|
3381
|
+
if (pinned) {
|
|
3382
|
+
return claimRow(pinned);
|
|
3383
|
+
}
|
|
3384
|
+
const ranked = findPrioritizedPending(requireVideo);
|
|
3385
|
+
if (ranked) {
|
|
3386
|
+
return claimRow(ranked);
|
|
3387
|
+
}
|
|
3388
|
+
}
|
|
3389
|
+
return null;
|
|
3390
|
+
}
|
|
3391
|
+
const refillResult = (async () => {
|
|
3392
|
+
refillLock = Promise.resolve(); // 占位
|
|
3393
|
+
const result = refillJobsFromRaw(
|
|
3394
|
+
normalizedLocations.length ? normalizedLocations : null,
|
|
3395
|
+
500,
|
|
3396
|
+
refillLlmConfig,
|
|
3397
|
+
);
|
|
3398
|
+
// refillJobsFromRaw 在 LLM 模式下返回 Promise
|
|
3399
|
+
if (result && typeof result.then === "function") {
|
|
3400
|
+
return result.finally(() => {
|
|
3401
|
+
refillLock = null;
|
|
3402
|
+
});
|
|
3403
|
+
}
|
|
3404
|
+
return result;
|
|
3405
|
+
})();
|
|
3406
|
+
if (refillResult && typeof refillResult.then === "function") {
|
|
3407
|
+
const awaited = await refillResult;
|
|
3408
|
+
if (awaited.moved > 0) {
|
|
3409
|
+
console.error(
|
|
3410
|
+
`[data-store] 从 raw_jobs 补充了 ${awaited.moved} 条任务到 jobs`,
|
|
3411
|
+
);
|
|
3412
|
+
for (const requireVideo of [true, false]) {
|
|
3413
|
+
const pinned = findPinnedPending(requireVideo);
|
|
3414
|
+
if (pinned) {
|
|
3415
|
+
return claimRow(pinned);
|
|
3416
|
+
}
|
|
3417
|
+
const ranked = findPrioritizedPending(requireVideo);
|
|
3418
|
+
if (ranked) {
|
|
3419
|
+
return claimRow(ranked);
|
|
3420
|
+
}
|
|
3421
|
+
}
|
|
3422
|
+
}
|
|
3423
|
+
} else if (refillResult.moved > 0) {
|
|
3424
|
+
console.error(
|
|
3425
|
+
`[data-store] 从 raw_jobs 补充了 ${refillResult.moved} 条任务到 jobs`,
|
|
3426
|
+
);
|
|
3427
|
+
for (const requireVideo of [true, false]) {
|
|
3428
|
+
const pinned = findPinnedPending(requireVideo);
|
|
3429
|
+
if (pinned) {
|
|
3430
|
+
return claimRow(pinned);
|
|
3431
|
+
}
|
|
3432
|
+
const ranked = findPrioritizedPending(requireVideo);
|
|
3433
|
+
if (ranked) {
|
|
3434
|
+
return claimRow(ranked);
|
|
3435
|
+
}
|
|
3436
|
+
}
|
|
3437
|
+
}
|
|
3438
|
+
|
|
3439
|
+
return null;
|
|
3440
|
+
}
|
|
3441
|
+
|
|
3442
|
+
if (!db) {
|
|
3443
|
+
const now = Date.now();
|
|
3444
|
+
|
|
3445
|
+
// 0. 该客户端有未过期的任务,续期返回
|
|
3446
|
+
const ongoing = data.find(
|
|
3447
|
+
(u) =>
|
|
3448
|
+
u.status === "processing" &&
|
|
3449
|
+
u.claimedBy === userId &&
|
|
3450
|
+
u.claimedAt &&
|
|
3451
|
+
now - u.claimedAt < expireMs,
|
|
3452
|
+
);
|
|
3453
|
+
if (ongoing) {
|
|
3454
|
+
ongoing.claimedAt = now;
|
|
3455
|
+
save();
|
|
3456
|
+
return {
|
|
3457
|
+
uniqueId: ongoing.uniqueId,
|
|
3458
|
+
nickname: ongoing.nickname,
|
|
3459
|
+
claimedAt: ongoing.claimedAt,
|
|
3460
|
+
claimedBy: userId,
|
|
3461
|
+
};
|
|
3462
|
+
}
|
|
3463
|
+
|
|
3464
|
+
// 按猜测国家梯队排序
|
|
3465
|
+
const tier1 = new Set(["PL", "NL", "BE"]);
|
|
3466
|
+
const tier2 = new Set(["DE", "FR", "IT", "IE", "ES"]);
|
|
3467
|
+
function locationTier(u) {
|
|
3468
|
+
const loc = (u.guessedLocation || "").toUpperCase();
|
|
3469
|
+
if (tier1.has(loc)) return 0;
|
|
3470
|
+
if (tier2.has(loc)) return 1;
|
|
3471
|
+
return 2;
|
|
3472
|
+
}
|
|
3473
|
+
|
|
3474
|
+
// 国家过滤:如果指定了 locations,只保留 guessedLocation 在列表中的用户
|
|
3475
|
+
function locationFilter(u) {
|
|
3476
|
+
if (!locations || locations.length === 0) return true;
|
|
3477
|
+
return isLocationInList(u.guessedLocation, locations);
|
|
3478
|
+
}
|
|
3479
|
+
|
|
3480
|
+
// 从候选列表中按优先级取第一个:pinned > 超时回收 > seed > ttSeller(仅登录) > follow > other
|
|
3481
|
+
function pickCandidate(candidates) {
|
|
3482
|
+
let next = candidates.find((u) => u.pinned);
|
|
3483
|
+
|
|
3484
|
+
if (!next) {
|
|
3485
|
+
const expired = data.find(
|
|
3486
|
+
(u) =>
|
|
3487
|
+
u.status === "processing" &&
|
|
3488
|
+
u.claimedAt &&
|
|
3489
|
+
now - u.claimedAt > expireMs,
|
|
3490
|
+
);
|
|
3491
|
+
if (expired) {
|
|
3492
|
+
expired.status = "pending";
|
|
3493
|
+
markStatsDirty();
|
|
3494
|
+
delete expired.claimedAt;
|
|
3495
|
+
next = expired;
|
|
3496
|
+
}
|
|
3497
|
+
}
|
|
3498
|
+
|
|
3499
|
+
if (!next) {
|
|
3500
|
+
const seed = candidates.filter(
|
|
3501
|
+
(u) => u.sources && u.sources.includes("seed"),
|
|
3502
|
+
);
|
|
3503
|
+
seed.sort((a, b) => locationTier(a) - locationTier(b));
|
|
3504
|
+
next = seed[0] || null;
|
|
3505
|
+
}
|
|
3506
|
+
|
|
3507
|
+
// 未登录时跳过 ttSeller 优先级
|
|
3508
|
+
if (!next && loggedIn) {
|
|
3509
|
+
const ttSeller = candidates.filter(
|
|
3510
|
+
(u) => u.ttSeller === true && u.verified === false,
|
|
3511
|
+
);
|
|
3512
|
+
ttSeller.sort((a, b) => locationTier(a) - locationTier(b));
|
|
3513
|
+
next = ttSeller[0] || null;
|
|
3514
|
+
}
|
|
3515
|
+
|
|
3516
|
+
if (!next) {
|
|
3517
|
+
const follow = candidates.filter(
|
|
3518
|
+
(u) =>
|
|
3519
|
+
u.sources &&
|
|
3520
|
+
(u.sources.includes("following") ||
|
|
3521
|
+
u.sources.includes("follower")),
|
|
3522
|
+
);
|
|
3523
|
+
follow.sort((a, b) => locationTier(a) - locationTier(b));
|
|
3524
|
+
next = follow[0] || null;
|
|
3525
|
+
}
|
|
3526
|
+
|
|
3527
|
+
if (!next) {
|
|
3528
|
+
candidates.sort((a, b) => locationTier(a) - locationTier(b));
|
|
3529
|
+
next = candidates[0] || null;
|
|
3530
|
+
}
|
|
3531
|
+
|
|
3532
|
+
return next;
|
|
3533
|
+
}
|
|
3534
|
+
|
|
3535
|
+
// 先在有视频的 pending 用户中找;找不到再用全部 pending 用户兜底
|
|
3536
|
+
let pending = data.filter((u) => u.status === "pending");
|
|
3537
|
+
// 应用国家过滤
|
|
3538
|
+
if (locations && locations.length > 0) {
|
|
3539
|
+
pending = pending.filter(locationFilter);
|
|
3540
|
+
}
|
|
3541
|
+
// 未登录客户端不能领取 ttSeller 用户
|
|
3542
|
+
if (!loggedIn) {
|
|
3543
|
+
pending = pending.filter((u) => u.ttSeller !== true);
|
|
3544
|
+
}
|
|
3545
|
+
// status_code 过滤:只领取空值、0 或 209002 的任务
|
|
3546
|
+
pending = pending.filter(
|
|
3547
|
+
(u) =>
|
|
3548
|
+
u.statusCode == null ||
|
|
3549
|
+
u.statusCode === 0 ||
|
|
3550
|
+
(loggedIn && u.statusCode === 209002),
|
|
3551
|
+
);
|
|
3552
|
+
let hasVideo = pending.filter((u) => u.videoCount > 0);
|
|
3553
|
+
const next = pickCandidate(hasVideo) || pickCandidate(pending);
|
|
3554
|
+
|
|
3555
|
+
if (next) {
|
|
3556
|
+
next.status = "processing";
|
|
3557
|
+
markStatsDirty();
|
|
3558
|
+
next.claimedAt = now;
|
|
3559
|
+
next.claimedBy = userId;
|
|
3560
|
+
save();
|
|
3561
|
+
return {
|
|
3562
|
+
uniqueId: next.uniqueId,
|
|
3563
|
+
nickname: next.nickname,
|
|
3564
|
+
claimedAt: next.claimedAt,
|
|
3565
|
+
claimedBy: userId,
|
|
3566
|
+
};
|
|
3567
|
+
}
|
|
3568
|
+
return null;
|
|
3569
|
+
}
|
|
3570
|
+
|
|
3571
|
+
return null;
|
|
3572
|
+
}
|
|
3573
|
+
|
|
3574
|
+
function debugClaimNextJob(
|
|
3575
|
+
userId,
|
|
3576
|
+
expireMs = 5 * 60 * 1000,
|
|
3577
|
+
locations = null,
|
|
3578
|
+
loggedIn = true,
|
|
3579
|
+
) {
|
|
3580
|
+
if (db) {
|
|
3581
|
+
const now = Date.now();
|
|
3582
|
+
const info = {
|
|
3583
|
+
path: "db",
|
|
3584
|
+
userId,
|
|
3585
|
+
expireMs,
|
|
3586
|
+
loggedIn,
|
|
3587
|
+
};
|
|
3588
|
+
|
|
3589
|
+
const ongoingRow = db
|
|
3590
|
+
.prepare(
|
|
3591
|
+
`
|
|
3592
|
+
SELECT *
|
|
3593
|
+
FROM jobs
|
|
3594
|
+
WHERE status = 'processing'
|
|
3595
|
+
AND claimed_by = ?
|
|
3596
|
+
AND claimed_at IS NOT NULL
|
|
3597
|
+
AND ? - claimed_at < ?
|
|
3598
|
+
ORDER BY claimed_at DESC
|
|
3599
|
+
LIMIT 1
|
|
3600
|
+
`,
|
|
3601
|
+
)
|
|
3602
|
+
.get(userId, now, expireMs);
|
|
3603
|
+
info.ongoing = ongoingRow
|
|
3604
|
+
? {
|
|
3605
|
+
uniqueId: ongoingRow.unique_id,
|
|
3606
|
+
claimedBy: ongoingRow.claimed_by,
|
|
3607
|
+
claimedAt: ongoingRow.claimed_at,
|
|
3608
|
+
}
|
|
3609
|
+
: null;
|
|
3610
|
+
|
|
3611
|
+
const tier1 = new Set(["PL", "NL", "BE"]);
|
|
3612
|
+
const tier2 = new Set(["DE", "FR", "IT", "IE", "ES"]);
|
|
3613
|
+
const normalizedLocations = Array.isArray(locations)
|
|
3614
|
+
? locations
|
|
3615
|
+
.map((loc) => String(loc).trim().toUpperCase())
|
|
3616
|
+
.filter(Boolean)
|
|
3617
|
+
: [];
|
|
3618
|
+
|
|
3619
|
+
function getLocationGroups() {
|
|
3620
|
+
const selected = normalizedLocations.length
|
|
3621
|
+
? normalizedLocations
|
|
3622
|
+
: null;
|
|
3623
|
+
const tier1List = selected
|
|
3624
|
+
? selected.filter((loc) => tier1.has(loc))
|
|
3625
|
+
: [...tier1];
|
|
3626
|
+
const tier2List = selected
|
|
3627
|
+
? selected.filter((loc) => tier2.has(loc))
|
|
3628
|
+
: [...tier2];
|
|
3629
|
+
const otherList = selected
|
|
3630
|
+
? selected.filter((loc) => !tier1.has(loc) && !tier2.has(loc))
|
|
3631
|
+
: null;
|
|
3632
|
+
const groups = [];
|
|
3633
|
+
if (tier1List.length > 0)
|
|
3634
|
+
groups.push({ type: "include", values: tier1List });
|
|
3635
|
+
if (tier2List.length > 0)
|
|
3636
|
+
groups.push({ type: "include", values: tier2List });
|
|
3637
|
+
if (selected) {
|
|
3638
|
+
if (otherList.length > 0)
|
|
3639
|
+
groups.push({ type: "include", values: otherList });
|
|
3640
|
+
} else {
|
|
3641
|
+
groups.push({ type: "exclude", values: [...tier1, ...tier2] });
|
|
3642
|
+
}
|
|
3643
|
+
return groups;
|
|
3644
|
+
}
|
|
3645
|
+
|
|
3646
|
+
const locationGroups = getLocationGroups();
|
|
3647
|
+
info.locationGroups = locationGroups;
|
|
3648
|
+
|
|
3649
|
+
function applyLocationGroup(where, args, group) {
|
|
3650
|
+
if (!group) return;
|
|
3651
|
+
if (group.type === "include") {
|
|
3652
|
+
where.push(
|
|
3653
|
+
`UPPER(COALESCE(guessed_location, '')) IN (${group.values.map(() => "?").join(", ")})`,
|
|
3654
|
+
);
|
|
3655
|
+
args.push(...group.values);
|
|
3656
|
+
return;
|
|
3657
|
+
}
|
|
3658
|
+
where.push(
|
|
3659
|
+
`UPPER(COALESCE(guessed_location, '')) NOT IN (${group.values.map(() => "?").join(", ")})`,
|
|
3660
|
+
);
|
|
3661
|
+
args.push(...group.values);
|
|
3662
|
+
}
|
|
3663
|
+
|
|
3664
|
+
function queryPendingOne({ requireVideo, group, filters = [] }) {
|
|
3665
|
+
const where = ["status = 'pending'"];
|
|
3666
|
+
const args = [];
|
|
3667
|
+
if (!loggedIn) {
|
|
3668
|
+
where.push("COALESCE(tt_seller, 0) != 1");
|
|
3669
|
+
where.push("(status_code IS NULL OR status_code = 0)");
|
|
3670
|
+
} else {
|
|
3671
|
+
where.push(
|
|
3672
|
+
"(status_code IS NULL OR status_code = 0 OR status_code = 209002)",
|
|
3673
|
+
);
|
|
3674
|
+
}
|
|
3675
|
+
if (requireVideo) {
|
|
3676
|
+
where.push("COALESCE(video_count, 0) > 0");
|
|
3677
|
+
}
|
|
3678
|
+
applyLocationGroup(where, args, group);
|
|
3679
|
+
for (const filter of filters) {
|
|
3680
|
+
where.push(filter);
|
|
3681
|
+
}
|
|
3682
|
+
const sql = `
|
|
3683
|
+
SELECT *
|
|
3684
|
+
FROM jobs
|
|
3685
|
+
WHERE ${where.join(" AND ")}
|
|
3686
|
+
ORDER BY follower_count DESC, created_at ASC, unique_id ASC
|
|
3687
|
+
LIMIT 1
|
|
3688
|
+
`;
|
|
3689
|
+
const row = db.prepare(sql).get(...args);
|
|
3690
|
+
return { row, sql, args };
|
|
3691
|
+
}
|
|
3692
|
+
|
|
3693
|
+
function queryPendingByGroup({ requireVideo, group, filters = [] }) {
|
|
3694
|
+
if (group?.type === "include" && group.values.length > 1) {
|
|
3695
|
+
for (const location of group.values) {
|
|
3696
|
+
const ret = queryPendingOne({
|
|
3697
|
+
requireVideo,
|
|
3698
|
+
group: { type: "include", values: [location] },
|
|
3699
|
+
filters,
|
|
3700
|
+
});
|
|
3701
|
+
if (ret.row) return ret;
|
|
3702
|
+
}
|
|
3703
|
+
return { row: null, sql: null, args: [] };
|
|
3704
|
+
}
|
|
3705
|
+
return queryPendingOne({ requireVideo, group, filters });
|
|
3706
|
+
}
|
|
3707
|
+
|
|
3708
|
+
function findPinnedPending(requireVideo) {
|
|
3709
|
+
const where = ["status = 'pending'", "COALESCE(pinned, 0) = 1"];
|
|
3710
|
+
const args = [];
|
|
3711
|
+
if (!loggedIn) {
|
|
3712
|
+
where.push("COALESCE(tt_seller, 0) != 1");
|
|
3713
|
+
}
|
|
3714
|
+
if (requireVideo) {
|
|
3715
|
+
where.push("COALESCE(video_count, 0) > 0");
|
|
3716
|
+
}
|
|
3717
|
+
if (normalizedLocations.length > 0) {
|
|
3718
|
+
where.push(
|
|
3719
|
+
`UPPER(COALESCE(guessed_location, '')) IN (${normalizedLocations.map(() => "?").join(", ")})`,
|
|
3720
|
+
);
|
|
3721
|
+
args.push(...normalizedLocations);
|
|
3722
|
+
}
|
|
3723
|
+
const sql = `
|
|
3724
|
+
SELECT *
|
|
3725
|
+
FROM jobs
|
|
3726
|
+
WHERE ${where.join(" AND ")}
|
|
3727
|
+
ORDER BY created_at ASC, unique_id ASC
|
|
3728
|
+
LIMIT 1
|
|
3729
|
+
`;
|
|
3730
|
+
const row = db.prepare(sql).get(...args);
|
|
3731
|
+
return { row, sql, args };
|
|
3732
|
+
}
|
|
3733
|
+
|
|
3734
|
+
const expiredSql = `
|
|
3735
|
+
SELECT *
|
|
3736
|
+
FROM jobs
|
|
3737
|
+
WHERE status = 'processing'
|
|
3738
|
+
AND claimed_at IS NOT NULL
|
|
3739
|
+
AND ? - claimed_at > ?
|
|
3740
|
+
ORDER BY claimed_at ASC
|
|
3741
|
+
LIMIT 1
|
|
3742
|
+
`;
|
|
3743
|
+
const expiredRow = db.prepare(expiredSql).get(now, expireMs);
|
|
3744
|
+
info.expired = expiredRow
|
|
3745
|
+
? {
|
|
3746
|
+
uniqueId: expiredRow.unique_id,
|
|
3747
|
+
claimedBy: expiredRow.claimed_by,
|
|
3748
|
+
claimedAt: expiredRow.claimed_at,
|
|
3749
|
+
diffMs: now - expiredRow.claimed_at,
|
|
3750
|
+
}
|
|
3751
|
+
: null;
|
|
3752
|
+
|
|
3753
|
+
info.requireVideoPasses = [];
|
|
3754
|
+
for (const requireVideo of [true, false]) {
|
|
3755
|
+
const pass = { requireVideo };
|
|
3756
|
+
const pinned = findPinnedPending(requireVideo);
|
|
3757
|
+
pass.pinned = pinned.row
|
|
3758
|
+
? {
|
|
3759
|
+
uniqueId: pinned.row.unique_id,
|
|
3760
|
+
sql: pinned.sql,
|
|
3761
|
+
args: pinned.args,
|
|
3762
|
+
}
|
|
3763
|
+
: null;
|
|
3764
|
+
|
|
3765
|
+
if (!pass.pinned) {
|
|
3766
|
+
for (const group of locationGroups) {
|
|
3767
|
+
const seed = queryPendingByGroup({
|
|
3768
|
+
requireVideo,
|
|
3769
|
+
group,
|
|
3770
|
+
filters: [
|
|
3771
|
+
"COALESCE(pinned, 0) = 0",
|
|
3772
|
+
`instr(COALESCE(sources, ''), '"seed"') > 0`,
|
|
3773
|
+
],
|
|
3774
|
+
});
|
|
3775
|
+
if (seed.row) {
|
|
3776
|
+
pass.seed = {
|
|
3777
|
+
uniqueId: seed.row.unique_id,
|
|
3778
|
+
group,
|
|
3779
|
+
sql: seed.sql,
|
|
3780
|
+
args: seed.args,
|
|
3781
|
+
};
|
|
3782
|
+
break;
|
|
3783
|
+
}
|
|
3784
|
+
}
|
|
3785
|
+
}
|
|
3786
|
+
|
|
3787
|
+
if (!pass.pinned && !pass.seed && loggedIn) {
|
|
3788
|
+
for (const group of locationGroups) {
|
|
3789
|
+
const seller = queryPendingByGroup({
|
|
3790
|
+
requireVideo,
|
|
3791
|
+
group,
|
|
3792
|
+
filters: [
|
|
3793
|
+
"COALESCE(pinned, 0) = 0",
|
|
3794
|
+
"tt_seller = 1",
|
|
3795
|
+
"verified = 0",
|
|
3796
|
+
],
|
|
3797
|
+
});
|
|
3798
|
+
if (seller.row) {
|
|
3799
|
+
pass.seller = {
|
|
3800
|
+
uniqueId: seller.row.unique_id,
|
|
3801
|
+
group,
|
|
3802
|
+
sql: seller.sql,
|
|
3803
|
+
args: seller.args,
|
|
3804
|
+
};
|
|
3805
|
+
break;
|
|
3806
|
+
}
|
|
3807
|
+
}
|
|
3808
|
+
}
|
|
3809
|
+
|
|
3810
|
+
if (!pass.pinned && !pass.seed && !pass.seller) {
|
|
3811
|
+
for (const group of locationGroups) {
|
|
3812
|
+
const follow = queryPendingByGroup({
|
|
3813
|
+
requireVideo,
|
|
3814
|
+
group,
|
|
3815
|
+
filters: [
|
|
3816
|
+
"COALESCE(pinned, 0) = 0",
|
|
3817
|
+
`(
|
|
3818
|
+
instr(COALESCE(sources, ''), '"following"') > 0
|
|
3819
|
+
OR instr(COALESCE(sources, ''), '"follower"') > 0
|
|
3820
|
+
)`,
|
|
3821
|
+
],
|
|
3822
|
+
});
|
|
3823
|
+
if (follow.row) {
|
|
3824
|
+
pass.follow = {
|
|
3825
|
+
uniqueId: follow.row.unique_id,
|
|
3826
|
+
group,
|
|
3827
|
+
sql: follow.sql,
|
|
3828
|
+
args: follow.args,
|
|
3829
|
+
};
|
|
3830
|
+
break;
|
|
3831
|
+
}
|
|
3832
|
+
}
|
|
3833
|
+
}
|
|
3834
|
+
|
|
3835
|
+
if (!pass.pinned && !pass.seed && !pass.seller && !pass.follow) {
|
|
3836
|
+
for (const group of locationGroups) {
|
|
3837
|
+
const other = queryPendingByGroup({
|
|
3838
|
+
requireVideo,
|
|
3839
|
+
group,
|
|
3840
|
+
filters: ["COALESCE(pinned, 0) = 0"],
|
|
3841
|
+
});
|
|
3842
|
+
if (other.row) {
|
|
3843
|
+
pass.other = {
|
|
3844
|
+
uniqueId: other.row.unique_id,
|
|
3845
|
+
group,
|
|
3846
|
+
sql: other.sql,
|
|
3847
|
+
args: other.args,
|
|
3848
|
+
};
|
|
3849
|
+
break;
|
|
3850
|
+
}
|
|
3851
|
+
}
|
|
3852
|
+
}
|
|
3853
|
+
|
|
3854
|
+
info.requireVideoPasses.push(pass);
|
|
3855
|
+
}
|
|
3856
|
+
|
|
3857
|
+
return info;
|
|
3858
|
+
}
|
|
3859
|
+
|
|
3860
|
+
return {
|
|
3861
|
+
path: "memory",
|
|
3862
|
+
userId,
|
|
3863
|
+
expireMs,
|
|
3864
|
+
loggedIn,
|
|
3865
|
+
totalUsers: data.length,
|
|
3866
|
+
processingUsers: data.filter((u) => u.status === "processing").length,
|
|
3867
|
+
pendingUsers: data.filter((u) => u.status === "pending").length,
|
|
3868
|
+
};
|
|
3869
|
+
}
|
|
3870
|
+
|
|
3871
|
+
function processDiscoveredUsers(result) {
|
|
3872
|
+
const guessedLocation = result.guessedLocation || null;
|
|
3873
|
+
const discovered = [
|
|
3874
|
+
...(result.discoveredVideoAuthors || []).map((v) => ({
|
|
3875
|
+
uniqueId:
|
|
3876
|
+
typeof v === "string"
|
|
3877
|
+
? v.replace(/^@/, "")
|
|
3878
|
+
: v.uniqueId?.replace(/^@/, "") || "",
|
|
3879
|
+
nickname: typeof v === "string" ? null : v.nickname || null,
|
|
3880
|
+
locationCreated:
|
|
3881
|
+
typeof v === "string" ? null : v.locationCreated || null,
|
|
3882
|
+
guessedLocation:
|
|
3883
|
+
typeof v === "string"
|
|
3884
|
+
? guessedLocation
|
|
3885
|
+
: v.guessedLocation || guessedLocation,
|
|
3886
|
+
sources: ["video"],
|
|
3887
|
+
})),
|
|
3888
|
+
...(result.discoveredCommentAuthors || []).map((c) => {
|
|
3889
|
+
if (typeof c === "string")
|
|
3890
|
+
return {
|
|
3891
|
+
uniqueId: c.replace(/^@/, ""),
|
|
3892
|
+
sources: ["comment"],
|
|
3893
|
+
guessedLocation,
|
|
3894
|
+
};
|
|
3895
|
+
return {
|
|
3896
|
+
uniqueId: (c.author || c.uniqueId || "").replace(/^@/, ""),
|
|
3897
|
+
nickname: c.nickname || null,
|
|
3898
|
+
sources: ["comment"],
|
|
3899
|
+
guessedLocation: c.guessedLocation || guessedLocation,
|
|
3900
|
+
};
|
|
3901
|
+
}),
|
|
3902
|
+
...(result.discoveredGuessAuthors || []).map((g) => {
|
|
3903
|
+
if (typeof g === "string")
|
|
3904
|
+
return {
|
|
3905
|
+
uniqueId: g.replace(/^@/, ""),
|
|
3906
|
+
sources: ["guess"],
|
|
3907
|
+
guessedLocation,
|
|
3908
|
+
};
|
|
3909
|
+
return {
|
|
3910
|
+
uniqueId: (g.author || g.uniqueId || "").replace(/^@/, ""),
|
|
3911
|
+
nickname: g.nickname || null,
|
|
3912
|
+
sources: ["guess"],
|
|
3913
|
+
guessedLocation: g.guessedLocation || guessedLocation,
|
|
3914
|
+
};
|
|
3915
|
+
}),
|
|
3916
|
+
...(result.discoveredFollowing || []).map((f) => {
|
|
3917
|
+
const handle = Array.isArray(f) ? f[0] : f.handle || "";
|
|
3918
|
+
const name = Array.isArray(f) ? f[1] : f.displayName || null;
|
|
3919
|
+
return {
|
|
3920
|
+
uniqueId: handle.replace(/^@/, ""),
|
|
3921
|
+
nickname: name,
|
|
3922
|
+
sources: ["following"],
|
|
3923
|
+
guessedLocation:
|
|
3924
|
+
(typeof f === "object" && f.guessedLocation) || guessedLocation,
|
|
3925
|
+
};
|
|
3926
|
+
}),
|
|
3927
|
+
...(result.discoveredFollowers || []).map((f) => {
|
|
3928
|
+
const handle = Array.isArray(f) ? f[0] : f.handle || "";
|
|
3929
|
+
const name = Array.isArray(f) ? f[1] : f.displayName || null;
|
|
3930
|
+
return {
|
|
3931
|
+
uniqueId: handle.replace(/^@/, ""),
|
|
3932
|
+
nickname: name,
|
|
3933
|
+
sources: ["follower"],
|
|
3934
|
+
guessedLocation:
|
|
3935
|
+
(typeof f === "object" && f.guessedLocation) || guessedLocation,
|
|
3936
|
+
};
|
|
3937
|
+
}),
|
|
3938
|
+
...(result.discoveredRecommended || []).map((f) => {
|
|
3939
|
+
const handle = Array.isArray(f) ? f[0] : f.handle || "";
|
|
3940
|
+
const name = Array.isArray(f) ? f[1] : f.displayName || null;
|
|
3941
|
+
return {
|
|
3942
|
+
uniqueId: handle.replace(/^@/, ""),
|
|
3943
|
+
nickname: name,
|
|
3944
|
+
sources: ["recommended"],
|
|
3945
|
+
guessedLocation:
|
|
3946
|
+
(typeof f === "object" && f.guessedLocation) || guessedLocation,
|
|
3947
|
+
};
|
|
3948
|
+
}),
|
|
3949
|
+
].filter((u) => u.uniqueId);
|
|
3950
|
+
|
|
3951
|
+
// 先对 discovered 内部去重,再用 uidIndex 批量判断
|
|
3952
|
+
const seen = new Set();
|
|
3953
|
+
const unique = [];
|
|
3954
|
+
for (const d of discovered) {
|
|
3955
|
+
if (!seen.has(d.uniqueId)) {
|
|
3956
|
+
seen.add(d.uniqueId);
|
|
3957
|
+
unique.push(d);
|
|
3958
|
+
}
|
|
3959
|
+
}
|
|
3960
|
+
|
|
3961
|
+
const newUsers = [];
|
|
3962
|
+
for (const d of unique) {
|
|
3963
|
+
if (!hasUser(d.uniqueId)) {
|
|
3964
|
+
addUserToDb(d);
|
|
3965
|
+
addJobBaseToDb(d);
|
|
3966
|
+
newUsers.push(d.uniqueId);
|
|
3967
|
+
}
|
|
3968
|
+
}
|
|
3969
|
+
return newUsers;
|
|
3970
|
+
}
|
|
3971
|
+
|
|
3972
|
+
function updateUserFromResult(user, result) {
|
|
3973
|
+
const oldStatus = user.status;
|
|
3974
|
+
if (result.restricted) {
|
|
3975
|
+
user.status = "restricted";
|
|
3976
|
+
if (result.userInfo) {
|
|
3977
|
+
const info = result.userInfo;
|
|
3978
|
+
for (const key of Object.keys(info)) {
|
|
3979
|
+
if (key === "uniqueId" || key === "sources") continue;
|
|
3980
|
+
if (
|
|
3981
|
+
info[key] !== undefined &&
|
|
3982
|
+
info[key] !== null &&
|
|
3983
|
+
info[key] !== ""
|
|
3984
|
+
) {
|
|
3985
|
+
user[key] = info[key];
|
|
3986
|
+
}
|
|
3987
|
+
}
|
|
3988
|
+
}
|
|
3989
|
+
user.restricted = true;
|
|
3990
|
+
user.processed = true;
|
|
3991
|
+
user.processedAt = Date.now();
|
|
3992
|
+
user.sources = [...new Set([...(user.sources || []), "restricted"])];
|
|
3993
|
+
} else if (result.error) {
|
|
3994
|
+
user.status = "error";
|
|
3995
|
+
user.error = result.error;
|
|
3996
|
+
user.sources = [...new Set([...(user.sources || []), "error"])];
|
|
3997
|
+
} else {
|
|
3998
|
+
user.status = "done";
|
|
3999
|
+
user.processed = true;
|
|
4000
|
+
user.processedAt = Date.now();
|
|
4001
|
+
user.noVideo = result.noVideo || false;
|
|
4002
|
+
user.keepFollow = result.keepFollow || false;
|
|
4003
|
+
user.hasFollowData = result.hasFollowData || false;
|
|
4004
|
+
|
|
4005
|
+
if (result.userInfo) {
|
|
4006
|
+
const info = result.userInfo;
|
|
4007
|
+
for (const key of Object.keys(info)) {
|
|
4008
|
+
if (key === "uniqueId" || key === "sources") continue;
|
|
4009
|
+
if (
|
|
4010
|
+
info[key] !== undefined &&
|
|
4011
|
+
info[key] !== null &&
|
|
4012
|
+
info[key] !== ""
|
|
4013
|
+
) {
|
|
4014
|
+
user[key] = info[key];
|
|
4015
|
+
}
|
|
4016
|
+
}
|
|
4017
|
+
}
|
|
4018
|
+
|
|
4019
|
+
user.followerCount = result.userInfo?.followerCount ?? user.followerCount;
|
|
4020
|
+
user.videoCount = result.userInfo?.videoCount ?? user.videoCount;
|
|
4021
|
+
user.nickname = result.userInfo?.nickname || user.nickname;
|
|
4022
|
+
user.locationCreated =
|
|
4023
|
+
result.userInfo?.locationCreated || user.locationCreated;
|
|
4024
|
+
user.ttSeller = result.userInfo?.ttSeller ?? user.ttSeller;
|
|
4025
|
+
user.verified = result.userInfo?.verified ?? user.verified;
|
|
4026
|
+
user.region = result.userInfo?.region || user.region;
|
|
4027
|
+
user.signature =
|
|
4028
|
+
result.userInfo?.signature ?? result.userInfo?.bio ?? user.signature;
|
|
4029
|
+
user.bioLink = result.userInfo?.bioLink ?? user.bioLink;
|
|
4030
|
+
user.followingCount =
|
|
4031
|
+
result.userInfo?.followingCount ?? user.followingCount;
|
|
4032
|
+
user.heartCount = result.userInfo?.heartCount ?? user.heartCount;
|
|
4033
|
+
if (result.userInfo?.secUid) user.secUid = result.userInfo.secUid;
|
|
4034
|
+
const extraFields = [
|
|
4035
|
+
"restricted",
|
|
4036
|
+
"error",
|
|
4037
|
+
"userInfo",
|
|
4038
|
+
"discoveredVideoAuthors",
|
|
4039
|
+
"discoveredCommentAuthors",
|
|
4040
|
+
"discoveredGuessAuthors",
|
|
4041
|
+
"discoveredFollowing",
|
|
4042
|
+
"discoveredFollowers",
|
|
4043
|
+
"discoveredRecommended",
|
|
4044
|
+
"uniqueId",
|
|
4045
|
+
"sources",
|
|
4046
|
+
"topRecentVideo", // 单独处理,不进入通用循环
|
|
4047
|
+
];
|
|
4048
|
+
for (const key of Object.keys(result)) {
|
|
4049
|
+
if (extraFields.includes(key)) continue;
|
|
4050
|
+
if (
|
|
4051
|
+
result[key] !== undefined &&
|
|
4052
|
+
result[key] !== null &&
|
|
4053
|
+
result[key] !== ""
|
|
4054
|
+
) {
|
|
4055
|
+
user[key] = result[key];
|
|
4056
|
+
}
|
|
4057
|
+
}
|
|
4058
|
+
// 将 topRecentVideo 对象展开为扁平字段
|
|
4059
|
+
if (result.topRecentVideo && typeof result.topRecentVideo === "object") {
|
|
4060
|
+
user.topVideoPlayCount = result.topRecentVideo.playCount || null;
|
|
4061
|
+
user.topVideoHref = result.topRecentVideo.href || null;
|
|
4062
|
+
}
|
|
4063
|
+
user.sources = [...new Set([...(user.sources || []), "processed"])];
|
|
4064
|
+
}
|
|
4065
|
+
if (user.status !== oldStatus) markStatsDirty();
|
|
4066
|
+
}
|
|
4067
|
+
|
|
4068
|
+
function commitJob(uniqueId, result) {
|
|
4069
|
+
if (db) {
|
|
4070
|
+
const user = getJob(uniqueId);
|
|
4071
|
+
if (!user) return { saved: false, error: "user not found" };
|
|
4072
|
+
|
|
4073
|
+
updateUserFromResult(user, result);
|
|
4074
|
+
user.claimedAt = null;
|
|
4075
|
+
const newUsers = processDiscoveredUsers(result);
|
|
4076
|
+
const persistRet = updateJobInfo(uniqueId, user, false);
|
|
4077
|
+
if (persistRet.error) {
|
|
4078
|
+
return { saved: false, error: persistRet.error };
|
|
4079
|
+
}
|
|
4080
|
+
return { saved: true, status: user.status, newUsers };
|
|
4081
|
+
}
|
|
4082
|
+
|
|
4083
|
+
const user = getUser(uniqueId);
|
|
4084
|
+
if (!user) return { saved: false, error: "user not found" };
|
|
4085
|
+
|
|
4086
|
+
updateUserFromResult(user, result);
|
|
4087
|
+
delete user.claimedAt;
|
|
4088
|
+
const newUsers = processDiscoveredUsers(result);
|
|
4089
|
+
|
|
4090
|
+
save();
|
|
4091
|
+
return { saved: true, status: user.status, newUsers };
|
|
4092
|
+
}
|
|
4093
|
+
|
|
4094
|
+
function commitNewExplore(uniqueId, result) {
|
|
4095
|
+
if (db) {
|
|
4096
|
+
const existing = getJob(uniqueId);
|
|
4097
|
+
if (existing) {
|
|
4098
|
+
updateUserFromResult(existing, result);
|
|
4099
|
+
const persistRet = updateJobInfo(uniqueId, existing, false);
|
|
4100
|
+
if (persistRet.error) {
|
|
4101
|
+
return { saved: false, error: persistRet.error };
|
|
4102
|
+
}
|
|
4103
|
+
const newUsers = processDiscoveredUsers(result);
|
|
4104
|
+
return {
|
|
4105
|
+
saved: true,
|
|
4106
|
+
created: false,
|
|
4107
|
+
status: existing.status,
|
|
4108
|
+
newUsers,
|
|
4109
|
+
};
|
|
4110
|
+
}
|
|
4111
|
+
|
|
4112
|
+
const userObj = {
|
|
4113
|
+
uniqueId,
|
|
4114
|
+
...(result.userInfo || {}),
|
|
4115
|
+
sources: ["refresh-explore"],
|
|
4116
|
+
};
|
|
4117
|
+
updateUserFromResult(userObj, result);
|
|
4118
|
+
addJob(userObj);
|
|
4119
|
+
const newUsers = processDiscoveredUsers(result);
|
|
4120
|
+
return { saved: true, created: true, status: userObj.status, newUsers };
|
|
4121
|
+
}
|
|
4122
|
+
|
|
4123
|
+
const existing = getUser(uniqueId);
|
|
4124
|
+
if (existing) {
|
|
4125
|
+
updateUserFromResult(existing, result);
|
|
4126
|
+
const newUsers = processDiscoveredUsers(result);
|
|
4127
|
+
save();
|
|
4128
|
+
return { saved: true, created: false, status: existing.status, newUsers };
|
|
4129
|
+
}
|
|
4130
|
+
|
|
4131
|
+
const userObj = {
|
|
4132
|
+
uniqueId,
|
|
4133
|
+
...(result.userInfo || {}),
|
|
4134
|
+
sources: ["refresh-explore"],
|
|
4135
|
+
};
|
|
4136
|
+
updateUserFromResult(userObj, result);
|
|
4137
|
+
addUser(userObj, true);
|
|
4138
|
+
const newUsers = processDiscoveredUsers(result);
|
|
4139
|
+
|
|
4140
|
+
save();
|
|
4141
|
+
return { saved: true, created: true, status: userObj.status, newUsers };
|
|
4142
|
+
}
|
|
4143
|
+
|
|
4144
|
+
function resetJob(uniqueId) {
|
|
4145
|
+
if (db) {
|
|
4146
|
+
const user = getJob(uniqueId);
|
|
4147
|
+
if (!user) return { saved: false, error: "user not found" };
|
|
4148
|
+
user.status = "pending";
|
|
4149
|
+
user.claimedAt = null;
|
|
4150
|
+
user.processedAt = null;
|
|
4151
|
+
user.processed = false;
|
|
4152
|
+
user.error = null;
|
|
4153
|
+
user.restricted = false;
|
|
4154
|
+
user.noVideo = false;
|
|
4155
|
+
const ret = updateJobInfo(uniqueId, user, false);
|
|
4156
|
+
if (ret.error) return { saved: false, error: ret.error };
|
|
4157
|
+
markStatsDirty();
|
|
4158
|
+
return { saved: true };
|
|
4159
|
+
}
|
|
4160
|
+
|
|
4161
|
+
const user = getUser(uniqueId);
|
|
4162
|
+
if (!user) return { saved: false, error: "user not found" };
|
|
4163
|
+
user.status = "pending";
|
|
4164
|
+
markStatsDirty();
|
|
4165
|
+
delete user.claimedAt;
|
|
4166
|
+
delete user.processedAt;
|
|
4167
|
+
delete user.processed;
|
|
4168
|
+
delete user.error;
|
|
4169
|
+
delete user.restricted;
|
|
4170
|
+
delete user.noVideo;
|
|
4171
|
+
save();
|
|
4172
|
+
return { saved: true };
|
|
4173
|
+
}
|
|
4174
|
+
|
|
4175
|
+
function togglePin(uniqueId) {
|
|
4176
|
+
if (db) {
|
|
4177
|
+
const user = getJob(uniqueId);
|
|
4178
|
+
if (!user) return { saved: false, error: "user not found" };
|
|
4179
|
+
const nextPinned = !user.pinned;
|
|
4180
|
+
const ret = updateJobInfo(uniqueId, { pinned: nextPinned }, false);
|
|
4181
|
+
if (ret.error) return { saved: false, error: ret.error };
|
|
4182
|
+
return { saved: true, pinned: nextPinned };
|
|
4183
|
+
}
|
|
4184
|
+
|
|
4185
|
+
const user = getUser(uniqueId);
|
|
4186
|
+
if (!user) return { saved: false, error: "user not found" };
|
|
4187
|
+
user.pinned = !user.pinned;
|
|
4188
|
+
save();
|
|
4189
|
+
return { saved: true, pinned: user.pinned };
|
|
4190
|
+
}
|
|
4191
|
+
|
|
4192
|
+
function getNextRedoJob(userId, maxAgeSeconds = 43200) {
|
|
4193
|
+
if (db) {
|
|
4194
|
+
const now = Date.now();
|
|
4195
|
+
const threshold = now - maxAgeSeconds * 1000;
|
|
4196
|
+
const defaultTime = new Date("2016-01-01T00:00:00Z").getTime();
|
|
4197
|
+
const targetLocations = DEFAULT_TARGET_LOCATIONS;
|
|
4198
|
+
const placeholders = targetLocations.map(() => "?").join(",");
|
|
4199
|
+
const row = db
|
|
4200
|
+
.prepare(
|
|
4201
|
+
`
|
|
4202
|
+
SELECT *
|
|
4203
|
+
FROM jobs
|
|
4204
|
+
WHERE tt_seller = 1
|
|
4205
|
+
AND verified = 0
|
|
4206
|
+
AND location_created IN (${placeholders})
|
|
4207
|
+
AND COALESCE(refresh_time, ?) < ?
|
|
4208
|
+
ORDER BY COALESCE(pinned, 0) DESC, COALESCE(refresh_time, ?) ASC
|
|
4209
|
+
LIMIT 1
|
|
4210
|
+
`,
|
|
4211
|
+
)
|
|
4212
|
+
.get(...targetLocations, defaultTime, threshold, defaultTime);
|
|
4213
|
+
if (!row) return null;
|
|
4214
|
+
db.prepare(
|
|
4215
|
+
"UPDATE jobs SET refresh_time = ?, updated_at = ? WHERE unique_id = ?",
|
|
4216
|
+
).run(now, now, row.unique_id);
|
|
4217
|
+
return {
|
|
4218
|
+
uniqueId: row.unique_id,
|
|
4219
|
+
nickname: row.nickname,
|
|
4220
|
+
refreshTime: now,
|
|
4221
|
+
};
|
|
4222
|
+
}
|
|
4223
|
+
|
|
4224
|
+
const now = Date.now();
|
|
4225
|
+
const threshold = now - maxAgeSeconds * 1000;
|
|
4226
|
+
const defaultTime = new Date("2016-01-01T00:00:00Z").getTime();
|
|
4227
|
+
|
|
4228
|
+
// 筛选目标国家用户,按 refreshTime 升序取最远的(没有则默认 2016-01-01)
|
|
4229
|
+
const targetLocations = DEFAULT_TARGET_LOCATIONS;
|
|
4230
|
+
const targetUsers = data.filter(
|
|
4231
|
+
(u) =>
|
|
4232
|
+
u.ttSeller &&
|
|
4233
|
+
u.verified === false &&
|
|
4234
|
+
targetLocations.includes(u.locationCreated),
|
|
4235
|
+
);
|
|
4236
|
+
if (targetUsers.length === 0) return null;
|
|
4237
|
+
|
|
4238
|
+
const recentEnough = targetUsers.filter((u) => {
|
|
4239
|
+
const rt = u.refreshTime || defaultTime;
|
|
4240
|
+
return rt < threshold;
|
|
4241
|
+
});
|
|
4242
|
+
if (recentEnough.length === 0) return null;
|
|
4243
|
+
|
|
4244
|
+
recentEnough.sort((a, b) => {
|
|
4245
|
+
// pinned 优先,其次按 refreshTime 升序
|
|
4246
|
+
if ((a.pinned ? 1 : 0) !== (b.pinned ? 1 : 0)) {
|
|
4247
|
+
return (b.pinned ? 1 : 0) - (a.pinned ? 1 : 0);
|
|
4248
|
+
}
|
|
4249
|
+
const ta = a.refreshTime || defaultTime;
|
|
4250
|
+
const tb = b.refreshTime || defaultTime;
|
|
4251
|
+
return ta - tb;
|
|
4252
|
+
});
|
|
4253
|
+
|
|
4254
|
+
const next = recentEnough[0];
|
|
4255
|
+
next.refreshTime = now;
|
|
4256
|
+
save();
|
|
4257
|
+
return {
|
|
4258
|
+
uniqueId: next.uniqueId,
|
|
4259
|
+
nickname: next.nickname,
|
|
4260
|
+
refreshTime: next.refreshTime,
|
|
4261
|
+
};
|
|
4262
|
+
}
|
|
4263
|
+
|
|
4264
|
+
function commitRedoJob(uniqueId, result) {
|
|
4265
|
+
if (db) {
|
|
4266
|
+
const user = getJob(uniqueId);
|
|
4267
|
+
if (!user) return { saved: false, error: "user not found" };
|
|
4268
|
+
user.refreshTime = Date.now();
|
|
4269
|
+
if (result.userInfo) {
|
|
4270
|
+
const info = result.userInfo;
|
|
4271
|
+
for (const key of Object.keys(info)) {
|
|
4272
|
+
if (key === "uniqueId" || key === "sources") continue;
|
|
4273
|
+
if (
|
|
4274
|
+
info[key] !== undefined &&
|
|
4275
|
+
info[key] !== null &&
|
|
4276
|
+
info[key] !== ""
|
|
4277
|
+
) {
|
|
4278
|
+
user[key] = info[key];
|
|
4279
|
+
}
|
|
4280
|
+
}
|
|
4281
|
+
}
|
|
4282
|
+
// 将 topRecentVideo 对象展开为扁平字段
|
|
4283
|
+
if (result.topRecentVideo && typeof result.topRecentVideo === "object") {
|
|
4284
|
+
user.topVideoPlayCount = result.topRecentVideo.playCount || null;
|
|
4285
|
+
user.topVideoHref = result.topRecentVideo.href || null;
|
|
4286
|
+
}
|
|
4287
|
+
const newUsers = processDiscoveredUsers(result);
|
|
4288
|
+
const ret = updateJobInfo(uniqueId, user, false);
|
|
4289
|
+
if (ret.error) return { saved: false, error: ret.error };
|
|
4290
|
+
return { saved: true, newUsers };
|
|
4291
|
+
}
|
|
4292
|
+
|
|
4293
|
+
const user = getUser(uniqueId);
|
|
4294
|
+
if (!user) return { saved: false, error: "user not found" };
|
|
4295
|
+
|
|
4296
|
+
user.refreshTime = Date.now();
|
|
4297
|
+
|
|
4298
|
+
if (result.userInfo) {
|
|
4299
|
+
const info = result.userInfo;
|
|
4300
|
+
for (const key of Object.keys(info)) {
|
|
4301
|
+
if (key === "uniqueId" || key === "sources") continue;
|
|
4302
|
+
if (info[key] !== undefined && info[key] !== null && info[key] !== "") {
|
|
4303
|
+
user[key] = info[key];
|
|
4304
|
+
}
|
|
4305
|
+
}
|
|
4306
|
+
}
|
|
4307
|
+
// 将 topRecentVideo 对象展开为扁平字段
|
|
4308
|
+
if (result.topRecentVideo && typeof result.topRecentVideo === "object") {
|
|
4309
|
+
user.topVideoPlayCount = result.topRecentVideo.playCount || null;
|
|
4310
|
+
user.topVideoHref = result.topRecentVideo.href || null;
|
|
4311
|
+
}
|
|
4312
|
+
const newUsers = processDiscoveredUsers(result);
|
|
4313
|
+
return { saved: true, newUsers };
|
|
4314
|
+
}
|
|
4315
|
+
|
|
4316
|
+
function reportClientError(
|
|
4317
|
+
userId,
|
|
4318
|
+
errorType,
|
|
4319
|
+
errorMessage,
|
|
4320
|
+
username,
|
|
4321
|
+
stage,
|
|
4322
|
+
errorStack,
|
|
4323
|
+
) {
|
|
4324
|
+
const existing = clientErrors.get(userId);
|
|
4325
|
+
if (existing) {
|
|
4326
|
+
existing.timestamp = Date.now();
|
|
4327
|
+
if (errorType === "captcha") {
|
|
4328
|
+
existing.captchaCount = (existing.captchaCount || 0) + 1;
|
|
4329
|
+
if (!existing.captchaStage) existing.captchaStage = stage || "";
|
|
4330
|
+
if (!existing.captchaMessage)
|
|
4331
|
+
existing.captchaMessage = errorMessage || "";
|
|
4332
|
+
if (!existing.captchaStack) existing.captchaStack = errorStack || "";
|
|
4333
|
+
} else {
|
|
4334
|
+
existing.errorType = errorType;
|
|
4335
|
+
existing.errorMessage = errorMessage || "";
|
|
4336
|
+
existing.errorStack = errorStack || "";
|
|
4337
|
+
existing.stage = stage || "";
|
|
4338
|
+
existing.reportCount = (existing.reportCount || 1) + 1;
|
|
4339
|
+
}
|
|
4340
|
+
if (username) existing.username = username;
|
|
4341
|
+
} else {
|
|
4342
|
+
clientErrors.set(userId, {
|
|
4343
|
+
userId,
|
|
4344
|
+
errorType,
|
|
4345
|
+
errorMessage: errorMessage || "",
|
|
4346
|
+
errorStack: errorStack || "",
|
|
4347
|
+
username,
|
|
4348
|
+
stage: stage || "",
|
|
4349
|
+
timestamp: Date.now(),
|
|
4350
|
+
reportCount: 1,
|
|
4351
|
+
captchaCount: errorType === "captcha" ? 1 : 0,
|
|
4352
|
+
captchaStage: errorType === "captcha" ? stage || "" : "",
|
|
4353
|
+
captchaMessage: errorType === "captcha" ? errorMessage || "" : "",
|
|
4354
|
+
captchaStack: errorType === "captcha" ? errorStack || "" : "",
|
|
4355
|
+
});
|
|
4356
|
+
}
|
|
4357
|
+
}
|
|
4358
|
+
|
|
4359
|
+
function deleteClientError(userId) {
|
|
4360
|
+
clientErrors.delete(userId);
|
|
4361
|
+
}
|
|
4362
|
+
|
|
4363
|
+
function getClientErrors() {
|
|
4364
|
+
return Array.from(clientErrors.values());
|
|
4365
|
+
}
|
|
4366
|
+
|
|
4367
|
+
function getClientLoginStatus() {
|
|
4368
|
+
return Object.fromEntries(clientLoginStatus);
|
|
4369
|
+
}
|
|
4370
|
+
|
|
4371
|
+
function trackClient(clientId, info) {
|
|
4372
|
+
const existing = activeClients.get(clientId);
|
|
4373
|
+
if (existing) {
|
|
4374
|
+
if (info.type) existing.type = info.type;
|
|
4375
|
+
if (info.userId) existing.userId = info.userId;
|
|
4376
|
+
if (info.ip) existing.ip = info.ip;
|
|
4377
|
+
if (info.port !== undefined) existing.port = info.port;
|
|
4378
|
+
existing.lastSeen = Date.now();
|
|
4379
|
+
} else {
|
|
4380
|
+
activeClients.set(clientId, {
|
|
4381
|
+
...info,
|
|
4382
|
+
lastSeen: Date.now(),
|
|
4383
|
+
});
|
|
4384
|
+
}
|
|
4385
|
+
}
|
|
4386
|
+
|
|
4387
|
+
function getActiveClients() {
|
|
4388
|
+
const now = Date.now();
|
|
4389
|
+
const stale = 2 * 60 * 1000;
|
|
4390
|
+
for (const [id, info] of activeClients) {
|
|
4391
|
+
if (now - info.lastSeen > stale) activeClients.delete(id);
|
|
4392
|
+
}
|
|
4393
|
+
return Array.from(activeClients.entries()).map(([clientId, info]) => ({
|
|
4394
|
+
clientId,
|
|
4395
|
+
type: info.type || "unknown",
|
|
4396
|
+
ip: info.ip || "",
|
|
4397
|
+
port: info.port || 0,
|
|
4398
|
+
userId: info.userId || "",
|
|
4399
|
+
lastSeen: info.lastSeen,
|
|
4400
|
+
}));
|
|
4401
|
+
}
|
|
4402
|
+
|
|
4403
|
+
function getPendingUserUpdateTasks(limit, countries) {
|
|
4404
|
+
const targetCountries = countries
|
|
4405
|
+
? countries.map((c) => String(c).trim().toUpperCase())
|
|
4406
|
+
: [];
|
|
4407
|
+
const hasCountryFilter = targetCountries.length > 0;
|
|
4408
|
+
|
|
4409
|
+
if (db) {
|
|
4410
|
+
const l = Math.max(1, parseInt(limit) || 5);
|
|
4411
|
+
|
|
4412
|
+
let sql = `
|
|
4413
|
+
SELECT *
|
|
4414
|
+
FROM jobs_base
|
|
4415
|
+
WHERE COALESCE(tt_seller, '') = ''
|
|
4416
|
+
AND COALESCE(user_update_count, 0) <= 0
|
|
4417
|
+
`;
|
|
4418
|
+
const sqlParams = [];
|
|
4419
|
+
|
|
4420
|
+
if (hasCountryFilter) {
|
|
4421
|
+
const placeholders = targetCountries.map(() => "?").join(", ");
|
|
4422
|
+
sql += ` AND UPPER(COALESCE(guessed_location, '')) IN (${placeholders})`;
|
|
4423
|
+
sqlParams.push(...targetCountries);
|
|
4424
|
+
}
|
|
4425
|
+
|
|
4426
|
+
// 优先级:sources 包含 "tag" 的任务优先,其余按 created_at 排序
|
|
4427
|
+
sql += ` ORDER BY
|
|
4428
|
+
CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
|
|
4429
|
+
created_at ASC,
|
|
4430
|
+
unique_id ASC
|
|
4431
|
+
LIMIT ?`;
|
|
4432
|
+
sqlParams.push(l);
|
|
4433
|
+
|
|
4434
|
+
const rows = db.prepare(sql).all(...sqlParams);
|
|
4435
|
+
if (rows.length === 0) return [];
|
|
4436
|
+
const now = Date.now();
|
|
4437
|
+
const bumpStmt = db.prepare(
|
|
4438
|
+
`
|
|
4439
|
+
UPDATE jobs_base
|
|
4440
|
+
SET user_update_count = COALESCE(user_update_count, 0) + 1,
|
|
4441
|
+
updated_at = ?
|
|
4442
|
+
WHERE unique_id = ?
|
|
4443
|
+
`,
|
|
4444
|
+
);
|
|
4445
|
+
const bumpTxn = db.transaction((items) => {
|
|
4446
|
+
for (const item of items) {
|
|
4447
|
+
bumpStmt.run(now, item.unique_id);
|
|
4448
|
+
}
|
|
4449
|
+
});
|
|
4450
|
+
bumpTxn(rows);
|
|
4451
|
+
return rows.map((row) => {
|
|
4452
|
+
const mapped = mapJobRow(row);
|
|
4453
|
+
mapped.userUpdateCount = (mapped.userUpdateCount || 0) + 1;
|
|
4454
|
+
mapped.updatedAt = now;
|
|
4455
|
+
return mapped;
|
|
4456
|
+
});
|
|
4457
|
+
}
|
|
4458
|
+
|
|
4459
|
+
const l = Math.max(1, parseInt(limit) || 5);
|
|
4460
|
+
const pending = data
|
|
4461
|
+
.filter((u) => {
|
|
4462
|
+
const updateCount = u.userUpdateCount;
|
|
4463
|
+
const ttSellerEmpty =
|
|
4464
|
+
u.ttSeller === null || u.ttSeller === undefined || u.ttSeller === "";
|
|
4465
|
+
if (!ttSellerEmpty) return false;
|
|
4466
|
+
if (
|
|
4467
|
+
updateCount === null ||
|
|
4468
|
+
updateCount === undefined ||
|
|
4469
|
+
updateCount <= 0
|
|
4470
|
+
) {
|
|
4471
|
+
if (hasCountryFilter) {
|
|
4472
|
+
const loc = (u.guessedLocation || "").toUpperCase();
|
|
4473
|
+
return targetCountries.includes(loc);
|
|
4474
|
+
}
|
|
4475
|
+
return true;
|
|
4476
|
+
}
|
|
4477
|
+
return false;
|
|
4478
|
+
})
|
|
4479
|
+
.sort((a, b) => {
|
|
4480
|
+
// 优先级:sources 包含 "tag" 的任务优先
|
|
4481
|
+
const aIsTag = (a.sources || "").includes("tag");
|
|
4482
|
+
const bIsTag = (b.sources || "").includes("tag");
|
|
4483
|
+
if (aIsTag !== bIsTag) return aIsTag ? -1 : 1;
|
|
4484
|
+
return (a.createdAt || 0) - (b.createdAt || 0);
|
|
4485
|
+
})
|
|
4486
|
+
.slice(0, l);
|
|
4487
|
+
// 接受任务时 userUpdateCount + 1
|
|
4488
|
+
pending.forEach((u) => {
|
|
4489
|
+
u.userUpdateCount = (u.userUpdateCount || 0) + 1;
|
|
4490
|
+
u.updatedAt = Date.now();
|
|
4491
|
+
});
|
|
4492
|
+
save();
|
|
4493
|
+
return pending;
|
|
4494
|
+
}
|
|
4495
|
+
|
|
4496
|
+
function updateUserInfo(uniqueId, info) {
|
|
4497
|
+
if (db) {
|
|
4498
|
+
return updateJobInfo(uniqueId, info, true);
|
|
4499
|
+
}
|
|
4500
|
+
|
|
4501
|
+
const user = getUser(uniqueId);
|
|
4502
|
+
if (!user) return { error: "user not found" };
|
|
4503
|
+
for (const key of Object.keys(info)) {
|
|
4504
|
+
if (key === "uniqueId" || key === "sources") continue;
|
|
4505
|
+
if (info[key] !== undefined && info[key] !== null && info[key] !== "") {
|
|
4506
|
+
user[key] = info[key];
|
|
4507
|
+
}
|
|
4508
|
+
}
|
|
4509
|
+
user.userUpdateCount = (user.userUpdateCount || 0) + 1;
|
|
4510
|
+
user.updatedAt = Date.now();
|
|
4511
|
+
save();
|
|
4512
|
+
return { ok: true, userUpdateCount: user.userUpdateCount };
|
|
4513
|
+
}
|
|
4514
|
+
|
|
4515
|
+
function updateUserLocation(uniqueId, location) {
|
|
4516
|
+
if (db) {
|
|
4517
|
+
const existing = db
|
|
4518
|
+
.prepare("SELECT * FROM jobs WHERE unique_id = ?")
|
|
4519
|
+
.get(uniqueId);
|
|
4520
|
+
if (!existing) return { error: "user not found" };
|
|
4521
|
+
const now = Date.now();
|
|
4522
|
+
db.prepare(
|
|
4523
|
+
"UPDATE jobs SET location_created = ?, modified_at = ?, updated_at = ? WHERE unique_id = ?",
|
|
4524
|
+
).run(location, now, now, uniqueId);
|
|
4525
|
+
return { ok: true, location, modifiedAt: now };
|
|
4526
|
+
}
|
|
4527
|
+
|
|
4528
|
+
const user = getUser(uniqueId);
|
|
4529
|
+
if (!user) return { error: "user not found" };
|
|
4530
|
+
user.locationCreated = location;
|
|
4531
|
+
user.modifiedAt = Date.now();
|
|
4532
|
+
user.updatedAt = Date.now();
|
|
4533
|
+
user.userUpdateCount = (user.userUpdateCount || 0) + 1;
|
|
4534
|
+
save();
|
|
4535
|
+
return { ok: true, location, modifiedAt: user.modifiedAt };
|
|
4536
|
+
}
|
|
4537
|
+
|
|
4538
|
+
// 将单个 job 移动到 raw_jobs 表(完整字段复制 + 删除原记录)
|
|
4539
|
+
function moveJobToRaw(uniqueId) {
|
|
4540
|
+
if (!db) return false;
|
|
4541
|
+
const safeId = String(uniqueId).trim();
|
|
4542
|
+
if (!safeId) return false;
|
|
4543
|
+
|
|
4544
|
+
const moveSingleTxn = db.transaction(() => {
|
|
4545
|
+
db.prepare(
|
|
4546
|
+
`
|
|
4547
|
+
INSERT OR REPLACE INTO raw_jobs (
|
|
4548
|
+
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
4549
|
+
error, pinned, no_video, restricted, user_update_count,
|
|
4550
|
+
tt_seller, verified, video_count, comment_count,
|
|
4551
|
+
guessed_location, location_created, confirmed_location, modified_at,
|
|
4552
|
+
follower_count, following_count, heart_count, refresh_time,
|
|
4553
|
+
processed, processed_at, created_at, updated_at,
|
|
4554
|
+
region, signature, bio_link, sec_uid, status_code, latest_video_time,
|
|
4555
|
+
user_create_time
|
|
4556
|
+
)
|
|
4557
|
+
SELECT
|
|
4558
|
+
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
4559
|
+
error, pinned, no_video, restricted, user_update_count,
|
|
4560
|
+
tt_seller, verified, video_count, comment_count,
|
|
4561
|
+
guessed_location, location_created, confirmed_location, modified_at,
|
|
4562
|
+
follower_count, following_count, heart_count, refresh_time,
|
|
4563
|
+
processed, processed_at, created_at, updated_at,
|
|
4564
|
+
region, signature, bio_link, sec_uid, status_code, latest_video_time,
|
|
4565
|
+
user_create_time
|
|
4566
|
+
FROM jobs WHERE unique_id = ?
|
|
4567
|
+
`,
|
|
4568
|
+
).run(safeId);
|
|
4569
|
+
|
|
4570
|
+
db.prepare("DELETE FROM jobs WHERE unique_id = ?").run(safeId);
|
|
4571
|
+
});
|
|
4572
|
+
moveSingleTxn();
|
|
4573
|
+
return true;
|
|
4574
|
+
}
|
|
4575
|
+
|
|
4576
|
+
function batchUpdateUserInfo(updates) {
|
|
4577
|
+
if (db) {
|
|
4578
|
+
const results = [];
|
|
4579
|
+
const rawMoveList = [];
|
|
4580
|
+
const sellerMoveList = [];
|
|
4581
|
+
|
|
4582
|
+
const txn = db.transaction((items) => {
|
|
4583
|
+
items.forEach((item) => {
|
|
4584
|
+
const uniqueId = item.uniqueId;
|
|
4585
|
+
// 处理 { error: true, statusCode: xxx } 的情况
|
|
4586
|
+
const info = item.info;
|
|
4587
|
+
let updateResult;
|
|
4588
|
+
if (info && info.error && info.statusCode !== undefined) {
|
|
4589
|
+
// 只更新 status_code,不更新其他字段
|
|
4590
|
+
updateResult = updateJobBaseInfo(
|
|
4591
|
+
uniqueId,
|
|
4592
|
+
{ statusCode: info.statusCode },
|
|
4593
|
+
true,
|
|
4594
|
+
);
|
|
4595
|
+
} else {
|
|
4596
|
+
updateResult = updateJobBaseInfo(uniqueId, info, true);
|
|
4597
|
+
}
|
|
4598
|
+
|
|
4599
|
+
if (updateResult.error) {
|
|
4600
|
+
results.push({ uniqueId, error: updateResult.error });
|
|
4601
|
+
return;
|
|
4602
|
+
}
|
|
4603
|
+
|
|
4604
|
+
// 检查 tt_seller:商家且视频数>0移到 jobs,否则移到 raw_jobs
|
|
4605
|
+
const row = getJobBaseRow(uniqueId);
|
|
4606
|
+
const ttSeller = row ? row.tt_seller : null;
|
|
4607
|
+
const videoCount = row ? row.video_count || 0 : 0;
|
|
4608
|
+
if (ttSeller && videoCount > 0) {
|
|
4609
|
+
// 商家且有视频:标记移动到 jobs
|
|
4610
|
+
results.push({
|
|
4611
|
+
uniqueId,
|
|
4612
|
+
ok: true,
|
|
4613
|
+
userUpdateCount: updateResult.userUpdateCount,
|
|
4614
|
+
_movedToJobs: true,
|
|
4615
|
+
});
|
|
4616
|
+
sellerMoveList.push(uniqueId);
|
|
4617
|
+
} else {
|
|
4618
|
+
// 非商家或无视频:标记移动到 raw_jobs
|
|
4619
|
+
results.push({
|
|
4620
|
+
uniqueId,
|
|
4621
|
+
ok: true,
|
|
4622
|
+
userUpdateCount: updateResult.userUpdateCount,
|
|
4623
|
+
_movedToRaw: true,
|
|
4624
|
+
});
|
|
4625
|
+
rawMoveList.push(uniqueId);
|
|
4626
|
+
}
|
|
4627
|
+
});
|
|
4628
|
+
});
|
|
4629
|
+
txn(updates);
|
|
4630
|
+
|
|
4631
|
+
// 批量移动商家用户到 jobs
|
|
4632
|
+
if (sellerMoveList.length > 0) {
|
|
4633
|
+
const placeholders = sellerMoveList.map(() => "?").join(",");
|
|
4634
|
+
db.prepare(
|
|
4635
|
+
`
|
|
4636
|
+
INSERT OR REPLACE INTO jobs (
|
|
4637
|
+
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
4638
|
+
error, pinned, no_video, restricted, user_update_count,
|
|
4639
|
+
tt_seller, verified, video_count, comment_count,
|
|
4640
|
+
guessed_location, location_created, confirmed_location, modified_at,
|
|
4641
|
+
follower_count, following_count, heart_count, refresh_time,
|
|
4642
|
+
processed, processed_at, created_at, updated_at,
|
|
4643
|
+
region, signature, bio_link, sec_uid, status_code, latest_video_time,
|
|
4644
|
+
user_create_time
|
|
4645
|
+
)
|
|
4646
|
+
SELECT
|
|
4647
|
+
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
4648
|
+
error, pinned, no_video, restricted, user_update_count,
|
|
4649
|
+
tt_seller, verified, video_count, comment_count,
|
|
4650
|
+
guessed_location, location_created, confirmed_location, modified_at,
|
|
4651
|
+
follower_count, following_count, heart_count, refresh_time,
|
|
4652
|
+
processed, processed_at, created_at, updated_at,
|
|
4653
|
+
region, signature, bio_link, sec_uid, status_code, latest_video_time,
|
|
4654
|
+
user_create_time
|
|
4655
|
+
FROM jobs_base WHERE unique_id IN (${placeholders})
|
|
4656
|
+
`,
|
|
4657
|
+
).run(...sellerMoveList);
|
|
4658
|
+
|
|
4659
|
+
db.prepare(
|
|
4660
|
+
`DELETE FROM jobs_base WHERE unique_id IN (${placeholders})`,
|
|
4661
|
+
).run(...sellerMoveList);
|
|
4662
|
+
}
|
|
4663
|
+
|
|
4664
|
+
// 批量移动非商家用户到 raw_jobs
|
|
4665
|
+
if (rawMoveList.length > 0) {
|
|
4666
|
+
const placeholders = rawMoveList.map(() => "?").join(",");
|
|
4667
|
+
db.prepare(
|
|
4668
|
+
`
|
|
4669
|
+
INSERT OR REPLACE INTO raw_jobs (
|
|
4670
|
+
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
4671
|
+
error, pinned, no_video, restricted, user_update_count,
|
|
4672
|
+
tt_seller, verified, video_count, comment_count,
|
|
4673
|
+
guessed_location, location_created, confirmed_location, modified_at,
|
|
4674
|
+
follower_count, following_count, heart_count, refresh_time,
|
|
4675
|
+
processed, processed_at, created_at, updated_at,
|
|
4676
|
+
region, signature, bio_link, sec_uid, status_code, latest_video_time,
|
|
4677
|
+
user_create_time
|
|
4678
|
+
)
|
|
4679
|
+
SELECT
|
|
4680
|
+
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
4681
|
+
error, pinned, no_video, restricted, user_update_count,
|
|
4682
|
+
tt_seller, verified, video_count, comment_count,
|
|
4683
|
+
guessed_location, location_created, confirmed_location, modified_at,
|
|
4684
|
+
follower_count, following_count, heart_count, refresh_time,
|
|
4685
|
+
processed, processed_at, created_at, updated_at,
|
|
4686
|
+
region, signature, bio_link, sec_uid, status_code, latest_video_time,
|
|
4687
|
+
user_create_time
|
|
4688
|
+
FROM jobs_base WHERE unique_id IN (${placeholders})
|
|
4689
|
+
`,
|
|
4690
|
+
).run(...rawMoveList);
|
|
4691
|
+
|
|
4692
|
+
db.prepare(
|
|
4693
|
+
`DELETE FROM jobs_base WHERE unique_id IN (${placeholders})`,
|
|
4694
|
+
).run(...rawMoveList);
|
|
4695
|
+
}
|
|
4696
|
+
|
|
4697
|
+
// 清理内部标记
|
|
4698
|
+
return results.map((r) => {
|
|
4699
|
+
const { _movedToRaw, _movedToJobs, ...rest } = r;
|
|
4700
|
+
return rest;
|
|
4701
|
+
});
|
|
4702
|
+
}
|
|
4703
|
+
|
|
4704
|
+
const memResults = [];
|
|
4705
|
+
for (const item of updates) {
|
|
4706
|
+
const user = getUser(item.uniqueId);
|
|
4707
|
+
if (!user) {
|
|
4708
|
+
memResults.push({ uniqueId: item.uniqueId, error: "user not found" });
|
|
4709
|
+
continue;
|
|
4710
|
+
}
|
|
4711
|
+
const info = item.info;
|
|
4712
|
+
if (info && info.error && info.statusCode !== undefined) {
|
|
4713
|
+
// 只更新 status_code
|
|
4714
|
+
user.statusCode = info.statusCode;
|
|
4715
|
+
} else {
|
|
4716
|
+
for (const key of Object.keys(info)) {
|
|
4717
|
+
if (key === "uniqueId" || key === "sources" || key === "error")
|
|
4718
|
+
continue;
|
|
4719
|
+
if (
|
|
4720
|
+
info[key] !== undefined &&
|
|
4721
|
+
info[key] !== null &&
|
|
4722
|
+
info[key] !== ""
|
|
4723
|
+
) {
|
|
4724
|
+
user[key] = info[key];
|
|
4725
|
+
}
|
|
4726
|
+
}
|
|
4727
|
+
}
|
|
4728
|
+
user.userUpdateCount = (user.userUpdateCount || 0) + 1;
|
|
4729
|
+
user.updatedAt = Date.now();
|
|
4730
|
+
memResults.push({
|
|
4731
|
+
uniqueId: item.uniqueId,
|
|
4732
|
+
ok: true,
|
|
4733
|
+
userUpdateCount: user.userUpdateCount,
|
|
4734
|
+
});
|
|
4735
|
+
}
|
|
4736
|
+
save();
|
|
4737
|
+
return memResults;
|
|
4738
|
+
}
|
|
4739
|
+
|
|
4740
|
+
// 视频登记
|
|
4741
|
+
function registerVideos(sourceUser, videoList, locationCreated, ttSeller) {
|
|
4742
|
+
if (!videoList || !Array.isArray(videoList) || videoList.length === 0) {
|
|
4743
|
+
return { registered: 0, skipped: 0 };
|
|
4744
|
+
}
|
|
4745
|
+
|
|
4746
|
+
if (db) {
|
|
4747
|
+
const insertStmt = db.prepare(`
|
|
4748
|
+
INSERT OR IGNORE INTO videos (
|
|
4749
|
+
id,
|
|
4750
|
+
href,
|
|
4751
|
+
author_unique_id,
|
|
4752
|
+
location_created,
|
|
4753
|
+
tt_seller,
|
|
4754
|
+
registered_at,
|
|
4755
|
+
user_update_count,
|
|
4756
|
+
create_time
|
|
4757
|
+
)
|
|
4758
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
4759
|
+
`);
|
|
4760
|
+
let registered = 0;
|
|
4761
|
+
let skipped = 0;
|
|
4762
|
+
const now = Date.now();
|
|
4763
|
+
const txn = db.transaction((items) => {
|
|
4764
|
+
for (const item of items) {
|
|
4765
|
+
const result = insertStmt.run(
|
|
4766
|
+
item.id,
|
|
4767
|
+
item.href || null,
|
|
4768
|
+
sourceUser,
|
|
4769
|
+
locationCreated || null,
|
|
4770
|
+
ttSeller ? 1 : 0,
|
|
4771
|
+
now,
|
|
4772
|
+
0,
|
|
4773
|
+
item.createTime || null,
|
|
4774
|
+
);
|
|
4775
|
+
if (result.changes > 0) registered++;
|
|
4776
|
+
else skipped++;
|
|
4777
|
+
}
|
|
4778
|
+
});
|
|
4779
|
+
txn(videoList.filter((item) => item?.id));
|
|
4780
|
+
return { registered, skipped };
|
|
4781
|
+
}
|
|
4782
|
+
|
|
4783
|
+
const existingIds = new Set(videos.map((v) => v.id));
|
|
4784
|
+
let registered = 0;
|
|
4785
|
+
let skipped = 0;
|
|
4786
|
+
|
|
4787
|
+
for (const item of videoList) {
|
|
4788
|
+
if (existingIds.has(item.id)) {
|
|
4789
|
+
skipped++;
|
|
4790
|
+
continue;
|
|
4791
|
+
}
|
|
4792
|
+
videos.push({
|
|
4793
|
+
id: item.id,
|
|
4794
|
+
href: item.href,
|
|
4795
|
+
authorUniqueId: sourceUser,
|
|
4796
|
+
locationCreated: locationCreated || null,
|
|
4797
|
+
ttSeller: ttSeller || false,
|
|
4798
|
+
registeredAt: Date.now(),
|
|
4799
|
+
createTime: item.createTime || null,
|
|
4800
|
+
});
|
|
4801
|
+
existingIds.add(item.id);
|
|
4802
|
+
registered++;
|
|
4803
|
+
}
|
|
4804
|
+
|
|
4805
|
+
saveVideos();
|
|
4806
|
+
return { registered, skipped };
|
|
4807
|
+
}
|
|
4808
|
+
|
|
4809
|
+
function getVideos() {
|
|
4810
|
+
if (db) {
|
|
4811
|
+
return getAllVideoRows().map(mapVideoRow);
|
|
4812
|
+
}
|
|
4813
|
+
return videos;
|
|
4814
|
+
}
|
|
4815
|
+
|
|
4816
|
+
function getVideo(videoId) {
|
|
4817
|
+
if (!videoId) return null;
|
|
4818
|
+
if (db) {
|
|
4819
|
+
return mapVideoRow(getVideoRow(videoId));
|
|
4820
|
+
}
|
|
4821
|
+
return videos.find((video) => video.id === videoId) || null;
|
|
4822
|
+
}
|
|
4823
|
+
|
|
4824
|
+
function getVideosPage(limit, offset) {
|
|
4825
|
+
const safeLimit = Math.max(1, Math.min(100, parseInt(limit) || 50));
|
|
4826
|
+
const safeOffset = Math.max(0, parseInt(offset) || 0);
|
|
4827
|
+
|
|
4828
|
+
if (db) {
|
|
4829
|
+
const rows = db
|
|
4830
|
+
.prepare(
|
|
4831
|
+
`
|
|
4832
|
+
SELECT *
|
|
4833
|
+
FROM videos
|
|
4834
|
+
ORDER BY registered_at DESC, id DESC
|
|
4835
|
+
LIMIT ? OFFSET ?
|
|
4836
|
+
`,
|
|
4837
|
+
)
|
|
4838
|
+
.all(safeLimit, safeOffset);
|
|
4839
|
+
const total = db.prepare("SELECT COUNT(*) as c FROM videos").get().c;
|
|
4840
|
+
return {
|
|
4841
|
+
total,
|
|
4842
|
+
limit: safeLimit,
|
|
4843
|
+
offset: safeOffset,
|
|
4844
|
+
videos: rows.map(mapVideoRow),
|
|
4845
|
+
};
|
|
4846
|
+
}
|
|
4847
|
+
|
|
4848
|
+
return {
|
|
4849
|
+
total: videos.length,
|
|
4850
|
+
limit: safeLimit,
|
|
4851
|
+
offset: safeOffset,
|
|
4852
|
+
videos: videos.slice(safeOffset, safeOffset + safeLimit),
|
|
4853
|
+
};
|
|
4854
|
+
}
|
|
4855
|
+
|
|
4856
|
+
function getVideoCount() {
|
|
4857
|
+
if (db) {
|
|
4858
|
+
return db.prepare("SELECT COUNT(*) as c FROM videos").get().c;
|
|
4859
|
+
}
|
|
4860
|
+
return videos.length;
|
|
4861
|
+
}
|
|
4862
|
+
|
|
4863
|
+
function getPendingCommentTasks(limit) {
|
|
4864
|
+
if (db) {
|
|
4865
|
+
const l = Math.max(1, parseInt(limit) || 1);
|
|
4866
|
+
const rows = db
|
|
4867
|
+
.prepare(
|
|
4868
|
+
`
|
|
4869
|
+
SELECT *
|
|
4870
|
+
FROM videos
|
|
4871
|
+
WHERE user_update_count IS NULL OR user_update_count <= 0
|
|
4872
|
+
ORDER BY tt_seller DESC, registered_at ASC
|
|
4873
|
+
LIMIT ?
|
|
4874
|
+
`,
|
|
4875
|
+
)
|
|
4876
|
+
.all(l);
|
|
4877
|
+
if (rows.length === 0) return [];
|
|
4878
|
+
const bumpStmt = db.prepare(
|
|
4879
|
+
`
|
|
4880
|
+
UPDATE videos
|
|
4881
|
+
SET user_update_count = COALESCE(user_update_count, 0) + 1
|
|
4882
|
+
WHERE id = ?
|
|
4883
|
+
`,
|
|
4884
|
+
);
|
|
4885
|
+
const bumpTxn = db.transaction((items) => {
|
|
4886
|
+
for (const item of items) bumpStmt.run(item.id);
|
|
4887
|
+
});
|
|
4888
|
+
bumpTxn(rows);
|
|
4889
|
+
return rows.map((row) => {
|
|
4890
|
+
const mapped = mapVideoRow(row);
|
|
4891
|
+
mapped.userUpdateCount = (mapped.userUpdateCount || 0) + 1;
|
|
4892
|
+
return mapped;
|
|
4893
|
+
});
|
|
4894
|
+
}
|
|
4895
|
+
|
|
4896
|
+
// 筛选待处理视频(userUpdateCount <= 0 或 null/undefined)
|
|
4897
|
+
const pending = videos.filter((v) => (v.userUpdateCount || 0) <= 0);
|
|
4898
|
+
// ttSeller=true 优先
|
|
4899
|
+
pending.sort((a, b) => {
|
|
4900
|
+
if (a.ttSeller && !b.ttSeller) return -1;
|
|
4901
|
+
if (!a.ttSeller && b.ttSeller) return 1;
|
|
4902
|
+
return (a.registeredAt || 0) - (b.registeredAt || 0);
|
|
4903
|
+
});
|
|
4904
|
+
// 取前 limit 个
|
|
4905
|
+
const tasks = pending.slice(0, limit);
|
|
4906
|
+
// userUpdateCount +1
|
|
4907
|
+
for (const task of tasks) {
|
|
4908
|
+
task.userUpdateCount = (task.userUpdateCount || 0) + 1;
|
|
4909
|
+
}
|
|
4910
|
+
saveVideos();
|
|
4911
|
+
return tasks;
|
|
4912
|
+
}
|
|
4913
|
+
|
|
4914
|
+
function commitCommentTask(videoId) {
|
|
4915
|
+
if (db) {
|
|
4916
|
+
const video = getVideoRow(videoId);
|
|
4917
|
+
if (!video) return { ok: false, error: "video not found" };
|
|
4918
|
+
const nextCount = (video.user_update_count || 0) + 1;
|
|
4919
|
+
db.prepare(
|
|
4920
|
+
`
|
|
4921
|
+
UPDATE videos
|
|
4922
|
+
SET user_update_count = ?
|
|
4923
|
+
WHERE id = ?
|
|
4924
|
+
`,
|
|
4925
|
+
).run(nextCount, videoId);
|
|
4926
|
+
return { ok: true, userUpdateCount: nextCount };
|
|
4927
|
+
}
|
|
4928
|
+
|
|
4929
|
+
const video = videos.find((v) => v.id === videoId);
|
|
4930
|
+
if (!video) return { ok: false, error: "video not found" };
|
|
4931
|
+
video.userUpdateCount = (video.userUpdateCount || 0) + 1;
|
|
4932
|
+
saveVideos();
|
|
4933
|
+
return { ok: true, userUpdateCount: video.userUpdateCount };
|
|
4934
|
+
}
|
|
4935
|
+
|
|
4936
|
+
return {
|
|
4937
|
+
save,
|
|
4938
|
+
flushSave,
|
|
4939
|
+
getUser,
|
|
4940
|
+
hasUser,
|
|
4941
|
+
userExists,
|
|
4942
|
+
addUser,
|
|
4943
|
+
addRawUsers,
|
|
4944
|
+
getPendingUsers,
|
|
4945
|
+
getProcessedUsers,
|
|
4946
|
+
getAllUsers,
|
|
4947
|
+
getUserDbCount,
|
|
4948
|
+
getJobsCount,
|
|
4949
|
+
getRawJobsCount,
|
|
4950
|
+
getPendingJobsCount,
|
|
4951
|
+
getPendingJobsUserUpdateCount,
|
|
4952
|
+
getDashboardStats: getDashboardStatsFromDb,
|
|
4953
|
+
getPendingByCountry: getPendingByCountryFromDb,
|
|
4954
|
+
getUserUpdateByCountry: getUserUpdateByCountryFromDb,
|
|
4955
|
+
getAttachStuckByCountry: getAttachStuckByCountryFromDb,
|
|
4956
|
+
getRawByCountry: getRawByCountryFromDb,
|
|
4957
|
+
moveJobsToRawByCountry,
|
|
4958
|
+
restoreAttachStuckByCountry,
|
|
4959
|
+
resetPendingByCountry,
|
|
4960
|
+
restoreRawJobsByCountry,
|
|
4961
|
+
restoreRawJobById,
|
|
4962
|
+
restoreRawJobsByFilter,
|
|
4963
|
+
getUsersPage: getUsersPageFromDb,
|
|
4964
|
+
getRawJobsPage: getRawJobsPageFromDb,
|
|
4965
|
+
getTargetUsers: getTargetUsersFromDb,
|
|
4966
|
+
getTargetUsersByCountry: getTargetUsersByCountryFromDb,
|
|
4967
|
+
getStats,
|
|
4968
|
+
getStatusGroups,
|
|
4969
|
+
markGroupsDirty,
|
|
4970
|
+
refillJobsFromRaw,
|
|
4971
|
+
scoreJobLocation,
|
|
4972
|
+
scoreJobsBatch,
|
|
4973
|
+
claimNextJob,
|
|
4974
|
+
commitJob,
|
|
4975
|
+
commitNewExplore,
|
|
4976
|
+
resetJob,
|
|
4977
|
+
togglePin,
|
|
4978
|
+
getNextRedoJob,
|
|
4979
|
+
commitRedoJob,
|
|
4980
|
+
getPendingUserUpdateTasks,
|
|
4981
|
+
updateUserInfo,
|
|
4982
|
+
updateUserLocation,
|
|
4983
|
+
batchUpdateUserInfo,
|
|
4984
|
+
reportClientError,
|
|
4985
|
+
deleteClientError,
|
|
4986
|
+
getClientErrors,
|
|
4987
|
+
getClientLoginStatus,
|
|
4988
|
+
trackClient,
|
|
4989
|
+
getActiveClients,
|
|
4990
|
+
registerVideos,
|
|
4991
|
+
getVideo,
|
|
4992
|
+
getVideos,
|
|
4993
|
+
getVideosPage,
|
|
4994
|
+
getVideoCount,
|
|
4995
|
+
getPendingCommentTasks,
|
|
4996
|
+
commitCommentTask,
|
|
4997
|
+
debugClaimNextJob,
|
|
4998
|
+
stopBackup,
|
|
4999
|
+
backupDatabase, // 手动备份数据库
|
|
5000
|
+
rawQuery,
|
|
5001
|
+
getLlmSampleOffsets, // 获取 LLM 采样偏移量状态
|
|
5002
|
+
// Tag 发现与打分
|
|
5003
|
+
insertTag,
|
|
5004
|
+
getTagsByStatus,
|
|
5005
|
+
getTagsByCountry,
|
|
5006
|
+
getDeadTags,
|
|
5007
|
+
claimTag,
|
|
5008
|
+
reportTagScore,
|
|
5009
|
+
getAllTags,
|
|
5010
|
+
normalizeTags,
|
|
5011
|
+
clearTags,
|
|
5012
|
+
data,
|
|
5013
|
+
};
|
|
5014
|
+
|
|
5015
|
+
// 辅助函数:获取 LLM 采样偏移量
|
|
5016
|
+
function getLlmSampleOffsets() {
|
|
5017
|
+
return Object.fromEntries(llmSampleOffsets);
|
|
5018
|
+
}
|
|
5019
|
+
}
|