tt-help-cli-ycl 1.3.92 → 1.3.94
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli/comments.js +49 -24
- package/src/cli/tag.js +239 -94
- package/src/lib/args.js +23 -0
- package/src/lib/browser/cdp.js +4 -1
- package/src/lib/constants.js +15 -0
- package/src/lib/tag-fetcher.js +69 -63
- package/src/watch/data-store.js +537 -2298
- package/src/watch/data-store.js.bak +5091 -0
- package/src/watch/data-store.js.bak2 +5019 -0
- package/src/watch/db-columns.js +160 -0
- package/src/watch/db-crud.js +458 -0
- package/src/watch/db-mappers.js +128 -0
- package/src/watch/db-raw-jobs.js +235 -0
- package/src/watch/db-schema.js +367 -0
- package/src/watch/db-stats.js +235 -0
- package/src/watch/db-tags.js +348 -0
- package/src/watch/llm-scoring.js +235 -0
- package/src/watch/public/app.js +47 -0
- package/src/watch/public/index.html +6 -0
- package/src/watch/server.js +24 -0
- package/src/watch/tag-service.js +142 -11
package/src/watch/data-store.js
CHANGED
|
@@ -1,3 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 数据存储主模块 — createStore() 编排器
|
|
3
|
+
*
|
|
4
|
+
* 本文件是数据存储的入口点,负责编排各子模块:
|
|
5
|
+
* - db-schema.js: 建表、迁移、全局连接管理
|
|
6
|
+
* - db-columns.js: 共享列名常量和 SQL 生成
|
|
7
|
+
* - db-crud.js: 基础 CRUD(增删改查、行映射)
|
|
8
|
+
* - db-stats.js: 仪表盘统计、按国家分组
|
|
9
|
+
* - db-raw-jobs.js: raw_jobs 移入/恢复
|
|
10
|
+
* - db-tags.js: Tag 发现与打分
|
|
11
|
+
* - llm-scoring.js: LLM 国家匹配度打分
|
|
12
|
+
*
|
|
13
|
+
* createStore() 保留为运行时编排器,管理:
|
|
14
|
+
* - 任务认领/提交(claimNextJob/commitJob)
|
|
15
|
+
* - 客户端追踪、视频管理、备份
|
|
16
|
+
* - 内存索引、stats 缓存
|
|
17
|
+
*/
|
|
18
|
+
|
|
1
19
|
import fs from "fs";
|
|
2
20
|
import path from "path";
|
|
3
21
|
import Database from "better-sqlite3";
|
|
@@ -6,421 +24,90 @@ import {
|
|
|
6
24
|
DEFAULT_TARGET_LOCATIONS,
|
|
7
25
|
} from "../lib/target-locations.js";
|
|
8
26
|
|
|
9
|
-
//
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
created_at TEXT,
|
|
88
|
-
updated_at TEXT
|
|
89
|
-
)
|
|
90
|
-
`);
|
|
91
|
-
db.exec(`
|
|
92
|
-
CREATE TABLE IF NOT EXISTS jobs (
|
|
93
|
-
unique_id TEXT PRIMARY KEY,
|
|
94
|
-
nickname TEXT,
|
|
95
|
-
status TEXT DEFAULT 'pending',
|
|
96
|
-
sources TEXT,
|
|
97
|
-
claimed_by TEXT,
|
|
98
|
-
claimed_at INTEGER,
|
|
99
|
-
error TEXT,
|
|
100
|
-
pinned INTEGER DEFAULT 0,
|
|
101
|
-
no_video INTEGER DEFAULT 0,
|
|
102
|
-
restricted INTEGER DEFAULT 0,
|
|
103
|
-
user_update_count INTEGER DEFAULT 0,
|
|
104
|
-
tt_seller INTEGER,
|
|
105
|
-
verified INTEGER,
|
|
106
|
-
video_count INTEGER DEFAULT 0,
|
|
107
|
-
comment_count INTEGER DEFAULT 0,
|
|
108
|
-
guessed_location TEXT,
|
|
109
|
-
location_created TEXT,
|
|
110
|
-
confirmed_location TEXT,
|
|
111
|
-
modified_at INTEGER,
|
|
112
|
-
follower_count INTEGER DEFAULT 0,
|
|
113
|
-
following_count INTEGER DEFAULT 0,
|
|
114
|
-
heart_count INTEGER DEFAULT 0,
|
|
115
|
-
refresh_time INTEGER,
|
|
116
|
-
processed INTEGER DEFAULT 0,
|
|
117
|
-
processed_at INTEGER,
|
|
118
|
-
created_at INTEGER,
|
|
119
|
-
updated_at INTEGER,
|
|
120
|
-
region TEXT,
|
|
121
|
-
signature TEXT,
|
|
122
|
-
sec_uid TEXT,
|
|
123
|
-
status_code INTEGER
|
|
124
|
-
)
|
|
125
|
-
`);
|
|
126
|
-
|
|
127
|
-
// 迁移:为已存在的 jobs 表添加 status_code 列
|
|
128
|
-
const existingJobColumns = new Set(
|
|
129
|
-
db
|
|
130
|
-
.prepare("PRAGMA table_info(jobs)")
|
|
131
|
-
.all()
|
|
132
|
-
.map((c) => c.name),
|
|
133
|
-
);
|
|
134
|
-
if (!existingJobColumns.has("status_code")) {
|
|
135
|
-
db.exec(`ALTER TABLE jobs ADD COLUMN status_code INTEGER`);
|
|
136
|
-
}
|
|
137
|
-
if (!existingJobColumns.has("latest_video_time")) {
|
|
138
|
-
db.exec(`ALTER TABLE jobs ADD COLUMN latest_video_time INTEGER`);
|
|
139
|
-
}
|
|
140
|
-
if (!existingJobColumns.has("confirmed_location")) {
|
|
141
|
-
db.exec(`ALTER TABLE jobs ADD COLUMN confirmed_location TEXT`);
|
|
142
|
-
}
|
|
143
|
-
if (!existingJobColumns.has("modified_at")) {
|
|
144
|
-
db.exec(`ALTER TABLE jobs ADD COLUMN modified_at INTEGER`);
|
|
145
|
-
}
|
|
146
|
-
if (!existingJobColumns.has("bio_link")) {
|
|
147
|
-
db.exec(`ALTER TABLE jobs ADD COLUMN bio_link TEXT`);
|
|
148
|
-
}
|
|
149
|
-
if (!existingJobColumns.has("top_video_play_count")) {
|
|
150
|
-
db.exec(`ALTER TABLE jobs ADD COLUMN top_video_play_count INTEGER`);
|
|
151
|
-
}
|
|
152
|
-
if (!existingJobColumns.has("top_video_href")) {
|
|
153
|
-
db.exec(`ALTER TABLE jobs ADD COLUMN top_video_href TEXT`);
|
|
154
|
-
}
|
|
155
|
-
if (!existingJobColumns.has("user_create_time")) {
|
|
156
|
-
db.exec(`ALTER TABLE jobs ADD COLUMN user_create_time INTEGER`);
|
|
157
|
-
}
|
|
158
|
-
db.exec(`
|
|
159
|
-
CREATE TABLE IF NOT EXISTS jobs_base (
|
|
160
|
-
unique_id TEXT PRIMARY KEY,
|
|
161
|
-
nickname TEXT,
|
|
162
|
-
status TEXT DEFAULT 'pending',
|
|
163
|
-
sources TEXT,
|
|
164
|
-
claimed_by TEXT,
|
|
165
|
-
claimed_at INTEGER,
|
|
166
|
-
error TEXT,
|
|
167
|
-
pinned INTEGER DEFAULT 0,
|
|
168
|
-
no_video INTEGER DEFAULT 0,
|
|
169
|
-
restricted INTEGER DEFAULT 0,
|
|
170
|
-
user_update_count INTEGER DEFAULT 0,
|
|
171
|
-
tt_seller INTEGER,
|
|
172
|
-
verified INTEGER,
|
|
173
|
-
video_count INTEGER DEFAULT 0,
|
|
174
|
-
comment_count INTEGER DEFAULT 0,
|
|
175
|
-
guessed_location TEXT,
|
|
176
|
-
location_created TEXT,
|
|
177
|
-
confirmed_location TEXT,
|
|
178
|
-
modified_at INTEGER,
|
|
179
|
-
follower_count INTEGER DEFAULT 0,
|
|
180
|
-
following_count INTEGER DEFAULT 0,
|
|
181
|
-
heart_count INTEGER DEFAULT 0,
|
|
182
|
-
refresh_time INTEGER,
|
|
183
|
-
processed INTEGER DEFAULT 0,
|
|
184
|
-
processed_at INTEGER,
|
|
185
|
-
created_at INTEGER,
|
|
186
|
-
updated_at INTEGER,
|
|
187
|
-
region TEXT,
|
|
188
|
-
signature TEXT,
|
|
189
|
-
sec_uid TEXT,
|
|
190
|
-
status_code INTEGER,
|
|
191
|
-
latest_video_time INTEGER,
|
|
192
|
-
bio_link TEXT
|
|
193
|
-
)
|
|
194
|
-
`);
|
|
195
|
-
|
|
196
|
-
// 迁移:为已存在的 jobs_base 表补全列
|
|
197
|
-
const existingJobBaseColumns = new Set(
|
|
198
|
-
db
|
|
199
|
-
.prepare("PRAGMA table_info(jobs_base)")
|
|
200
|
-
.all()
|
|
201
|
-
.map((c) => c.name),
|
|
202
|
-
);
|
|
203
|
-
if (!existingJobBaseColumns.has("status_code")) {
|
|
204
|
-
db.exec(`ALTER TABLE jobs_base ADD COLUMN status_code INTEGER`);
|
|
205
|
-
}
|
|
206
|
-
if (!existingJobBaseColumns.has("latest_video_time")) {
|
|
207
|
-
db.exec(`ALTER TABLE jobs_base ADD COLUMN latest_video_time INTEGER`);
|
|
208
|
-
}
|
|
209
|
-
if (!existingJobBaseColumns.has("confirmed_location")) {
|
|
210
|
-
db.exec(`ALTER TABLE jobs_base ADD COLUMN confirmed_location TEXT`);
|
|
211
|
-
}
|
|
212
|
-
if (!existingJobBaseColumns.has("modified_at")) {
|
|
213
|
-
db.exec(`ALTER TABLE jobs_base ADD COLUMN modified_at INTEGER`);
|
|
214
|
-
}
|
|
215
|
-
if (!existingJobBaseColumns.has("bio_link")) {
|
|
216
|
-
db.exec(`ALTER TABLE jobs_base ADD COLUMN bio_link TEXT`);
|
|
217
|
-
}
|
|
218
|
-
if (!existingJobBaseColumns.has("user_create_time")) {
|
|
219
|
-
db.exec(`ALTER TABLE jobs_base ADD COLUMN user_create_time INTEGER`);
|
|
220
|
-
}
|
|
221
|
-
db.exec(`
|
|
222
|
-
CREATE TABLE IF NOT EXISTS raw_jobs (
|
|
223
|
-
unique_id TEXT PRIMARY KEY,
|
|
224
|
-
nickname TEXT,
|
|
225
|
-
status TEXT DEFAULT 'pending',
|
|
226
|
-
sources TEXT,
|
|
227
|
-
claimed_by TEXT,
|
|
228
|
-
claimed_at INTEGER,
|
|
229
|
-
error TEXT,
|
|
230
|
-
pinned INTEGER DEFAULT 0,
|
|
231
|
-
no_video INTEGER DEFAULT 0,
|
|
232
|
-
restricted INTEGER DEFAULT 0,
|
|
233
|
-
user_update_count INTEGER DEFAULT 0,
|
|
234
|
-
tt_seller INTEGER,
|
|
235
|
-
verified INTEGER,
|
|
236
|
-
video_count INTEGER DEFAULT 0,
|
|
237
|
-
comment_count INTEGER DEFAULT 0,
|
|
238
|
-
guessed_location TEXT,
|
|
239
|
-
location_created TEXT,
|
|
240
|
-
confirmed_location TEXT,
|
|
241
|
-
modified_at INTEGER,
|
|
242
|
-
follower_count INTEGER DEFAULT 0,
|
|
243
|
-
following_count INTEGER DEFAULT 0,
|
|
244
|
-
heart_count INTEGER DEFAULT 0,
|
|
245
|
-
refresh_time INTEGER,
|
|
246
|
-
processed INTEGER DEFAULT 0,
|
|
247
|
-
processed_at INTEGER,
|
|
248
|
-
created_at INTEGER,
|
|
249
|
-
updated_at INTEGER,
|
|
250
|
-
region TEXT,
|
|
251
|
-
signature TEXT,
|
|
252
|
-
sec_uid TEXT,
|
|
253
|
-
status_code INTEGER,
|
|
254
|
-
latest_video_time INTEGER
|
|
255
|
-
)
|
|
256
|
-
`);
|
|
257
|
-
|
|
258
|
-
// 迁移:为已存在的 raw_jobs 表添加 status_code 列
|
|
259
|
-
const existingRawJobColumns = new Set(
|
|
260
|
-
db
|
|
261
|
-
.prepare("PRAGMA table_info(raw_jobs)")
|
|
262
|
-
.all()
|
|
263
|
-
.map((c) => c.name),
|
|
264
|
-
);
|
|
265
|
-
if (!existingRawJobColumns.has("status_code")) {
|
|
266
|
-
db.exec(`ALTER TABLE raw_jobs ADD COLUMN status_code INTEGER`);
|
|
267
|
-
}
|
|
268
|
-
if (!existingRawJobColumns.has("latest_video_time")) {
|
|
269
|
-
db.exec(`ALTER TABLE raw_jobs ADD COLUMN latest_video_time INTEGER`);
|
|
270
|
-
}
|
|
271
|
-
if (!existingRawJobColumns.has("confirmed_location")) {
|
|
272
|
-
db.exec(`ALTER TABLE raw_jobs ADD COLUMN confirmed_location TEXT`);
|
|
273
|
-
}
|
|
274
|
-
if (!existingRawJobColumns.has("modified_at")) {
|
|
275
|
-
db.exec(`ALTER TABLE raw_jobs ADD COLUMN modified_at INTEGER`);
|
|
276
|
-
}
|
|
277
|
-
if (!existingRawJobColumns.has("bio_link")) {
|
|
278
|
-
db.exec(`ALTER TABLE raw_jobs ADD COLUMN bio_link TEXT`);
|
|
279
|
-
}
|
|
280
|
-
if (!existingRawJobColumns.has("user_create_time")) {
|
|
281
|
-
db.exec(`ALTER TABLE raw_jobs ADD COLUMN user_create_time INTEGER`);
|
|
282
|
-
}
|
|
283
|
-
db.exec(`
|
|
284
|
-
CREATE TABLE IF NOT EXISTS videos (
|
|
285
|
-
id TEXT PRIMARY KEY,
|
|
286
|
-
href TEXT,
|
|
287
|
-
author_unique_id TEXT,
|
|
288
|
-
location_created TEXT,
|
|
289
|
-
tt_seller INTEGER DEFAULT 0,
|
|
290
|
-
registered_at INTEGER,
|
|
291
|
-
user_update_count INTEGER DEFAULT 0,
|
|
292
|
-
play_count INTEGER,
|
|
293
|
-
digg_count INTEGER,
|
|
294
|
-
comment_count INTEGER,
|
|
295
|
-
share_count INTEGER,
|
|
296
|
-
collect_count INTEGER,
|
|
297
|
-
stats_updated_at INTEGER,
|
|
298
|
-
create_time INTEGER
|
|
299
|
-
)
|
|
300
|
-
`);
|
|
301
|
-
db.exec(`
|
|
302
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_status_video
|
|
303
|
-
ON jobs(status, video_count DESC)
|
|
304
|
-
`);
|
|
305
|
-
db.exec(`
|
|
306
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_claimed_by_status
|
|
307
|
-
ON jobs(claimed_by, status, claimed_at)
|
|
308
|
-
`);
|
|
309
|
-
db.exec(`
|
|
310
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_status_claimed_at
|
|
311
|
-
ON jobs(status, claimed_at)
|
|
312
|
-
`);
|
|
313
|
-
db.exec(`
|
|
314
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_redo_target
|
|
315
|
-
ON jobs(tt_seller, verified, location_created, refresh_time)
|
|
316
|
-
`);
|
|
317
|
-
db.exec(`
|
|
318
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_pending_priority
|
|
319
|
-
ON jobs(status, pinned DESC, guessed_location, follower_count DESC)
|
|
320
|
-
`);
|
|
321
|
-
db.exec(`
|
|
322
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_pinned
|
|
323
|
-
ON jobs(created_at ASC, unique_id ASC)
|
|
324
|
-
WHERE status = 'pending' AND COALESCE(pinned, 0) = 1
|
|
325
|
-
`);
|
|
326
|
-
db.exec(`
|
|
327
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_seller
|
|
328
|
-
ON jobs(UPPER(COALESCE(guessed_location, '')), follower_count DESC, created_at ASC, unique_id ASC)
|
|
329
|
-
WHERE status = 'pending'
|
|
330
|
-
AND COALESCE(pinned, 0) = 0
|
|
331
|
-
AND tt_seller = 1
|
|
332
|
-
AND verified = 0
|
|
333
|
-
`);
|
|
334
|
-
db.exec(`
|
|
335
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_follow
|
|
336
|
-
ON jobs(UPPER(COALESCE(guessed_location, '')), follower_count DESC, created_at ASC, unique_id ASC)
|
|
337
|
-
WHERE status = 'pending'
|
|
338
|
-
AND COALESCE(pinned, 0) = 0
|
|
339
|
-
AND (
|
|
340
|
-
instr(COALESCE(sources, ''), '"following"') > 0
|
|
341
|
-
OR instr(COALESCE(sources, ''), '"follower"') > 0
|
|
342
|
-
)
|
|
343
|
-
`);
|
|
344
|
-
db.exec(`
|
|
345
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_claim_pending_other
|
|
346
|
-
ON jobs(UPPER(COALESCE(guessed_location, '')), follower_count DESC, created_at ASC, unique_id ASC)
|
|
347
|
-
WHERE status = 'pending' AND COALESCE(pinned, 0) = 0
|
|
348
|
-
`);
|
|
349
|
-
db.exec(`
|
|
350
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_user_update_queue
|
|
351
|
-
ON jobs(created_at ASC, unique_id ASC)
|
|
352
|
-
WHERE (tt_seller IS NULL OR tt_seller = '')
|
|
353
|
-
AND (user_update_count IS NULL OR user_update_count <= 0)
|
|
354
|
-
`);
|
|
355
|
-
db.exec(`
|
|
356
|
-
CREATE INDEX IF NOT EXISTS idx_jobs_user_update_queue_expr
|
|
357
|
-
ON jobs(created_at ASC, unique_id ASC)
|
|
358
|
-
WHERE COALESCE(tt_seller, '') = ''
|
|
359
|
-
AND COALESCE(user_update_count, 0) <= 0
|
|
360
|
-
`);
|
|
361
|
-
db.exec(`
|
|
362
|
-
CREATE INDEX IF NOT EXISTS idx_videos_comment_queue
|
|
363
|
-
ON videos(user_update_count, tt_seller DESC, registered_at ASC)
|
|
364
|
-
`);
|
|
365
|
-
db.exec(`
|
|
366
|
-
CREATE INDEX IF NOT EXISTS idx_videos_comment_queue_pending
|
|
367
|
-
ON videos(tt_seller DESC, registered_at ASC, id)
|
|
368
|
-
WHERE user_update_count IS NULL OR user_update_count <= 0
|
|
369
|
-
`);
|
|
370
|
-
|
|
371
|
-
const existingVideoColumns = new Set(
|
|
372
|
-
db
|
|
373
|
-
.prepare("PRAGMA table_info(videos)")
|
|
374
|
-
.all()
|
|
375
|
-
.map((column) => column.name),
|
|
376
|
-
);
|
|
377
|
-
const requiredVideoColumns = {
|
|
378
|
-
play_count: "INTEGER",
|
|
379
|
-
digg_count: "INTEGER",
|
|
380
|
-
comment_count: "INTEGER",
|
|
381
|
-
share_count: "INTEGER",
|
|
382
|
-
collect_count: "INTEGER",
|
|
383
|
-
stats_updated_at: "INTEGER",
|
|
384
|
-
};
|
|
385
|
-
for (const [column, type] of Object.entries(requiredVideoColumns)) {
|
|
386
|
-
if (!existingVideoColumns.has(column)) {
|
|
387
|
-
db.exec(`ALTER TABLE videos ADD COLUMN ${column} ${type}`);
|
|
388
|
-
}
|
|
389
|
-
}
|
|
390
|
-
|
|
391
|
-
// 迁移:videos 表添加 create_time 列
|
|
392
|
-
if (!existingVideoColumns.has("create_time")) {
|
|
393
|
-
db.exec(`ALTER TABLE videos ADD COLUMN create_time INTEGER`);
|
|
394
|
-
}
|
|
27
|
+
// Schema 与连接管理
|
|
28
|
+
import {
|
|
29
|
+
getDb,
|
|
30
|
+
getDbPath,
|
|
31
|
+
initDb,
|
|
32
|
+
resetDbConnection,
|
|
33
|
+
loadLegacyUsersFromFiles,
|
|
34
|
+
loadLegacyVideosFromFile,
|
|
35
|
+
} from "./db-schema.js";
|
|
36
|
+
|
|
37
|
+
// CRUD 操作
|
|
38
|
+
import {
|
|
39
|
+
snakeToCamel,
|
|
40
|
+
camelToSnake,
|
|
41
|
+
normalizeJobValue,
|
|
42
|
+
mapJobRow,
|
|
43
|
+
mapVideoRow,
|
|
44
|
+
inferStatus,
|
|
45
|
+
hasUserInDb,
|
|
46
|
+
addUserToDb,
|
|
47
|
+
addJobToDb,
|
|
48
|
+
addJobBaseToDb,
|
|
49
|
+
addJob,
|
|
50
|
+
getJobRow,
|
|
51
|
+
getJobBaseRow,
|
|
52
|
+
getJob,
|
|
53
|
+
getAllJobs,
|
|
54
|
+
getVideoRow,
|
|
55
|
+
getAllVideoRows,
|
|
56
|
+
updateJobInfo,
|
|
57
|
+
updateJobBaseInfo,
|
|
58
|
+
getUserDbCount,
|
|
59
|
+
getJobsCount,
|
|
60
|
+
getPendingJobsCount,
|
|
61
|
+
getPendingJobsUserUpdateCount,
|
|
62
|
+
getRawJobsCount,
|
|
63
|
+
} from "./db-crud.js";
|
|
64
|
+
|
|
65
|
+
// 统计查询
|
|
66
|
+
import {
|
|
67
|
+
getDashboardStatsFromDb,
|
|
68
|
+
getPendingByCountryFromDb,
|
|
69
|
+
getUserUpdateByCountryFromDb,
|
|
70
|
+
getAttachStuckByCountryFromDb,
|
|
71
|
+
getRawByCountryFromDb,
|
|
72
|
+
restoreAttachStuckByCountry,
|
|
73
|
+
resetPendingByCountry,
|
|
74
|
+
} from "./db-stats.js";
|
|
75
|
+
|
|
76
|
+
// Raw Jobs 管理
|
|
77
|
+
import {
|
|
78
|
+
moveJobsToRawByCountry,
|
|
79
|
+
restoreRawJobsByCountry,
|
|
80
|
+
restoreRawJobById,
|
|
81
|
+
restoreRawJobsByFilter,
|
|
82
|
+
getRawJobsPageFromDb,
|
|
83
|
+
} from "./db-raw-jobs.js";
|
|
84
|
+
|
|
85
|
+
// Tag CRUD
|
|
86
|
+
import {
|
|
87
|
+
insertTag,
|
|
88
|
+
getTagsByStatus,
|
|
89
|
+
getTagsByCountry,
|
|
90
|
+
getDeadTags,
|
|
91
|
+
claimTag,
|
|
92
|
+
reportTagScore,
|
|
93
|
+
getAllTags,
|
|
94
|
+
rawQuery,
|
|
95
|
+
normalizeTags,
|
|
96
|
+
clearTags,
|
|
97
|
+
} from "./db-tags.js";
|
|
98
|
+
|
|
99
|
+
// LLM 打分
|
|
100
|
+
import {
|
|
101
|
+
scoreJobLocation,
|
|
102
|
+
scoreJobsBatch,
|
|
103
|
+
createLlmOffsetStore,
|
|
104
|
+
} from "./llm-scoring.js";
|
|
395
105
|
|
|
396
|
-
|
|
397
|
-
db.exec(`
|
|
398
|
-
CREATE TABLE IF NOT EXISTS tags (
|
|
399
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
400
|
-
tag TEXT NOT NULL UNIQUE,
|
|
401
|
-
status TEXT NOT NULL DEFAULT 'new',
|
|
402
|
-
score REAL NOT NULL DEFAULT 0,
|
|
403
|
-
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
404
|
-
scored_at TEXT,
|
|
405
|
-
score_count INTEGER NOT NULL DEFAULT 0,
|
|
406
|
-
countries TEXT NOT NULL DEFAULT '[]',
|
|
407
|
-
matched_countries TEXT DEFAULT '[]',
|
|
408
|
-
total_posts INTEGER DEFAULT 0,
|
|
409
|
-
author_count INTEGER DEFAULT 0,
|
|
410
|
-
matched_authors INTEGER DEFAULT 0,
|
|
411
|
-
pushed_users INTEGER DEFAULT 0,
|
|
412
|
-
source TEXT NOT NULL DEFAULT 'llm',
|
|
413
|
-
user_prompt TEXT,
|
|
414
|
-
last_error TEXT
|
|
415
|
-
)
|
|
416
|
-
`);
|
|
417
|
-
db.exec(`CREATE INDEX IF NOT EXISTS idx_tags_status ON tags(status)`);
|
|
418
|
-
db.exec(`CREATE INDEX IF NOT EXISTS idx_tags_score ON tags(score DESC)`);
|
|
419
|
-
|
|
420
|
-
const count = db.prepare("SELECT COUNT(*) as c FROM users").get().c;
|
|
421
|
-
console.log(`[data-store] SQLite users 表初始化完成: ${count} 条`);
|
|
422
|
-
}
|
|
106
|
+
// ===== 薄包装函数(保持外部 API 不变)=====
|
|
423
107
|
|
|
108
|
+
/**
|
|
109
|
+
* 导入历史 JSON 数据到 SQLite
|
|
110
|
+
*/
|
|
424
111
|
export function importLegacyJsonToDb({
|
|
425
112
|
dbFilePath,
|
|
426
113
|
usersFilePath,
|
|
@@ -428,33 +115,30 @@ export function importLegacyJsonToDb({
|
|
|
428
115
|
videosFilePath,
|
|
429
116
|
}) {
|
|
430
117
|
resetDbConnection();
|
|
431
|
-
|
|
118
|
+
initDb(dbFilePath);
|
|
119
|
+
|
|
120
|
+
const db = getDb();
|
|
121
|
+
const dbPath = getDbPath();
|
|
432
122
|
|
|
433
123
|
const legacyUsers = loadLegacyUsersFromFiles(usersFilePath, doneFilePath);
|
|
434
124
|
const legacyVideos = loadLegacyVideosFromFile(videosFilePath);
|
|
435
125
|
|
|
436
|
-
const beforeUsers =
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
const
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
const insertVideoStmt = db.prepare(`
|
|
444
|
-
INSERT OR IGNORE INTO videos (
|
|
445
|
-
id,
|
|
446
|
-
href,
|
|
447
|
-
author_unique_id,
|
|
448
|
-
location_created,
|
|
449
|
-
tt_seller,
|
|
450
|
-
registered_at,
|
|
451
|
-
user_update_count,
|
|
452
|
-
create_time
|
|
453
|
-
)
|
|
454
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
455
|
-
`);
|
|
126
|
+
const beforeUsers = getDb()
|
|
127
|
+
.prepare("SELECT COUNT(*) as c FROM users")
|
|
128
|
+
.get().c;
|
|
129
|
+
const beforeJobs = getDb().prepare("SELECT COUNT(*) as c FROM jobs").get().c;
|
|
130
|
+
const beforeVideos = getDb()
|
|
131
|
+
.prepare("SELECT COUNT(*) as c FROM videos")
|
|
132
|
+
.get().c;
|
|
456
133
|
|
|
457
|
-
const
|
|
134
|
+
const insertUserStmt = getDb().prepare(
|
|
135
|
+
`INSERT OR IGNORE INTO users (unique_id) VALUES (?)`,
|
|
136
|
+
);
|
|
137
|
+
const insertVideoStmt = getDb().prepare(
|
|
138
|
+
`INSERT OR IGNORE INTO videos (id, href, author_unique_id, location_created, tt_seller, registered_at, user_update_count, create_time) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
139
|
+
);
|
|
140
|
+
|
|
141
|
+
const importUsersTxn = getDb().transaction((items) => {
|
|
458
142
|
for (const item of items) {
|
|
459
143
|
const uniqueId = item.uniqueId || item.unique_id;
|
|
460
144
|
if (!uniqueId) continue;
|
|
@@ -463,7 +147,7 @@ export function importLegacyJsonToDb({
|
|
|
463
147
|
}
|
|
464
148
|
});
|
|
465
149
|
|
|
466
|
-
const importVideosTxn =
|
|
150
|
+
const importVideosTxn = getDb().transaction((items) => {
|
|
467
151
|
for (const item of items) {
|
|
468
152
|
if (!item?.id) continue;
|
|
469
153
|
insertVideoStmt.run(
|
|
@@ -482,12 +166,14 @@ export function importLegacyJsonToDb({
|
|
|
482
166
|
importUsersTxn(legacyUsers);
|
|
483
167
|
importVideosTxn(legacyVideos);
|
|
484
168
|
|
|
485
|
-
const afterUsers =
|
|
486
|
-
const afterJobs =
|
|
487
|
-
const afterVideos =
|
|
169
|
+
const afterUsers = getDb().prepare("SELECT COUNT(*) as c FROM users").get().c;
|
|
170
|
+
const afterJobs = getDb().prepare("SELECT COUNT(*) as c FROM jobs").get().c;
|
|
171
|
+
const afterVideos = getDb()
|
|
172
|
+
.prepare("SELECT COUNT(*) as c FROM videos")
|
|
173
|
+
.get().c;
|
|
488
174
|
|
|
489
175
|
return {
|
|
490
|
-
dbPath,
|
|
176
|
+
dbPath: getDbPath(),
|
|
491
177
|
usersImported: afterUsers - beforeUsers,
|
|
492
178
|
jobsImported: afterJobs - beforeJobs,
|
|
493
179
|
videosImported: afterVideos - beforeVideos,
|
|
@@ -501,1082 +187,6 @@ export function closeStoreDb() {
|
|
|
501
187
|
resetDbConnection();
|
|
502
188
|
}
|
|
503
189
|
|
|
504
|
-
function hasUserInDb(uid) {
|
|
505
|
-
if (!db) return false;
|
|
506
|
-
const row = db.prepare("SELECT 1 FROM users WHERE unique_id = ?").get(uid);
|
|
507
|
-
return !!row;
|
|
508
|
-
}
|
|
509
|
-
|
|
510
|
-
function addUserToDb(user) {
|
|
511
|
-
if (!db) return;
|
|
512
|
-
db.prepare(
|
|
513
|
-
`
|
|
514
|
-
INSERT OR IGNORE INTO users (unique_id, tt_seller, verified, location_created, created_at, updated_at)
|
|
515
|
-
VALUES (?, ?, ?, ?, ?, ?)
|
|
516
|
-
`,
|
|
517
|
-
).run(
|
|
518
|
-
user.uniqueId,
|
|
519
|
-
user.ttSeller === undefined ||
|
|
520
|
-
user.ttSeller === null ||
|
|
521
|
-
user.ttSeller === ""
|
|
522
|
-
? null
|
|
523
|
-
: user.ttSeller
|
|
524
|
-
? 1
|
|
525
|
-
: 0,
|
|
526
|
-
user.verified === undefined ||
|
|
527
|
-
user.verified === null ||
|
|
528
|
-
user.verified === ""
|
|
529
|
-
? null
|
|
530
|
-
: user.verified
|
|
531
|
-
? 1
|
|
532
|
-
: 0,
|
|
533
|
-
user.locationCreated || null,
|
|
534
|
-
new Date().toISOString(),
|
|
535
|
-
new Date().toISOString(),
|
|
536
|
-
);
|
|
537
|
-
}
|
|
538
|
-
|
|
539
|
-
function addJobToDb(user) {
|
|
540
|
-
if (!db) return;
|
|
541
|
-
const now = Date.now();
|
|
542
|
-
db.prepare(
|
|
543
|
-
`
|
|
544
|
-
INSERT OR IGNORE INTO jobs (
|
|
545
|
-
unique_id,
|
|
546
|
-
nickname,
|
|
547
|
-
status,
|
|
548
|
-
sources,
|
|
549
|
-
claimed_by,
|
|
550
|
-
claimed_at,
|
|
551
|
-
error,
|
|
552
|
-
pinned,
|
|
553
|
-
no_video,
|
|
554
|
-
restricted,
|
|
555
|
-
user_update_count,
|
|
556
|
-
tt_seller,
|
|
557
|
-
verified,
|
|
558
|
-
video_count,
|
|
559
|
-
comment_count,
|
|
560
|
-
guessed_location,
|
|
561
|
-
location_created,
|
|
562
|
-
follower_count,
|
|
563
|
-
following_count,
|
|
564
|
-
heart_count,
|
|
565
|
-
refresh_time,
|
|
566
|
-
processed,
|
|
567
|
-
processed_at,
|
|
568
|
-
created_at,
|
|
569
|
-
updated_at,
|
|
570
|
-
region,
|
|
571
|
-
signature,
|
|
572
|
-
bio_link,
|
|
573
|
-
sec_uid
|
|
574
|
-
)
|
|
575
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
576
|
-
`,
|
|
577
|
-
).run(
|
|
578
|
-
user.uniqueId,
|
|
579
|
-
user.nickname || null,
|
|
580
|
-
user.status || inferStatus(user),
|
|
581
|
-
JSON.stringify(
|
|
582
|
-
Array.isArray(user.sources) ? [...new Set(user.sources)] : [],
|
|
583
|
-
),
|
|
584
|
-
user.claimedBy || null,
|
|
585
|
-
user.claimedAt || null,
|
|
586
|
-
user.error || null,
|
|
587
|
-
user.pinned ? 1 : 0,
|
|
588
|
-
user.noVideo ? 1 : 0,
|
|
589
|
-
user.restricted ? 1 : 0,
|
|
590
|
-
user.userUpdateCount || 0,
|
|
591
|
-
user.ttSeller === undefined ||
|
|
592
|
-
user.ttSeller === null ||
|
|
593
|
-
user.ttSeller === ""
|
|
594
|
-
? null
|
|
595
|
-
: user.ttSeller
|
|
596
|
-
? 1
|
|
597
|
-
: 0,
|
|
598
|
-
user.verified === undefined ||
|
|
599
|
-
user.verified === null ||
|
|
600
|
-
user.verified === ""
|
|
601
|
-
? null
|
|
602
|
-
: user.verified
|
|
603
|
-
? 1
|
|
604
|
-
: 0,
|
|
605
|
-
user.videoCount || 0,
|
|
606
|
-
user.commentCount || 0,
|
|
607
|
-
user.guessedLocation || null,
|
|
608
|
-
user.locationCreated || null,
|
|
609
|
-
user.followerCount || 0,
|
|
610
|
-
user.followingCount || 0,
|
|
611
|
-
user.heartCount || 0,
|
|
612
|
-
user.refreshTime || null,
|
|
613
|
-
user.processed ? 1 : 0,
|
|
614
|
-
user.processedAt || null,
|
|
615
|
-
user.createdAt || now,
|
|
616
|
-
user.updatedAt || now,
|
|
617
|
-
user.region || null,
|
|
618
|
-
user.signature || null,
|
|
619
|
-
user.bioLink?.link || user.bioLink?.url || user.bioLink || null,
|
|
620
|
-
user.secUid || null,
|
|
621
|
-
);
|
|
622
|
-
}
|
|
623
|
-
|
|
624
|
-
function getUserDbCount() {
|
|
625
|
-
if (!db) return 0;
|
|
626
|
-
return db.prepare("SELECT COUNT(*) as c FROM users").get().c;
|
|
627
|
-
}
|
|
628
|
-
|
|
629
|
-
function getJobsCount() {
|
|
630
|
-
if (!db) return 0;
|
|
631
|
-
return db.prepare("SELECT COUNT(*) as c FROM jobs").get().c;
|
|
632
|
-
}
|
|
633
|
-
|
|
634
|
-
function getPendingJobsCount() {
|
|
635
|
-
if (!db) return 0;
|
|
636
|
-
return db
|
|
637
|
-
.prepare("SELECT COUNT(*) as c FROM jobs WHERE status = 'pending'")
|
|
638
|
-
.get().c;
|
|
639
|
-
}
|
|
640
|
-
|
|
641
|
-
function getPendingJobsUserUpdateCount() {
|
|
642
|
-
if (!db) return 0;
|
|
643
|
-
return db
|
|
644
|
-
.prepare(
|
|
645
|
-
`
|
|
646
|
-
SELECT COUNT(*) as c
|
|
647
|
-
FROM jobs
|
|
648
|
-
WHERE COALESCE(tt_seller, '') = ''
|
|
649
|
-
AND COALESCE(user_update_count, 0) <= 0
|
|
650
|
-
`,
|
|
651
|
-
)
|
|
652
|
-
.get().c;
|
|
653
|
-
}
|
|
654
|
-
|
|
655
|
-
function getRawJobsCount() {
|
|
656
|
-
if (!db) return 0;
|
|
657
|
-
return db.prepare("SELECT COUNT(*) as c FROM raw_jobs").get().c;
|
|
658
|
-
}
|
|
659
|
-
|
|
660
|
-
function getDashboardStatsFromDb(targetLocations = []) {
|
|
661
|
-
if (!db) return null;
|
|
662
|
-
|
|
663
|
-
const targetPlaceholders = targetLocations.map(() => "?").join(", ");
|
|
664
|
-
const targetParams = targetLocations.length ? targetLocations : [];
|
|
665
|
-
|
|
666
|
-
// 合并所有 jobs 表的聚合统计为单次扫描
|
|
667
|
-
const aggregateRow = db
|
|
668
|
-
.prepare(
|
|
669
|
-
`
|
|
670
|
-
SELECT
|
|
671
|
-
COUNT(*) as total,
|
|
672
|
-
SUM(CASE WHEN status = 'pending' THEN 1 ELSE 0 END) as pending,
|
|
673
|
-
SUM(CASE WHEN status = 'processing' THEN 1 ELSE 0 END) as processing,
|
|
674
|
-
SUM(CASE WHEN status = 'done' THEN 1 ELSE 0 END) as done,
|
|
675
|
-
SUM(CASE WHEN status = 'error' THEN 1 ELSE 0 END) as error,
|
|
676
|
-
SUM(CASE WHEN status = 'restricted' THEN 1 ELSE 0 END) as restricted,
|
|
677
|
-
SUM(CASE WHEN tt_seller = 1 AND verified = 0 ${
|
|
678
|
-
targetLocations.length
|
|
679
|
-
? `AND location_created IN (${targetPlaceholders})`
|
|
680
|
-
: "AND 1 = 0"
|
|
681
|
-
} THEN 1 ELSE 0 END) as targetUsers,
|
|
682
|
-
SUM(CASE WHEN no_video = 1 THEN 1 ELSE 0 END) as noVideo,
|
|
683
|
-
SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"video"') > 0 THEN 1 ELSE 0 END) as video,
|
|
684
|
-
SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"comment"') > 0 THEN 1 ELSE 0 END) as comment,
|
|
685
|
-
SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"guess"') > 0 THEN 1 ELSE 0 END) as guess,
|
|
686
|
-
SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"following"') > 0 THEN 1 ELSE 0 END) as following,
|
|
687
|
-
SUM(CASE WHEN status != 'done' AND instr(COALESCE(sources, ''), '"follower"') > 0 THEN 1 ELSE 0 END) as follower,
|
|
688
|
-
SUM(CASE
|
|
689
|
-
WHEN status != 'done'
|
|
690
|
-
AND instr(COALESCE(sources, ''), '"video"') = 0
|
|
691
|
-
AND instr(COALESCE(sources, ''), '"comment"') = 0
|
|
692
|
-
AND instr(COALESCE(sources, ''), '"guess"') = 0
|
|
693
|
-
AND instr(COALESCE(sources, ''), '"following"') = 0
|
|
694
|
-
AND instr(COALESCE(sources, ''), '"follower"') = 0
|
|
695
|
-
THEN 1 ELSE 0 END) as seed
|
|
696
|
-
FROM jobs
|
|
697
|
-
`,
|
|
698
|
-
)
|
|
699
|
-
.get(...targetParams);
|
|
700
|
-
|
|
701
|
-
// userUpdateTasks 单独从 jobs_base 统计
|
|
702
|
-
const userUpdateTasksRow = db
|
|
703
|
-
.prepare(
|
|
704
|
-
`
|
|
705
|
-
SELECT COUNT(*) as userUpdateTasks
|
|
706
|
-
FROM jobs_base
|
|
707
|
-
WHERE COALESCE(tt_seller, '') = ''
|
|
708
|
-
AND COALESCE(user_update_count, 0) <= 0
|
|
709
|
-
`,
|
|
710
|
-
)
|
|
711
|
-
.get();
|
|
712
|
-
|
|
713
|
-
// countryStats 和 targetCountryStats 需要 GROUP BY,保留为独立查询
|
|
714
|
-
const countryStats = db
|
|
715
|
-
.prepare(
|
|
716
|
-
`
|
|
717
|
-
SELECT
|
|
718
|
-
COALESCE(location_created, '未知') as country,
|
|
719
|
-
COUNT(*) as count,
|
|
720
|
-
SUM(CASE
|
|
721
|
-
WHEN tt_seller = 1 AND verified = 0 ${
|
|
722
|
-
targetLocations.length
|
|
723
|
-
? `AND location_created IN (${targetPlaceholders})`
|
|
724
|
-
: "AND 1 = 0"
|
|
725
|
-
}
|
|
726
|
-
THEN 1 ELSE 0 END) as targetCount
|
|
727
|
-
FROM jobs
|
|
728
|
-
WHERE status = 'done'
|
|
729
|
-
GROUP BY COALESCE(location_created, '未知')
|
|
730
|
-
ORDER BY count DESC
|
|
731
|
-
`,
|
|
732
|
-
)
|
|
733
|
-
.all(...targetParams);
|
|
734
|
-
|
|
735
|
-
const targetCountryStats = targetLocations.length
|
|
736
|
-
? db
|
|
737
|
-
.prepare(
|
|
738
|
-
`
|
|
739
|
-
SELECT location_created as country, COUNT(*) as count
|
|
740
|
-
FROM jobs
|
|
741
|
-
WHERE tt_seller = 1
|
|
742
|
-
AND verified = 0
|
|
743
|
-
AND location_created IN (${targetPlaceholders})
|
|
744
|
-
GROUP BY location_created
|
|
745
|
-
ORDER BY count DESC
|
|
746
|
-
`,
|
|
747
|
-
)
|
|
748
|
-
.all(...targetLocations)
|
|
749
|
-
: [];
|
|
750
|
-
|
|
751
|
-
const jobsBaseCount = db
|
|
752
|
-
.prepare("SELECT COUNT(*) as total FROM jobs_base")
|
|
753
|
-
.get().total;
|
|
754
|
-
|
|
755
|
-
return {
|
|
756
|
-
totalUsers: aggregateRow.total,
|
|
757
|
-
rawJobs: getRawJobsCount(),
|
|
758
|
-
dbTotalUsers: getUserDbCount(),
|
|
759
|
-
jobsTotal: aggregateRow.total,
|
|
760
|
-
jobsBaseTotal: jobsBaseCount,
|
|
761
|
-
jobsPending: aggregateRow.pending,
|
|
762
|
-
processedUsers: aggregateRow.done,
|
|
763
|
-
pendingUsers: aggregateRow.pending,
|
|
764
|
-
processingUsers: aggregateRow.processing,
|
|
765
|
-
restrictedUsers: aggregateRow.restricted,
|
|
766
|
-
errorUsers: aggregateRow.error,
|
|
767
|
-
targetUsers: aggregateRow.targetUsers,
|
|
768
|
-
userUpdateTasks: userUpdateTasksRow.userUpdateTasks,
|
|
769
|
-
targetCountryStats,
|
|
770
|
-
countryStats,
|
|
771
|
-
sourceStats: {
|
|
772
|
-
seed: aggregateRow.seed || 0,
|
|
773
|
-
video: aggregateRow.video || 0,
|
|
774
|
-
comment: aggregateRow.comment || 0,
|
|
775
|
-
guess: aggregateRow.guess || 0,
|
|
776
|
-
following: aggregateRow.following || 0,
|
|
777
|
-
follower: aggregateRow.follower || 0,
|
|
778
|
-
processed: aggregateRow.done,
|
|
779
|
-
restricted: aggregateRow.restricted,
|
|
780
|
-
error: aggregateRow.error,
|
|
781
|
-
noVideo: aggregateRow.noVideo || 0,
|
|
782
|
-
},
|
|
783
|
-
};
|
|
784
|
-
}
|
|
785
|
-
|
|
786
|
-
function getPendingByCountryFromDb() {
|
|
787
|
-
if (!db) return [];
|
|
788
|
-
|
|
789
|
-
// 按 guessed_location 分组统计待处理任务
|
|
790
|
-
const rows = db
|
|
791
|
-
.prepare(
|
|
792
|
-
`
|
|
793
|
-
SELECT
|
|
794
|
-
COALESCE(guessed_location, '未知') as country,
|
|
795
|
-
COUNT(*) as count
|
|
796
|
-
FROM jobs
|
|
797
|
-
WHERE status = 'pending'
|
|
798
|
-
GROUP BY COALESCE(guessed_location, '未知')
|
|
799
|
-
ORDER BY count DESC
|
|
800
|
-
`,
|
|
801
|
-
)
|
|
802
|
-
.all();
|
|
803
|
-
|
|
804
|
-
return rows;
|
|
805
|
-
}
|
|
806
|
-
|
|
807
|
-
function getUserUpdateByCountryFromDb() {
|
|
808
|
-
if (!db) return [];
|
|
809
|
-
|
|
810
|
-
// 按 guessed_location 分组统计待补资料任务
|
|
811
|
-
const rows = db
|
|
812
|
-
.prepare(
|
|
813
|
-
`
|
|
814
|
-
SELECT
|
|
815
|
-
COALESCE(guessed_location, '未知') as country,
|
|
816
|
-
COUNT(*) as count
|
|
817
|
-
FROM jobs_base
|
|
818
|
-
WHERE tt_seller IS NULL
|
|
819
|
-
AND COALESCE(user_update_count, 0) <= 0
|
|
820
|
-
GROUP BY COALESCE(guessed_location, '未知')
|
|
821
|
-
ORDER BY count DESC
|
|
822
|
-
`,
|
|
823
|
-
)
|
|
824
|
-
.all();
|
|
825
|
-
|
|
826
|
-
return rows;
|
|
827
|
-
}
|
|
828
|
-
|
|
829
|
-
function getAttachStuckByCountryFromDb() {
|
|
830
|
-
if (!db) return [];
|
|
831
|
-
|
|
832
|
-
return db
|
|
833
|
-
.prepare(
|
|
834
|
-
`
|
|
835
|
-
SELECT
|
|
836
|
-
COALESCE(guessed_location, '未知') as country,
|
|
837
|
-
COUNT(*) as count
|
|
838
|
-
FROM jobs_base
|
|
839
|
-
WHERE tt_seller IS NULL
|
|
840
|
-
AND COALESCE(user_update_count, 0) = 1
|
|
841
|
-
GROUP BY COALESCE(guessed_location, '未知')
|
|
842
|
-
ORDER BY count DESC
|
|
843
|
-
`,
|
|
844
|
-
)
|
|
845
|
-
.all();
|
|
846
|
-
}
|
|
847
|
-
|
|
848
|
-
function restoreAttachStuckByCountry(country) {
|
|
849
|
-
if (!db) {
|
|
850
|
-
return { restored: 0, country, error: "db not ready" };
|
|
851
|
-
}
|
|
852
|
-
|
|
853
|
-
const normalizedCountry = String(country == null ? "未知" : country).trim();
|
|
854
|
-
if (!normalizedCountry) {
|
|
855
|
-
return {
|
|
856
|
-
restored: 0,
|
|
857
|
-
country: normalizedCountry,
|
|
858
|
-
error: "country is required",
|
|
859
|
-
};
|
|
860
|
-
}
|
|
861
|
-
|
|
862
|
-
const whereSql = `
|
|
863
|
-
COALESCE(tt_seller, '') = ''
|
|
864
|
-
AND COALESCE(user_update_count, 0) = 1
|
|
865
|
-
AND COALESCE(guessed_location, '未知') = ?
|
|
866
|
-
`;
|
|
867
|
-
const count =
|
|
868
|
-
db
|
|
869
|
-
.prepare(
|
|
870
|
-
`
|
|
871
|
-
SELECT COUNT(*) as c
|
|
872
|
-
FROM jobs_base
|
|
873
|
-
WHERE ${whereSql}
|
|
874
|
-
`,
|
|
875
|
-
)
|
|
876
|
-
.get(normalizedCountry)?.c || 0;
|
|
877
|
-
|
|
878
|
-
if (!count) {
|
|
879
|
-
return { restored: 0, country: normalizedCountry };
|
|
880
|
-
}
|
|
881
|
-
|
|
882
|
-
db.prepare(
|
|
883
|
-
`
|
|
884
|
-
UPDATE jobs_base
|
|
885
|
-
SET user_update_count = 0,
|
|
886
|
-
updated_at = ?,
|
|
887
|
-
claimed_by = NULL,
|
|
888
|
-
claimed_at = NULL
|
|
889
|
-
WHERE ${whereSql}
|
|
890
|
-
`,
|
|
891
|
-
).run(Date.now(), normalizedCountry);
|
|
892
|
-
|
|
893
|
-
return { restored: count, country: normalizedCountry };
|
|
894
|
-
}
|
|
895
|
-
|
|
896
|
-
function resetPendingByCountry(country) {
|
|
897
|
-
if (!db) {
|
|
898
|
-
return { reset: 0, country, error: "db not ready" };
|
|
899
|
-
}
|
|
900
|
-
|
|
901
|
-
const normalizedCountry = String(country == null ? "未知" : country).trim();
|
|
902
|
-
if (!normalizedCountry) {
|
|
903
|
-
return {
|
|
904
|
-
reset: 0,
|
|
905
|
-
country: normalizedCountry,
|
|
906
|
-
error: "country is required",
|
|
907
|
-
};
|
|
908
|
-
}
|
|
909
|
-
|
|
910
|
-
const whereSql = `
|
|
911
|
-
status = 'pending'
|
|
912
|
-
AND COALESCE(guessed_location, '未知') = ?
|
|
913
|
-
`;
|
|
914
|
-
const count =
|
|
915
|
-
db
|
|
916
|
-
.prepare(
|
|
917
|
-
`
|
|
918
|
-
SELECT COUNT(*) as c
|
|
919
|
-
FROM jobs
|
|
920
|
-
WHERE ${whereSql}
|
|
921
|
-
`,
|
|
922
|
-
)
|
|
923
|
-
.get(normalizedCountry)?.c || 0;
|
|
924
|
-
|
|
925
|
-
if (!count) {
|
|
926
|
-
return { reset: 0, country: normalizedCountry };
|
|
927
|
-
}
|
|
928
|
-
|
|
929
|
-
db.prepare(
|
|
930
|
-
`
|
|
931
|
-
UPDATE jobs
|
|
932
|
-
SET user_update_count = 0,
|
|
933
|
-
updated_at = ?,
|
|
934
|
-
claimed_by = NULL,
|
|
935
|
-
claimed_at = NULL
|
|
936
|
-
WHERE ${whereSql}
|
|
937
|
-
`,
|
|
938
|
-
).run(Date.now(), normalizedCountry);
|
|
939
|
-
|
|
940
|
-
return { reset: count, country: normalizedCountry };
|
|
941
|
-
}
|
|
942
|
-
|
|
943
|
-
function getRawByCountryFromDb() {
|
|
944
|
-
if (!db) return [];
|
|
945
|
-
|
|
946
|
-
return db
|
|
947
|
-
.prepare(
|
|
948
|
-
`
|
|
949
|
-
SELECT
|
|
950
|
-
COALESCE(guessed_location, '未知') as country,
|
|
951
|
-
COUNT(*) as count
|
|
952
|
-
FROM raw_jobs
|
|
953
|
-
GROUP BY COALESCE(guessed_location, '未知')
|
|
954
|
-
ORDER BY count DESC
|
|
955
|
-
`,
|
|
956
|
-
)
|
|
957
|
-
.all();
|
|
958
|
-
}
|
|
959
|
-
|
|
960
|
-
function moveJobsToRawByCountry(scope, country) {
|
|
961
|
-
if (!db) {
|
|
962
|
-
return { moved: 0, scope, country, error: "db not ready" };
|
|
963
|
-
}
|
|
964
|
-
|
|
965
|
-
const normalizedScope = String(scope || "").trim();
|
|
966
|
-
const normalizedCountry = String(country == null ? "未知" : country).trim();
|
|
967
|
-
if (!normalizedCountry) {
|
|
968
|
-
return {
|
|
969
|
-
moved: 0,
|
|
970
|
-
scope: normalizedScope,
|
|
971
|
-
country: normalizedCountry,
|
|
972
|
-
error: "country is required",
|
|
973
|
-
};
|
|
974
|
-
}
|
|
975
|
-
|
|
976
|
-
// pending 操作 jobs 表(与 getPendingByCountryFromDb 数据源一致)
|
|
977
|
-
// userUpdate 操作 jobs_base 表(与 getUserUpdateByCountryFromDb 数据源一致)
|
|
978
|
-
let sourceTable = "";
|
|
979
|
-
let scopeWhere = "";
|
|
980
|
-
let columns = "";
|
|
981
|
-
|
|
982
|
-
if (normalizedScope === "pending") {
|
|
983
|
-
sourceTable = "jobs";
|
|
984
|
-
scopeWhere = `status = 'pending'`;
|
|
985
|
-
columns = `
|
|
986
|
-
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
987
|
-
error, pinned, no_video, restricted, user_update_count,
|
|
988
|
-
tt_seller, verified, video_count, comment_count,
|
|
989
|
-
guessed_location, location_created, follower_count,
|
|
990
|
-
following_count, heart_count, refresh_time, processed,
|
|
991
|
-
processed_at, created_at, updated_at, region, signature,
|
|
992
|
-
sec_uid, latest_video_time, user_create_time
|
|
993
|
-
`;
|
|
994
|
-
} else if (normalizedScope === "userUpdate") {
|
|
995
|
-
sourceTable = "jobs_base";
|
|
996
|
-
scopeWhere = `tt_seller IS NULL AND COALESCE(user_update_count, 0) <= 0`;
|
|
997
|
-
columns = `
|
|
998
|
-
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
999
|
-
error, pinned, no_video, restricted, user_update_count,
|
|
1000
|
-
tt_seller, verified, video_count, comment_count,
|
|
1001
|
-
guessed_location, location_created, follower_count,
|
|
1002
|
-
following_count, heart_count, refresh_time, processed,
|
|
1003
|
-
processed_at, created_at, updated_at, region, signature,
|
|
1004
|
-
sec_uid, latest_video_time, user_create_time
|
|
1005
|
-
`;
|
|
1006
|
-
} else {
|
|
1007
|
-
return {
|
|
1008
|
-
moved: 0,
|
|
1009
|
-
scope: normalizedScope,
|
|
1010
|
-
country: normalizedCountry,
|
|
1011
|
-
error: "unsupported scope",
|
|
1012
|
-
};
|
|
1013
|
-
}
|
|
1014
|
-
|
|
1015
|
-
const whereSql = `
|
|
1016
|
-
${scopeWhere}
|
|
1017
|
-
AND COALESCE(guessed_location, '未知') = ?
|
|
1018
|
-
`;
|
|
1019
|
-
const count =
|
|
1020
|
-
db
|
|
1021
|
-
.prepare(
|
|
1022
|
-
`
|
|
1023
|
-
SELECT COUNT(*) as c
|
|
1024
|
-
FROM ${sourceTable}
|
|
1025
|
-
WHERE ${whereSql}
|
|
1026
|
-
`,
|
|
1027
|
-
)
|
|
1028
|
-
.get(normalizedCountry)?.c || 0;
|
|
1029
|
-
|
|
1030
|
-
if (!count) {
|
|
1031
|
-
return { moved: 0, scope: normalizedScope, country: normalizedCountry };
|
|
1032
|
-
}
|
|
1033
|
-
|
|
1034
|
-
const moveTxn = db.transaction((targetCountry) => {
|
|
1035
|
-
db.prepare(
|
|
1036
|
-
`
|
|
1037
|
-
INSERT OR REPLACE INTO raw_jobs (
|
|
1038
|
-
${columns}
|
|
1039
|
-
)
|
|
1040
|
-
SELECT
|
|
1041
|
-
${columns}
|
|
1042
|
-
FROM ${sourceTable}
|
|
1043
|
-
WHERE ${whereSql}
|
|
1044
|
-
`,
|
|
1045
|
-
).run(targetCountry);
|
|
1046
|
-
|
|
1047
|
-
db.prepare(
|
|
1048
|
-
`
|
|
1049
|
-
DELETE FROM ${sourceTable}
|
|
1050
|
-
WHERE ${whereSql}
|
|
1051
|
-
`,
|
|
1052
|
-
).run(targetCountry);
|
|
1053
|
-
});
|
|
1054
|
-
|
|
1055
|
-
moveTxn(normalizedCountry);
|
|
1056
|
-
return { moved: count, scope: normalizedScope, country: normalizedCountry };
|
|
1057
|
-
}
|
|
1058
|
-
|
|
1059
|
-
function restoreRawJobsByCountry(country) {
|
|
1060
|
-
if (!db) {
|
|
1061
|
-
return { restored: 0, country, error: "db not ready" };
|
|
1062
|
-
}
|
|
1063
|
-
|
|
1064
|
-
const normalizedCountry = String(country == null ? "未知" : country).trim();
|
|
1065
|
-
if (!normalizedCountry) {
|
|
1066
|
-
return {
|
|
1067
|
-
restored: 0,
|
|
1068
|
-
country: normalizedCountry,
|
|
1069
|
-
error: "country is required",
|
|
1070
|
-
};
|
|
1071
|
-
}
|
|
1072
|
-
|
|
1073
|
-
const whereSql = `COALESCE(guessed_location, '未知') = ?`;
|
|
1074
|
-
const count =
|
|
1075
|
-
db
|
|
1076
|
-
.prepare(
|
|
1077
|
-
`
|
|
1078
|
-
SELECT COUNT(*) as c
|
|
1079
|
-
FROM raw_jobs
|
|
1080
|
-
WHERE ${whereSql}
|
|
1081
|
-
`,
|
|
1082
|
-
)
|
|
1083
|
-
.get(normalizedCountry)?.c || 0;
|
|
1084
|
-
|
|
1085
|
-
if (!count) {
|
|
1086
|
-
return { restored: 0, country: normalizedCountry };
|
|
1087
|
-
}
|
|
1088
|
-
|
|
1089
|
-
const restoreTxn = db.transaction((targetCountry) => {
|
|
1090
|
-
db.prepare(
|
|
1091
|
-
`
|
|
1092
|
-
INSERT OR REPLACE INTO jobs (
|
|
1093
|
-
unique_id,
|
|
1094
|
-
nickname,
|
|
1095
|
-
status,
|
|
1096
|
-
sources,
|
|
1097
|
-
claimed_by,
|
|
1098
|
-
claimed_at,
|
|
1099
|
-
error,
|
|
1100
|
-
pinned,
|
|
1101
|
-
no_video,
|
|
1102
|
-
restricted,
|
|
1103
|
-
user_update_count,
|
|
1104
|
-
tt_seller,
|
|
1105
|
-
verified,
|
|
1106
|
-
video_count,
|
|
1107
|
-
comment_count,
|
|
1108
|
-
guessed_location,
|
|
1109
|
-
location_created,
|
|
1110
|
-
follower_count,
|
|
1111
|
-
following_count,
|
|
1112
|
-
heart_count,
|
|
1113
|
-
refresh_time,
|
|
1114
|
-
processed,
|
|
1115
|
-
processed_at,
|
|
1116
|
-
created_at,
|
|
1117
|
-
updated_at,
|
|
1118
|
-
region,
|
|
1119
|
-
signature,
|
|
1120
|
-
sec_uid
|
|
1121
|
-
)
|
|
1122
|
-
SELECT
|
|
1123
|
-
unique_id,
|
|
1124
|
-
nickname,
|
|
1125
|
-
status,
|
|
1126
|
-
sources,
|
|
1127
|
-
claimed_by,
|
|
1128
|
-
claimed_at,
|
|
1129
|
-
error,
|
|
1130
|
-
pinned,
|
|
1131
|
-
no_video,
|
|
1132
|
-
restricted,
|
|
1133
|
-
user_update_count,
|
|
1134
|
-
tt_seller,
|
|
1135
|
-
verified,
|
|
1136
|
-
video_count,
|
|
1137
|
-
comment_count,
|
|
1138
|
-
guessed_location,
|
|
1139
|
-
location_created,
|
|
1140
|
-
follower_count,
|
|
1141
|
-
following_count,
|
|
1142
|
-
heart_count,
|
|
1143
|
-
refresh_time,
|
|
1144
|
-
processed,
|
|
1145
|
-
processed_at,
|
|
1146
|
-
created_at,
|
|
1147
|
-
updated_at,
|
|
1148
|
-
region,
|
|
1149
|
-
signature,
|
|
1150
|
-
sec_uid
|
|
1151
|
-
FROM raw_jobs
|
|
1152
|
-
WHERE ${whereSql}
|
|
1153
|
-
`,
|
|
1154
|
-
).run(targetCountry);
|
|
1155
|
-
|
|
1156
|
-
db.prepare(
|
|
1157
|
-
`
|
|
1158
|
-
DELETE FROM raw_jobs
|
|
1159
|
-
WHERE ${whereSql}
|
|
1160
|
-
`,
|
|
1161
|
-
).run(targetCountry);
|
|
1162
|
-
});
|
|
1163
|
-
|
|
1164
|
-
restoreTxn(normalizedCountry);
|
|
1165
|
-
return { restored: count, country: normalizedCountry };
|
|
1166
|
-
}
|
|
1167
|
-
|
|
1168
|
-
function restoreRawJobById(uniqueId) {
|
|
1169
|
-
if (!db) {
|
|
1170
|
-
return { restored: 0, uniqueId, error: "db not ready" };
|
|
1171
|
-
}
|
|
1172
|
-
|
|
1173
|
-
const safeId = String(uniqueId).trim();
|
|
1174
|
-
if (!safeId) {
|
|
1175
|
-
return { restored: 0, uniqueId: safeId, error: "uniqueId is required" };
|
|
1176
|
-
}
|
|
1177
|
-
|
|
1178
|
-
const exists =
|
|
1179
|
-
db
|
|
1180
|
-
.prepare("SELECT COUNT(*) as c FROM raw_jobs WHERE unique_id = ?")
|
|
1181
|
-
.get(safeId)?.c || 0;
|
|
1182
|
-
|
|
1183
|
-
if (!exists) {
|
|
1184
|
-
return { restored: 0, uniqueId: safeId };
|
|
1185
|
-
}
|
|
1186
|
-
|
|
1187
|
-
const restoreTxn = db.transaction(() => {
|
|
1188
|
-
db.prepare(
|
|
1189
|
-
`
|
|
1190
|
-
INSERT OR REPLACE INTO jobs (
|
|
1191
|
-
unique_id, nickname, status, sources, claimed_by, claimed_at, error,
|
|
1192
|
-
pinned, no_video, restricted, user_update_count, tt_seller, verified,
|
|
1193
|
-
video_count, comment_count, guessed_location, location_created,
|
|
1194
|
-
follower_count, following_count, heart_count, refresh_time,
|
|
1195
|
-
processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
|
|
1196
|
-
)
|
|
1197
|
-
SELECT
|
|
1198
|
-
unique_id, nickname, status, sources, claimed_by, claimed_at, error,
|
|
1199
|
-
pinned, no_video, restricted, user_update_count, tt_seller, verified,
|
|
1200
|
-
video_count, comment_count, guessed_location, location_created,
|
|
1201
|
-
follower_count, following_count, heart_count, refresh_time,
|
|
1202
|
-
processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
|
|
1203
|
-
FROM raw_jobs WHERE unique_id = ?
|
|
1204
|
-
`,
|
|
1205
|
-
).run(safeId);
|
|
1206
|
-
|
|
1207
|
-
db.prepare("DELETE FROM raw_jobs WHERE unique_id = ?").run(safeId);
|
|
1208
|
-
});
|
|
1209
|
-
|
|
1210
|
-
restoreTxn();
|
|
1211
|
-
return { restored: 1, uniqueId: safeId };
|
|
1212
|
-
}
|
|
1213
|
-
|
|
1214
|
-
function restoreRawJobsByFilter({ search, location, hasVideo, hasFollower }) {
|
|
1215
|
-
if (!db) {
|
|
1216
|
-
return { restored: 0, error: "db not ready" };
|
|
1217
|
-
}
|
|
1218
|
-
|
|
1219
|
-
const where = [];
|
|
1220
|
-
const args = [];
|
|
1221
|
-
|
|
1222
|
-
if (search) {
|
|
1223
|
-
where.push(
|
|
1224
|
-
"(LOWER(unique_id) LIKE ? OR LOWER(COALESCE(nickname, '')) LIKE ?)",
|
|
1225
|
-
);
|
|
1226
|
-
const likeVal = `%${search.toLowerCase()}%`;
|
|
1227
|
-
args.push(likeVal, likeVal);
|
|
1228
|
-
}
|
|
1229
|
-
|
|
1230
|
-
if (location) {
|
|
1231
|
-
where.push("COALESCE(guessed_location, '未知') = ?");
|
|
1232
|
-
args.push(location);
|
|
1233
|
-
}
|
|
1234
|
-
|
|
1235
|
-
if (hasVideo) {
|
|
1236
|
-
where.push("COALESCE(video_count, 0) > 0");
|
|
1237
|
-
}
|
|
1238
|
-
|
|
1239
|
-
if (hasFollower) {
|
|
1240
|
-
where.push("COALESCE(follower_count, 0) > 0");
|
|
1241
|
-
}
|
|
1242
|
-
|
|
1243
|
-
if (where.length === 0) {
|
|
1244
|
-
return { restored: 0, error: "at least one filter is required" };
|
|
1245
|
-
}
|
|
1246
|
-
|
|
1247
|
-
const whereSql = where.join(" AND ");
|
|
1248
|
-
|
|
1249
|
-
const count =
|
|
1250
|
-
db
|
|
1251
|
-
.prepare(`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql}`)
|
|
1252
|
-
.get(...args)?.c || 0;
|
|
1253
|
-
|
|
1254
|
-
if (!count) {
|
|
1255
|
-
return { restored: 0 };
|
|
1256
|
-
}
|
|
1257
|
-
|
|
1258
|
-
const restoreTxn = db.transaction(() => {
|
|
1259
|
-
db.prepare(
|
|
1260
|
-
`
|
|
1261
|
-
INSERT OR REPLACE INTO jobs (
|
|
1262
|
-
unique_id, nickname, status, sources, claimed_by, claimed_at, error,
|
|
1263
|
-
pinned, no_video, restricted, user_update_count, tt_seller, verified,
|
|
1264
|
-
video_count, comment_count, guessed_location, location_created,
|
|
1265
|
-
follower_count, following_count, heart_count, refresh_time,
|
|
1266
|
-
processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
|
|
1267
|
-
)
|
|
1268
|
-
SELECT
|
|
1269
|
-
unique_id, nickname, status, sources, claimed_by, claimed_at, error,
|
|
1270
|
-
pinned, no_video, restricted, user_update_count, tt_seller, verified,
|
|
1271
|
-
video_count, comment_count, guessed_location, location_created,
|
|
1272
|
-
follower_count, following_count, heart_count, refresh_time,
|
|
1273
|
-
processed, processed_at, created_at, updated_at, region, signature, bio_link, sec_uid
|
|
1274
|
-
FROM raw_jobs WHERE ${whereSql}
|
|
1275
|
-
`,
|
|
1276
|
-
).run(...args);
|
|
1277
|
-
|
|
1278
|
-
db.prepare(`DELETE FROM raw_jobs WHERE ${whereSql}`).run(...args);
|
|
1279
|
-
});
|
|
1280
|
-
|
|
1281
|
-
restoreTxn();
|
|
1282
|
-
return { restored: count };
|
|
1283
|
-
}
|
|
1284
|
-
|
|
1285
|
-
function getRawJobsPageFromDb({
|
|
1286
|
-
search,
|
|
1287
|
-
location,
|
|
1288
|
-
limit,
|
|
1289
|
-
offset,
|
|
1290
|
-
hasVideo,
|
|
1291
|
-
hasFollower,
|
|
1292
|
-
}) {
|
|
1293
|
-
if (!db) return null;
|
|
1294
|
-
|
|
1295
|
-
const safeLimit = Math.max(1, Math.min(200, parseInt(limit) || 50));
|
|
1296
|
-
const safeOffset = Math.max(0, parseInt(offset) || 0);
|
|
1297
|
-
const where = [];
|
|
1298
|
-
const args = [];
|
|
1299
|
-
|
|
1300
|
-
if (search) {
|
|
1301
|
-
where.push(
|
|
1302
|
-
"(LOWER(unique_id) LIKE ? OR LOWER(COALESCE(nickname, '')) LIKE ?)",
|
|
1303
|
-
);
|
|
1304
|
-
const pattern = `%${String(search).toLowerCase()}%`;
|
|
1305
|
-
args.push(pattern, pattern);
|
|
1306
|
-
}
|
|
1307
|
-
if (location) {
|
|
1308
|
-
where.push("COALESCE(guessed_location, '未知') = ?");
|
|
1309
|
-
args.push(location);
|
|
1310
|
-
}
|
|
1311
|
-
if (hasVideo) {
|
|
1312
|
-
where.push("COALESCE(video_count, 0) > 0");
|
|
1313
|
-
}
|
|
1314
|
-
if (hasFollower) {
|
|
1315
|
-
where.push("COALESCE(follower_count, 0) > 0");
|
|
1316
|
-
}
|
|
1317
|
-
|
|
1318
|
-
const whereSql = where.length ? `WHERE ${where.join(" AND ")}` : "";
|
|
1319
|
-
const total = db
|
|
1320
|
-
.prepare(`SELECT COUNT(*) as c FROM raw_jobs ${whereSql}`)
|
|
1321
|
-
.get(...args).c;
|
|
1322
|
-
|
|
1323
|
-
const rows = db
|
|
1324
|
-
.prepare(
|
|
1325
|
-
`
|
|
1326
|
-
SELECT *
|
|
1327
|
-
FROM raw_jobs
|
|
1328
|
-
${whereSql}
|
|
1329
|
-
ORDER BY created_at DESC, unique_id ASC
|
|
1330
|
-
LIMIT ? OFFSET ?
|
|
1331
|
-
`,
|
|
1332
|
-
)
|
|
1333
|
-
.all(...args, safeLimit, safeOffset);
|
|
1334
|
-
|
|
1335
|
-
return {
|
|
1336
|
-
total,
|
|
1337
|
-
limit: safeLimit,
|
|
1338
|
-
offset: safeOffset,
|
|
1339
|
-
users: rows.map(mapJobRow),
|
|
1340
|
-
};
|
|
1341
|
-
}
|
|
1342
|
-
|
|
1343
|
-
// ====== Tag 发现与打分 CRUD ======
|
|
1344
|
-
|
|
1345
|
-
function insertTag(tag, countries, source = "llm") {
|
|
1346
|
-
if (!db) return { inserted: false, error: "db not ready" };
|
|
1347
|
-
// 防止存入带 # 前缀的 tag
|
|
1348
|
-
const normalized = tag.replace(/^#+/, "").trim().toLowerCase();
|
|
1349
|
-
if (!normalized || normalized.length < 2) {
|
|
1350
|
-
return { inserted: false, error: "invalid tag" };
|
|
1351
|
-
}
|
|
1352
|
-
try {
|
|
1353
|
-
const result = db
|
|
1354
|
-
.prepare(
|
|
1355
|
-
`
|
|
1356
|
-
INSERT OR IGNORE INTO tags (tag, countries, source)
|
|
1357
|
-
VALUES (?, ?, ?)
|
|
1358
|
-
`,
|
|
1359
|
-
)
|
|
1360
|
-
.run(normalized, JSON.stringify(countries), source);
|
|
1361
|
-
return { inserted: result.changes > 0, tag: normalized };
|
|
1362
|
-
} catch (e) {
|
|
1363
|
-
return { inserted: false, error: e.message };
|
|
1364
|
-
}
|
|
1365
|
-
}
|
|
1366
|
-
|
|
1367
|
-
function getTagsByStatus(status, limit = 100) {
|
|
1368
|
-
if (!db) return [];
|
|
1369
|
-
const rows = db
|
|
1370
|
-
.prepare(
|
|
1371
|
-
`
|
|
1372
|
-
SELECT * FROM tags WHERE status = ? ORDER BY score ASC, created_at ASC LIMIT ?
|
|
1373
|
-
`,
|
|
1374
|
-
)
|
|
1375
|
-
.all(status, limit);
|
|
1376
|
-
return rows.map((r) => ({
|
|
1377
|
-
...r,
|
|
1378
|
-
countries: JSON.parse(r.countries || "[]"),
|
|
1379
|
-
matched_countries: JSON.parse(r.matched_countries || "[]"),
|
|
1380
|
-
}));
|
|
1381
|
-
}
|
|
1382
|
-
|
|
1383
|
-
function getTagsByCountry(country, minScore = 0) {
|
|
1384
|
-
if (!db) return [];
|
|
1385
|
-
const rows = db
|
|
1386
|
-
.prepare(
|
|
1387
|
-
`
|
|
1388
|
-
SELECT * FROM tags WHERE status != 'dead'
|
|
1389
|
-
ORDER BY score DESC
|
|
1390
|
-
`,
|
|
1391
|
-
)
|
|
1392
|
-
.all();
|
|
1393
|
-
// Filter in JS since countries is JSON
|
|
1394
|
-
return rows
|
|
1395
|
-
.map((r) => ({
|
|
1396
|
-
...r,
|
|
1397
|
-
countries: JSON.parse(r.countries || "[]"),
|
|
1398
|
-
matched_countries: JSON.parse(r.matched_countries || "[]"),
|
|
1399
|
-
}))
|
|
1400
|
-
.filter((r) => r.countries.includes(country) && r.score >= minScore);
|
|
1401
|
-
}
|
|
1402
|
-
|
|
1403
|
-
function getDeadTags(country) {
|
|
1404
|
-
if (!db) return [];
|
|
1405
|
-
const rows = db
|
|
1406
|
-
.prepare(
|
|
1407
|
-
`
|
|
1408
|
-
SELECT * FROM tags WHERE status = 'dead' ORDER BY score ASC
|
|
1409
|
-
`,
|
|
1410
|
-
)
|
|
1411
|
-
.all();
|
|
1412
|
-
return rows
|
|
1413
|
-
.map((r) => ({
|
|
1414
|
-
...r,
|
|
1415
|
-
countries: JSON.parse(r.countries || "[]"),
|
|
1416
|
-
matched_countries: JSON.parse(r.matched_countries || "[]"),
|
|
1417
|
-
}))
|
|
1418
|
-
.filter((r) => r.countries.includes(country));
|
|
1419
|
-
}
|
|
1420
|
-
|
|
1421
|
-
function claimTag(tag) {
|
|
1422
|
-
if (!db) return { ok: false, error: "db not ready" };
|
|
1423
|
-
// 原子操作:只有 status='new' 时才更新为 'scoring',避免竞态
|
|
1424
|
-
const result = db
|
|
1425
|
-
.prepare(
|
|
1426
|
-
"UPDATE tags SET status = 'scoring' WHERE tag = ? AND status = 'new'",
|
|
1427
|
-
)
|
|
1428
|
-
.run(tag);
|
|
1429
|
-
if (result.changes === 0) {
|
|
1430
|
-
// 检查是否不存在 vs 已被别人锁定
|
|
1431
|
-
const row = db.prepare("SELECT status FROM tags WHERE tag = ?").get(tag);
|
|
1432
|
-
if (!row) return { ok: false, error: "tag not found" };
|
|
1433
|
-
return { ok: false, error: `tag status is ${row.status}, already claimed` };
|
|
1434
|
-
}
|
|
1435
|
-
return { ok: true, tag };
|
|
1436
|
-
}
|
|
1437
|
-
|
|
1438
|
-
function reportTagScore(tag, fields) {
|
|
1439
|
-
if (!db) return { ok: false, error: "db not ready" };
|
|
1440
|
-
const {
|
|
1441
|
-
score,
|
|
1442
|
-
status,
|
|
1443
|
-
totalPosts,
|
|
1444
|
-
authorCount,
|
|
1445
|
-
matchedAuthors,
|
|
1446
|
-
matchedCountries,
|
|
1447
|
-
pushedUsers,
|
|
1448
|
-
error,
|
|
1449
|
-
} = fields;
|
|
1450
|
-
const matchedCountriesJson = matchedCountries
|
|
1451
|
-
? JSON.stringify(matchedCountries)
|
|
1452
|
-
: null;
|
|
1453
|
-
const now = new Date().toISOString();
|
|
1454
|
-
|
|
1455
|
-
try {
|
|
1456
|
-
const result = db
|
|
1457
|
-
.prepare(
|
|
1458
|
-
`
|
|
1459
|
-
UPDATE tags SET
|
|
1460
|
-
score = COALESCE(?, score),
|
|
1461
|
-
status = COALESCE(?, status),
|
|
1462
|
-
total_posts = COALESCE(?, total_posts),
|
|
1463
|
-
author_count = COALESCE(?, author_count),
|
|
1464
|
-
matched_authors = COALESCE(?, matched_authors),
|
|
1465
|
-
matched_countries = COALESCE(?, matched_countries),
|
|
1466
|
-
pushed_users = COALESCE(?, pushed_users),
|
|
1467
|
-
last_error = COALESCE(?, last_error),
|
|
1468
|
-
scored_at = ?,
|
|
1469
|
-
score_count = score_count + 1
|
|
1470
|
-
WHERE tag = ?
|
|
1471
|
-
`,
|
|
1472
|
-
)
|
|
1473
|
-
.run(
|
|
1474
|
-
score ?? null,
|
|
1475
|
-
status ?? null,
|
|
1476
|
-
totalPosts ?? null,
|
|
1477
|
-
authorCount ?? null,
|
|
1478
|
-
matchedAuthors ?? null,
|
|
1479
|
-
matchedCountriesJson,
|
|
1480
|
-
pushedUsers ?? null,
|
|
1481
|
-
error ?? null,
|
|
1482
|
-
now,
|
|
1483
|
-
tag,
|
|
1484
|
-
);
|
|
1485
|
-
return { ok: result.changes > 0, tag };
|
|
1486
|
-
} catch (e) {
|
|
1487
|
-
return { ok: false, error: e.message };
|
|
1488
|
-
}
|
|
1489
|
-
}
|
|
1490
|
-
|
|
1491
|
-
function getAllTags(limit = 200) {
|
|
1492
|
-
if (!db) return [];
|
|
1493
|
-
const rows = db
|
|
1494
|
-
.prepare(
|
|
1495
|
-
`
|
|
1496
|
-
SELECT * FROM tags ORDER BY score DESC, created_at DESC LIMIT ?
|
|
1497
|
-
`,
|
|
1498
|
-
)
|
|
1499
|
-
.all(limit);
|
|
1500
|
-
return rows.map((r) => ({
|
|
1501
|
-
...r,
|
|
1502
|
-
countries: JSON.parse(r.countries || "[]"),
|
|
1503
|
-
matched_countries: JSON.parse(r.matched_countries || "[]"),
|
|
1504
|
-
}));
|
|
1505
|
-
}
|
|
1506
|
-
|
|
1507
|
-
// 调试接口:直接执行 SQL 查询,返回原始数据
|
|
1508
|
-
function rawQuery(sql, params = []) {
|
|
1509
|
-
if (!db) return { error: "db not ready" };
|
|
1510
|
-
try {
|
|
1511
|
-
const rows = db.prepare(sql).all(...params);
|
|
1512
|
-
return { rows };
|
|
1513
|
-
} catch (e) {
|
|
1514
|
-
return { error: e.message };
|
|
1515
|
-
}
|
|
1516
|
-
}
|
|
1517
|
-
|
|
1518
|
-
// 清理 tags 表中以 # 开头的脏数据
|
|
1519
|
-
function normalizeTags() {
|
|
1520
|
-
if (!db) return { ok: false, error: "db not ready" };
|
|
1521
|
-
const dirtyRows = db
|
|
1522
|
-
.prepare("SELECT id, tag, countries FROM tags WHERE tag LIKE '#%'")
|
|
1523
|
-
.all();
|
|
1524
|
-
const fixed = [];
|
|
1525
|
-
const merged = [];
|
|
1526
|
-
const skipped = [];
|
|
1527
|
-
|
|
1528
|
-
for (const row of dirtyRows) {
|
|
1529
|
-
const cleanTag = row.tag.replace(/^#+/, "").trim().toLowerCase();
|
|
1530
|
-
if (!cleanTag || cleanTag.length < 2) {
|
|
1531
|
-
db.prepare("DELETE FROM tags WHERE id = ?").run(row.id);
|
|
1532
|
-
skipped.push({
|
|
1533
|
-
dirty: row.tag,
|
|
1534
|
-
reason: "empty after normalize, deleted",
|
|
1535
|
-
});
|
|
1536
|
-
continue;
|
|
1537
|
-
}
|
|
1538
|
-
|
|
1539
|
-
// 检查 cleanTag 是否已存在
|
|
1540
|
-
const existing = db
|
|
1541
|
-
.prepare("SELECT * FROM tags WHERE tag = ?")
|
|
1542
|
-
.get(cleanTag);
|
|
1543
|
-
if (existing) {
|
|
1544
|
-
// 合并:保留已有 clean 版本,合并 countries
|
|
1545
|
-
const oldCountries = JSON.parse(row.countries || "[]");
|
|
1546
|
-
const existCountries = JSON.parse(existing.countries || "[]");
|
|
1547
|
-
const mergedCountries = [
|
|
1548
|
-
...new Set([...existCountries, ...oldCountries]),
|
|
1549
|
-
];
|
|
1550
|
-
db.prepare("UPDATE tags SET countries = ? WHERE tag = ?").run(
|
|
1551
|
-
JSON.stringify(mergedCountries),
|
|
1552
|
-
cleanTag,
|
|
1553
|
-
);
|
|
1554
|
-
// 删除脏数据
|
|
1555
|
-
db.prepare("DELETE FROM tags WHERE id = ?").run(row.id);
|
|
1556
|
-
merged.push({ dirty: row.tag, clean: cleanTag, id: row.id });
|
|
1557
|
-
} else {
|
|
1558
|
-
// 直接重命名
|
|
1559
|
-
db.prepare("UPDATE tags SET tag = ? WHERE id = ?").run(cleanTag, row.id);
|
|
1560
|
-
fixed.push({ dirty: row.tag, clean: cleanTag, id: row.id });
|
|
1561
|
-
}
|
|
1562
|
-
}
|
|
1563
|
-
|
|
1564
|
-
return {
|
|
1565
|
-
ok: true,
|
|
1566
|
-
fixed: fixed.length,
|
|
1567
|
-
merged: merged.length,
|
|
1568
|
-
skipped: skipped.length,
|
|
1569
|
-
details: { fixed, merged, skipped },
|
|
1570
|
-
};
|
|
1571
|
-
}
|
|
1572
|
-
|
|
1573
|
-
function clearTags() {
|
|
1574
|
-
if (!db) return { ok: false, error: "db not ready" };
|
|
1575
|
-
const count = db.prepare("SELECT COUNT(*) as c FROM tags").get().c;
|
|
1576
|
-
db.exec("DELETE FROM tags");
|
|
1577
|
-
return { ok: true, deleted: count };
|
|
1578
|
-
}
|
|
1579
|
-
|
|
1580
190
|
function getUsersPageFromDb({
|
|
1581
191
|
status,
|
|
1582
192
|
search,
|
|
@@ -1587,7 +197,7 @@ function getUsersPageFromDb({
|
|
|
1587
197
|
offset,
|
|
1588
198
|
targetLocations = [],
|
|
1589
199
|
}) {
|
|
1590
|
-
if (!
|
|
200
|
+
if (!getDb()) return null;
|
|
1591
201
|
|
|
1592
202
|
const safeLimit = Math.max(1, Math.min(200, parseInt(limit) || 50));
|
|
1593
203
|
const safeOffset = Math.max(0, parseInt(offset) || 0);
|
|
@@ -1636,7 +246,7 @@ function getUsersPageFromDb({
|
|
|
1636
246
|
if (cachedCount && Date.now() - cachedCount.time < 5000) {
|
|
1637
247
|
total = cachedCount.c;
|
|
1638
248
|
} else {
|
|
1639
|
-
total =
|
|
249
|
+
total = getDb()
|
|
1640
250
|
.prepare(`SELECT COUNT(*) as c FROM jobs ${whereSql}`)
|
|
1641
251
|
.get(...args).c;
|
|
1642
252
|
getUsersPageFromDb._countCache.set(cacheKey, {
|
|
@@ -1646,7 +256,7 @@ function getUsersPageFromDb({
|
|
|
1646
256
|
}
|
|
1647
257
|
|
|
1648
258
|
// 只查询前端需要的列,避免 SELECT * 带来的大字段传输和 mapJobRow 开销
|
|
1649
|
-
const rows =
|
|
259
|
+
const rows = getDb()
|
|
1650
260
|
.prepare(
|
|
1651
261
|
`
|
|
1652
262
|
SELECT
|
|
@@ -1688,13 +298,13 @@ function getUsersPageFromDb({
|
|
|
1688
298
|
}
|
|
1689
299
|
|
|
1690
300
|
function getTargetUsersFromDb(targetLocations = []) {
|
|
1691
|
-
if (!
|
|
301
|
+
if (!getDb()) return null;
|
|
1692
302
|
if (!targetLocations.length) {
|
|
1693
303
|
return { total: 0, users: [] };
|
|
1694
304
|
}
|
|
1695
305
|
|
|
1696
306
|
const placeholders = targetLocations.map(() => "?").join(", ");
|
|
1697
|
-
const rows =
|
|
307
|
+
const rows = getDb()
|
|
1698
308
|
.prepare(
|
|
1699
309
|
`
|
|
1700
310
|
SELECT
|
|
@@ -1703,6 +313,8 @@ function getTargetUsersFromDb(targetLocations = []) {
|
|
|
1703
313
|
location_created, latest_video_time, refresh_time,
|
|
1704
314
|
guessed_location, pinned, processed_at, video_count,
|
|
1705
315
|
no_video, claimed_by, claimed_at, created_at, updated_at
|
|
316
|
+
FROM jobs
|
|
317
|
+
WHERE tt_seller = 1
|
|
1706
318
|
AND verified = 0
|
|
1707
319
|
AND location_created IN (${placeholders})
|
|
1708
320
|
ORDER BY COALESCE(follower_count, 0) DESC, unique_id ASC
|
|
@@ -1718,7 +330,7 @@ function getTargetUsersFromDb(targetLocations = []) {
|
|
|
1718
330
|
}
|
|
1719
331
|
|
|
1720
332
|
function getTargetUsersByCountryFromDb(targetLocations = [], options = {}) {
|
|
1721
|
-
if (!
|
|
333
|
+
if (!getDb()) return null;
|
|
1722
334
|
if (!targetLocations.length) {
|
|
1723
335
|
return { countries: [] };
|
|
1724
336
|
}
|
|
@@ -1735,7 +347,7 @@ function getTargetUsersByCountryFromDb(targetLocations = [], options = {}) {
|
|
|
1735
347
|
|
|
1736
348
|
// 摘要模式:只返回各国统计数,不返回用户数据
|
|
1737
349
|
if (summaryOnly) {
|
|
1738
|
-
const statsRows =
|
|
350
|
+
const statsRows = getDb()
|
|
1739
351
|
.prepare(
|
|
1740
352
|
`
|
|
1741
353
|
SELECT location_created as country, COUNT(*) as count
|
|
@@ -1803,13 +415,16 @@ function getTargetUsersByCountryFromDb(targetLocations = [], options = {}) {
|
|
|
1803
415
|
/SELECT[^FROM]*FROM/,
|
|
1804
416
|
"SELECT COUNT(*) as cnt FROM",
|
|
1805
417
|
);
|
|
1806
|
-
const total =
|
|
418
|
+
const total =
|
|
419
|
+
getDb()
|
|
420
|
+
.prepare(countSql)
|
|
421
|
+
.get(...params)?.cnt || 0;
|
|
1807
422
|
|
|
1808
423
|
sql += ` LIMIT ? OFFSET ?`;
|
|
1809
424
|
const safeLimit = Math.min(Math.floor(limit), 10000);
|
|
1810
425
|
const safeOffset = Math.max(Math.floor(offset), 0);
|
|
1811
426
|
|
|
1812
|
-
const rows =
|
|
427
|
+
const rows = getDb()
|
|
1813
428
|
.prepare(sql)
|
|
1814
429
|
.all(...params, safeLimit, safeOffset)
|
|
1815
430
|
.map(mapJobRow);
|
|
@@ -1822,7 +437,7 @@ function getTargetUsersByCountryFromDb(targetLocations = [], options = {}) {
|
|
|
1822
437
|
};
|
|
1823
438
|
}
|
|
1824
439
|
|
|
1825
|
-
const rows =
|
|
440
|
+
const rows = getDb()
|
|
1826
441
|
.prepare(
|
|
1827
442
|
`
|
|
1828
443
|
SELECT
|
|
@@ -1873,327 +488,6 @@ function getTargetUsersByCountryFromDb(targetLocations = [], options = {}) {
|
|
|
1873
488
|
};
|
|
1874
489
|
}
|
|
1875
490
|
|
|
1876
|
-
function snakeToCamel(key) {
|
|
1877
|
-
return key.replace(/_([a-z])/g, (_, ch) => ch.toUpperCase());
|
|
1878
|
-
}
|
|
1879
|
-
|
|
1880
|
-
function camelToSnake(key) {
|
|
1881
|
-
return key.replace(/[A-Z]/g, (ch) => `_${ch.toLowerCase()}`);
|
|
1882
|
-
}
|
|
1883
|
-
|
|
1884
|
-
const jobBooleanColumns = new Set([
|
|
1885
|
-
"pinned",
|
|
1886
|
-
"no_video",
|
|
1887
|
-
"restricted",
|
|
1888
|
-
"processed",
|
|
1889
|
-
"tt_seller",
|
|
1890
|
-
"verified",
|
|
1891
|
-
"error",
|
|
1892
|
-
]);
|
|
1893
|
-
|
|
1894
|
-
const videoBooleanColumns = new Set(["tt_seller"]);
|
|
1895
|
-
|
|
1896
|
-
const writableJobColumns = new Set([
|
|
1897
|
-
"nickname",
|
|
1898
|
-
"status",
|
|
1899
|
-
"sources",
|
|
1900
|
-
"claimed_by",
|
|
1901
|
-
"claimed_at",
|
|
1902
|
-
"error",
|
|
1903
|
-
"pinned",
|
|
1904
|
-
"no_video",
|
|
1905
|
-
"restricted",
|
|
1906
|
-
"user_update_count",
|
|
1907
|
-
"tt_seller",
|
|
1908
|
-
"verified",
|
|
1909
|
-
"video_count",
|
|
1910
|
-
"comment_count",
|
|
1911
|
-
"guessed_location",
|
|
1912
|
-
"location_created",
|
|
1913
|
-
"confirmed_location",
|
|
1914
|
-
"modified_at",
|
|
1915
|
-
"follower_count",
|
|
1916
|
-
"following_count",
|
|
1917
|
-
"heart_count",
|
|
1918
|
-
"refresh_time",
|
|
1919
|
-
"processed",
|
|
1920
|
-
"processed_at",
|
|
1921
|
-
"updated_at",
|
|
1922
|
-
"region",
|
|
1923
|
-
"signature",
|
|
1924
|
-
"bio_link",
|
|
1925
|
-
"sec_uid",
|
|
1926
|
-
"status_code",
|
|
1927
|
-
"latest_video_time",
|
|
1928
|
-
"top_video_play_count",
|
|
1929
|
-
"top_video_href",
|
|
1930
|
-
"user_create_time",
|
|
1931
|
-
]);
|
|
1932
|
-
|
|
1933
|
-
function normalizeJobValue(column, value) {
|
|
1934
|
-
if (value === undefined || value === null) return null;
|
|
1935
|
-
if (column === "sources") {
|
|
1936
|
-
if (!Array.isArray(value)) return JSON.stringify([]);
|
|
1937
|
-
return JSON.stringify([...new Set(value)]);
|
|
1938
|
-
}
|
|
1939
|
-
if (jobBooleanColumns.has(column)) {
|
|
1940
|
-
return value ? 1 : 0;
|
|
1941
|
-
}
|
|
1942
|
-
// 防御:如果值是对象或数组,转为 JSON 字符串
|
|
1943
|
-
if (typeof value === "object") return JSON.stringify(value);
|
|
1944
|
-
return value;
|
|
1945
|
-
}
|
|
1946
|
-
|
|
1947
|
-
function mapJobRow(row) {
|
|
1948
|
-
if (!row) return undefined;
|
|
1949
|
-
const mapped = {};
|
|
1950
|
-
for (const [key, value] of Object.entries(row)) {
|
|
1951
|
-
const camelKey = snakeToCamel(key);
|
|
1952
|
-
if (key === "sources") {
|
|
1953
|
-
try {
|
|
1954
|
-
mapped[camelKey] = value ? JSON.parse(value) : [];
|
|
1955
|
-
} catch {
|
|
1956
|
-
mapped[camelKey] = [];
|
|
1957
|
-
}
|
|
1958
|
-
continue;
|
|
1959
|
-
}
|
|
1960
|
-
if (jobBooleanColumns.has(key)) {
|
|
1961
|
-
mapped[camelKey] = value === null || value === undefined ? null : !!value;
|
|
1962
|
-
continue;
|
|
1963
|
-
}
|
|
1964
|
-
mapped[camelKey] = value;
|
|
1965
|
-
}
|
|
1966
|
-
return mapped;
|
|
1967
|
-
}
|
|
1968
|
-
|
|
1969
|
-
function getJobRow(uniqueId) {
|
|
1970
|
-
if (!db) return null;
|
|
1971
|
-
return db.prepare("SELECT * FROM jobs WHERE unique_id = ?").get(uniqueId);
|
|
1972
|
-
}
|
|
1973
|
-
|
|
1974
|
-
function getJobBaseRow(uniqueId) {
|
|
1975
|
-
if (!db) return null;
|
|
1976
|
-
return db
|
|
1977
|
-
.prepare("SELECT * FROM jobs_base WHERE unique_id = ?")
|
|
1978
|
-
.get(uniqueId);
|
|
1979
|
-
}
|
|
1980
|
-
|
|
1981
|
-
function getJob(uniqueId) {
|
|
1982
|
-
return mapJobRow(getJobRow(uniqueId));
|
|
1983
|
-
}
|
|
1984
|
-
|
|
1985
|
-
function getAllJobs() {
|
|
1986
|
-
if (!db) return [];
|
|
1987
|
-
return db.prepare("SELECT * FROM jobs").all().map(mapJobRow);
|
|
1988
|
-
}
|
|
1989
|
-
|
|
1990
|
-
function mapVideoRow(row) {
|
|
1991
|
-
if (!row) return undefined;
|
|
1992
|
-
const mapped = {};
|
|
1993
|
-
for (const [key, value] of Object.entries(row)) {
|
|
1994
|
-
const camelKey = snakeToCamel(key);
|
|
1995
|
-
if (videoBooleanColumns.has(key)) {
|
|
1996
|
-
mapped[camelKey] = value === null || value === undefined ? null : !!value;
|
|
1997
|
-
continue;
|
|
1998
|
-
}
|
|
1999
|
-
mapped[camelKey] = value;
|
|
2000
|
-
}
|
|
2001
|
-
return mapped;
|
|
2002
|
-
}
|
|
2003
|
-
|
|
2004
|
-
function getVideoRow(videoId) {
|
|
2005
|
-
if (!db) return null;
|
|
2006
|
-
return db.prepare("SELECT * FROM videos WHERE id = ?").get(videoId);
|
|
2007
|
-
}
|
|
2008
|
-
|
|
2009
|
-
function getAllVideoRows() {
|
|
2010
|
-
if (!db) return [];
|
|
2011
|
-
return db.prepare("SELECT * FROM videos").all();
|
|
2012
|
-
}
|
|
2013
|
-
|
|
2014
|
-
function updateJobInfo(uniqueId, info, incrementCount = true) {
|
|
2015
|
-
if (!db) return { error: "db not initialized" };
|
|
2016
|
-
const existing = getJobRow(uniqueId);
|
|
2017
|
-
if (!existing) return { error: "user not found" };
|
|
2018
|
-
|
|
2019
|
-
const nextValues = {};
|
|
2020
|
-
for (const [key, value] of Object.entries(info || {})) {
|
|
2021
|
-
if (key === "uniqueId" || key === "unique_id") continue;
|
|
2022
|
-
if (value === undefined || value === "") continue;
|
|
2023
|
-
let column = camelToSnake(key);
|
|
2024
|
-
// 字段别名:bio → signature, createTime → user_create_time
|
|
2025
|
-
if (column === "bio") column = "signature";
|
|
2026
|
-
if (column === "create_time") column = "user_create_time";
|
|
2027
|
-
if (!writableJobColumns.has(column)) continue;
|
|
2028
|
-
nextValues[column] = normalizeJobValue(column, value);
|
|
2029
|
-
}
|
|
2030
|
-
|
|
2031
|
-
nextValues.updated_at = Date.now();
|
|
2032
|
-
if (incrementCount) {
|
|
2033
|
-
nextValues.user_update_count = (existing.user_update_count || 0) + 1;
|
|
2034
|
-
}
|
|
2035
|
-
|
|
2036
|
-
const columns = Object.keys(nextValues);
|
|
2037
|
-
if (columns.length > 0) {
|
|
2038
|
-
const sql = `UPDATE jobs SET ${columns.map((column) => `${column} = ?`).join(", ")} WHERE unique_id = ?`;
|
|
2039
|
-
db.prepare(sql).run(
|
|
2040
|
-
...columns.map((column) => nextValues[column]),
|
|
2041
|
-
uniqueId,
|
|
2042
|
-
);
|
|
2043
|
-
}
|
|
2044
|
-
|
|
2045
|
-
return {
|
|
2046
|
-
ok: true,
|
|
2047
|
-
userUpdateCount:
|
|
2048
|
-
nextValues.user_update_count ?? existing.user_update_count ?? 0,
|
|
2049
|
-
};
|
|
2050
|
-
}
|
|
2051
|
-
|
|
2052
|
-
function inferStatus(u) {
|
|
2053
|
-
if (u.restricted) return "restricted";
|
|
2054
|
-
if (u.error) return "error";
|
|
2055
|
-
if (u.processed) return "done";
|
|
2056
|
-
return "pending";
|
|
2057
|
-
}
|
|
2058
|
-
|
|
2059
|
-
function updateJobBaseInfo(uniqueId, info, incrementCount = true) {
|
|
2060
|
-
if (!db) return { error: "db not initialized" };
|
|
2061
|
-
const existing = getJobBaseRow(uniqueId);
|
|
2062
|
-
if (!existing) return { error: "user not found" };
|
|
2063
|
-
|
|
2064
|
-
const nextValues = {};
|
|
2065
|
-
for (const [key, value] of Object.entries(info || {})) {
|
|
2066
|
-
if (key === "uniqueId" || key === "unique_id") continue;
|
|
2067
|
-
if (value === undefined || value === "") continue;
|
|
2068
|
-
let column = camelToSnake(key);
|
|
2069
|
-
// 字段别名:bio → signature, createTime → user_create_time
|
|
2070
|
-
if (column === "bio") column = "signature";
|
|
2071
|
-
if (column === "create_time") column = "user_create_time";
|
|
2072
|
-
if (!writableJobColumns.has(column)) continue;
|
|
2073
|
-
nextValues[column] = normalizeJobValue(column, value);
|
|
2074
|
-
}
|
|
2075
|
-
|
|
2076
|
-
nextValues.updated_at = Date.now();
|
|
2077
|
-
if (incrementCount) {
|
|
2078
|
-
nextValues.user_update_count = (existing.user_update_count || 0) + 1;
|
|
2079
|
-
}
|
|
2080
|
-
|
|
2081
|
-
const columns = Object.keys(nextValues);
|
|
2082
|
-
if (columns.length > 0) {
|
|
2083
|
-
const sql = `UPDATE jobs_base SET ${columns.map((column) => `${column} = ?`).join(", ")} WHERE unique_id = ?`;
|
|
2084
|
-
db.prepare(sql).run(
|
|
2085
|
-
...columns.map((column) => nextValues[column]),
|
|
2086
|
-
uniqueId,
|
|
2087
|
-
);
|
|
2088
|
-
}
|
|
2089
|
-
|
|
2090
|
-
return {
|
|
2091
|
-
ok: true,
|
|
2092
|
-
userUpdateCount:
|
|
2093
|
-
nextValues.user_update_count ?? existing.user_update_count ?? 0,
|
|
2094
|
-
};
|
|
2095
|
-
}
|
|
2096
|
-
|
|
2097
|
-
function addJobBaseToDb(user) {
|
|
2098
|
-
if (!db) return;
|
|
2099
|
-
const now = Date.now();
|
|
2100
|
-
db.prepare(
|
|
2101
|
-
`
|
|
2102
|
-
INSERT OR IGNORE INTO jobs_base (
|
|
2103
|
-
unique_id,
|
|
2104
|
-
nickname,
|
|
2105
|
-
status,
|
|
2106
|
-
sources,
|
|
2107
|
-
claimed_by,
|
|
2108
|
-
claimed_at,
|
|
2109
|
-
error,
|
|
2110
|
-
pinned,
|
|
2111
|
-
no_video,
|
|
2112
|
-
restricted,
|
|
2113
|
-
user_update_count,
|
|
2114
|
-
tt_seller,
|
|
2115
|
-
verified,
|
|
2116
|
-
video_count,
|
|
2117
|
-
comment_count,
|
|
2118
|
-
guessed_location,
|
|
2119
|
-
location_created,
|
|
2120
|
-
follower_count,
|
|
2121
|
-
following_count,
|
|
2122
|
-
heart_count,
|
|
2123
|
-
refresh_time,
|
|
2124
|
-
processed,
|
|
2125
|
-
processed_at,
|
|
2126
|
-
created_at,
|
|
2127
|
-
updated_at,
|
|
2128
|
-
region,
|
|
2129
|
-
signature,
|
|
2130
|
-
bio_link,
|
|
2131
|
-
sec_uid
|
|
2132
|
-
)
|
|
2133
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
2134
|
-
`,
|
|
2135
|
-
).run(
|
|
2136
|
-
user.uniqueId,
|
|
2137
|
-
user.nickname || null,
|
|
2138
|
-
user.status || inferStatus(user),
|
|
2139
|
-
JSON.stringify(
|
|
2140
|
-
Array.isArray(user.sources) ? [...new Set(user.sources)] : [],
|
|
2141
|
-
),
|
|
2142
|
-
user.claimedBy || null,
|
|
2143
|
-
user.claimedAt || null,
|
|
2144
|
-
user.error || null,
|
|
2145
|
-
user.pinned ? 1 : 0,
|
|
2146
|
-
user.noVideo ? 1 : 0,
|
|
2147
|
-
user.restricted ? 1 : 0,
|
|
2148
|
-
user.userUpdateCount || 0,
|
|
2149
|
-
user.ttSeller === undefined ||
|
|
2150
|
-
user.ttSeller === null ||
|
|
2151
|
-
user.ttSeller === ""
|
|
2152
|
-
? null
|
|
2153
|
-
: user.ttSeller
|
|
2154
|
-
? 1
|
|
2155
|
-
: 0,
|
|
2156
|
-
user.verified === undefined ||
|
|
2157
|
-
user.verified === null ||
|
|
2158
|
-
user.verified === ""
|
|
2159
|
-
? null
|
|
2160
|
-
: user.verified
|
|
2161
|
-
? 1
|
|
2162
|
-
: 0,
|
|
2163
|
-
user.videoCount || 0,
|
|
2164
|
-
user.commentCount || 0,
|
|
2165
|
-
user.guessedLocation || null,
|
|
2166
|
-
user.locationCreated || null,
|
|
2167
|
-
user.followerCount || 0,
|
|
2168
|
-
user.followingCount || 0,
|
|
2169
|
-
user.heartCount || 0,
|
|
2170
|
-
user.refreshTime || null,
|
|
2171
|
-
user.processed ? 1 : 0,
|
|
2172
|
-
user.processedAt || null,
|
|
2173
|
-
user.createdAt || now,
|
|
2174
|
-
user.updatedAt || now,
|
|
2175
|
-
user.region || null,
|
|
2176
|
-
user.signature || null,
|
|
2177
|
-
user.bioLink?.link || user.bioLink?.url || user.bioLink || null,
|
|
2178
|
-
user.secUid || null,
|
|
2179
|
-
);
|
|
2180
|
-
}
|
|
2181
|
-
|
|
2182
|
-
function addJob(user) {
|
|
2183
|
-
if (!db) {
|
|
2184
|
-
addUserToDb(user);
|
|
2185
|
-
return;
|
|
2186
|
-
}
|
|
2187
|
-
if (!user.status) user.status = inferStatus(user);
|
|
2188
|
-
if (!user.createdAt) user.createdAt = Date.now();
|
|
2189
|
-
if (!user.updatedAt) user.updatedAt = user.createdAt;
|
|
2190
|
-
const writeTxn = db.transaction((job) => {
|
|
2191
|
-
addUserToDb(job);
|
|
2192
|
-
addJobToDb(job);
|
|
2193
|
-
});
|
|
2194
|
-
writeTxn(user);
|
|
2195
|
-
}
|
|
2196
|
-
|
|
2197
491
|
export function createStore(filePath, options = {}) {
|
|
2198
492
|
if (!filePath) {
|
|
2199
493
|
throw new Error("createStore requires an explicit .db path");
|
|
@@ -2219,62 +513,12 @@ export function createStore(filePath, options = {}) {
|
|
|
2219
513
|
let refillLock = null; // Promise | null
|
|
2220
514
|
// LLM 采样偏移量记忆:按猜测国家记录上次查询位置,避免重复采样
|
|
2221
515
|
// 格式: { "ES": 300, "PL": 500, "NL": 400 }
|
|
2222
|
-
|
|
516
|
+
const offsetStore = createLlmOffsetStore();
|
|
2223
517
|
if (filePath) {
|
|
2224
518
|
// 初始化 SQLite 用户表(用于判重)
|
|
2225
|
-
|
|
519
|
+
initDb(filePath);
|
|
2226
520
|
// 从数据库恢复偏移量
|
|
2227
|
-
|
|
2228
|
-
}
|
|
2229
|
-
|
|
2230
|
-
/**
|
|
2231
|
-
* 从数据库加载 LLM 采样偏移量
|
|
2232
|
-
*/
|
|
2233
|
-
function loadLlmSampleOffsets() {
|
|
2234
|
-
try {
|
|
2235
|
-
const row = db
|
|
2236
|
-
.prepare(`SELECT offsets FROM _llm_sample_offsets LIMIT 1`)
|
|
2237
|
-
.get();
|
|
2238
|
-
if (row && row.offsets) {
|
|
2239
|
-
const parsed = JSON.parse(row.offsets);
|
|
2240
|
-
if (parsed && typeof parsed === "object") {
|
|
2241
|
-
Object.entries(parsed).forEach(([k, v]) => {
|
|
2242
|
-
llmSampleOffsets.set(k, v);
|
|
2243
|
-
});
|
|
2244
|
-
console.error(
|
|
2245
|
-
`[data-store] 已恢复 LLM 采样偏移量: ${Array.from(
|
|
2246
|
-
llmSampleOffsets.entries(),
|
|
2247
|
-
)
|
|
2248
|
-
.map(([k, v]) => `${k}:${v}`)
|
|
2249
|
-
.join(", ")}`,
|
|
2250
|
-
);
|
|
2251
|
-
}
|
|
2252
|
-
}
|
|
2253
|
-
} catch (e) {
|
|
2254
|
-
// 表不存在或解析失败,使用空偏移量
|
|
2255
|
-
console.error(
|
|
2256
|
-
`[data-store] 加载 LLM 采样偏移量失败,使用空偏移量: ${e.message}`,
|
|
2257
|
-
);
|
|
2258
|
-
}
|
|
2259
|
-
}
|
|
2260
|
-
|
|
2261
|
-
/**
|
|
2262
|
-
* 将 LLM 采样偏移量持久化到数据库
|
|
2263
|
-
*/
|
|
2264
|
-
function saveLlmSampleOffsets() {
|
|
2265
|
-
try {
|
|
2266
|
-
const offsetsJson = JSON.stringify(Object.fromEntries(llmSampleOffsets));
|
|
2267
|
-
// 表不存在则创建
|
|
2268
|
-
db.prepare(
|
|
2269
|
-
`CREATE TABLE IF NOT EXISTS _llm_sample_offsets (id INTEGER PRIMARY KEY CHECK (id = 1), offsets TEXT)`,
|
|
2270
|
-
).run();
|
|
2271
|
-
// 插入或更新
|
|
2272
|
-
db.prepare(
|
|
2273
|
-
`INSERT OR REPLACE INTO _llm_sample_offsets (id, offsets) VALUES (1, ?)`,
|
|
2274
|
-
).run(offsetsJson);
|
|
2275
|
-
} catch (e) {
|
|
2276
|
-
console.error(`[data-store] 保存 LLM 采样偏移量失败: ${e.message}`);
|
|
2277
|
-
}
|
|
521
|
+
offsetStore.load();
|
|
2278
522
|
}
|
|
2279
523
|
|
|
2280
524
|
// stats 缓存
|
|
@@ -2287,7 +531,7 @@ export function createStore(filePath, options = {}) {
|
|
|
2287
531
|
}
|
|
2288
532
|
|
|
2289
533
|
function computeStatsInternal() {
|
|
2290
|
-
if (
|
|
534
|
+
if (getDb()) {
|
|
2291
535
|
const total = getJobsCount();
|
|
2292
536
|
const statusCounts = {
|
|
2293
537
|
pending: 0,
|
|
@@ -2296,7 +540,7 @@ export function createStore(filePath, options = {}) {
|
|
|
2296
540
|
error: 0,
|
|
2297
541
|
restricted: 0,
|
|
2298
542
|
};
|
|
2299
|
-
const rows =
|
|
543
|
+
const rows = getDb()
|
|
2300
544
|
.prepare(
|
|
2301
545
|
`
|
|
2302
546
|
SELECT status, COUNT(*) as count
|
|
@@ -2372,7 +616,7 @@ export function createStore(filePath, options = {}) {
|
|
|
2372
616
|
}
|
|
2373
617
|
|
|
2374
618
|
function rebuildStatusGroups() {
|
|
2375
|
-
if (
|
|
619
|
+
if (getDb()) {
|
|
2376
620
|
statusGroups = {
|
|
2377
621
|
pending: [],
|
|
2378
622
|
processing: [],
|
|
@@ -2436,9 +680,9 @@ export function createStore(filePath, options = {}) {
|
|
|
2436
680
|
|
|
2437
681
|
function flushSave() {
|
|
2438
682
|
// 数据库模式:先保存 LLM 偏移量,再备份数据库
|
|
2439
|
-
if (
|
|
683
|
+
if (getDb() && getDbPath()) {
|
|
2440
684
|
try {
|
|
2441
|
-
|
|
685
|
+
offsetStore.save();
|
|
2442
686
|
} catch (e) {
|
|
2443
687
|
console.error(`[data-store] 保存 LLM 偏移量失败: ${e.message}`);
|
|
2444
688
|
}
|
|
@@ -2452,7 +696,7 @@ export function createStore(filePath, options = {}) {
|
|
|
2452
696
|
* @returns {string|null} 备份文件路径,失败返回 null
|
|
2453
697
|
*/
|
|
2454
698
|
function backupDatabase(maxBackups = 3) {
|
|
2455
|
-
if (!
|
|
699
|
+
if (!getDb() || !getDbPath()) {
|
|
2456
700
|
console.error("[data-store] 数据库未初始化,跳过备份");
|
|
2457
701
|
return null;
|
|
2458
702
|
}
|
|
@@ -2464,16 +708,16 @@ export function createStore(filePath, options = {}) {
|
|
|
2464
708
|
.toISOString()
|
|
2465
709
|
.replace(/[-:T.]/g, "")
|
|
2466
710
|
.slice(0, 15); // YYYYMMDDHHmmss
|
|
2467
|
-
const baseName = path.basename(
|
|
711
|
+
const baseName = path.basename(getDbPath(), ".db");
|
|
2468
712
|
const backupName = `${baseName}-${timestamp}.db`;
|
|
2469
|
-
const backupDir = path.dirname(
|
|
713
|
+
const backupDir = path.dirname(getDbPath());
|
|
2470
714
|
const backupPath = path.join(backupDir, backupName);
|
|
2471
715
|
|
|
2472
716
|
console.error(`[data-store] 正在备份数据库: ${backupName}`);
|
|
2473
717
|
|
|
2474
718
|
// 使用 better-sqlite3 的 backup API(原子性备份,安全可靠)
|
|
2475
719
|
const backupDb = new Database(backupPath);
|
|
2476
|
-
|
|
720
|
+
getDb().backup("main", backupDb, "main");
|
|
2477
721
|
backupDb.close();
|
|
2478
722
|
|
|
2479
723
|
// 验证备份文件大小
|
|
@@ -2523,7 +767,7 @@ export function createStore(filePath, options = {}) {
|
|
|
2523
767
|
|
|
2524
768
|
function stopBackup() {
|
|
2525
769
|
// 退出时执行备份
|
|
2526
|
-
if (
|
|
770
|
+
if (getDb() && getDbPath()) {
|
|
2527
771
|
backupDatabase();
|
|
2528
772
|
}
|
|
2529
773
|
}
|
|
@@ -2531,7 +775,7 @@ export function createStore(filePath, options = {}) {
|
|
|
2531
775
|
function getUser(uid) {
|
|
2532
776
|
const idx = uidIndex.get(uid);
|
|
2533
777
|
if (idx !== undefined) return data[idx];
|
|
2534
|
-
if (
|
|
778
|
+
if (getDb()) return getJob(uid);
|
|
2535
779
|
return undefined;
|
|
2536
780
|
}
|
|
2537
781
|
|
|
@@ -2549,12 +793,25 @@ export function createStore(filePath, options = {}) {
|
|
|
2549
793
|
|
|
2550
794
|
function addUser(user, append) {
|
|
2551
795
|
const memoryIdx = uidIndex.get(user.uniqueId);
|
|
2552
|
-
if (
|
|
796
|
+
if (getDb() && memoryIdx === undefined) {
|
|
2553
797
|
// 用 users 表判重(所有发现过的用户合集),而不是 jobs 表
|
|
2554
798
|
if (hasUserInDb(user.uniqueId)) {
|
|
2555
799
|
return;
|
|
2556
800
|
}
|
|
2557
|
-
|
|
801
|
+
const now = Date.now();
|
|
802
|
+
const writeTxn = getDb().transaction((job) => {
|
|
803
|
+
addUserToDb({
|
|
804
|
+
...job,
|
|
805
|
+
createdAt: job.createdAt || now,
|
|
806
|
+
updatedAt: job.updatedAt || now,
|
|
807
|
+
});
|
|
808
|
+
addJobBaseToDb({
|
|
809
|
+
...job,
|
|
810
|
+
createdAt: job.createdAt || now,
|
|
811
|
+
updatedAt: job.updatedAt || now,
|
|
812
|
+
});
|
|
813
|
+
});
|
|
814
|
+
writeTxn(user);
|
|
2558
815
|
return;
|
|
2559
816
|
}
|
|
2560
817
|
|
|
@@ -2616,7 +873,7 @@ export function createStore(filePath, options = {}) {
|
|
|
2616
873
|
createdAt: now,
|
|
2617
874
|
updatedAt: now,
|
|
2618
875
|
};
|
|
2619
|
-
const writeTxn =
|
|
876
|
+
const writeTxn = getDb().transaction((job) => {
|
|
2620
877
|
addUserToDb(job);
|
|
2621
878
|
addJobBaseToDb(job);
|
|
2622
879
|
});
|
|
@@ -2628,195 +885,26 @@ export function createStore(filePath, options = {}) {
|
|
|
2628
885
|
}
|
|
2629
886
|
|
|
2630
887
|
function getPendingUsers() {
|
|
2631
|
-
if (
|
|
888
|
+
if (getDb()) {
|
|
2632
889
|
return getAllJobs().filter((u) => u.status === "pending");
|
|
2633
890
|
}
|
|
2634
891
|
return data.filter((u) => u.status === "pending");
|
|
2635
892
|
}
|
|
2636
893
|
|
|
2637
894
|
function getProcessedUsers() {
|
|
2638
|
-
if (
|
|
895
|
+
if (getDb()) {
|
|
2639
896
|
return getAllJobs().filter((u) => u.status === "done");
|
|
2640
897
|
}
|
|
2641
898
|
return data.filter((u) => u.status === "done");
|
|
2642
899
|
}
|
|
2643
900
|
|
|
2644
901
|
function getAllUsers() {
|
|
2645
|
-
if (
|
|
902
|
+
if (getDb()) {
|
|
2646
903
|
return getAllJobs();
|
|
2647
904
|
}
|
|
2648
905
|
return data;
|
|
2649
906
|
}
|
|
2650
907
|
|
|
2651
|
-
/**
|
|
2652
|
-
* 使用 LLM 对单个 job 的国家匹配度打分(0-100)
|
|
2653
|
-
* @param {Object} job - raw_jobs 中的一条记录
|
|
2654
|
-
* @param {string[]} targetLocations - 目标国家列表
|
|
2655
|
-
* @returns {Promise<{ uniqueId: string, score: number, reason: string }>}
|
|
2656
|
-
*/
|
|
2657
|
-
async function scoreJobLocation(job, targetLocations) {
|
|
2658
|
-
const { fetch: undiciFetch } = await import("undici");
|
|
2659
|
-
|
|
2660
|
-
const prompt = `
|
|
2661
|
-
你是一个 TikTok 用户数据分析助手。请根据以下用户信息,判断该用户是否来自以下**任意一个**目标国家。
|
|
2662
|
-
|
|
2663
|
-
目标国家列表: ${targetLocations.join(", ")}
|
|
2664
|
-
|
|
2665
|
-
重要:
|
|
2666
|
-
- 用户只要来自上述**任意一个**国家就算匹配。
|
|
2667
|
-
- guessed_location 是系统初步猜测的结果,**仅供参考**,不要完全依赖它。
|
|
2668
|
-
- 请综合用户名、昵称、签名、位置等信息做判断。
|
|
2669
|
-
|
|
2670
|
-
用户信息:
|
|
2671
|
-
- 用户名: ${job.unique_id || "未知"}
|
|
2672
|
-
- 昵称: ${job.nickname || "未知"}
|
|
2673
|
-
- 签名: ${job.signature || "未知"}
|
|
2674
|
-
- 地区: ${job.region || "未知"}
|
|
2675
|
-
- 猜测国家(参考): ${job.guessed_location || "未知"}
|
|
2676
|
-
- 位置信息: ${job.location_created || "未知"}
|
|
2677
|
-
- 主页链接: ${job.bio_link || "未知"}
|
|
2678
|
-
|
|
2679
|
-
返回 JSON(仅返回 JSON,无其他内容):
|
|
2680
|
-
{"score": 0-100, "reason": "English only, under 50 chars, no quotes/brackets"}
|
|
2681
|
-
|
|
2682
|
-
Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unlikely
|
|
2683
|
-
`;
|
|
2684
|
-
|
|
2685
|
-
try {
|
|
2686
|
-
const apiKey = process.env.APIKEY || "";
|
|
2687
|
-
const response = await undiciFetch(
|
|
2688
|
-
"http://82.156.52.214:18000/v1/chat/completions",
|
|
2689
|
-
{
|
|
2690
|
-
method: "POST",
|
|
2691
|
-
headers: {
|
|
2692
|
-
"Content-Type": "application/json",
|
|
2693
|
-
Authorization: `Bearer ${apiKey}`,
|
|
2694
|
-
},
|
|
2695
|
-
body: JSON.stringify({
|
|
2696
|
-
model: "zc-fast",
|
|
2697
|
-
messages: [{ role: "user", content: prompt }],
|
|
2698
|
-
max_tokens: 512,
|
|
2699
|
-
temperature: 0.1,
|
|
2700
|
-
}),
|
|
2701
|
-
},
|
|
2702
|
-
);
|
|
2703
|
-
|
|
2704
|
-
const result = await response.json();
|
|
2705
|
-
const content = result.choices?.[0]?.message?.content || "";
|
|
2706
|
-
|
|
2707
|
-
// 解析 JSON 响应(多层容错)
|
|
2708
|
-
let parsed = null;
|
|
2709
|
-
|
|
2710
|
-
// 尝试 1: 直接解析
|
|
2711
|
-
try {
|
|
2712
|
-
parsed = JSON.parse(content);
|
|
2713
|
-
} catch {
|
|
2714
|
-
// 尝试 2: 提取 {} 包裹的内容
|
|
2715
|
-
const match = content.match(/\{[\s\S]*\}/);
|
|
2716
|
-
if (match) {
|
|
2717
|
-
try {
|
|
2718
|
-
parsed = JSON.parse(match[0]);
|
|
2719
|
-
} catch {
|
|
2720
|
-
// 尝试 3: 清理常见问题后解析
|
|
2721
|
-
const cleaned = match[0]
|
|
2722
|
-
.replace(/"/g, '"') // 弯引号 → 直引号
|
|
2723
|
-
.replace(/\s+/g, " ") // 多余空白
|
|
2724
|
-
.trim();
|
|
2725
|
-
try {
|
|
2726
|
-
parsed = JSON.parse(cleaned);
|
|
2727
|
-
} catch {
|
|
2728
|
-
// 尝试 4: 从文本中提取 score 和 reason(reason 可能包含引号等特殊字符)
|
|
2729
|
-
const scoreMatch = content.match(/"?score"?\s*:\s*(\d+)/i);
|
|
2730
|
-
if (scoreMatch) {
|
|
2731
|
-
let reason = "解析降级";
|
|
2732
|
-
// 找 "reason": 的位置,取到最后一个 } 前的内容
|
|
2733
|
-
const reasonKeyPos = content.search(/"?reason"?\s*:\s*"/i);
|
|
2734
|
-
if (reasonKeyPos !== -1) {
|
|
2735
|
-
const afterKey = content.substring(reasonKeyPos);
|
|
2736
|
-
const colonPos = afterKey.indexOf(":");
|
|
2737
|
-
const valueStart = afterKey.indexOf('"', colonPos + 1) + 1;
|
|
2738
|
-
const rawValue = afterKey.substring(valueStart);
|
|
2739
|
-
// 取到原始 content 最后一个 } 前
|
|
2740
|
-
const lastBrace = content.lastIndexOf("}");
|
|
2741
|
-
const reasonEnd = lastBrace - reasonKeyPos - valueStart;
|
|
2742
|
-
if (reasonEnd > 0) {
|
|
2743
|
-
reason = rawValue.substring(0, reasonEnd).trim();
|
|
2744
|
-
// 去掉首尾的引号
|
|
2745
|
-
if (reason.startsWith('"')) reason = reason.substring(1);
|
|
2746
|
-
if (reason.endsWith('"'))
|
|
2747
|
-
reason = reason.substring(0, reason.length - 1);
|
|
2748
|
-
}
|
|
2749
|
-
}
|
|
2750
|
-
parsed = {
|
|
2751
|
-
score: parseInt(scoreMatch[1]) || 50,
|
|
2752
|
-
reason,
|
|
2753
|
-
};
|
|
2754
|
-
}
|
|
2755
|
-
}
|
|
2756
|
-
}
|
|
2757
|
-
}
|
|
2758
|
-
|
|
2759
|
-
// 尝试 5: 如果以上都失败,用更宽松的正则提取
|
|
2760
|
-
if (!parsed) {
|
|
2761
|
-
const scoreMatch = content.match(/"score"\s*:\s*(\d+)/);
|
|
2762
|
-
const reasonMatch = content.match(/"reason"\s*:\s*"([^"]*)"/);
|
|
2763
|
-
if (scoreMatch) {
|
|
2764
|
-
parsed = {
|
|
2765
|
-
score: parseInt(scoreMatch[1]) || 50,
|
|
2766
|
-
reason: reasonMatch ? reasonMatch[1] : "解析降级 - 宽松模式",
|
|
2767
|
-
};
|
|
2768
|
-
}
|
|
2769
|
-
}
|
|
2770
|
-
}
|
|
2771
|
-
|
|
2772
|
-
if (parsed && typeof parsed.score === "number") {
|
|
2773
|
-
return {
|
|
2774
|
-
uniqueId: job.unique_id,
|
|
2775
|
-
score: Math.max(0, Math.min(100, parsed.score)),
|
|
2776
|
-
reason: parsed.reason || "",
|
|
2777
|
-
};
|
|
2778
|
-
}
|
|
2779
|
-
|
|
2780
|
-
// 所有解析都失败,返回默认分
|
|
2781
|
-
console.error(
|
|
2782
|
-
`[scoreJobLocation] JSON 解析失败 (${job.unique_id}): ${content.substring(0, 100)}`,
|
|
2783
|
-
);
|
|
2784
|
-
return {
|
|
2785
|
-
uniqueId: job.unique_id,
|
|
2786
|
-
score: 50,
|
|
2787
|
-
reason: "LLM 响应解析失败,使用默认分",
|
|
2788
|
-
};
|
|
2789
|
-
} catch (e) {
|
|
2790
|
-
console.error(
|
|
2791
|
-
`[scoreJobLocation] LLM 调用失败 (${job.unique_id}): ${e.message}`,
|
|
2792
|
-
);
|
|
2793
|
-
return {
|
|
2794
|
-
uniqueId: job.unique_id,
|
|
2795
|
-
score: 50,
|
|
2796
|
-
reason: `LLM 调用异常: ${e.message}`,
|
|
2797
|
-
};
|
|
2798
|
-
}
|
|
2799
|
-
}
|
|
2800
|
-
|
|
2801
|
-
/**
|
|
2802
|
-
* 批量对 jobs 进行 LLM 国家匹配度打分
|
|
2803
|
-
* @param {Object[]} jobs - raw_jobs 记录数组
|
|
2804
|
-
* @param {string[]} targetLocations - 目标国家列表
|
|
2805
|
-
* @param {number} batchSize - 每批处理数量(并发),默认 10
|
|
2806
|
-
* @returns {Promise<Array<{ uniqueId: string, score: number, reason: string }>>}
|
|
2807
|
-
*/
|
|
2808
|
-
async function scoreJobsBatch(jobs, targetLocations, batchSize = 10) {
|
|
2809
|
-
const results = [];
|
|
2810
|
-
for (let i = 0; i < jobs.length; i += batchSize) {
|
|
2811
|
-
const batch = jobs.slice(i, i + batchSize);
|
|
2812
|
-
const batchResults = await Promise.all(
|
|
2813
|
-
batch.map((job) => scoreJobLocation(job, targetLocations)),
|
|
2814
|
-
);
|
|
2815
|
-
results.push(...batchResults);
|
|
2816
|
-
}
|
|
2817
|
-
return results;
|
|
2818
|
-
}
|
|
2819
|
-
|
|
2820
908
|
/**
|
|
2821
909
|
* 从 raw_jobs 中移动一批符合条件的任务到 jobs 表
|
|
2822
910
|
* @param {string[]} locations - 目标国家列表(null 表示不限制)
|
|
@@ -2828,8 +916,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2828
916
|
* @returns {{ moved: number }} 实际移动的数量
|
|
2829
917
|
*/
|
|
2830
918
|
function refillJobsFromRaw(locations = null, limit = 500, options = {}) {
|
|
2831
|
-
if (!
|
|
2832
|
-
return { moved: 0, error: "
|
|
919
|
+
if (!getDb()) {
|
|
920
|
+
return { moved: 0, error: "getDb() not ready" };
|
|
2833
921
|
}
|
|
2834
922
|
|
|
2835
923
|
const safeLimit = Math.max(1, Math.min(2000, parseInt(limit) || 500));
|
|
@@ -2860,7 +948,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2860
948
|
|
|
2861
949
|
// 统计符合条件的数量
|
|
2862
950
|
const count =
|
|
2863
|
-
|
|
951
|
+
getDb()
|
|
2864
952
|
.prepare(`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql}`)
|
|
2865
953
|
.get(...args)?.c || 0;
|
|
2866
954
|
|
|
@@ -2868,149 +956,142 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
2868
956
|
return { moved: 0 };
|
|
2869
957
|
}
|
|
2870
958
|
|
|
2871
|
-
// 如果启用 LLM
|
|
959
|
+
// 如果启用 LLM 打分:先取 tag(一次性),再取非 tag 走 LLM 打分
|
|
2872
960
|
if (useLlm && normalizedLocations && normalizedLocations.length > 0) {
|
|
2873
|
-
const
|
|
2874
|
-
const
|
|
961
|
+
const llmTotal = options.llmTotal ?? 200; // 总条数
|
|
962
|
+
const llmTagRatio = options.llmTagRatio ?? 0.6; // tag 占比 60%
|
|
963
|
+
const llmTagLimit = Math.floor(llmTotal * llmTagRatio); // tag 上限 120
|
|
964
|
+
const llmNonTagTarget = llmTotal - llmTagLimit; // 非 tag 目标 80
|
|
965
|
+
const llmMinScore = options.llmMinScore ?? 60;
|
|
966
|
+
const llmSampleSize = options.llmSampleSize ?? 100;
|
|
967
|
+
const maxBatches = options.llmMaxBatches ?? 10;
|
|
2875
968
|
|
|
2876
|
-
// 打印当前偏移量状态
|
|
2877
|
-
const offsetSummary = Array.from(llmSampleOffsets.entries())
|
|
2878
|
-
.map(([k, v]) => `${k}:${v}`)
|
|
2879
|
-
.join(", ");
|
|
2880
969
|
console.error(
|
|
2881
|
-
`[data-store] LLM 打分开始:
|
|
970
|
+
`[data-store] LLM 打分开始: 总目标 ${llmTotal} 条,tag 最多 ${llmTagLimit} 条(一次性),非 tag 目标 ${llmNonTagTarget} 条(LLM 打分)`,
|
|
2882
971
|
);
|
|
2883
|
-
if (offsetSummary) {
|
|
2884
|
-
console.error(`[data-store] 偏移量记忆: ${offsetSummary}`);
|
|
2885
|
-
}
|
|
2886
972
|
|
|
2887
973
|
// 返回 Promise,调用方需要 await
|
|
2888
974
|
return (async () => {
|
|
2889
|
-
const
|
|
975
|
+
const allTagQualified = [];
|
|
976
|
+
const allNonTagQualified = [];
|
|
2890
977
|
const allScores = [];
|
|
2891
978
|
|
|
2892
|
-
//
|
|
2893
|
-
|
|
2894
|
-
|
|
979
|
+
// ===== 第一步:一次性取所有 tag(全局,最多 llmTagLimit 条)=====
|
|
980
|
+
let tagOffset = offsetStore.get("_tag") || 0;
|
|
981
|
+
const tagGlobalCount =
|
|
982
|
+
getDb()
|
|
983
|
+
.prepare(
|
|
984
|
+
`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql} AND sources LIKE '%tag%'`,
|
|
985
|
+
)
|
|
986
|
+
.get(...args)?.c || 0;
|
|
2895
987
|
|
|
2896
|
-
|
|
2897
|
-
|
|
2898
|
-
|
|
988
|
+
if (tagOffset >= tagGlobalCount) {
|
|
989
|
+
tagOffset = 0;
|
|
990
|
+
offsetStore.set("_tag", 0);
|
|
991
|
+
}
|
|
2899
992
|
|
|
2900
|
-
|
|
2901
|
-
|
|
2902
|
-
|
|
2903
|
-
const locationCount =
|
|
2904
|
-
db.prepare(locationCountSql).get(...locationArgs)?.c || 0;
|
|
993
|
+
console.error(
|
|
994
|
+
`[data-store] Tag 全局共 ${tagGlobalCount} 条,从偏移量 ${tagOffset} 开始`,
|
|
995
|
+
);
|
|
2905
996
|
|
|
2906
|
-
|
|
2907
|
-
|
|
2908
|
-
|
|
997
|
+
while (
|
|
998
|
+
allTagQualified.length < llmTagLimit &&
|
|
999
|
+
tagOffset < tagGlobalCount
|
|
1000
|
+
) {
|
|
1001
|
+
const batch = getDb()
|
|
1002
|
+
.prepare(
|
|
1003
|
+
`
|
|
1004
|
+
SELECT * FROM raw_jobs WHERE ${whereSql} AND sources LIKE '%tag%'
|
|
1005
|
+
ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
|
|
1006
|
+
LIMIT ? OFFSET ?
|
|
1007
|
+
`,
|
|
1008
|
+
)
|
|
1009
|
+
.all(
|
|
1010
|
+
...args,
|
|
1011
|
+
Math.min(llmSampleSize, llmTagLimit - allTagQualified.length),
|
|
1012
|
+
tagOffset,
|
|
2909
1013
|
);
|
|
1014
|
+
|
|
1015
|
+
if (!batch.length) break;
|
|
1016
|
+
|
|
1017
|
+
allTagQualified.push(...batch.map((s) => s.unique_id));
|
|
1018
|
+
tagOffset += batch.length;
|
|
1019
|
+
|
|
1020
|
+
console.error(
|
|
1021
|
+
`[data-store] Tag 本批 ${batch.length} 条,累计 ${allTagQualified.length}/${llmTagLimit}`,
|
|
1022
|
+
);
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
offsetStore.set("_tag", tagOffset);
|
|
1026
|
+
|
|
1027
|
+
// ===== 第二步:按国家取非 tag,走 LLM 打分,直到合格数达到 llmNonTagTarget =====
|
|
1028
|
+
for (const location of normalizedLocations) {
|
|
1029
|
+
if (allNonTagQualified.length >= llmNonTagTarget) break;
|
|
1030
|
+
|
|
1031
|
+
const nonTagOffsetKey = `${location}:nonTag`;
|
|
1032
|
+
let offset = offsetStore.get(nonTagOffsetKey) || 0;
|
|
1033
|
+
|
|
1034
|
+
const locationArgs = [...args, location];
|
|
1035
|
+
const nonTagCount =
|
|
1036
|
+
getDb()
|
|
1037
|
+
.prepare(
|
|
1038
|
+
`SELECT COUNT(*) as c FROM raw_jobs WHERE ${whereSql} AND guessed_location = ? AND (sources NOT LIKE '%tag%' OR sources IS NULL)`,
|
|
1039
|
+
)
|
|
1040
|
+
.get(...locationArgs)?.c || 0;
|
|
1041
|
+
|
|
1042
|
+
if (nonTagCount === 0) {
|
|
1043
|
+
console.error(`[data-store] 国家 ${location}: 无非 tag 数据,跳过`);
|
|
2910
1044
|
continue;
|
|
2911
1045
|
}
|
|
2912
1046
|
|
|
2913
|
-
|
|
2914
|
-
if (offset >= locationCount) {
|
|
1047
|
+
if (offset >= nonTagCount) {
|
|
2915
1048
|
offset = 0;
|
|
2916
|
-
|
|
1049
|
+
offsetStore.set(nonTagOffsetKey, 0);
|
|
2917
1050
|
}
|
|
2918
1051
|
|
|
2919
1052
|
console.error(
|
|
2920
|
-
`[data-store] 国家 ${location}: 共 ${
|
|
1053
|
+
`[data-store] 国家 ${location}: 非 tag 共 ${nonTagCount} 条,从偏移量 ${offset} 开始`,
|
|
2921
1054
|
);
|
|
2922
1055
|
|
|
2923
1056
|
for (let batch = 0; batch < maxBatches; batch++) {
|
|
2924
|
-
|
|
2925
|
-
if (remaining <= 0) break;
|
|
1057
|
+
if (allNonTagQualified.length >= llmNonTagTarget) break;
|
|
2926
1058
|
|
|
2927
|
-
const
|
|
2928
|
-
const samples = db
|
|
1059
|
+
const samples = getDb()
|
|
2929
1060
|
.prepare(
|
|
2930
1061
|
`
|
|
2931
1062
|
SELECT * FROM raw_jobs WHERE ${whereSql} AND guessed_location = ?
|
|
2932
|
-
|
|
2933
|
-
|
|
2934
|
-
COALESCE(video_count, 0) DESC, created_at DESC
|
|
1063
|
+
AND (sources NOT LIKE '%tag%' OR sources IS NULL)
|
|
1064
|
+
ORDER BY COALESCE(video_count, 0) DESC, created_at DESC
|
|
2935
1065
|
LIMIT ? OFFSET ?
|
|
2936
1066
|
`,
|
|
2937
1067
|
)
|
|
2938
|
-
.all(...locationArgs,
|
|
1068
|
+
.all(...locationArgs, llmSampleSize, offset);
|
|
2939
1069
|
|
|
2940
|
-
if (samples.length
|
|
1070
|
+
if (!samples.length) break;
|
|
2941
1071
|
|
|
2942
|
-
|
|
2943
|
-
|
|
2944
|
-
|
|
1072
|
+
const scores = await scoreJobsBatch(
|
|
1073
|
+
samples,
|
|
1074
|
+
DEFAULT_TARGET_LOCATIONS,
|
|
2945
1075
|
);
|
|
2946
|
-
const
|
|
2947
|
-
|
|
2948
|
-
);
|
|
2949
|
-
|
|
2950
|
-
// tag 来源直接加入合格列表
|
|
2951
|
-
if (tagSamples.length > 0) {
|
|
2952
|
-
allQualified.push(...tagSamples.map((s) => s.unique_id));
|
|
2953
|
-
console.error(
|
|
2954
|
-
`[data-store] ${location}: 本批 ${tagSamples.length} 条 tag 来源任务跳过 LLM 打分直接合格`,
|
|
2955
|
-
);
|
|
2956
|
-
}
|
|
2957
|
-
|
|
2958
|
-
// 非 tag 来源走 LLM 打分
|
|
2959
|
-
let batchQualified = [];
|
|
2960
|
-
let scores = [];
|
|
2961
|
-
if (nonTagSamples.length > 0) {
|
|
2962
|
-
scores = await scoreJobsBatch(
|
|
2963
|
-
nonTagSamples,
|
|
2964
|
-
DEFAULT_TARGET_LOCATIONS,
|
|
2965
|
-
);
|
|
2966
|
-
batchQualified = scores.filter((s) => s.score >= llmMinScore);
|
|
2967
|
-
}
|
|
2968
|
-
|
|
1076
|
+
const qualified = scores.filter((s) => s.score >= llmMinScore);
|
|
1077
|
+
allNonTagQualified.push(...qualified.map((s) => s.uniqueId));
|
|
2969
1078
|
allScores.push(...scores);
|
|
2970
|
-
allQualified.push(...batchQualified.map((s) => s.uniqueId));
|
|
2971
1079
|
|
|
2972
|
-
totalBatches++;
|
|
2973
|
-
console.error(
|
|
2974
|
-
`[data-store] ${location} 第 ${batch + 1} 批: 采样 ${samples.length} 条,本批合格 ${batchQualified.length} 条,累计合格 ${allQualified.length} 条`,
|
|
2975
|
-
);
|
|
2976
|
-
|
|
2977
|
-
// 更新偏移量记忆
|
|
2978
1080
|
offset += samples.length;
|
|
2979
|
-
|
|
1081
|
+
offsetStore.set(nonTagOffsetKey, offset);
|
|
2980
1082
|
|
|
2981
|
-
|
|
2982
|
-
|
|
1083
|
+
console.error(
|
|
1084
|
+
`[data-store] ${location} 第 ${batch + 1} 批: 采样 ${samples.length} 条,本批合格 ${qualified.length} 条,非 tag 累计 ${allNonTagQualified.length}/${llmNonTagTarget}`,
|
|
1085
|
+
);
|
|
2983
1086
|
}
|
|
2984
|
-
|
|
2985
|
-
// 合格数已达到最小返回阈值,停止所有国家的采样
|
|
2986
|
-
if (allQualified.length >= llmMinReturn) break;
|
|
2987
1087
|
}
|
|
2988
1088
|
|
|
2989
|
-
//
|
|
2990
|
-
|
|
2991
|
-
const tagQualified = allQualified.filter(
|
|
2992
|
-
(uid) => !allScores.find((s) => s.uniqueId === uid),
|
|
2993
|
-
);
|
|
2994
|
-
const nonTagQualifiedScores = allScores
|
|
2995
|
-
.filter((s) => s.score >= llmMinScore)
|
|
2996
|
-
.sort((a, b) => b.score - a.score);
|
|
2997
|
-
const nonTagQualified = nonTagQualifiedScores.map((s) => s.uniqueId);
|
|
2998
|
-
|
|
2999
|
-
// 限制 tag 占比:最多占 safeLimit 的 70%,留 30% 给非 tag
|
|
3000
|
-
const tagMaxCount = Math.floor(safeLimit * 0.7);
|
|
3001
|
-
const tagCount = Math.min(tagQualified.length, tagMaxCount);
|
|
3002
|
-
const nonTagMaxCount = safeLimit - tagCount;
|
|
3003
|
-
const finalNonTagQualified = nonTagQualified.slice(0, nonTagMaxCount);
|
|
3004
|
-
|
|
3005
|
-
// 最终合格列表:tag 优先 + 非 tag 按分数排序
|
|
3006
|
-
const qualified = [
|
|
3007
|
-
...tagQualified.slice(0, tagCount),
|
|
3008
|
-
...finalNonTagQualified,
|
|
3009
|
-
];
|
|
1089
|
+
// ===== 最终结果 =====
|
|
1090
|
+
const qualified = [...allTagQualified, ...allNonTagQualified];
|
|
3010
1091
|
|
|
3011
1092
|
if (!qualified.length) {
|
|
3012
1093
|
console.error(
|
|
3013
|
-
`[data-store] LLM
|
|
1094
|
+
`[data-store] LLM 打分后无符合条件的任务(tag: ${allTagQualified.length}, 非 tag: ${allNonTagQualified.length})`,
|
|
3014
1095
|
);
|
|
3015
1096
|
return {
|
|
3016
1097
|
moved: 0,
|
|
@@ -3020,11 +1101,16 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3020
1101
|
};
|
|
3021
1102
|
}
|
|
3022
1103
|
|
|
1104
|
+
console.error(
|
|
1105
|
+
`[data-store] LLM 打分完成: tag ${allTagQualified.length} 条 + 非 tag ${allNonTagQualified.length} 条 = 共 ${qualified.length} 条`,
|
|
1106
|
+
);
|
|
1107
|
+
|
|
3023
1108
|
// 移动符合条件的记录
|
|
3024
1109
|
const placeholders = qualified.map(() => "?").join(", ");
|
|
3025
|
-
const moveTxn =
|
|
3026
|
-
|
|
3027
|
-
|
|
1110
|
+
const moveTxn = getDb().transaction(() => {
|
|
1111
|
+
getDb()
|
|
1112
|
+
.prepare(
|
|
1113
|
+
`
|
|
3028
1114
|
INSERT OR IGNORE INTO jobs (
|
|
3029
1115
|
unique_id, nickname, status, sources, pinned,
|
|
3030
1116
|
tt_seller, verified, video_count, comment_count,
|
|
@@ -3043,41 +1129,41 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3043
1129
|
FROM raw_jobs
|
|
3044
1130
|
WHERE unique_id IN (${placeholders})
|
|
3045
1131
|
`,
|
|
3046
|
-
|
|
1132
|
+
)
|
|
1133
|
+
.run(...qualified);
|
|
3047
1134
|
|
|
3048
|
-
|
|
3049
|
-
|
|
3050
|
-
|
|
1135
|
+
getDb()
|
|
1136
|
+
.prepare(
|
|
1137
|
+
`DELETE FROM raw_jobs WHERE unique_id IN (${placeholders})`,
|
|
1138
|
+
)
|
|
1139
|
+
.run(...qualified);
|
|
3051
1140
|
});
|
|
3052
|
-
|
|
3053
1141
|
moveTxn();
|
|
3054
1142
|
markStatsDirty();
|
|
3055
1143
|
|
|
3056
1144
|
// 持久化偏移量到数据库
|
|
3057
|
-
|
|
1145
|
+
offsetStore.save();
|
|
3058
1146
|
|
|
3059
1147
|
// 打印最终偏移量状态
|
|
3060
|
-
const finalOffsetSummary = Array.from(
|
|
1148
|
+
const finalOffsetSummary = Array.from(offsetStore.entries())
|
|
3061
1149
|
.map(([k, v]) => `${k}:${v}`)
|
|
3062
1150
|
.join(", ");
|
|
3063
|
-
console.error(
|
|
3064
|
-
`[data-store] LLM 打分完成: 共采样 ${allScores.length} 条,合格 ${qualified.length} 条,已移动到 jobs`,
|
|
3065
|
-
);
|
|
3066
1151
|
console.error(`[data-store] 偏移量记忆更新: ${finalOffsetSummary}`);
|
|
3067
|
-
|
|
1152
|
+
|
|
3068
1153
|
return {
|
|
3069
1154
|
moved: qualified.length,
|
|
3070
1155
|
scored: allScores.length,
|
|
3071
1156
|
qualified: qualified.length,
|
|
3072
|
-
scores:
|
|
1157
|
+
scores: allScores,
|
|
3073
1158
|
};
|
|
3074
1159
|
})();
|
|
3075
1160
|
}
|
|
3076
1161
|
|
|
3077
1162
|
// 常规移动:INSERT + DELETE 事务
|
|
3078
|
-
const moveTxn =
|
|
3079
|
-
|
|
3080
|
-
|
|
1163
|
+
const moveTxn = getDb().transaction(() => {
|
|
1164
|
+
getDb()
|
|
1165
|
+
.prepare(
|
|
1166
|
+
`
|
|
3081
1167
|
INSERT OR IGNORE INTO jobs (
|
|
3082
1168
|
unique_id, nickname, status, sources, pinned,
|
|
3083
1169
|
tt_seller, verified, video_count, comment_count,
|
|
@@ -3100,11 +1186,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3100
1186
|
COALESCE(video_count, 0) DESC, created_at DESC
|
|
3101
1187
|
LIMIT ?
|
|
3102
1188
|
`,
|
|
3103
|
-
|
|
1189
|
+
)
|
|
1190
|
+
.run(...args, safeLimit);
|
|
3104
1191
|
|
|
3105
1192
|
// 删除已移动的记录:用子查询匹配刚 INSERT 的 unique_id
|
|
3106
|
-
|
|
3107
|
-
|
|
1193
|
+
getDb()
|
|
1194
|
+
.prepare(
|
|
1195
|
+
`
|
|
3108
1196
|
DELETE FROM raw_jobs
|
|
3109
1197
|
WHERE unique_id IN (
|
|
3110
1198
|
SELECT unique_id FROM raw_jobs
|
|
@@ -3115,7 +1203,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3115
1203
|
LIMIT ?
|
|
3116
1204
|
)
|
|
3117
1205
|
`,
|
|
3118
|
-
|
|
1206
|
+
)
|
|
1207
|
+
.run(...args, safeLimit);
|
|
3119
1208
|
});
|
|
3120
1209
|
|
|
3121
1210
|
moveTxn();
|
|
@@ -3133,9 +1222,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3133
1222
|
) {
|
|
3134
1223
|
// 记录客户端登录状态
|
|
3135
1224
|
clientLoginStatus.set(userId, !!loggedIn);
|
|
3136
|
-
if (
|
|
1225
|
+
if (getDb()) {
|
|
3137
1226
|
const now = Date.now();
|
|
3138
|
-
const ongoingRow =
|
|
1227
|
+
const ongoingRow = getDb()
|
|
3139
1228
|
.prepare(
|
|
3140
1229
|
`
|
|
3141
1230
|
SELECT *
|
|
@@ -3150,10 +1239,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3150
1239
|
)
|
|
3151
1240
|
.get(userId, now, expireMs);
|
|
3152
1241
|
if (ongoingRow) {
|
|
3153
|
-
|
|
3154
|
-
|
|
3155
|
-
ongoingRow.unique_id
|
|
3156
|
-
);
|
|
1242
|
+
getDb()
|
|
1243
|
+
.prepare("UPDATE jobs SET claimed_at = ? WHERE unique_id = ?")
|
|
1244
|
+
.run(now, ongoingRow.unique_id);
|
|
3157
1245
|
return {
|
|
3158
1246
|
uniqueId: ongoingRow.unique_id,
|
|
3159
1247
|
nickname: ongoingRow.nickname,
|
|
@@ -3235,7 +1323,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3235
1323
|
for (const filter of filters) {
|
|
3236
1324
|
where.push(filter);
|
|
3237
1325
|
}
|
|
3238
|
-
return
|
|
1326
|
+
return getDb()
|
|
3239
1327
|
.prepare(
|
|
3240
1328
|
`
|
|
3241
1329
|
SELECT *
|
|
@@ -3278,7 +1366,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3278
1366
|
);
|
|
3279
1367
|
args.push(...normalizedLocations);
|
|
3280
1368
|
}
|
|
3281
|
-
return
|
|
1369
|
+
return getDb()
|
|
3282
1370
|
.prepare(
|
|
3283
1371
|
`
|
|
3284
1372
|
SELECT *
|
|
@@ -3348,9 +1436,11 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3348
1436
|
|
|
3349
1437
|
function claimRow(row) {
|
|
3350
1438
|
if (!row) return null;
|
|
3351
|
-
|
|
3352
|
-
|
|
3353
|
-
|
|
1439
|
+
getDb()
|
|
1440
|
+
.prepare(
|
|
1441
|
+
"UPDATE jobs SET status = 'processing', claimed_at = ?, claimed_by = ? WHERE unique_id = ?",
|
|
1442
|
+
)
|
|
1443
|
+
.run(now, userId, row.unique_id);
|
|
3354
1444
|
markStatsDirty();
|
|
3355
1445
|
return {
|
|
3356
1446
|
uniqueId: row.unique_id,
|
|
@@ -3360,7 +1450,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3360
1450
|
};
|
|
3361
1451
|
}
|
|
3362
1452
|
|
|
3363
|
-
const expiredRow =
|
|
1453
|
+
const expiredRow = getDb()
|
|
3364
1454
|
.prepare(
|
|
3365
1455
|
`
|
|
3366
1456
|
SELECT *
|
|
@@ -3375,9 +1465,11 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3375
1465
|
.get(now, expireMs);
|
|
3376
1466
|
let expiredCandidate = null;
|
|
3377
1467
|
if (expiredRow) {
|
|
3378
|
-
|
|
3379
|
-
|
|
3380
|
-
|
|
1468
|
+
getDb()
|
|
1469
|
+
.prepare(
|
|
1470
|
+
"UPDATE jobs SET status = 'pending', claimed_at = NULL WHERE unique_id = ?",
|
|
1471
|
+
)
|
|
1472
|
+
.run(expiredRow.unique_id);
|
|
3381
1473
|
expiredCandidate = mapJobRow({
|
|
3382
1474
|
...expiredRow,
|
|
3383
1475
|
status: "pending",
|
|
@@ -3469,7 +1561,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3469
1561
|
return null;
|
|
3470
1562
|
}
|
|
3471
1563
|
|
|
3472
|
-
if (!
|
|
1564
|
+
if (!getDb()) {
|
|
3473
1565
|
const now = Date.now();
|
|
3474
1566
|
|
|
3475
1567
|
// 0. 该客户端有未过期的任务,续期返回
|
|
@@ -3607,16 +1699,16 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3607
1699
|
locations = null,
|
|
3608
1700
|
loggedIn = true,
|
|
3609
1701
|
) {
|
|
3610
|
-
if (
|
|
1702
|
+
if (getDb()) {
|
|
3611
1703
|
const now = Date.now();
|
|
3612
1704
|
const info = {
|
|
3613
|
-
path: "
|
|
1705
|
+
path: "getDb()",
|
|
3614
1706
|
userId,
|
|
3615
1707
|
expireMs,
|
|
3616
1708
|
loggedIn,
|
|
3617
1709
|
};
|
|
3618
1710
|
|
|
3619
|
-
const ongoingRow =
|
|
1711
|
+
const ongoingRow = getDb()
|
|
3620
1712
|
.prepare(
|
|
3621
1713
|
`
|
|
3622
1714
|
SELECT *
|
|
@@ -3716,7 +1808,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3716
1808
|
ORDER BY follower_count DESC, created_at ASC, unique_id ASC
|
|
3717
1809
|
LIMIT 1
|
|
3718
1810
|
`;
|
|
3719
|
-
const row =
|
|
1811
|
+
const row = getDb()
|
|
1812
|
+
.prepare(sql)
|
|
1813
|
+
.get(...args);
|
|
3720
1814
|
return { row, sql, args };
|
|
3721
1815
|
}
|
|
3722
1816
|
|
|
@@ -3757,7 +1851,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3757
1851
|
ORDER BY created_at ASC, unique_id ASC
|
|
3758
1852
|
LIMIT 1
|
|
3759
1853
|
`;
|
|
3760
|
-
const row =
|
|
1854
|
+
const row = getDb()
|
|
1855
|
+
.prepare(sql)
|
|
1856
|
+
.get(...args);
|
|
3761
1857
|
return { row, sql, args };
|
|
3762
1858
|
}
|
|
3763
1859
|
|
|
@@ -3770,7 +1866,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
3770
1866
|
ORDER BY claimed_at ASC
|
|
3771
1867
|
LIMIT 1
|
|
3772
1868
|
`;
|
|
3773
|
-
const expiredRow =
|
|
1869
|
+
const expiredRow = getDb().prepare(expiredSql).get(now, expireMs);
|
|
3774
1870
|
info.expired = expiredRow
|
|
3775
1871
|
? {
|
|
3776
1872
|
uniqueId: expiredRow.unique_id,
|
|
@@ -4096,7 +2192,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4096
2192
|
}
|
|
4097
2193
|
|
|
4098
2194
|
function commitJob(uniqueId, result) {
|
|
4099
|
-
if (
|
|
2195
|
+
if (getDb()) {
|
|
4100
2196
|
const user = getJob(uniqueId);
|
|
4101
2197
|
if (!user) return { saved: false, error: "user not found" };
|
|
4102
2198
|
|
|
@@ -4122,7 +2218,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4122
2218
|
}
|
|
4123
2219
|
|
|
4124
2220
|
function commitNewExplore(uniqueId, result) {
|
|
4125
|
-
if (
|
|
2221
|
+
if (getDb()) {
|
|
4126
2222
|
const existing = getJob(uniqueId);
|
|
4127
2223
|
if (existing) {
|
|
4128
2224
|
updateUserFromResult(existing, result);
|
|
@@ -4172,7 +2268,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4172
2268
|
}
|
|
4173
2269
|
|
|
4174
2270
|
function resetJob(uniqueId) {
|
|
4175
|
-
if (
|
|
2271
|
+
if (getDb()) {
|
|
4176
2272
|
const user = getJob(uniqueId);
|
|
4177
2273
|
if (!user) return { saved: false, error: "user not found" };
|
|
4178
2274
|
user.status = "pending";
|
|
@@ -4203,7 +2299,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4203
2299
|
}
|
|
4204
2300
|
|
|
4205
2301
|
function togglePin(uniqueId) {
|
|
4206
|
-
if (
|
|
2302
|
+
if (getDb()) {
|
|
4207
2303
|
const user = getJob(uniqueId);
|
|
4208
2304
|
if (!user) return { saved: false, error: "user not found" };
|
|
4209
2305
|
const nextPinned = !user.pinned;
|
|
@@ -4220,13 +2316,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4220
2316
|
}
|
|
4221
2317
|
|
|
4222
2318
|
function getNextRedoJob(userId, maxAgeSeconds = 43200) {
|
|
4223
|
-
if (
|
|
2319
|
+
if (getDb()) {
|
|
4224
2320
|
const now = Date.now();
|
|
4225
2321
|
const threshold = now - maxAgeSeconds * 1000;
|
|
4226
2322
|
const defaultTime = new Date("2016-01-01T00:00:00Z").getTime();
|
|
4227
2323
|
const targetLocations = DEFAULT_TARGET_LOCATIONS;
|
|
4228
2324
|
const placeholders = targetLocations.map(() => "?").join(",");
|
|
4229
|
-
const row =
|
|
2325
|
+
const row = getDb()
|
|
4230
2326
|
.prepare(
|
|
4231
2327
|
`
|
|
4232
2328
|
SELECT *
|
|
@@ -4241,9 +2337,11 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4241
2337
|
)
|
|
4242
2338
|
.get(...targetLocations, defaultTime, threshold, defaultTime);
|
|
4243
2339
|
if (!row) return null;
|
|
4244
|
-
|
|
4245
|
-
|
|
4246
|
-
|
|
2340
|
+
getDb()
|
|
2341
|
+
.prepare(
|
|
2342
|
+
"UPDATE jobs SET refresh_time = ?, updated_at = ? WHERE unique_id = ?",
|
|
2343
|
+
)
|
|
2344
|
+
.run(now, now, row.unique_id);
|
|
4247
2345
|
return {
|
|
4248
2346
|
uniqueId: row.unique_id,
|
|
4249
2347
|
nickname: row.nickname,
|
|
@@ -4292,7 +2390,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4292
2390
|
}
|
|
4293
2391
|
|
|
4294
2392
|
function commitRedoJob(uniqueId, result) {
|
|
4295
|
-
if (
|
|
2393
|
+
if (getDb()) {
|
|
4296
2394
|
const user = getJob(uniqueId);
|
|
4297
2395
|
if (!user) return { saved: false, error: "user not found" };
|
|
4298
2396
|
user.refreshTime = Date.now();
|
|
@@ -4436,13 +2534,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4436
2534
|
: [];
|
|
4437
2535
|
const hasCountryFilter = targetCountries.length > 0;
|
|
4438
2536
|
|
|
4439
|
-
if (
|
|
2537
|
+
if (getDb()) {
|
|
4440
2538
|
const l = Math.max(1, parseInt(limit) || 5);
|
|
4441
2539
|
|
|
4442
2540
|
let sql = `
|
|
4443
2541
|
SELECT *
|
|
4444
2542
|
FROM jobs_base
|
|
4445
|
-
WHERE COALESCE(tt_seller, '') = ''
|
|
2543
|
+
WHERE (COALESCE(tt_seller, '') = '' OR tt_seller = 1)
|
|
4446
2544
|
AND COALESCE(user_update_count, 0) <= 0
|
|
4447
2545
|
`;
|
|
4448
2546
|
const sqlParams = [];
|
|
@@ -4453,18 +2551,21 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4453
2551
|
sqlParams.push(...targetCountries);
|
|
4454
2552
|
}
|
|
4455
2553
|
|
|
4456
|
-
// 优先级:
|
|
2554
|
+
// 优先级:tt_seller=1 的商家重处理任务优先 > tag 来源 > 其余
|
|
4457
2555
|
sql += ` ORDER BY
|
|
2556
|
+
CASE WHEN tt_seller = 1 THEN 0 ELSE 1 END,
|
|
4458
2557
|
CASE WHEN sources LIKE '%tag%' THEN 0 ELSE 1 END,
|
|
4459
2558
|
created_at ASC,
|
|
4460
2559
|
unique_id ASC
|
|
4461
2560
|
LIMIT ?`;
|
|
4462
2561
|
sqlParams.push(l);
|
|
4463
2562
|
|
|
4464
|
-
const rows =
|
|
2563
|
+
const rows = getDb()
|
|
2564
|
+
.prepare(sql)
|
|
2565
|
+
.all(...sqlParams);
|
|
4465
2566
|
if (rows.length === 0) return [];
|
|
4466
2567
|
const now = Date.now();
|
|
4467
|
-
const bumpStmt =
|
|
2568
|
+
const bumpStmt = getDb().prepare(
|
|
4468
2569
|
`
|
|
4469
2570
|
UPDATE jobs_base
|
|
4470
2571
|
SET user_update_count = COALESCE(user_update_count, 0) + 1,
|
|
@@ -4472,7 +2573,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4472
2573
|
WHERE unique_id = ?
|
|
4473
2574
|
`,
|
|
4474
2575
|
);
|
|
4475
|
-
const bumpTxn =
|
|
2576
|
+
const bumpTxn = getDb().transaction((items) => {
|
|
4476
2577
|
for (const item of items) {
|
|
4477
2578
|
bumpStmt.run(now, item.unique_id);
|
|
4478
2579
|
}
|
|
@@ -4490,9 +2591,13 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4490
2591
|
const pending = data
|
|
4491
2592
|
.filter((u) => {
|
|
4492
2593
|
const updateCount = u.userUpdateCount;
|
|
4493
|
-
|
|
4494
|
-
|
|
4495
|
-
|
|
2594
|
+
// ttSeller 为空 或 ttSeller=1(商家重处理)都可以领取
|
|
2595
|
+
const ttSellerEligible =
|
|
2596
|
+
u.ttSeller === null ||
|
|
2597
|
+
u.ttSeller === undefined ||
|
|
2598
|
+
u.ttSeller === "" ||
|
|
2599
|
+
u.ttSeller === 1;
|
|
2600
|
+
if (!ttSellerEligible) return false;
|
|
4496
2601
|
if (
|
|
4497
2602
|
updateCount === null ||
|
|
4498
2603
|
updateCount === undefined ||
|
|
@@ -4507,7 +2612,10 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4507
2612
|
return false;
|
|
4508
2613
|
})
|
|
4509
2614
|
.sort((a, b) => {
|
|
4510
|
-
// 优先级:
|
|
2615
|
+
// 优先级:tt_seller=1 的商家重处理任务优先 > tag 来源 > 其余
|
|
2616
|
+
const aIsSeller = a.ttSeller === 1 ? 0 : 1;
|
|
2617
|
+
const bIsSeller = b.ttSeller === 1 ? 0 : 1;
|
|
2618
|
+
if (aIsSeller !== bIsSeller) return aIsSeller - bIsSeller;
|
|
4511
2619
|
const aIsTag = (a.sources || "").includes("tag");
|
|
4512
2620
|
const bIsTag = (b.sources || "").includes("tag");
|
|
4513
2621
|
if (aIsTag !== bIsTag) return aIsTag ? -1 : 1;
|
|
@@ -4524,7 +2632,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4524
2632
|
}
|
|
4525
2633
|
|
|
4526
2634
|
function updateUserInfo(uniqueId, info) {
|
|
4527
|
-
if (
|
|
2635
|
+
if (getDb()) {
|
|
4528
2636
|
return updateJobInfo(uniqueId, info, true);
|
|
4529
2637
|
}
|
|
4530
2638
|
|
|
@@ -4543,15 +2651,17 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4543
2651
|
}
|
|
4544
2652
|
|
|
4545
2653
|
function updateUserLocation(uniqueId, location) {
|
|
4546
|
-
if (
|
|
4547
|
-
const existing =
|
|
2654
|
+
if (getDb()) {
|
|
2655
|
+
const existing = getDb()
|
|
4548
2656
|
.prepare("SELECT * FROM jobs WHERE unique_id = ?")
|
|
4549
2657
|
.get(uniqueId);
|
|
4550
2658
|
if (!existing) return { error: "user not found" };
|
|
4551
2659
|
const now = Date.now();
|
|
4552
|
-
|
|
4553
|
-
|
|
4554
|
-
|
|
2660
|
+
getDb()
|
|
2661
|
+
.prepare(
|
|
2662
|
+
"UPDATE jobs SET location_created = ?, modified_at = ?, updated_at = ? WHERE unique_id = ?",
|
|
2663
|
+
)
|
|
2664
|
+
.run(location, now, now, uniqueId);
|
|
4555
2665
|
return { ok: true, location, modifiedAt: now };
|
|
4556
2666
|
}
|
|
4557
2667
|
|
|
@@ -4567,13 +2677,14 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4567
2677
|
|
|
4568
2678
|
// 将单个 job 移动到 raw_jobs 表(完整字段复制 + 删除原记录)
|
|
4569
2679
|
function moveJobToRaw(uniqueId) {
|
|
4570
|
-
if (!
|
|
2680
|
+
if (!getDb()) return false;
|
|
4571
2681
|
const safeId = String(uniqueId).trim();
|
|
4572
2682
|
if (!safeId) return false;
|
|
4573
2683
|
|
|
4574
|
-
const moveSingleTxn =
|
|
4575
|
-
|
|
4576
|
-
|
|
2684
|
+
const moveSingleTxn = getDb().transaction(() => {
|
|
2685
|
+
getDb()
|
|
2686
|
+
.prepare(
|
|
2687
|
+
`
|
|
4577
2688
|
INSERT OR REPLACE INTO raw_jobs (
|
|
4578
2689
|
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
4579
2690
|
error, pinned, no_video, restricted, user_update_count,
|
|
@@ -4595,21 +2706,22 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4595
2706
|
user_create_time
|
|
4596
2707
|
FROM jobs WHERE unique_id = ?
|
|
4597
2708
|
`,
|
|
4598
|
-
|
|
2709
|
+
)
|
|
2710
|
+
.run(safeId);
|
|
4599
2711
|
|
|
4600
|
-
|
|
2712
|
+
getDb().prepare("DELETE FROM jobs WHERE unique_id = ?").run(safeId);
|
|
4601
2713
|
});
|
|
4602
2714
|
moveSingleTxn();
|
|
4603
2715
|
return true;
|
|
4604
2716
|
}
|
|
4605
2717
|
|
|
4606
2718
|
function batchUpdateUserInfo(updates) {
|
|
4607
|
-
if (
|
|
2719
|
+
if (getDb()) {
|
|
4608
2720
|
const results = [];
|
|
4609
2721
|
const rawMoveList = [];
|
|
4610
2722
|
const sellerMoveList = [];
|
|
4611
2723
|
|
|
4612
|
-
const txn =
|
|
2724
|
+
const txn = getDb().transaction((items) => {
|
|
4613
2725
|
items.forEach((item) => {
|
|
4614
2726
|
const uniqueId = item.uniqueId;
|
|
4615
2727
|
// 处理 { error: true, statusCode: xxx } 的情况
|
|
@@ -4661,8 +2773,9 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4661
2773
|
// 批量移动商家用户到 jobs
|
|
4662
2774
|
if (sellerMoveList.length > 0) {
|
|
4663
2775
|
const placeholders = sellerMoveList.map(() => "?").join(",");
|
|
4664
|
-
|
|
4665
|
-
|
|
2776
|
+
getDb()
|
|
2777
|
+
.prepare(
|
|
2778
|
+
`
|
|
4666
2779
|
INSERT OR REPLACE INTO jobs (
|
|
4667
2780
|
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
4668
2781
|
error, pinned, no_video, restricted, user_update_count,
|
|
@@ -4684,18 +2797,20 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4684
2797
|
user_create_time
|
|
4685
2798
|
FROM jobs_base WHERE unique_id IN (${placeholders})
|
|
4686
2799
|
`,
|
|
4687
|
-
|
|
2800
|
+
)
|
|
2801
|
+
.run(...sellerMoveList);
|
|
4688
2802
|
|
|
4689
|
-
|
|
4690
|
-
`DELETE FROM jobs_base WHERE unique_id IN (${placeholders})
|
|
4691
|
-
|
|
2803
|
+
getDb()
|
|
2804
|
+
.prepare(`DELETE FROM jobs_base WHERE unique_id IN (${placeholders})`)
|
|
2805
|
+
.run(...sellerMoveList);
|
|
4692
2806
|
}
|
|
4693
2807
|
|
|
4694
2808
|
// 批量移动非商家用户到 raw_jobs
|
|
4695
2809
|
if (rawMoveList.length > 0) {
|
|
4696
2810
|
const placeholders = rawMoveList.map(() => "?").join(",");
|
|
4697
|
-
|
|
4698
|
-
|
|
2811
|
+
getDb()
|
|
2812
|
+
.prepare(
|
|
2813
|
+
`
|
|
4699
2814
|
INSERT OR REPLACE INTO raw_jobs (
|
|
4700
2815
|
unique_id, nickname, status, sources, claimed_by, claimed_at,
|
|
4701
2816
|
error, pinned, no_video, restricted, user_update_count,
|
|
@@ -4717,11 +2832,12 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4717
2832
|
user_create_time
|
|
4718
2833
|
FROM jobs_base WHERE unique_id IN (${placeholders})
|
|
4719
2834
|
`,
|
|
4720
|
-
|
|
2835
|
+
)
|
|
2836
|
+
.run(...rawMoveList);
|
|
4721
2837
|
|
|
4722
|
-
|
|
4723
|
-
`DELETE FROM jobs_base WHERE unique_id IN (${placeholders})
|
|
4724
|
-
|
|
2838
|
+
getDb()
|
|
2839
|
+
.prepare(`DELETE FROM jobs_base WHERE unique_id IN (${placeholders})`)
|
|
2840
|
+
.run(...rawMoveList);
|
|
4725
2841
|
}
|
|
4726
2842
|
|
|
4727
2843
|
// 清理内部标记
|
|
@@ -4773,8 +2889,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4773
2889
|
return { registered: 0, skipped: 0 };
|
|
4774
2890
|
}
|
|
4775
2891
|
|
|
4776
|
-
if (
|
|
4777
|
-
const insertStmt =
|
|
2892
|
+
if (getDb()) {
|
|
2893
|
+
const insertStmt = getDb().prepare(`
|
|
4778
2894
|
INSERT OR IGNORE INTO videos (
|
|
4779
2895
|
id,
|
|
4780
2896
|
href,
|
|
@@ -4790,7 +2906,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4790
2906
|
let registered = 0;
|
|
4791
2907
|
let skipped = 0;
|
|
4792
2908
|
const now = Date.now();
|
|
4793
|
-
const txn =
|
|
2909
|
+
const txn = getDb().transaction((items) => {
|
|
4794
2910
|
for (const item of items) {
|
|
4795
2911
|
const result = insertStmt.run(
|
|
4796
2912
|
item.id,
|
|
@@ -4837,7 +2953,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4837
2953
|
}
|
|
4838
2954
|
|
|
4839
2955
|
function getVideos() {
|
|
4840
|
-
if (
|
|
2956
|
+
if (getDb()) {
|
|
4841
2957
|
return getAllVideoRows().map(mapVideoRow);
|
|
4842
2958
|
}
|
|
4843
2959
|
return videos;
|
|
@@ -4845,7 +2961,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4845
2961
|
|
|
4846
2962
|
function getVideo(videoId) {
|
|
4847
2963
|
if (!videoId) return null;
|
|
4848
|
-
if (
|
|
2964
|
+
if (getDb()) {
|
|
4849
2965
|
return mapVideoRow(getVideoRow(videoId));
|
|
4850
2966
|
}
|
|
4851
2967
|
return videos.find((video) => video.id === videoId) || null;
|
|
@@ -4855,8 +2971,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4855
2971
|
const safeLimit = Math.max(1, Math.min(100, parseInt(limit) || 50));
|
|
4856
2972
|
const safeOffset = Math.max(0, parseInt(offset) || 0);
|
|
4857
2973
|
|
|
4858
|
-
if (
|
|
4859
|
-
const rows =
|
|
2974
|
+
if (getDb()) {
|
|
2975
|
+
const rows = getDb()
|
|
4860
2976
|
.prepare(
|
|
4861
2977
|
`
|
|
4862
2978
|
SELECT *
|
|
@@ -4866,7 +2982,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4866
2982
|
`,
|
|
4867
2983
|
)
|
|
4868
2984
|
.all(safeLimit, safeOffset);
|
|
4869
|
-
const total =
|
|
2985
|
+
const total = getDb().prepare("SELECT COUNT(*) as c FROM videos").get().c;
|
|
4870
2986
|
return {
|
|
4871
2987
|
total,
|
|
4872
2988
|
limit: safeLimit,
|
|
@@ -4884,16 +3000,16 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4884
3000
|
}
|
|
4885
3001
|
|
|
4886
3002
|
function getVideoCount() {
|
|
4887
|
-
if (
|
|
4888
|
-
return
|
|
3003
|
+
if (getDb()) {
|
|
3004
|
+
return getDb().prepare("SELECT COUNT(*) as c FROM videos").get().c;
|
|
4889
3005
|
}
|
|
4890
3006
|
return videos.length;
|
|
4891
3007
|
}
|
|
4892
3008
|
|
|
4893
3009
|
function getPendingCommentTasks(limit) {
|
|
4894
|
-
if (
|
|
3010
|
+
if (getDb()) {
|
|
4895
3011
|
const l = Math.max(1, parseInt(limit) || 1);
|
|
4896
|
-
const rows =
|
|
3012
|
+
const rows = getDb()
|
|
4897
3013
|
.prepare(
|
|
4898
3014
|
`
|
|
4899
3015
|
SELECT *
|
|
@@ -4905,14 +3021,14 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4905
3021
|
)
|
|
4906
3022
|
.all(l);
|
|
4907
3023
|
if (rows.length === 0) return [];
|
|
4908
|
-
const bumpStmt =
|
|
3024
|
+
const bumpStmt = getDb().prepare(
|
|
4909
3025
|
`
|
|
4910
3026
|
UPDATE videos
|
|
4911
3027
|
SET user_update_count = COALESCE(user_update_count, 0) + 1
|
|
4912
3028
|
WHERE id = ?
|
|
4913
3029
|
`,
|
|
4914
3030
|
);
|
|
4915
|
-
const bumpTxn =
|
|
3031
|
+
const bumpTxn = getDb().transaction((items) => {
|
|
4916
3032
|
for (const item of items) bumpStmt.run(item.id);
|
|
4917
3033
|
});
|
|
4918
3034
|
bumpTxn(rows);
|
|
@@ -4942,17 +3058,19 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
4942
3058
|
}
|
|
4943
3059
|
|
|
4944
3060
|
function commitCommentTask(videoId) {
|
|
4945
|
-
if (
|
|
3061
|
+
if (getDb()) {
|
|
4946
3062
|
const video = getVideoRow(videoId);
|
|
4947
3063
|
if (!video) return { ok: false, error: "video not found" };
|
|
4948
3064
|
const nextCount = (video.user_update_count || 0) + 1;
|
|
4949
|
-
|
|
4950
|
-
|
|
3065
|
+
getDb()
|
|
3066
|
+
.prepare(
|
|
3067
|
+
`
|
|
4951
3068
|
UPDATE videos
|
|
4952
3069
|
SET user_update_count = ?
|
|
4953
3070
|
WHERE id = ?
|
|
4954
3071
|
`,
|
|
4955
|
-
|
|
3072
|
+
)
|
|
3073
|
+
.run(nextCount, videoId);
|
|
4956
3074
|
return { ok: true, userUpdateCount: nextCount };
|
|
4957
3075
|
}
|
|
4958
3076
|
|
|
@@ -5017,6 +3135,7 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
5017
3135
|
getClientLoginStatus,
|
|
5018
3136
|
trackClient,
|
|
5019
3137
|
getActiveClients,
|
|
3138
|
+
moveSellerJobsToBase, // 将 jobs/raw_jobs 中 tt_seller=1 且 video_count=0 的记录移动到 jobs_base
|
|
5020
3139
|
registerVideos,
|
|
5021
3140
|
getVideo,
|
|
5022
3141
|
getVideos,
|
|
@@ -5044,6 +3163,126 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
|
|
|
5044
3163
|
|
|
5045
3164
|
// 辅助函数:获取 LLM 采样偏移量
|
|
5046
3165
|
function getLlmSampleOffsets() {
|
|
5047
|
-
return Object.fromEntries(
|
|
3166
|
+
return Object.fromEntries(offsetStore.entries());
|
|
3167
|
+
}
|
|
3168
|
+
|
|
3169
|
+
// ===== 将 jobs/raw_jobs 中商家用户移动到 jobs_base =====
|
|
3170
|
+
|
|
3171
|
+
/**
|
|
3172
|
+
* 将 jobs 和 raw_jobs 中 tt_seller=1 且 video_count=0 的记录移动到 jobs_base,
|
|
3173
|
+
* 并重置 user_update_count=0,保留 tt_seller=1 以便后续优先级判断。
|
|
3174
|
+
*/
|
|
3175
|
+
function moveSellerJobsToBase() {
|
|
3176
|
+
const db = getDb();
|
|
3177
|
+
if (!db) return { ok: false, error: "db not ready" };
|
|
3178
|
+
|
|
3179
|
+
const COLUMNS = [
|
|
3180
|
+
"unique_id",
|
|
3181
|
+
"nickname",
|
|
3182
|
+
"status",
|
|
3183
|
+
"sources",
|
|
3184
|
+
"claimed_by",
|
|
3185
|
+
"claimed_at",
|
|
3186
|
+
"error",
|
|
3187
|
+
"pinned",
|
|
3188
|
+
"no_video",
|
|
3189
|
+
"restricted",
|
|
3190
|
+
"user_update_count",
|
|
3191
|
+
"tt_seller",
|
|
3192
|
+
"verified",
|
|
3193
|
+
"video_count",
|
|
3194
|
+
"comment_count",
|
|
3195
|
+
"guessed_location",
|
|
3196
|
+
"location_created",
|
|
3197
|
+
"confirmed_location",
|
|
3198
|
+
"modified_at",
|
|
3199
|
+
"follower_count",
|
|
3200
|
+
"following_count",
|
|
3201
|
+
"heart_count",
|
|
3202
|
+
"refresh_time",
|
|
3203
|
+
"processed",
|
|
3204
|
+
"processed_at",
|
|
3205
|
+
"created_at",
|
|
3206
|
+
"updated_at",
|
|
3207
|
+
"region",
|
|
3208
|
+
"signature",
|
|
3209
|
+
"sec_uid",
|
|
3210
|
+
"status_code",
|
|
3211
|
+
"latest_video_time",
|
|
3212
|
+
"bio_link",
|
|
3213
|
+
];
|
|
3214
|
+
const cols = COLUMNS.join(",");
|
|
3215
|
+
const insertSql = `INSERT OR IGNORE INTO jobs_base (${cols}) SELECT ${cols} FROM `;
|
|
3216
|
+
const condition = "WHERE tt_seller = 1 AND video_count = 0";
|
|
3217
|
+
|
|
3218
|
+
let fromJobs = 0;
|
|
3219
|
+
let fromRawJobs = 0;
|
|
3220
|
+
|
|
3221
|
+
try {
|
|
3222
|
+
// 1. jobs → jobs_base
|
|
3223
|
+
const result1 = db.prepare(insertSql + "jobs " + condition).run();
|
|
3224
|
+
fromJobs = result1.changes || 0;
|
|
3225
|
+
|
|
3226
|
+
// 2. raw_jobs → jobs_base
|
|
3227
|
+
const result2 = db.prepare(insertSql + "raw_jobs " + condition).run();
|
|
3228
|
+
fromRawJobs = result2.changes || 0;
|
|
3229
|
+
} catch (e) {
|
|
3230
|
+
return { ok: false, error: e.message };
|
|
3231
|
+
}
|
|
3232
|
+
|
|
3233
|
+
// 3. 重置 user_update_count=0,保留 tt_seller=1 以便后续优先级判断
|
|
3234
|
+
// 包括:新移动过来的记录 + jobs_base 中已有的滞留记录
|
|
3235
|
+
let resetCount = 0;
|
|
3236
|
+
try {
|
|
3237
|
+
const resetStmt = db.prepare(
|
|
3238
|
+
`UPDATE jobs_base
|
|
3239
|
+
SET user_update_count = 0
|
|
3240
|
+
WHERE video_count = 0
|
|
3241
|
+
AND tt_seller = 1`,
|
|
3242
|
+
);
|
|
3243
|
+
resetStmt.run();
|
|
3244
|
+
resetCount = resetStmt.changes || 0;
|
|
3245
|
+
} catch (e) {
|
|
3246
|
+
return {
|
|
3247
|
+
ok: false,
|
|
3248
|
+
error: `reset failed: ${e.message}, inserted: jobs=${fromJobs}, raw_jobs=${fromRawJobs}`,
|
|
3249
|
+
};
|
|
3250
|
+
}
|
|
3251
|
+
|
|
3252
|
+
// 4. 删除 jobs 和 raw_jobs 中已移动的记录
|
|
3253
|
+
try {
|
|
3254
|
+
db.prepare("DELETE FROM jobs " + condition).run();
|
|
3255
|
+
db.prepare("DELETE FROM raw_jobs " + condition).run();
|
|
3256
|
+
} catch (e) {
|
|
3257
|
+
return {
|
|
3258
|
+
ok: false,
|
|
3259
|
+
error: `delete failed: ${e.message}, inserted: jobs=${fromJobs}, raw_jobs=${fromRawJobs}`,
|
|
3260
|
+
};
|
|
3261
|
+
}
|
|
3262
|
+
|
|
3263
|
+
// 5. 验证:统计 jobs_base 中可被 attach 领取的记录数(tt_seller=1 且 user_update_count<=0)
|
|
3264
|
+
let available = 0;
|
|
3265
|
+
try {
|
|
3266
|
+
const row = db
|
|
3267
|
+
.prepare(
|
|
3268
|
+
`SELECT COUNT(*) as total FROM jobs_base
|
|
3269
|
+
WHERE tt_seller = 1
|
|
3270
|
+
AND COALESCE(user_update_count, 0) <= 0
|
|
3271
|
+
AND video_count = 0`,
|
|
3272
|
+
)
|
|
3273
|
+
.get();
|
|
3274
|
+
available = row.total;
|
|
3275
|
+
} catch (_) {
|
|
3276
|
+
// ignore
|
|
3277
|
+
}
|
|
3278
|
+
|
|
3279
|
+
return {
|
|
3280
|
+
ok: true,
|
|
3281
|
+
fromJobs,
|
|
3282
|
+
fromRawJobs,
|
|
3283
|
+
totalInserted: fromJobs + fromRawJobs,
|
|
3284
|
+
resetCount,
|
|
3285
|
+
availableInBase: available,
|
|
3286
|
+
};
|
|
5048
3287
|
}
|
|
5049
3288
|
}
|