tt-help-cli-ycl 1.3.93 → 1.3.94
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli/comments.js +49 -24
- package/src/cli/tag.js +239 -91
- package/src/lib/args.js +23 -0
- package/src/lib/browser/cdp.js +4 -1
- package/src/lib/constants.js +15 -0
- package/src/lib/tag-fetcher.js +69 -63
- package/src/watch/data-store.js +631 -2399
- package/src/watch/data-store.js.bak +5091 -0
- package/src/watch/data-store.js.bak2 +5019 -0
- package/src/watch/db-columns.js +160 -0
- package/src/watch/db-crud.js +458 -0
- package/src/watch/db-mappers.js +128 -0
- package/src/watch/db-raw-jobs.js +235 -0
- package/src/watch/db-schema.js +367 -0
- package/src/watch/db-stats.js +235 -0
- package/src/watch/db-tags.js +348 -0
- package/src/watch/llm-scoring.js +235 -0
- package/src/watch/public/app.js +47 -0
- package/src/watch/public/index.html +6 -0
- package/src/watch/server.js +24 -0
- package/src/watch/tag-service.js +142 -11
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tag 发现与打分 CRUD
|
|
3
|
+
*
|
|
4
|
+
* 管理 tags 表:插入、查询、认领、打分、清理。
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { getDb } from "./db-schema.js";
|
|
8
|
+
|
|
9
|
+
function parseTagRow(r) {
|
|
10
|
+
return {
|
|
11
|
+
...r,
|
|
12
|
+
countries: JSON.parse(r.countries || "[]"),
|
|
13
|
+
matched_countries: JSON.parse(r.matched_countries || "[]"),
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function insertTag(tag, countries, source = "llm") {
|
|
18
|
+
const db = getDb();
|
|
19
|
+
if (!db) return { inserted: false, error: "db not ready" };
|
|
20
|
+
const normalized = tag.replace(/^#+/, "").trim().toLowerCase();
|
|
21
|
+
if (!normalized || normalized.length < 2) {
|
|
22
|
+
return { inserted: false, error: "invalid tag" };
|
|
23
|
+
}
|
|
24
|
+
try {
|
|
25
|
+
const result = db
|
|
26
|
+
.prepare(
|
|
27
|
+
"INSERT OR IGNORE INTO tags (tag, countries, source) VALUES (?, ?, ?)",
|
|
28
|
+
)
|
|
29
|
+
.run(normalized, JSON.stringify(countries), source);
|
|
30
|
+
return { inserted: result.changes > 0, tag: normalized };
|
|
31
|
+
} catch (e) {
|
|
32
|
+
return { inserted: false, error: e.message };
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function getTagsByStatus(status, limit = 100) {
|
|
37
|
+
const db = getDb();
|
|
38
|
+
if (!db) return [];
|
|
39
|
+
const rows = db
|
|
40
|
+
.prepare(
|
|
41
|
+
"SELECT * FROM tags WHERE status = ? ORDER BY score ASC, created_at ASC LIMIT ?",
|
|
42
|
+
)
|
|
43
|
+
.all(status, limit);
|
|
44
|
+
return rows.map(parseTagRow);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export function getTagsByCountry(country, minScore = 0) {
|
|
48
|
+
const db = getDb();
|
|
49
|
+
if (!db) return [];
|
|
50
|
+
const rows = db
|
|
51
|
+
.prepare("SELECT * FROM tags WHERE status != 'dead' ORDER BY score DESC")
|
|
52
|
+
.all();
|
|
53
|
+
return rows
|
|
54
|
+
.map(parseTagRow)
|
|
55
|
+
.filter((r) => r.countries.includes(country) && r.score >= minScore);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export function getDeadTags(country) {
|
|
59
|
+
const db = getDb();
|
|
60
|
+
if (!db) return [];
|
|
61
|
+
const rows = db
|
|
62
|
+
.prepare("SELECT * FROM tags WHERE status = 'dead' ORDER BY score ASC")
|
|
63
|
+
.all();
|
|
64
|
+
return rows.map(parseTagRow).filter((r) => r.countries.includes(country));
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export function claimTag(tag) {
|
|
68
|
+
const db = getDb();
|
|
69
|
+
if (!db) return { ok: false, error: "db not ready" };
|
|
70
|
+
const result = db
|
|
71
|
+
.prepare(
|
|
72
|
+
"UPDATE tags SET status = 'scoring' WHERE tag = ? AND status = 'new'",
|
|
73
|
+
)
|
|
74
|
+
.run(tag);
|
|
75
|
+
if (result.changes === 0) {
|
|
76
|
+
const row = db.prepare("SELECT status FROM tags WHERE tag = ?").get(tag);
|
|
77
|
+
if (!row) return { ok: false, error: "tag not found" };
|
|
78
|
+
return { ok: false, error: `tag status is ${row.status}, already claimed` };
|
|
79
|
+
}
|
|
80
|
+
return { ok: true, tag };
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
export function reportTagScore(tag, fields) {
|
|
84
|
+
const db = getDb();
|
|
85
|
+
if (!db) return { ok: false, error: "db not ready" };
|
|
86
|
+
const {
|
|
87
|
+
score,
|
|
88
|
+
status,
|
|
89
|
+
totalPosts,
|
|
90
|
+
authorCount,
|
|
91
|
+
matchedAuthors,
|
|
92
|
+
matchedCountries,
|
|
93
|
+
pushedUsers,
|
|
94
|
+
error,
|
|
95
|
+
} = fields;
|
|
96
|
+
const matchedCountriesJson = matchedCountries
|
|
97
|
+
? JSON.stringify(matchedCountries)
|
|
98
|
+
: null;
|
|
99
|
+
const now = new Date().toISOString();
|
|
100
|
+
|
|
101
|
+
try {
|
|
102
|
+
const result = db
|
|
103
|
+
.prepare(
|
|
104
|
+
`UPDATE tags SET
|
|
105
|
+
score = COALESCE(?, score),
|
|
106
|
+
status = COALESCE(?, status),
|
|
107
|
+
total_posts = COALESCE(?, total_posts),
|
|
108
|
+
author_count = COALESCE(?, author_count),
|
|
109
|
+
matched_authors = COALESCE(?, matched_authors),
|
|
110
|
+
matched_countries = COALESCE(?, matched_countries),
|
|
111
|
+
pushed_users = COALESCE(?, pushed_users),
|
|
112
|
+
last_error = COALESCE(?, last_error),
|
|
113
|
+
scored_at = ?,
|
|
114
|
+
score_count = score_count + 1
|
|
115
|
+
WHERE tag = ?`,
|
|
116
|
+
)
|
|
117
|
+
.run(
|
|
118
|
+
score ?? null,
|
|
119
|
+
status ?? null,
|
|
120
|
+
totalPosts ?? null,
|
|
121
|
+
authorCount ?? null,
|
|
122
|
+
matchedAuthors ?? null,
|
|
123
|
+
matchedCountriesJson,
|
|
124
|
+
pushedUsers ?? null,
|
|
125
|
+
error ?? null,
|
|
126
|
+
now,
|
|
127
|
+
tag,
|
|
128
|
+
);
|
|
129
|
+
return { ok: result.changes > 0, tag };
|
|
130
|
+
} catch (e) {
|
|
131
|
+
return { ok: false, error: e.message };
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
export function getAllTags(limit = 200) {
|
|
136
|
+
const db = getDb();
|
|
137
|
+
if (!db) return [];
|
|
138
|
+
const rows = db
|
|
139
|
+
.prepare("SELECT * FROM tags ORDER BY score DESC, created_at DESC LIMIT ?")
|
|
140
|
+
.all(limit);
|
|
141
|
+
return rows.map(parseTagRow);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
export function rawQuery(sql, params = []) {
|
|
145
|
+
const db = getDb();
|
|
146
|
+
if (!db) return { error: "db not ready" };
|
|
147
|
+
try {
|
|
148
|
+
const rows = db.prepare(sql).all(...params);
|
|
149
|
+
return { rows };
|
|
150
|
+
} catch (e) {
|
|
151
|
+
return { error: e.message };
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
export function normalizeTags() {
|
|
156
|
+
const db = getDb();
|
|
157
|
+
if (!db) return { ok: false, error: "db not ready" };
|
|
158
|
+
const dirtyRows = db
|
|
159
|
+
.prepare("SELECT id, tag, countries FROM tags WHERE tag LIKE '#%'")
|
|
160
|
+
.all();
|
|
161
|
+
const fixed = [];
|
|
162
|
+
const merged = [];
|
|
163
|
+
const skipped = [];
|
|
164
|
+
|
|
165
|
+
for (const row of dirtyRows) {
|
|
166
|
+
const cleanTag = row.tag.replace(/^#+/, "").trim().toLowerCase();
|
|
167
|
+
if (!cleanTag || cleanTag.length < 2) {
|
|
168
|
+
db.prepare("DELETE FROM tags WHERE id = ?").run(row.id);
|
|
169
|
+
skipped.push({
|
|
170
|
+
dirty: row.tag,
|
|
171
|
+
reason: "empty after normalize, deleted",
|
|
172
|
+
});
|
|
173
|
+
continue;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
const existing = db
|
|
177
|
+
.prepare("SELECT * FROM tags WHERE tag = ?")
|
|
178
|
+
.get(cleanTag);
|
|
179
|
+
if (existing) {
|
|
180
|
+
const oldCountries = JSON.parse(row.countries || "[]");
|
|
181
|
+
const existCountries = JSON.parse(existing.countries || "[]");
|
|
182
|
+
const mergedCountries = [
|
|
183
|
+
...new Set([...existCountries, ...oldCountries]),
|
|
184
|
+
];
|
|
185
|
+
db.prepare("UPDATE tags SET countries = ? WHERE tag = ?").run(
|
|
186
|
+
JSON.stringify(mergedCountries),
|
|
187
|
+
cleanTag,
|
|
188
|
+
);
|
|
189
|
+
db.prepare("DELETE FROM tags WHERE id = ?").run(row.id);
|
|
190
|
+
merged.push({ dirty: row.tag, clean: cleanTag, id: row.id });
|
|
191
|
+
} else {
|
|
192
|
+
db.prepare("UPDATE tags SET tag = ? WHERE id = ?").run(cleanTag, row.id);
|
|
193
|
+
fixed.push({ dirty: row.tag, clean: cleanTag, id: row.id });
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
return {
|
|
198
|
+
ok: true,
|
|
199
|
+
fixed: fixed.length,
|
|
200
|
+
merged: merged.length,
|
|
201
|
+
skipped: skipped.length,
|
|
202
|
+
details: { fixed, merged, skipped },
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
export function clearTags() {
|
|
207
|
+
const db = getDb();
|
|
208
|
+
if (!db) return { ok: false, error: "db not ready" };
|
|
209
|
+
const count = db.prepare("SELECT COUNT(*) as c FROM tags").get().c;
|
|
210
|
+
db.exec("DELETE FROM tags");
|
|
211
|
+
return { ok: true, deleted: count };
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// ===== discover_log CRUD =====
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* 记录一次 LLM discover 的完整过程
|
|
218
|
+
*/
|
|
219
|
+
export function insertDiscoverLog({
|
|
220
|
+
country,
|
|
221
|
+
round,
|
|
222
|
+
strategy,
|
|
223
|
+
tagsGenerated,
|
|
224
|
+
tagsAdded,
|
|
225
|
+
productiveSample,
|
|
226
|
+
deadSample,
|
|
227
|
+
avgProductiveScore,
|
|
228
|
+
avgDeadScore,
|
|
229
|
+
}) {
|
|
230
|
+
const db = getDb();
|
|
231
|
+
if (!db) return { ok: false, error: "db not ready" };
|
|
232
|
+
|
|
233
|
+
try {
|
|
234
|
+
const result = db
|
|
235
|
+
.prepare(
|
|
236
|
+
`INSERT INTO discover_log
|
|
237
|
+
(country, round, strategy, tags_generated, tags_added,
|
|
238
|
+
productive_sample, dead_sample, avg_productive_score, avg_dead_score)
|
|
239
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
240
|
+
)
|
|
241
|
+
.run(
|
|
242
|
+
country,
|
|
243
|
+
round,
|
|
244
|
+
strategy || null,
|
|
245
|
+
JSON.stringify(tagsGenerated || []),
|
|
246
|
+
tagsAdded || 0,
|
|
247
|
+
JSON.stringify(productiveSample || []),
|
|
248
|
+
JSON.stringify(deadSample || []),
|
|
249
|
+
avgProductiveScore || 0,
|
|
250
|
+
avgDeadScore || 0,
|
|
251
|
+
);
|
|
252
|
+
return { ok: true, id: result.lastInsertRowid };
|
|
253
|
+
} catch (e) {
|
|
254
|
+
return { ok: false, error: e.message };
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
/**
|
|
259
|
+
* 获取某国家最近的 discover 记录(用于自学习)
|
|
260
|
+
*/
|
|
261
|
+
export function getRecentDiscoverLogs(country, limit = 10) {
|
|
262
|
+
const db = getDb();
|
|
263
|
+
if (!db) return [];
|
|
264
|
+
|
|
265
|
+
return db
|
|
266
|
+
.prepare(
|
|
267
|
+
`SELECT * FROM discover_log
|
|
268
|
+
WHERE country = ?
|
|
269
|
+
ORDER BY round DESC
|
|
270
|
+
LIMIT ?`,
|
|
271
|
+
)
|
|
272
|
+
.all(country, limit)
|
|
273
|
+
.map((row) => ({
|
|
274
|
+
...row,
|
|
275
|
+
tags_generated: JSON.parse(row.tags_generated || "[]"),
|
|
276
|
+
productive_sample: JSON.parse(row.productive_sample || "[]"),
|
|
277
|
+
dead_sample: JSON.parse(row.dead_sample || "[]"),
|
|
278
|
+
}));
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
/**
|
|
282
|
+
* 获取某国家的 discover 轮次(用于递增)
|
|
283
|
+
*/
|
|
284
|
+
export function getDiscoverRound(country) {
|
|
285
|
+
const db = getDb();
|
|
286
|
+
if (!db) return 1;
|
|
287
|
+
|
|
288
|
+
const row = db
|
|
289
|
+
.prepare(
|
|
290
|
+
`SELECT MAX(round) as maxRound FROM discover_log WHERE country = ?`,
|
|
291
|
+
)
|
|
292
|
+
.get(country);
|
|
293
|
+
return (row?.maxRound || 0) + 1;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
/**
|
|
297
|
+
* 获取某国家 discover 产出的 tag 平均效果(用于策略评估)
|
|
298
|
+
*/
|
|
299
|
+
export function getDiscoverEffectiveness(country) {
|
|
300
|
+
const db = getDb();
|
|
301
|
+
if (!db) return null;
|
|
302
|
+
|
|
303
|
+
// 先获取 discover_log 记录,再逐轮计算 tag 效果
|
|
304
|
+
const logs = db
|
|
305
|
+
.prepare(
|
|
306
|
+
`SELECT round, strategy, tags_added, tags_generated FROM discover_log
|
|
307
|
+
WHERE country = ?
|
|
308
|
+
ORDER BY round DESC
|
|
309
|
+
LIMIT 5`,
|
|
310
|
+
)
|
|
311
|
+
.all(country);
|
|
312
|
+
|
|
313
|
+
if (!logs || logs.length === 0) return [];
|
|
314
|
+
|
|
315
|
+
// 逐轮计算效果
|
|
316
|
+
return logs.map((dl) => {
|
|
317
|
+
const tagsGenerated = JSON.parse(dl.tags_generated || "[]");
|
|
318
|
+
if (tagsGenerated.length === 0) {
|
|
319
|
+
return {
|
|
320
|
+
round: dl.round,
|
|
321
|
+
strategy: dl.strategy,
|
|
322
|
+
tags_added: dl.tags_added,
|
|
323
|
+
avg_score: 0,
|
|
324
|
+
productive_count: 0,
|
|
325
|
+
dead_count: 0,
|
|
326
|
+
};
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// 用 IN 子句查询这些 tag 的得分
|
|
330
|
+
const placeholders = tagsGenerated.map(() => "?").join(",");
|
|
331
|
+
const tags = db
|
|
332
|
+
.prepare(`SELECT score, status FROM tags WHERE tag IN (${placeholders})`)
|
|
333
|
+
.all(...tagsGenerated);
|
|
334
|
+
|
|
335
|
+
const scores = tags.map((t) => t.score || 0);
|
|
336
|
+
return {
|
|
337
|
+
round: dl.round,
|
|
338
|
+
strategy: dl.strategy,
|
|
339
|
+
tags_added: dl.tags_added,
|
|
340
|
+
avg_score:
|
|
341
|
+
scores.length > 0
|
|
342
|
+
? scores.reduce((a, b) => a + b, 0) / scores.length
|
|
343
|
+
: 0,
|
|
344
|
+
productive_count: tags.filter((t) => t.status === "productive").length,
|
|
345
|
+
dead_count: tags.filter((t) => t.status === "dead").length,
|
|
346
|
+
};
|
|
347
|
+
});
|
|
348
|
+
}
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM 打分模块
|
|
3
|
+
*
|
|
4
|
+
* 使用外部 LLM API 对 TikTok 用户的国家匹配度打分。
|
|
5
|
+
* 包含:单条打分、批量打分、采样偏移量持久化。
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { getDb } from "./db-schema.js";
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* 使用 LLM 对单个 job 的国家匹配度打分(0-100)
|
|
12
|
+
* @param {Object} job - raw_jobs 中的一条记录
|
|
13
|
+
* @param {string[]} targetLocations - 目标国家列表
|
|
14
|
+
* @returns {Promise<{ uniqueId: string, score: number, reason: string }>}
|
|
15
|
+
*/
|
|
16
|
+
export async function scoreJobLocation(job, targetLocations) {
|
|
17
|
+
const { fetch: undiciFetch } = await import("undici");
|
|
18
|
+
|
|
19
|
+
const prompt = `
|
|
20
|
+
你是一个 TikTok 用户数据分析助手。请根据以下用户信息,判断该用户是否来自以下**任意一个**目标国家。
|
|
21
|
+
|
|
22
|
+
目标国家列表: ${targetLocations.join(", ")}
|
|
23
|
+
|
|
24
|
+
重要:
|
|
25
|
+
- 用户只要来自上述**任意一个**国家就算匹配。
|
|
26
|
+
- guessed_location 是系统初步猜测的结果,**仅供参考**,不要完全依赖它。
|
|
27
|
+
- 请综合用户名、昵称、签名、位置等信息做判断。
|
|
28
|
+
|
|
29
|
+
用户信息:
|
|
30
|
+
- 用户名: ${job.unique_id || "未知"}
|
|
31
|
+
- 昵称: ${job.nickname || "未知"}
|
|
32
|
+
- 签名: ${job.signature || "未知"}
|
|
33
|
+
- 地区: ${job.region || "未知"}
|
|
34
|
+
- 猜测国家(参考): ${job.guessed_location || "未知"}
|
|
35
|
+
- 位置信息: ${job.location_created || "未知"}
|
|
36
|
+
- 主页链接: ${job.bio_link || "未知"}
|
|
37
|
+
|
|
38
|
+
返回 JSON(仅返回 JSON,无其他内容):
|
|
39
|
+
{"score": 0-100, "reason": "English only, under 50 chars, no quotes/brackets"}
|
|
40
|
+
|
|
41
|
+
Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unlikely
|
|
42
|
+
`;
|
|
43
|
+
|
|
44
|
+
try {
|
|
45
|
+
const apiKey = process.env.APIKEY || "";
|
|
46
|
+
const response = await undiciFetch(
|
|
47
|
+
"http://82.156.52.214:18000/v1/chat/completions",
|
|
48
|
+
{
|
|
49
|
+
method: "POST",
|
|
50
|
+
headers: {
|
|
51
|
+
"Content-Type": "application/json",
|
|
52
|
+
Authorization: `Bearer ${apiKey}`,
|
|
53
|
+
},
|
|
54
|
+
body: JSON.stringify({
|
|
55
|
+
model: "zc-fast",
|
|
56
|
+
messages: [{ role: "user", content: prompt }],
|
|
57
|
+
max_tokens: 512,
|
|
58
|
+
temperature: 0.1,
|
|
59
|
+
}),
|
|
60
|
+
},
|
|
61
|
+
);
|
|
62
|
+
|
|
63
|
+
const result = await response.json();
|
|
64
|
+
const content = result.choices?.[0]?.message?.content || "";
|
|
65
|
+
|
|
66
|
+
let parsed = null;
|
|
67
|
+
|
|
68
|
+
// 尝试 1: 直接解析
|
|
69
|
+
try {
|
|
70
|
+
parsed = JSON.parse(content);
|
|
71
|
+
} catch {
|
|
72
|
+
// 尝试 2: 提取 {} 包裹的内容
|
|
73
|
+
const match = content.match(/\{[\s\S]*\}/);
|
|
74
|
+
if (match) {
|
|
75
|
+
try {
|
|
76
|
+
parsed = JSON.parse(match[0]);
|
|
77
|
+
} catch {
|
|
78
|
+
// 尝试 3: 清理常见问题后解析
|
|
79
|
+
const cleaned = match[0]
|
|
80
|
+
.replace(/"/g, '"')
|
|
81
|
+
.replace(/\s+/g, " ")
|
|
82
|
+
.trim();
|
|
83
|
+
try {
|
|
84
|
+
parsed = JSON.parse(cleaned);
|
|
85
|
+
} catch {
|
|
86
|
+
// 尝试 4: 从文本中提取 score 和 reason
|
|
87
|
+
const scoreMatch = content.match(/"?score"?\s*:\s*(\d+)/i);
|
|
88
|
+
if (scoreMatch) {
|
|
89
|
+
let reason = "解析降级";
|
|
90
|
+
const reasonKeyPos = content.search(/"?reason"?\s*:\s*"/i);
|
|
91
|
+
if (reasonKeyPos !== -1) {
|
|
92
|
+
const afterKey = content.substring(reasonKeyPos);
|
|
93
|
+
const colonPos = afterKey.indexOf(":");
|
|
94
|
+
const valueStart = afterKey.indexOf('"', colonPos + 1) + 1;
|
|
95
|
+
const rawValue = afterKey.substring(valueStart);
|
|
96
|
+
const lastBrace = content.lastIndexOf("}");
|
|
97
|
+
const reasonEnd = lastBrace - reasonKeyPos - valueStart;
|
|
98
|
+
if (reasonEnd > 0) {
|
|
99
|
+
reason = rawValue.substring(0, reasonEnd).trim();
|
|
100
|
+
if (reason.startsWith('"')) reason = reason.substring(1);
|
|
101
|
+
if (reason.endsWith('"'))
|
|
102
|
+
reason = reason.substring(0, reason.length - 1);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
parsed = { score: parseInt(scoreMatch[1]) || 50, reason };
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// 尝试 5: 更宽松的正则提取
|
|
112
|
+
if (!parsed) {
|
|
113
|
+
const scoreMatch = content.match(/"score"\s*:\s*(\d+)/);
|
|
114
|
+
const reasonMatch = content.match(/"reason"\s*:\s*"([^"]*)"/);
|
|
115
|
+
if (scoreMatch) {
|
|
116
|
+
parsed = {
|
|
117
|
+
score: parseInt(scoreMatch[1]) || 50,
|
|
118
|
+
reason: reasonMatch ? reasonMatch[1] : "解析降级 - 宽松模式",
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
if (parsed && typeof parsed.score === "number") {
|
|
125
|
+
return {
|
|
126
|
+
uniqueId: job.unique_id,
|
|
127
|
+
score: Math.max(0, Math.min(100, parsed.score)),
|
|
128
|
+
reason: parsed.reason || "",
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
console.error(
|
|
133
|
+
`[scoreJobLocation] JSON 解析失败 (${job.unique_id}): ${content.substring(0, 100)}`,
|
|
134
|
+
);
|
|
135
|
+
return {
|
|
136
|
+
uniqueId: job.unique_id,
|
|
137
|
+
score: 50,
|
|
138
|
+
reason: "LLM 响应解析失败,使用默认分",
|
|
139
|
+
};
|
|
140
|
+
} catch (e) {
|
|
141
|
+
console.error(
|
|
142
|
+
`[scoreJobLocation] LLM 调用失败 (${job.unique_id}): ${e.message}`,
|
|
143
|
+
);
|
|
144
|
+
return {
|
|
145
|
+
uniqueId: job.unique_id,
|
|
146
|
+
score: 50,
|
|
147
|
+
reason: `LLM 调用异常: ${e.message}`,
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* 批量对 jobs 进行 LLM 国家匹配度打分
|
|
154
|
+
* @param {Object[]} jobs - raw_jobs 记录数组
|
|
155
|
+
* @param {string[]} targetLocations - 目标国家列表
|
|
156
|
+
* @param {number} batchSize - 每批处理数量(并发),默认 10
|
|
157
|
+
* @returns {Promise<Array<{ uniqueId: string, score: number, reason: string }>>}
|
|
158
|
+
*/
|
|
159
|
+
export async function scoreJobsBatch(jobs, targetLocations, batchSize = 10) {
|
|
160
|
+
const results = [];
|
|
161
|
+
for (let i = 0; i < jobs.length; i += batchSize) {
|
|
162
|
+
const batch = jobs.slice(i, i + batchSize);
|
|
163
|
+
const batchResults = await Promise.all(
|
|
164
|
+
batch.map((job) => scoreJobLocation(job, targetLocations)),
|
|
165
|
+
);
|
|
166
|
+
results.push(...batchResults);
|
|
167
|
+
}
|
|
168
|
+
return results;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* 创建 LLM 采样偏移量存储器
|
|
173
|
+
* 管理按国家记录的查询偏移量,支持持久化到数据库。
|
|
174
|
+
* @returns {{ load(): void, save(): void, get(key: string): number, set(key: string, val: number): void, entries(): Iterator }}
|
|
175
|
+
*/
|
|
176
|
+
export function createLlmOffsetStore() {
|
|
177
|
+
const offsets = new Map();
|
|
178
|
+
|
|
179
|
+
return {
|
|
180
|
+
load() {
|
|
181
|
+
const db = getDb();
|
|
182
|
+
if (!db) return;
|
|
183
|
+
try {
|
|
184
|
+
const row = db
|
|
185
|
+
.prepare("SELECT offsets FROM _llm_sample_offsets LIMIT 1")
|
|
186
|
+
.get();
|
|
187
|
+
if (row && row.offsets) {
|
|
188
|
+
const parsed = JSON.parse(row.offsets);
|
|
189
|
+
if (parsed && typeof parsed === "object") {
|
|
190
|
+
Object.entries(parsed).forEach(([k, v]) => offsets.set(k, v));
|
|
191
|
+
console.error(
|
|
192
|
+
`[data-store] 已恢复 LLM 采样偏移量: ${Array.from(
|
|
193
|
+
offsets.entries(),
|
|
194
|
+
)
|
|
195
|
+
.map(([k, v]) => `${k}:${v}`)
|
|
196
|
+
.join(", ")}`,
|
|
197
|
+
);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
} catch (e) {
|
|
201
|
+
console.error(
|
|
202
|
+
`[data-store] 加载 LLM 采样偏移量失败,使用空偏移量: ${e.message}`,
|
|
203
|
+
);
|
|
204
|
+
}
|
|
205
|
+
},
|
|
206
|
+
|
|
207
|
+
save() {
|
|
208
|
+
const db = getDb();
|
|
209
|
+
if (!db) return;
|
|
210
|
+
try {
|
|
211
|
+
const offsetsJson = JSON.stringify(Object.fromEntries(offsets));
|
|
212
|
+
db.prepare(
|
|
213
|
+
`CREATE TABLE IF NOT EXISTS _llm_sample_offsets (id INTEGER PRIMARY KEY CHECK (id = 1), offsets TEXT)`,
|
|
214
|
+
).run();
|
|
215
|
+
db.prepare(
|
|
216
|
+
`INSERT INTO _llm_sample_offsets (id, offsets) VALUES (1, ?) ON CONFLICT(id) DO UPDATE SET offsets = excluded.offsets`,
|
|
217
|
+
).run(offsetsJson);
|
|
218
|
+
} catch (e) {
|
|
219
|
+
console.error(`[data-store] 保存 LLM 采样偏移量失败: ${e.message}`);
|
|
220
|
+
}
|
|
221
|
+
},
|
|
222
|
+
|
|
223
|
+
get(key) {
|
|
224
|
+
return offsets.get(key) || 0;
|
|
225
|
+
},
|
|
226
|
+
|
|
227
|
+
set(key, val) {
|
|
228
|
+
offsets.set(key, val);
|
|
229
|
+
},
|
|
230
|
+
|
|
231
|
+
entries() {
|
|
232
|
+
return offsets.entries();
|
|
233
|
+
},
|
|
234
|
+
};
|
|
235
|
+
}
|
package/src/watch/public/app.js
CHANGED
|
@@ -1521,6 +1521,53 @@ function renderUserUpdateCountryGrid(countries) {
|
|
|
1521
1521
|
.join("");
|
|
1522
1522
|
}
|
|
1523
1523
|
|
|
1524
|
+
async function moveSellerJobsToBase() {
|
|
1525
|
+
// 确认提示
|
|
1526
|
+
if (
|
|
1527
|
+
!confirm(
|
|
1528
|
+
"确定要重处理商家吗?\n\n这将把 jobs 和 raw_jobs 中 ttSeller=1 且视频数=0 的记录移动到 jobs_base,并重置 user_update_count=0,使其可以被 attach 重新领取。",
|
|
1529
|
+
)
|
|
1530
|
+
) {
|
|
1531
|
+
return;
|
|
1532
|
+
}
|
|
1533
|
+
|
|
1534
|
+
const card = document.getElementById("statSellerResetCard");
|
|
1535
|
+
if (card) {
|
|
1536
|
+
card.style.pointerEvents = "none";
|
|
1537
|
+
card.style.opacity = "0.6";
|
|
1538
|
+
}
|
|
1539
|
+
|
|
1540
|
+
try {
|
|
1541
|
+
showLoading("正在移动商家数据到 jobs_base...");
|
|
1542
|
+
|
|
1543
|
+
const res = await fetch("/api/move-seller-jobs-to-base", {
|
|
1544
|
+
method: "POST",
|
|
1545
|
+
});
|
|
1546
|
+
const data = await res.json();
|
|
1547
|
+
|
|
1548
|
+
if (data.ok) {
|
|
1549
|
+
hideLoading();
|
|
1550
|
+
showNotification(
|
|
1551
|
+
`✅ 完成:从 jobs 移动 ${data.fromJobs} 条,从 raw_jobs 移动 ${data.fromRawJobs} 条,重置 ${data.resetCount} 条,jobs_base 中可领取 ${data.availableInBase} 条`,
|
|
1552
|
+
"success",
|
|
1553
|
+
);
|
|
1554
|
+
// 刷新页面统计
|
|
1555
|
+
await fetchStats();
|
|
1556
|
+
} else {
|
|
1557
|
+
hideLoading();
|
|
1558
|
+
showNotification(`❌ 移动失败:${data.error || "未知错误"}`, "error");
|
|
1559
|
+
}
|
|
1560
|
+
} catch (e) {
|
|
1561
|
+
hideLoading();
|
|
1562
|
+
showNotification(`❌ 请求失败:${e.message}`, "error");
|
|
1563
|
+
} finally {
|
|
1564
|
+
if (card) {
|
|
1565
|
+
card.style.pointerEvents = "";
|
|
1566
|
+
card.style.opacity = "";
|
|
1567
|
+
}
|
|
1568
|
+
}
|
|
1569
|
+
}
|
|
1570
|
+
|
|
1524
1571
|
async function fetchAttachStuckByCountry() {
|
|
1525
1572
|
try {
|
|
1526
1573
|
const res = await fetch("/api/attach-stuck-by-country");
|
|
@@ -210,6 +210,12 @@
|
|
|
210
210
|
<div class="label">待补资料</div>
|
|
211
211
|
<div class="value target" id="userUpdateStatUserUpdateTasks">0</div>
|
|
212
212
|
</div>
|
|
213
|
+
<div class="stat-card clickable" id="statSellerResetCard" onclick="moveSellerJobsToBase()"
|
|
214
|
+
style="background:rgba(236,72,153,0.12);border:1px solid rgba(236,72,153,0.25)">
|
|
215
|
+
<div class="label">🔄 重处理商家</div>
|
|
216
|
+
<div class="value-sub" style="font-size:10px;color:#ec4899">将 jobs/raw_jobs 中 ttSeller=1 且视频数=0 的记录移到 jobs_base
|
|
217
|
+
重新处理</div>
|
|
218
|
+
</div>
|
|
213
219
|
<div class="stat-card clickable" onclick="navigateToRaw()">
|
|
214
220
|
<div class="label">毛料库</div>
|
|
215
221
|
<div class="value target" id="userUpdateStatRawJobs">0</div>
|
package/src/watch/server.js
CHANGED
|
@@ -274,6 +274,30 @@ export function startWatchServer(
|
|
|
274
274
|
return;
|
|
275
275
|
}
|
|
276
276
|
|
|
277
|
+
// 将 jobs/raw_jobs 中 tt_seller=1 且 video_count=0 的记录移动到 jobs_base
|
|
278
|
+
if (
|
|
279
|
+
req.method === "POST" &&
|
|
280
|
+
routePath === "/api/move-seller-jobs-to-base"
|
|
281
|
+
) {
|
|
282
|
+
try {
|
|
283
|
+
const result = store.moveSellerJobsToBase();
|
|
284
|
+
const ts = new Date().toISOString().slice(11, 19);
|
|
285
|
+
if (result.ok) {
|
|
286
|
+
console.error(
|
|
287
|
+
`[JOB ${ts}] MOVE-SELLER-JOBS: jobs=${result.fromJobs}, raw_jobs=${result.fromRawJobs}, reset=${result.resetCount}, available=${result.availableInBase}`,
|
|
288
|
+
);
|
|
289
|
+
} else {
|
|
290
|
+
console.error(
|
|
291
|
+
`[JOB ${ts}] MOVE-SELLER-JOBS ERROR: ${result.error}`,
|
|
292
|
+
);
|
|
293
|
+
}
|
|
294
|
+
sendJSON(res, result.ok ? 200 : 400, result);
|
|
295
|
+
} catch (e) {
|
|
296
|
+
sendJSON(res, 500, { error: e.message });
|
|
297
|
+
}
|
|
298
|
+
return;
|
|
299
|
+
}
|
|
300
|
+
|
|
277
301
|
if (req.method === "GET" && routePath === "/api/db-query") {
|
|
278
302
|
const sql = params.sql || "SELECT * FROM jobs LIMIT 10";
|
|
279
303
|
const limit = Math.min(parseInt(params.limit) || 100, 1000);
|