tt-help-cli-ycl 1.3.85 → 1.3.86

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tt-help-cli-ycl",
3
- "version": "1.3.85",
3
+ "version": "1.3.86",
4
4
  "description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli/tag.js CHANGED
@@ -6,6 +6,10 @@ import {
6
6
  isLocationInList,
7
7
  } from "../lib/target-locations.js";
8
8
  import { discoverTags, recordProductiveTag } from "../lib/tag-discover.js";
9
+ import { server as cfgServer } from "../lib/constants.js";
10
+
11
+ const ALL_COUNTRIES = DEFAULT_TARGET_LOCATIONS;
12
+ const DEFAULT_SERVER = cfgServer || "http://127.0.0.1:3000";
9
13
 
10
14
  async function pushToServer(serverUrl, filteredAuthors, videos) {
11
15
  const users = filteredAuthors.map((author) => {
@@ -182,14 +186,26 @@ async function processTag(
182
186
 
183
187
  export async function handleDiscover(parsed) {
184
188
  const { tagDiscover } = parsed;
185
- const { countries, count = 4, prompt, serverUrl } = tagDiscover || {};
189
+ let { countries, count = 4, prompt, serverUrl } = tagDiscover || {};
190
+
191
+ // 支持 'all' 展开为全部目标国家
192
+ if (
193
+ countries &&
194
+ countries.length === 1 &&
195
+ countries[0].toUpperCase() === "ALL"
196
+ ) {
197
+ countries = ALL_COUNTRIES;
198
+ }
186
199
 
187
200
  if (!countries || countries.length === 0) {
188
201
  console.error(
189
- "用法: tt-help tag discover <国家> [国家...] [--count <n>] [--prompt <文本>] [-s <服务端>]",
202
+ "用法: tt-help tag discover <国家|all> [国家...] [--count <n>] [--prompt <文本>] [-s <服务端>]",
190
203
  );
191
204
  console.error("");
192
205
  console.error("示例:");
206
+ console.error(
207
+ " tt-help tag discover all --count 10 # 为全部 13 个国家各生成 10 个标签",
208
+ );
193
209
  console.error(
194
210
  " tt-help tag discover ES # 为西班牙生成 4 个标签",
195
211
  );
@@ -249,7 +265,7 @@ export async function handleScore(parsed) {
249
265
  process.exit(1);
250
266
  }
251
267
 
252
- const baseUrl = serverUrl || "http://127.0.0.1:3000";
268
+ const baseUrl = serverUrl || DEFAULT_SERVER;
253
269
  const targetCountries = countries || [
254
270
  "ES",
255
271
  "FR",
@@ -403,9 +419,9 @@ async function reportToServer(baseUrl, result) {
403
419
 
404
420
  export async function handleScoreAll(parsed) {
405
421
  const { tagScoreAll } = parsed;
406
- const { countries, serverUrl } = tagScoreAll || {};
422
+ let { countries, serverUrl } = tagScoreAll || {};
407
423
 
408
- const baseUrl = serverUrl || "http://127.0.0.1:3000";
424
+ const baseUrl = serverUrl || DEFAULT_SERVER;
409
425
  const targetCountries = countries || [
410
426
  "ES",
411
427
  "FR",
@@ -465,14 +481,13 @@ export async function handleScoreAll(parsed) {
465
481
  const tagsRes = await fetch(`${baseUrl}/api/tags?status=new&limit=1`);
466
482
  const tagsData = await tagsRes.json();
467
483
  if (!tagsData.tags || tagsData.tags.length === 0) {
468
- log("");
469
- log("========================================");
470
- log(` 🎉 全部完成! 共打分 ${totalScored} 个标签`);
471
- log("========================================");
472
- break;
484
+ log(` ⏳ 暂无待打分标签,10 秒后重试...`);
485
+ totalNew = null; // 重置计数,下次新标签到达时重新查询
486
+ await new Promise((r) => setTimeout(r, 10000));
487
+ continue;
473
488
  }
474
489
 
475
- const tag = tagsData.tags[0].tag;
490
+ const tag = tagsData.tags[0].tag.replace(/^#+/, "").trim().toLowerCase();
476
491
  const startTime = Date.now();
477
492
 
478
493
  log(`[${totalScored + 1}/${totalNew || "?"}] 正在打分 #${tag} ...`);
@@ -498,7 +513,16 @@ export async function handleScoreAll(parsed) {
498
513
  });
499
514
  const claimData = await claimRes.json();
500
515
  if (!claimData.ok) {
501
- log(` ⚠️ 无法锁定 (${claimData.error}),跳过`);
516
+ // already claimed: 其他机器抢先了,跳过不标 dead
517
+ if (claimData.error && claimData.error.includes("already claimed")) {
518
+ log(` ⏭️ 已被其他客户端锁定,跳过`);
519
+ continue;
520
+ }
521
+ log(` ⚠️ 无法锁定 (${claimData.error}),标记为 dead 并跳过`);
522
+ result.error = claimData.error;
523
+ result.status = "dead";
524
+ await reportToServer(baseUrl, result);
525
+ totalScored++;
502
526
  continue;
503
527
  }
504
528
 
@@ -1,14 +1,20 @@
1
- import { readFileSync, writeFileSync, existsSync } from 'fs';
2
- import { resolve, dirname } from 'path';
3
- import { fileURLToPath } from 'url';
1
+ import { readFileSync, writeFileSync, existsSync } from "fs";
2
+ import { resolve, dirname } from "path";
3
+ import { fileURLToPath } from "url";
4
4
 
5
5
  const __dirname = dirname(fileURLToPath(import.meta.url));
6
- const TAGS_FILE = resolve(__dirname, '..', '..', 'data', 'productive-tags.json');
6
+ const TAGS_FILE = resolve(
7
+ __dirname,
8
+ "..",
9
+ "..",
10
+ "data",
11
+ "productive-tags.json",
12
+ );
7
13
 
8
14
  function loadTags() {
9
15
  try {
10
16
  if (existsSync(TAGS_FILE)) {
11
- return JSON.parse(readFileSync(TAGS_FILE, 'utf-8'));
17
+ return JSON.parse(readFileSync(TAGS_FILE, "utf-8"));
12
18
  }
13
19
  } catch {}
14
20
  return { tags: [], lastUpdated: null };
@@ -17,10 +23,10 @@ function loadTags() {
17
23
  function saveTags(data) {
18
24
  const dir = dirname(TAGS_FILE);
19
25
  if (!existsSync(dir)) {
20
- const { mkdirSync } = require('fs');
26
+ const { mkdirSync } = require("fs");
21
27
  mkdirSync(dir, { recursive: true });
22
28
  }
23
- writeFileSync(TAGS_FILE, JSON.stringify(data, null, 2), 'utf-8');
29
+ writeFileSync(TAGS_FILE, JSON.stringify(data, null, 2), "utf-8");
24
30
  }
25
31
 
26
32
  export function getProductiveTags() {
@@ -29,7 +35,7 @@ export function getProductiveTags() {
29
35
 
30
36
  export function recordProductiveTag(tag, country, userCount) {
31
37
  const data = loadTags();
32
- const existing = data.tags.find(t => t.tag === tag);
38
+ const existing = data.tags.find((t) => t.tag === tag);
33
39
  if (existing) {
34
40
  if (!existing.countries.includes(country)) {
35
41
  existing.countries.push(country);
@@ -50,39 +56,50 @@ export function recordProductiveTag(tag, country, userCount) {
50
56
  }
51
57
 
52
58
  async function callLLM(prompt) {
53
- const apiKey = process.env.APIKEY || '';
54
- const { fetch } = await import('undici');
55
-
56
- const response = await fetch('http://82.156.52.214:18000/v1/chat/completions', {
57
- method: 'POST',
58
- headers: {
59
- 'Content-Type': 'application/json',
60
- Authorization: `Bearer ${apiKey}`,
59
+ const apiKey = process.env.APIKEY || "";
60
+ const { fetch } = await import("undici");
61
+
62
+ const response = await fetch(
63
+ "http://82.156.52.214:18000/v1/chat/completions",
64
+ {
65
+ method: "POST",
66
+ headers: {
67
+ "Content-Type": "application/json",
68
+ Authorization: `Bearer ${apiKey}`,
69
+ },
70
+ body: JSON.stringify({
71
+ model: "zc-fast",
72
+ messages: [{ role: "user", content: prompt }],
73
+ max_tokens: 1024,
74
+ temperature: 0.7,
75
+ }),
61
76
  },
62
- body: JSON.stringify({
63
- model: 'zc-fast',
64
- messages: [{ role: 'user', content: prompt }],
65
- max_tokens: 1024,
66
- temperature: 0.7,
67
- }),
68
- });
77
+ );
69
78
 
70
79
  const result = await response.json();
71
- const content = result.choices?.[0]?.message?.content || '';
80
+ const content = result.choices?.[0]?.message?.content || "";
72
81
  return content;
73
82
  }
74
83
 
84
+ function normalizeTag(t) {
85
+ return t.replace(/^#+/, "").trim().toLowerCase();
86
+ }
87
+
75
88
  function parseTagsFromResponse(content) {
76
89
  try {
77
90
  const parsed = JSON.parse(content);
78
- if (Array.isArray(parsed)) return parsed;
79
- if (Array.isArray(parsed.tags)) return parsed.tags;
91
+ if (Array.isArray(parsed)) {
92
+ return parsed.map(normalizeTag).filter((t) => t && t.length >= 2);
93
+ }
94
+ if (Array.isArray(parsed.tags)) {
95
+ return parsed.tags.map(normalizeTag).filter((t) => t && t.length >= 2);
96
+ }
80
97
  } catch {}
81
98
 
82
99
  const lines = content.split(/[\n,]+/);
83
100
  const tags = [];
84
101
  for (const line of lines) {
85
- const cleaned = line.replace(/^[-\d.\s#]+/, '').trim().toLowerCase();
102
+ const cleaned = normalizeTag(line.replace(/^[-\d.\s]+/, ""));
86
103
  if (cleaned && /^[a-z0-9_]+$/.test(cleaned) && cleaned.length >= 2) {
87
104
  tags.push(cleaned);
88
105
  }
@@ -91,17 +108,22 @@ function parseTagsFromResponse(content) {
91
108
  }
92
109
 
93
110
  export async function discoverTags(countries, options = {}) {
94
- const { language = 'auto', count = 10 } = options;
111
+ const { language = "auto", count = 10 } = options;
95
112
 
96
113
  const productiveTags = getProductiveTags();
97
- const countryStr = Array.isArray(countries) ? countries.join(', ') : countries;
98
- const langHint = language === 'auto'
99
- ? ''
100
- : `Tags should be in ${language} language.`;
101
-
102
- const historyHint = productiveTags.length > 0
103
- ? `Previously productive tags for these countries: ${productiveTags.filter(t => t.countries.some(c => countries.includes(c))).map(t => `#${t.tag}`).join(', ')}. Generate new ones, don't repeat these.`
104
- : '';
114
+ const countryStr = Array.isArray(countries)
115
+ ? countries.join(", ")
116
+ : countries;
117
+ const langHint =
118
+ language === "auto" ? "" : `Tags should be in ${language} language.`;
119
+
120
+ const historyHint =
121
+ productiveTags.length > 0
122
+ ? `Previously productive tags for these countries: ${productiveTags
123
+ .filter((t) => t.countries.some((c) => countries.includes(c)))
124
+ .map((t) => `#${t.tag}`)
125
+ .join(", ")}. Generate new ones, don't repeat these.`
126
+ : "";
105
127
 
106
128
  const prompt = `Generate ${count} TikTok hashtags (lowercase, no spaces, no # symbol) that are likely to be used by online sellers, shop owners, e-commerce merchants, and small businesses in these countries: ${countryStr}.
107
129
 
@@ -114,11 +136,15 @@ ${historyHint}
114
136
 
115
137
  Return ONLY a JSON array of tag strings, nothing else. Example: ["ventas","tiendaonline","vender"]`;
116
138
 
117
- process.stderr.write(` [LLM] 正在生成 ${count} 个标签 (目标: ${countryStr})...\n`);
139
+ process.stderr.write(
140
+ ` [LLM] 正在生成 ${count} 个标签 (目标: ${countryStr})...\n`,
141
+ );
118
142
  const content = await callLLM(prompt);
119
143
  const tags = parseTagsFromResponse(content);
120
144
 
121
145
  const unique = [...new Set(tags)].slice(0, count);
122
- process.stderr.write(` [LLM] 生成 ${unique.length} 个标签: ${unique.join(', ')}\n`);
146
+ process.stderr.write(
147
+ ` [LLM] 生成 ${unique.length} 个标签: ${unique.join(", ")}\n`,
148
+ );
123
149
  return unique;
124
150
  }
package/src/npm-main.js CHANGED
@@ -7,7 +7,12 @@ import { handleConfig, showConfig, showUsage, version } from "./cli/config.js";
7
7
  import { handleOpen } from "./cli/open.js";
8
8
  import { handleComments } from "./cli/comments.js";
9
9
  import { handleRefresh } from "./cli/refresh.js";
10
- import { handleTag } from "./cli/tag.js";
10
+ import {
11
+ handleTag,
12
+ handleDiscover,
13
+ handleScore,
14
+ handleScoreAll,
15
+ } from "./cli/tag.js";
11
16
 
12
17
  function exitUnsupportedCommand(command) {
13
18
  console.error(
@@ -39,6 +44,12 @@ async function main() {
39
44
  return handleRefresh(parsed);
40
45
  case "tag":
41
46
  return handleTag(parsed);
47
+ case "tag-discover":
48
+ return handleDiscover(parsed);
49
+ case "tag-score":
50
+ return handleScore(parsed);
51
+ case "tag-score-all":
52
+ return handleScoreAll(parsed);
42
53
  }
43
54
 
44
55
  const {
@@ -1344,6 +1344,11 @@ function getRawJobsPageFromDb({
1344
1344
 
1345
1345
  function insertTag(tag, countries, source = "llm") {
1346
1346
  if (!db) return { inserted: false, error: "db not ready" };
1347
+ // 防止存入带 # 前缀的 tag
1348
+ const normalized = tag.replace(/^#+/, "").trim().toLowerCase();
1349
+ if (!normalized || normalized.length < 2) {
1350
+ return { inserted: false, error: "invalid tag" };
1351
+ }
1347
1352
  try {
1348
1353
  const result = db
1349
1354
  .prepare(
@@ -1352,8 +1357,8 @@ function insertTag(tag, countries, source = "llm") {
1352
1357
  VALUES (?, ?, ?)
1353
1358
  `,
1354
1359
  )
1355
- .run(tag, JSON.stringify(countries), source);
1356
- return { inserted: result.changes > 0, tag };
1360
+ .run(normalized, JSON.stringify(countries), source);
1361
+ return { inserted: result.changes > 0, tag: normalized };
1357
1362
  } catch (e) {
1358
1363
  return { inserted: false, error: e.message };
1359
1364
  }
@@ -1415,12 +1420,19 @@ function getDeadTags(country) {
1415
1420
 
1416
1421
  function claimTag(tag) {
1417
1422
  if (!db) return { ok: false, error: "db not ready" };
1418
- const row = db.prepare("SELECT status FROM tags WHERE tag = ?").get(tag);
1419
- if (!row) return { ok: false, error: "tag not found" };
1420
- if (row.status !== "new")
1421
- return { ok: false, error: `tag status is ${row.status}, not new` };
1422
- db.prepare("UPDATE tags SET status = 'scoring' WHERE tag = ?").run(tag);
1423
- return { ok: true, tag, previousStatus: row.status };
1423
+ // 原子操作:只有 status='new' 时才更新为 'scoring',避免竞态
1424
+ const result = db
1425
+ .prepare(
1426
+ "UPDATE tags SET status = 'scoring' WHERE tag = ? AND status = 'new'",
1427
+ )
1428
+ .run(tag);
1429
+ if (result.changes === 0) {
1430
+ // 检查是否不存在 vs 已被别人锁定
1431
+ const row = db.prepare("SELECT status FROM tags WHERE tag = ?").get(tag);
1432
+ if (!row) return { ok: false, error: "tag not found" };
1433
+ return { ok: false, error: `tag status is ${row.status}, already claimed` };
1434
+ }
1435
+ return { ok: true, tag };
1424
1436
  }
1425
1437
 
1426
1438
  function reportTagScore(tag, fields) {
@@ -1503,6 +1515,68 @@ function rawQuery(sql, params = []) {
1503
1515
  }
1504
1516
  }
1505
1517
 
1518
+ // 清理 tags 表中以 # 开头的脏数据
1519
+ function normalizeTags() {
1520
+ if (!db) return { ok: false, error: "db not ready" };
1521
+ const dirtyRows = db
1522
+ .prepare("SELECT id, tag, countries FROM tags WHERE tag LIKE '#%'")
1523
+ .all();
1524
+ const fixed = [];
1525
+ const merged = [];
1526
+ const skipped = [];
1527
+
1528
+ for (const row of dirtyRows) {
1529
+ const cleanTag = row.tag.replace(/^#+/, "").trim().toLowerCase();
1530
+ if (!cleanTag || cleanTag.length < 2) {
1531
+ db.prepare("DELETE FROM tags WHERE id = ?").run(row.id);
1532
+ skipped.push({
1533
+ dirty: row.tag,
1534
+ reason: "empty after normalize, deleted",
1535
+ });
1536
+ continue;
1537
+ }
1538
+
1539
+ // 检查 cleanTag 是否已存在
1540
+ const existing = db
1541
+ .prepare("SELECT * FROM tags WHERE tag = ?")
1542
+ .get(cleanTag);
1543
+ if (existing) {
1544
+ // 合并:保留已有 clean 版本,合并 countries
1545
+ const oldCountries = JSON.parse(row.countries || "[]");
1546
+ const existCountries = JSON.parse(existing.countries || "[]");
1547
+ const mergedCountries = [
1548
+ ...new Set([...existCountries, ...oldCountries]),
1549
+ ];
1550
+ db.prepare("UPDATE tags SET countries = ? WHERE tag = ?").run(
1551
+ JSON.stringify(mergedCountries),
1552
+ cleanTag,
1553
+ );
1554
+ // 删除脏数据
1555
+ db.prepare("DELETE FROM tags WHERE id = ?").run(row.id);
1556
+ merged.push({ dirty: row.tag, clean: cleanTag, id: row.id });
1557
+ } else {
1558
+ // 直接重命名
1559
+ db.prepare("UPDATE tags SET tag = ? WHERE id = ?").run(cleanTag, row.id);
1560
+ fixed.push({ dirty: row.tag, clean: cleanTag, id: row.id });
1561
+ }
1562
+ }
1563
+
1564
+ return {
1565
+ ok: true,
1566
+ fixed: fixed.length,
1567
+ merged: merged.length,
1568
+ skipped: skipped.length,
1569
+ details: { fixed, merged, skipped },
1570
+ };
1571
+ }
1572
+
1573
+ function clearTags() {
1574
+ if (!db) return { ok: false, error: "db not ready" };
1575
+ const count = db.prepare("SELECT COUNT(*) as c FROM tags").get().c;
1576
+ db.exec("DELETE FROM tags");
1577
+ return { ok: true, deleted: count };
1578
+ }
1579
+
1506
1580
  function getUsersPageFromDb({
1507
1581
  status,
1508
1582
  search,
@@ -4759,6 +4833,8 @@ Standards: 90-100=clear match, 70-89=likely, 50-69=possible, 20-49=low, 0-19=unl
4759
4833
  claimTag,
4760
4834
  reportTagScore,
4761
4835
  getAllTags,
4836
+ normalizeTags,
4837
+ clearTags,
4762
4838
  data,
4763
4839
  };
4764
4840
 
@@ -994,6 +994,20 @@ export function startWatchServer(
994
994
  return;
995
995
  }
996
996
 
997
+ // POST /api/tags/normalize — 清理以 # 开头的脏 tag
998
+ if (req.method === "POST" && routePath === "/api/tags/normalize") {
999
+ const result = store.normalizeTags();
1000
+ sendJSON(res, 200, result);
1001
+ return;
1002
+ }
1003
+
1004
+ // POST /api/tags/clear — 清空 tags 表
1005
+ if (req.method === "POST" && routePath === "/api/tags/clear") {
1006
+ const result = store.clearTags();
1007
+ sendJSON(res, 200, result);
1008
+ return;
1009
+ }
1010
+
997
1011
  // POST /api/tags/claim { tag } — 锁定 tag 状态为 scoring(防并发冲突)
998
1012
  if (req.method === "POST" && routePath === "/api/tags/claim") {
999
1013
  try {
@@ -53,20 +53,25 @@ async function callLLM(prompt) {
53
53
  return result.choices?.[0]?.message?.content || "";
54
54
  }
55
55
 
56
+ function normalizeTag(t) {
57
+ return t.replace(/^#+/, "").trim().toLowerCase();
58
+ }
59
+
56
60
  function parseTagsFromResponse(content) {
57
61
  try {
58
62
  const parsed = JSON.parse(content);
59
- if (Array.isArray(parsed)) return parsed;
60
- if (Array.isArray(parsed.tags)) return parsed.tags;
63
+ if (Array.isArray(parsed)) {
64
+ return parsed.map(normalizeTag).filter((t) => t && t.length >= 2);
65
+ }
66
+ if (Array.isArray(parsed.tags)) {
67
+ return parsed.tags.map(normalizeTag).filter((t) => t && t.length >= 2);
68
+ }
61
69
  } catch {}
62
70
 
63
71
  const lines = content.split(/[\n,]+/);
64
72
  const tags = [];
65
73
  for (const line of lines) {
66
- const cleaned = line
67
- .replace(/^[-\d.\s#]+/, "")
68
- .trim()
69
- .toLowerCase();
74
+ const cleaned = normalizeTag(line.replace(/^[-\d.\s]+/, ""));
70
75
  if (cleaned && /^[a-z0-9_]+$/.test(cleaned) && cleaned.length >= 2) {
71
76
  tags.push(cleaned);
72
77
  }
@@ -165,7 +170,7 @@ export async function discoverTagsForCountry(
165
170
  const inserted = [];
166
171
  for (const tag of unique) {
167
172
  const result = store.insertTag(tag, [country], "llm");
168
- if (result.inserted) inserted.push(tag);
173
+ if (result.inserted) inserted.push(result.tag);
169
174
  }
170
175
 
171
176
  return {