tt-help-cli-ycl 1.3.95 → 1.3.97
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli/tag.js +66 -11
- package/src/lib/api-interceptor.js +2 -0
- package/src/lib/tag-fetcher.js +48 -37
- package/src/lib/tiktok-scraper.mjs +16 -0
- package/src/scraper/explore-core.js +6 -3
- package/src/watch/data-store.js +32 -1
- package/src/watch/db-stats.js +3 -0
- package/src/watch/db-tags.js +82 -11
- package/src/watch/public/app.js +493 -10
- package/src/watch/public/index.html +76 -0
- package/src/watch/public/style.css +150 -0
- package/src/watch/server.js +80 -11
- package/src/watch/tag-service.js +43 -22
package/package.json
CHANGED
package/src/cli/tag.js
CHANGED
|
@@ -9,6 +9,7 @@ import {
|
|
|
9
9
|
DEFAULT_TARGET_LOCATIONS,
|
|
10
10
|
isLocationInList,
|
|
11
11
|
} from "../lib/target-locations.js";
|
|
12
|
+
import { delay as randomDelay } from "../lib/delay.js";
|
|
12
13
|
import { discoverTags } from "../lib/tag-discover.js";
|
|
13
14
|
import {
|
|
14
15
|
server as cfgServer,
|
|
@@ -121,13 +122,13 @@ async function processTag(
|
|
|
121
122
|
port: port || 9222,
|
|
122
123
|
onProgress: ({ videos, authors }) => {
|
|
123
124
|
process.stderr.write(
|
|
124
|
-
`\r${prefix} #${tag}: ${videos} 视频, ${authors}
|
|
125
|
+
`\r${prefix} #${tag}: ${videos} 视频, ${authors} 作者\x1b[K`,
|
|
125
126
|
);
|
|
126
127
|
},
|
|
127
128
|
});
|
|
128
129
|
|
|
129
130
|
process.stderr.write(
|
|
130
|
-
`\r${prefix} #${tag}: ${result.videoCount} 视频, ${result.uniqueAuthorCount}
|
|
131
|
+
`\r${prefix} #${tag}: ${result.videoCount} 视频, ${result.uniqueAuthorCount} 作者\x1b[K`,
|
|
131
132
|
);
|
|
132
133
|
|
|
133
134
|
let videos = result.videos;
|
|
@@ -154,7 +155,7 @@ async function processTag(
|
|
|
154
155
|
locationCreated &&
|
|
155
156
|
isLocationInList(locationCreated, targetLocations);
|
|
156
157
|
process.stderr.write(
|
|
157
|
-
`\r [${done}/${total}] ${label} → ${loc}${hit ? " ✓" : ""}`,
|
|
158
|
+
`\r [${done}/${total}] ${label} → ${loc}${hit ? " ✓" : ""}\x1b[K`,
|
|
158
159
|
);
|
|
159
160
|
},
|
|
160
161
|
});
|
|
@@ -295,8 +296,8 @@ async function scoreSingleTag(
|
|
|
295
296
|
{ baseUrl, cdpPort, targetCountries, effectiveProxy },
|
|
296
297
|
) {
|
|
297
298
|
const log = (...args) => process.stderr.write(args.join(" ") + "\n");
|
|
298
|
-
const progress = (msg) => process.stderr.write(`\r ${msg}`);
|
|
299
|
-
const clearLine = () => process.stderr.write("\r
|
|
299
|
+
const progress = (msg) => process.stderr.write(`\r ${msg}\x1b[K`);
|
|
300
|
+
const clearLine = () => process.stderr.write("\r\x1b[K");
|
|
300
301
|
|
|
301
302
|
const startTime = Date.now();
|
|
302
303
|
|
|
@@ -613,7 +614,7 @@ export async function handleScoreAll(parsed) {
|
|
|
613
614
|
const cdpOpts = { port: cdpPort };
|
|
614
615
|
if (effectiveProxy) cdpOpts.proxyServer = effectiveProxy;
|
|
615
616
|
const browser = await ensureBrowserReady(cdpOpts);
|
|
616
|
-
|
|
617
|
+
let page = await getOrCreatePage(browser);
|
|
617
618
|
|
|
618
619
|
let totalScored = 0;
|
|
619
620
|
let emptyRounds = 0; // 连续无任务的轮数
|
|
@@ -633,6 +634,15 @@ export async function handleScoreAll(parsed) {
|
|
|
633
634
|
log(` 客户端 ID: ${clientId.substring(0, 8)}...`);
|
|
634
635
|
log("");
|
|
635
636
|
|
|
637
|
+
// Ctrl+C 时关闭浏览器和 scraper
|
|
638
|
+
const cleanup = () => {
|
|
639
|
+
log("\n正在清理资源...");
|
|
640
|
+
enrichScraper.close().catch(() => {});
|
|
641
|
+
killEdgeProcesses(null, cdpPort);
|
|
642
|
+
process.exit(0);
|
|
643
|
+
};
|
|
644
|
+
process.on("SIGINT", cleanup);
|
|
645
|
+
|
|
636
646
|
try {
|
|
637
647
|
while (true) {
|
|
638
648
|
try {
|
|
@@ -720,15 +730,19 @@ export async function handleScoreAll(parsed) {
|
|
|
720
730
|
}
|
|
721
731
|
|
|
722
732
|
// 抓取视频(CDP 连接已登录 Edge)
|
|
733
|
+
const fetchStart = Date.now();
|
|
723
734
|
log(` 抓取 TikTok 标签页...`);
|
|
724
735
|
const tagResult = await fetchTagData(tag, {
|
|
725
736
|
port: cdpPort,
|
|
726
737
|
onProgress: ({ videos, authors }) => {
|
|
727
|
-
process.stderr.write(
|
|
738
|
+
process.stderr.write(
|
|
739
|
+
`\r 抓取中: ${videos} 视频, ${authors} 作者\x1b[K`,
|
|
740
|
+
);
|
|
728
741
|
},
|
|
729
742
|
});
|
|
743
|
+
const fetchSec = ((Date.now() - fetchStart) / 1000).toFixed(1);
|
|
730
744
|
log(
|
|
731
|
-
`\r 完成: ${tagResult.videoCount} 视频, ${tagResult.uniqueAuthorCount}
|
|
745
|
+
`\r 完成: ${tagResult.videoCount} 视频, ${tagResult.uniqueAuthorCount} 作者 (${fetchSec}s)\x1b[K`,
|
|
732
746
|
);
|
|
733
747
|
|
|
734
748
|
result.totalPosts = tagResult.totalPosts || 0;
|
|
@@ -736,11 +750,23 @@ export async function handleScoreAll(parsed) {
|
|
|
736
750
|
let videos = tagResult.videos;
|
|
737
751
|
|
|
738
752
|
if (!videos || videos.length === 0) {
|
|
739
|
-
|
|
753
|
+
const deadSec = ((Date.now() - fetchStart) / 1000).toFixed(1);
|
|
754
|
+
const memMB = (process.memoryUsage().heapUsed / 1024 / 1024).toFixed(
|
|
755
|
+
0,
|
|
756
|
+
);
|
|
757
|
+
log(` ⚠️ 无视频 (${deadSec}s) mem=${memMB}MB,标记 dead`);
|
|
740
758
|
result.status = "dead";
|
|
741
759
|
result.error = "no videos found";
|
|
742
760
|
await reportToServer(baseUrl, result, clientId, clientMeta);
|
|
743
761
|
totalScored++;
|
|
762
|
+
// 导航到 about:blank 释放页面状态再跳过
|
|
763
|
+
await page
|
|
764
|
+
.goto("about:blank", {
|
|
765
|
+
waitUntil: "domcontentloaded",
|
|
766
|
+
timeout: 5000,
|
|
767
|
+
})
|
|
768
|
+
.catch(() => {});
|
|
769
|
+
process.stderr.write(` → page=${page.url()}\n`);
|
|
744
770
|
continue;
|
|
745
771
|
}
|
|
746
772
|
|
|
@@ -751,7 +777,7 @@ export async function handleScoreAll(parsed) {
|
|
|
751
777
|
onProgress: ({ done, total, current, locationCreated }) => {
|
|
752
778
|
if (done % 10 === 0 || done === total) {
|
|
753
779
|
process.stderr.write(
|
|
754
|
-
`\r [${done}/${total}] ${current.split("/").pop().slice(0, 20)} → ${locationCreated || "-"}`,
|
|
780
|
+
`\r [${done}/${total}] ${current.split("/").pop().slice(0, 20)} → ${locationCreated || "-"}\x1b[K`,
|
|
755
781
|
);
|
|
756
782
|
}
|
|
757
783
|
},
|
|
@@ -760,6 +786,20 @@ export async function handleScoreAll(parsed) {
|
|
|
760
786
|
const enriched = await enrichVideosWithLocation(videos, enrichOpts);
|
|
761
787
|
videos = enriched.videos;
|
|
762
788
|
|
|
789
|
+
// CDN 限流检测:有拦截则冷却 + 重启 scraper
|
|
790
|
+
const cdnBlocked = enriched.cdnBlockedCount || 0;
|
|
791
|
+
if (cdnBlocked > 0) {
|
|
792
|
+
const cdnRatio = cdnBlocked / (videos.length || 1);
|
|
793
|
+
const coolSec = cdnRatio > 0.3 ? 120 : 60;
|
|
794
|
+
log(
|
|
795
|
+
` ⚠️ CDN 限流: ${cdnBlocked}/${videos.length} (${(cdnRatio * 100).toFixed(0)}%),冷却 ${coolSec} 秒后重启 scraper`,
|
|
796
|
+
);
|
|
797
|
+
await new Promise((r) => setTimeout(r, coolSec * 1000));
|
|
798
|
+
log(` 正在重启 TikTokScraper...`);
|
|
799
|
+
await enrichScraper.restart();
|
|
800
|
+
log(` ✅ TikTokScraper 已重启`);
|
|
801
|
+
}
|
|
802
|
+
|
|
763
803
|
// 过滤 + 算分 (共用函数)
|
|
764
804
|
const { matchedAuthorSet } = applyFilterAndScore(
|
|
765
805
|
videos,
|
|
@@ -795,10 +835,25 @@ export async function handleScoreAll(parsed) {
|
|
|
795
835
|
const mc = result.matchedCountries
|
|
796
836
|
.map((c) => `${c.c}:${c.n}`)
|
|
797
837
|
.join(" ");
|
|
838
|
+
// Node.js 进程内存占用
|
|
839
|
+
const memMB = (process.memoryUsage().heapUsed / 1024 / 1024).toFixed(0);
|
|
840
|
+
const memStr = ` mem=${memMB}MB`;
|
|
798
841
|
log(
|
|
799
|
-
` ${icon} ${result.status} score=${result.score} authors=${result.authorCount} matched=${result.matchedAuthors} (${elapsed}s)${mc ? " " + mc : ""}`,
|
|
842
|
+
` ${icon} ${result.status} score=${result.score} authors=${result.authorCount} matched=${result.matchedAuthors} (${elapsed}s)${mc ? " " + mc : ""}${memStr}`,
|
|
800
843
|
);
|
|
801
844
|
log("");
|
|
845
|
+
|
|
846
|
+
// 随机等待 3-7 秒,避免连续访问 TikTok 触发风控
|
|
847
|
+
await randomDelay(3000, 7000);
|
|
848
|
+
|
|
849
|
+
// 导航到 about:blank 卸载页面,状态清零,下次 goto 重新初始化
|
|
850
|
+
await page
|
|
851
|
+
.goto("about:blank", { waitUntil: "domcontentloaded", timeout: 5000 })
|
|
852
|
+
.catch((e) => {
|
|
853
|
+
log(` ⚠️ about:blank 跳转失败: ${e.message}`);
|
|
854
|
+
});
|
|
855
|
+
process.stderr.write(` → page=${page.url()}\n`);
|
|
856
|
+
await randomDelay(3000, 7000);
|
|
802
857
|
} catch (e) {
|
|
803
858
|
// 区分网络错误和业务错误
|
|
804
859
|
const isNetworkError =
|
|
@@ -23,6 +23,7 @@ async function processAPIResponse(
|
|
|
23
23
|
href,
|
|
24
24
|
createTime: item.createTime || null,
|
|
25
25
|
playCount: item.stats?.playCount || 0,
|
|
26
|
+
isECVideo: item.isECVideo ? 1 : 0,
|
|
26
27
|
});
|
|
27
28
|
}
|
|
28
29
|
|
|
@@ -72,6 +73,7 @@ async function processAPIResponse(
|
|
|
72
73
|
href,
|
|
73
74
|
createTime: item.createTime || null,
|
|
74
75
|
playCount: item.stats?.playCount || 0,
|
|
76
|
+
isECVideo: item.isECVideo ? 1 : 0,
|
|
75
77
|
});
|
|
76
78
|
}
|
|
77
79
|
}
|
package/src/lib/tag-fetcher.js
CHANGED
|
@@ -2,6 +2,7 @@ import { chromium } from "playwright";
|
|
|
2
2
|
import { ensureBrowserReady } from "./browser/cdp.js";
|
|
3
3
|
import { getOrCreatePage } from "./browser/page.js";
|
|
4
4
|
import { TikTokScraper } from "./tiktok-scraper.mjs";
|
|
5
|
+
import { CDNBlockedError } from "./parse-ssr.mjs";
|
|
5
6
|
|
|
6
7
|
const TAG_URL = "https://www.tiktok.com/tag";
|
|
7
8
|
const SCROLL_INTERVAL = 3000;
|
|
@@ -56,43 +57,45 @@ export async function fetchTagData(tag, options = {}) {
|
|
|
56
57
|
const browser = await ensureBrowserReady(cdpOptions);
|
|
57
58
|
const page = await getOrCreatePage(browser);
|
|
58
59
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
challengeInfo = body.challengeInfo.challenge;
|
|
74
|
-
}
|
|
60
|
+
let challengeInfo = null;
|
|
61
|
+
const rawVideos = [];
|
|
62
|
+
const seenVideoIds = new Set();
|
|
63
|
+
const authors = new Set();
|
|
64
|
+
|
|
65
|
+
const responseHandler = async (resp) => {
|
|
66
|
+
try {
|
|
67
|
+
const url = resp.url();
|
|
68
|
+
const ct = resp.headers()["content-type"] || "";
|
|
69
|
+
|
|
70
|
+
if (url.includes("/api/challenge/detail/") && ct.includes("json")) {
|
|
71
|
+
const body = await resp.json();
|
|
72
|
+
if (body?.challengeInfo?.challenge) {
|
|
73
|
+
challengeInfo = body.challengeInfo.challenge;
|
|
75
74
|
}
|
|
75
|
+
}
|
|
76
76
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
if (onProgress) {
|
|
90
|
-
onProgress({ videos: rawVideos.length, authors: authors.size });
|
|
77
|
+
if (url.includes("/api/challenge/item_list/") && ct.includes("json")) {
|
|
78
|
+
const body = await resp.json();
|
|
79
|
+
if (!body?.itemList) return;
|
|
80
|
+
for (const item of body.itemList) {
|
|
81
|
+
const vid = item.id || "";
|
|
82
|
+
if (vid && !seenVideoIds.has(vid)) {
|
|
83
|
+
seenVideoIds.add(vid);
|
|
84
|
+
const uid = item.author?.uniqueId || "";
|
|
85
|
+
if (uid) authors.add(uid);
|
|
86
|
+
rawVideos.push(extractItemData(item));
|
|
91
87
|
}
|
|
92
88
|
}
|
|
93
|
-
|
|
94
|
-
|
|
89
|
+
if (onProgress) {
|
|
90
|
+
onProgress({ videos: rawVideos.length, authors: authors.size });
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
} catch {}
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
page.on("response", responseHandler);
|
|
95
97
|
|
|
98
|
+
try {
|
|
96
99
|
const tagUrl = `${TAG_URL}/${encodeURIComponent(tag)}`;
|
|
97
100
|
const resp = await page.goto(tagUrl, {
|
|
98
101
|
waitUntil: "domcontentloaded",
|
|
@@ -185,7 +188,7 @@ export async function fetchTagData(tag, options = {}) {
|
|
|
185
188
|
uniqueAuthors: [...authors],
|
|
186
189
|
};
|
|
187
190
|
} finally {
|
|
188
|
-
|
|
191
|
+
page.off("response", responseHandler);
|
|
189
192
|
}
|
|
190
193
|
}
|
|
191
194
|
|
|
@@ -197,7 +200,7 @@ export async function fetchTagData(tag, options = {}) {
|
|
|
197
200
|
* @param {number} [options.poolSize=3] - 并发页面数
|
|
198
201
|
* @param {number} [options.maxRetries=3] - 单个请求最大重试次数
|
|
199
202
|
* @param {Function} [options.onProgress] - 进度回调 ({ done, total, current, locationCreated })
|
|
200
|
-
* @returns {Promise<{ videos: Array, locationMap: Record<string, string|null
|
|
203
|
+
* @returns {Promise<{ videos: Array, locationMap: Record<string, string|null>, cdnBlockedCount: number }>}
|
|
201
204
|
*/
|
|
202
205
|
export async function enrichVideosWithLocation(videos, options = {}) {
|
|
203
206
|
const {
|
|
@@ -219,6 +222,8 @@ export async function enrichVideosWithLocation(videos, options = {}) {
|
|
|
219
222
|
const locationMap = {};
|
|
220
223
|
let done = 0;
|
|
221
224
|
|
|
225
|
+
let cdnBlockedCount = 0;
|
|
226
|
+
|
|
222
227
|
if (mode === "users") {
|
|
223
228
|
const uniqueAuthors = [
|
|
224
229
|
...new Set(videos.map((v) => v.authorUniqueId).filter(Boolean)),
|
|
@@ -239,7 +244,10 @@ export async function enrichVideosWithLocation(videos, options = {}) {
|
|
|
239
244
|
current: uniqueId,
|
|
240
245
|
locationCreated: location,
|
|
241
246
|
});
|
|
242
|
-
} catch {
|
|
247
|
+
} catch (err) {
|
|
248
|
+
if (err instanceof CDNBlockedError) {
|
|
249
|
+
cdnBlockedCount++;
|
|
250
|
+
}
|
|
243
251
|
locationMap[uniqueId] = null;
|
|
244
252
|
done++;
|
|
245
253
|
if (onProgress)
|
|
@@ -277,7 +285,10 @@ export async function enrichVideosWithLocation(videos, options = {}) {
|
|
|
277
285
|
current: videoUrl,
|
|
278
286
|
locationCreated: location,
|
|
279
287
|
});
|
|
280
|
-
} catch {
|
|
288
|
+
} catch (err) {
|
|
289
|
+
if (err instanceof CDNBlockedError) {
|
|
290
|
+
cdnBlockedCount++;
|
|
291
|
+
}
|
|
281
292
|
v.locationCreated = null;
|
|
282
293
|
locationMap[v.id] = null;
|
|
283
294
|
done++;
|
|
@@ -295,7 +306,7 @@ export async function enrichVideosWithLocation(videos, options = {}) {
|
|
|
295
306
|
await Promise.allSettled(tasks);
|
|
296
307
|
}
|
|
297
308
|
|
|
298
|
-
return { videos: enriched, locationMap };
|
|
309
|
+
return { videos: enriched, locationMap, cdnBlockedCount };
|
|
299
310
|
} finally {
|
|
300
311
|
if (ownsScraper) await scraper.close();
|
|
301
312
|
}
|
|
@@ -267,6 +267,14 @@ export class TikTokScraper {
|
|
|
267
267
|
const slot = this._pickSlot();
|
|
268
268
|
return slot.lock.run(async () => {
|
|
269
269
|
let rawHtml = await this._fetchViewSource(videoUrl, slot);
|
|
270
|
+
// CDN 限流立即抛出,不重试
|
|
271
|
+
if (detectAccessDenied(rawHtml)) {
|
|
272
|
+
const denied = detectAccessDenied(rawHtml);
|
|
273
|
+
throw new CDNBlockedError(
|
|
274
|
+
`CDN限流 (Access Denied, ref:${denied.reference || "N/A"})`,
|
|
275
|
+
denied.reference,
|
|
276
|
+
);
|
|
277
|
+
}
|
|
270
278
|
let result = parseVideoInfo(rawHtml);
|
|
271
279
|
for (let attempt = 1; !result && attempt <= maxRetries; attempt++) {
|
|
272
280
|
// 检查是否值得重试
|
|
@@ -278,6 +286,14 @@ export class TikTokScraper {
|
|
|
278
286
|
} catch {}
|
|
279
287
|
await delay(500 * attempt);
|
|
280
288
|
rawHtml = await this._fetchViewSource(videoUrl, slot);
|
|
289
|
+
// 重试中也检查 CDN 限流
|
|
290
|
+
if (detectAccessDenied(rawHtml)) {
|
|
291
|
+
const denied = detectAccessDenied(rawHtml);
|
|
292
|
+
throw new CDNBlockedError(
|
|
293
|
+
`CDN限流 (Access Denied, ref:${denied.reference || "N/A"})`,
|
|
294
|
+
denied.reference,
|
|
295
|
+
);
|
|
296
|
+
}
|
|
281
297
|
result = parseVideoInfo(rawHtml);
|
|
282
298
|
}
|
|
283
299
|
return result || null;
|
|
@@ -87,11 +87,14 @@ async function processExplore(page, username, options, log) {
|
|
|
87
87
|
if (result.userInfo) result.userInfo.latestVideoTime = latestCreateTime;
|
|
88
88
|
}
|
|
89
89
|
|
|
90
|
-
// 找出 7
|
|
90
|
+
// 找出 7 天内发布且 isECVideo=1 且播放量最大的视频
|
|
91
91
|
const SEVEN_DAYS_SECONDS = 7 * 24 * 60 * 60;
|
|
92
92
|
const nowSeconds = Math.floor(Date.now() / 1000);
|
|
93
93
|
const recentVideos = videoArray.filter(
|
|
94
|
-
(v) =>
|
|
94
|
+
(v) =>
|
|
95
|
+
v.isECVideo === 1 &&
|
|
96
|
+
v.createTime &&
|
|
97
|
+
nowSeconds - v.createTime <= SEVEN_DAYS_SECONDS,
|
|
95
98
|
);
|
|
96
99
|
if (recentVideos.length > 0) {
|
|
97
100
|
const topVideo = recentVideos.reduce((max, v) =>
|
|
@@ -104,7 +107,7 @@ async function processExplore(page, username, options, log) {
|
|
|
104
107
|
createTime: topVideo.createTime,
|
|
105
108
|
};
|
|
106
109
|
log(
|
|
107
|
-
` 7
|
|
110
|
+
` 7天内 EC视频最高播放: ${topVideo.playCount} 次播放 (${recentVideos.length} 个EC候选)`,
|
|
108
111
|
);
|
|
109
112
|
}
|
|
110
113
|
|
package/src/watch/data-store.js
CHANGED
|
@@ -90,7 +90,9 @@ import {
|
|
|
90
90
|
getDeadTags,
|
|
91
91
|
claimTag,
|
|
92
92
|
reportTagScore,
|
|
93
|
+
resetStaleScoringTags,
|
|
93
94
|
getAllTags,
|
|
95
|
+
getTagStats,
|
|
94
96
|
rawQuery,
|
|
95
97
|
normalizeTags,
|
|
96
98
|
clearTags,
|
|
@@ -1415,6 +1417,7 @@ export function createStore(filePath, options = {}) {
|
|
|
1415
1417
|
`(
|
|
1416
1418
|
instr(COALESCE(sources, ''), '"following"') > 0
|
|
1417
1419
|
OR instr(COALESCE(sources, ''), '"follower"') > 0
|
|
1420
|
+
OR instr(COALESCE(sources, ''), '"comment"') > 0
|
|
1418
1421
|
)`,
|
|
1419
1422
|
],
|
|
1420
1423
|
});
|
|
@@ -1639,7 +1642,8 @@ export function createStore(filePath, options = {}) {
|
|
|
1639
1642
|
(u) =>
|
|
1640
1643
|
u.sources &&
|
|
1641
1644
|
(u.sources.includes("following") ||
|
|
1642
|
-
u.sources.includes("follower")
|
|
1645
|
+
u.sources.includes("follower") ||
|
|
1646
|
+
u.sources.includes("comment")),
|
|
1643
1647
|
);
|
|
1644
1648
|
follow.sort((a, b) => locationTier(a) - locationTier(b));
|
|
1645
1649
|
next = follow[0] || null;
|
|
@@ -2674,6 +2678,30 @@ export function createStore(filePath, options = {}) {
|
|
|
2674
2678
|
return { ok: true, location, modifiedAt: user.modifiedAt };
|
|
2675
2679
|
}
|
|
2676
2680
|
|
|
2681
|
+
function setNonSeller(uniqueId) {
|
|
2682
|
+
if (getDb()) {
|
|
2683
|
+
const existing = getDb()
|
|
2684
|
+
.prepare("SELECT * FROM jobs WHERE unique_id = ?")
|
|
2685
|
+
.get(uniqueId);
|
|
2686
|
+
if (!existing) return { error: "user not found" };
|
|
2687
|
+
const now = Date.now();
|
|
2688
|
+
getDb()
|
|
2689
|
+
.prepare(
|
|
2690
|
+
"UPDATE jobs SET tt_seller = 0, updated_at = ? WHERE unique_id = ?",
|
|
2691
|
+
)
|
|
2692
|
+
.run(now, uniqueId);
|
|
2693
|
+
console.error(`[DB] setNonSeller: ${uniqueId} → tt_seller=0`);
|
|
2694
|
+
return { ok: true };
|
|
2695
|
+
}
|
|
2696
|
+
|
|
2697
|
+
const user = getUser(uniqueId);
|
|
2698
|
+
if (!user) return { error: "user not found" };
|
|
2699
|
+
user.ttSeller = false;
|
|
2700
|
+
user.updatedAt = Date.now();
|
|
2701
|
+
save();
|
|
2702
|
+
return { ok: true };
|
|
2703
|
+
}
|
|
2704
|
+
|
|
2677
2705
|
// 将单个 job 移动到 raw_jobs 表(完整字段复制 + 删除原记录)
|
|
2678
2706
|
function moveJobToRaw(uniqueId) {
|
|
2679
2707
|
if (!getDb()) return false;
|
|
@@ -3127,6 +3155,7 @@ export function createStore(filePath, options = {}) {
|
|
|
3127
3155
|
getPendingUserUpdateTasks,
|
|
3128
3156
|
updateUserInfo,
|
|
3129
3157
|
updateUserLocation,
|
|
3158
|
+
setNonSeller,
|
|
3130
3159
|
batchUpdateUserInfo,
|
|
3131
3160
|
reportClientError,
|
|
3132
3161
|
deleteClientError,
|
|
@@ -3154,7 +3183,9 @@ export function createStore(filePath, options = {}) {
|
|
|
3154
3183
|
getDeadTags,
|
|
3155
3184
|
claimTag,
|
|
3156
3185
|
reportTagScore,
|
|
3186
|
+
resetStaleScoringTags,
|
|
3157
3187
|
getAllTags,
|
|
3188
|
+
getTagStats,
|
|
3158
3189
|
normalizeTags,
|
|
3159
3190
|
clearTags,
|
|
3160
3191
|
data,
|
package/src/watch/db-stats.js
CHANGED
|
@@ -93,8 +93,11 @@ export function getDashboardStatsFromDb(targetLocations = []) {
|
|
|
93
93
|
.prepare("SELECT COUNT(*) as total FROM jobs_base")
|
|
94
94
|
.get().total;
|
|
95
95
|
|
|
96
|
+
const tagCount = db.prepare("SELECT COUNT(*) as total FROM tags").get().total;
|
|
97
|
+
|
|
96
98
|
return {
|
|
97
99
|
totalUsers: aggregateRow.total,
|
|
100
|
+
tagCount,
|
|
98
101
|
rawJobs: getRawJobsCount(),
|
|
99
102
|
dbTotalUsers: getUserDbCount(),
|
|
100
103
|
jobsTotal: aggregateRow.total,
|
package/src/watch/db-tags.js
CHANGED
|
@@ -33,17 +33,61 @@ export function insertTag(tag, countries, source = "llm") {
|
|
|
33
33
|
}
|
|
34
34
|
}
|
|
35
35
|
|
|
36
|
-
export function getTagsByStatus(
|
|
36
|
+
export function getTagsByStatus(
|
|
37
|
+
status,
|
|
38
|
+
limit = 100,
|
|
39
|
+
offset = 0,
|
|
40
|
+
country = null,
|
|
41
|
+
) {
|
|
37
42
|
const db = getDb();
|
|
38
43
|
if (!db) return [];
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
.
|
|
44
|
+
let sql = "SELECT * FROM tags WHERE status = ?";
|
|
45
|
+
const params = [status];
|
|
46
|
+
if (country) {
|
|
47
|
+
sql += " AND countries LIKE ?";
|
|
48
|
+
params.push(`%"${country}"%`);
|
|
49
|
+
}
|
|
50
|
+
sql += " ORDER BY score ASC, created_at ASC LIMIT ? OFFSET ?";
|
|
51
|
+
params.push(limit, offset);
|
|
52
|
+
const rows = db.prepare(sql).all(...params);
|
|
44
53
|
return rows.map(parseTagRow);
|
|
45
54
|
}
|
|
46
55
|
|
|
56
|
+
export function getTagStats(country = null) {
|
|
57
|
+
const db = getDb();
|
|
58
|
+
if (!db) return null;
|
|
59
|
+
let sql = `SELECT
|
|
60
|
+
COUNT(*) as total,
|
|
61
|
+
SUM(CASE WHEN status = 'productive' THEN 1 ELSE 0 END) as productive,
|
|
62
|
+
SUM(CASE WHEN status = 'dead' THEN 1 ELSE 0 END) as dead,
|
|
63
|
+
SUM(CASE WHEN status = 'new' THEN 1 ELSE 0 END) as newCount,
|
|
64
|
+
SUM(CASE WHEN status = 'scoring' THEN 1 ELSE 0 END) as scoring
|
|
65
|
+
FROM tags`;
|
|
66
|
+
const params = [];
|
|
67
|
+
if (country) {
|
|
68
|
+
sql += " WHERE countries LIKE ?";
|
|
69
|
+
params.push(`%"${country}"%`);
|
|
70
|
+
}
|
|
71
|
+
const row = db.prepare(sql).get(...params);
|
|
72
|
+
// 获取所有出现过的国家
|
|
73
|
+
const allRows = db.prepare("SELECT countries FROM tags").all();
|
|
74
|
+
const countrySet = new Set();
|
|
75
|
+
for (const r of allRows) {
|
|
76
|
+
try {
|
|
77
|
+
const arr = JSON.parse(r.countries || "[]");
|
|
78
|
+
for (const c of arr) countrySet.add(c);
|
|
79
|
+
} catch {}
|
|
80
|
+
}
|
|
81
|
+
return {
|
|
82
|
+
total: row.total,
|
|
83
|
+
productive: row.productive || 0,
|
|
84
|
+
dead: row.dead || 0,
|
|
85
|
+
new: row.newCount || 0,
|
|
86
|
+
scoring: row.scoring || 0,
|
|
87
|
+
countries: [...countrySet].sort(),
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
|
|
47
91
|
export function getTagsByCountry(country, minScore = 0) {
|
|
48
92
|
const db = getDb();
|
|
49
93
|
if (!db) return [];
|
|
@@ -64,12 +108,33 @@ export function getDeadTags(country) {
|
|
|
64
108
|
return rows.map(parseTagRow).filter((r) => r.countries.includes(country));
|
|
65
109
|
}
|
|
66
110
|
|
|
111
|
+
export function resetStaleScoringTags(minutes = 30) {
|
|
112
|
+
const db = getDb();
|
|
113
|
+
if (!db) return { ok: false, error: "db not ready" };
|
|
114
|
+
// 清理超时的 scoring 标签:有时间戳的按时间,没时间戳的(旧数据)直接清
|
|
115
|
+
const result = db
|
|
116
|
+
.prepare(
|
|
117
|
+
"UPDATE tags SET status = 'new', scored_at = NULL WHERE status = 'scoring' AND (scored_at IS NULL OR scored_at < datetime('now', ?))",
|
|
118
|
+
)
|
|
119
|
+
.run(`-${minutes} minutes`);
|
|
120
|
+
if (result.changes > 0) {
|
|
121
|
+
console.error(
|
|
122
|
+
`[tags] 清理了 ${result.changes} 个超时 scoring 标签(>${minutes}分钟)`,
|
|
123
|
+
);
|
|
124
|
+
}
|
|
125
|
+
return { ok: true, reset: result.changes };
|
|
126
|
+
}
|
|
127
|
+
|
|
67
128
|
export function claimTag(tag) {
|
|
68
129
|
const db = getDb();
|
|
69
130
|
if (!db) return { ok: false, error: "db not ready" };
|
|
131
|
+
|
|
132
|
+
// 先清理超时的 scoring 标签,防止死任务堆积
|
|
133
|
+
resetStaleScoringTags();
|
|
134
|
+
|
|
70
135
|
const result = db
|
|
71
136
|
.prepare(
|
|
72
|
-
"UPDATE tags SET status = 'scoring' WHERE tag = ? AND status = 'new'",
|
|
137
|
+
"UPDATE tags SET status = 'scoring', scored_at = datetime('now') WHERE tag = ? AND status = 'new'",
|
|
73
138
|
)
|
|
74
139
|
.run(tag);
|
|
75
140
|
if (result.changes === 0) {
|
|
@@ -132,12 +197,18 @@ export function reportTagScore(tag, fields) {
|
|
|
132
197
|
}
|
|
133
198
|
}
|
|
134
199
|
|
|
135
|
-
export function getAllTags(limit = 200) {
|
|
200
|
+
export function getAllTags(limit = 200, offset = 0, country = null) {
|
|
136
201
|
const db = getDb();
|
|
137
202
|
if (!db) return [];
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
203
|
+
let sql = "SELECT * FROM tags";
|
|
204
|
+
const params = [];
|
|
205
|
+
if (country) {
|
|
206
|
+
sql += " WHERE countries LIKE ?";
|
|
207
|
+
params.push(`%"${country}"%`);
|
|
208
|
+
}
|
|
209
|
+
sql += " ORDER BY score DESC, created_at DESC LIMIT ? OFFSET ?";
|
|
210
|
+
params.push(limit, offset);
|
|
211
|
+
const rows = db.prepare(sql).all(...params);
|
|
141
212
|
return rows.map(parseTagRow);
|
|
142
213
|
}
|
|
143
214
|
|