ppxc-leads-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +115 -0
- package/dist/backend/config.js +13 -0
- package/dist/backend/ppxc-client.js +156 -0
- package/dist/backend/ppxc-login-window.js +168 -0
- package/dist/backend/token-store.js +65 -0
- package/dist/browser/comments.js +9 -0
- package/dist/browser/douyin-runner.js +15 -0
- package/dist/browser/kernel/electron-profile.js +32 -0
- package/dist/browser/kernel/logger.js +57 -0
- package/dist/browser/kernel/page-scripts/index.js +1422 -0
- package/dist/browser/kernel/runner-page-manager.js +145 -0
- package/dist/browser/kernel/runner-page-session.js +1465 -0
- package/dist/browser/kernel/runner-page-session.search-parser.js +187 -0
- package/dist/browser/kernel/runner-page-session.user-agent.js +32 -0
- package/dist/browser/platform-runner.js +312 -0
- package/dist/browser/platforms/detect-platform.js +33 -0
- package/dist/browser/platforms/douyin/adapter.js +162 -0
- package/dist/browser/platforms/douyin/comments.js +130 -0
- package/dist/browser/platforms/kuaishou/adapter.js +178 -0
- package/dist/browser/platforms/kuaishou/comments.js +170 -0
- package/dist/browser/platforms/registry.js +23 -0
- package/dist/browser/platforms/shared/cdp-json-waiter.js +75 -0
- package/dist/browser/platforms/types.js +3 -0
- package/dist/browser/platforms/xiaohongshu/adapter.js +233 -0
- package/dist/browser/platforms/xiaohongshu/comments.js +184 -0
- package/dist/browser/usage-throttle.js +72 -0
- package/dist/main.js +64 -0
- package/dist/mcp/battle-report.js +325 -0
- package/dist/mcp/content-insights.js +66 -0
- package/dist/mcp/diagnostics.js +79 -0
- package/dist/mcp/server.js +829 -0
- package/dist/version.js +19 -0
- package/package.json +43 -0
- package/scripts/launch-mcp.cjs +96 -0
- package/skills/ppxc-find-customers/SKILL.md +110 -0
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.isLikelyDouyinSearchJsonResponse = isLikelyDouyinSearchJsonResponse;
|
|
4
|
+
exports.parseSearchItemPayload = parseSearchItemPayload;
|
|
5
|
+
exports.sourceUrlPattern = sourceUrlPattern;
|
|
6
|
+
exports.redactSearchUrl = redactSearchUrl;
|
|
7
|
+
const DOUYIN_JSON_HOST_RE = /(^|\.)douyin\.com$/i;
|
|
8
|
+
const STATIC_RESOURCE_EXT_RE = /\.(?:js|mjs|css|png|jpe?g|webp|gif|svg|ico|woff2?|ttf|mp4|webm|m4a|mp3)(?:$|\?)/i;
|
|
9
|
+
function isLikelyDouyinSearchJsonResponse(url, mimeType) {
|
|
10
|
+
const parsed = parseHttpUrl(url);
|
|
11
|
+
if (!parsed)
|
|
12
|
+
return false;
|
|
13
|
+
if (!DOUYIN_JSON_HOST_RE.test(parsed.hostname))
|
|
14
|
+
return false;
|
|
15
|
+
const lowerUrl = parsed.pathname.toLowerCase() + parsed.search.toLowerCase();
|
|
16
|
+
if (STATIC_RESOURCE_EXT_RE.test(lowerUrl))
|
|
17
|
+
return false;
|
|
18
|
+
const lowerMime = String(mimeType || "").toLowerCase();
|
|
19
|
+
if (lowerMime && !/(^|[/+])json\b|text\/plain|application\/octet-stream/.test(lowerMime)) {
|
|
20
|
+
return false;
|
|
21
|
+
}
|
|
22
|
+
if (parsed.pathname.includes("/aweme/v1/web/search/item/"))
|
|
23
|
+
return true;
|
|
24
|
+
return (lowerUrl.includes("search") &&
|
|
25
|
+
lowerUrl.includes("aweme") &&
|
|
26
|
+
(lowerUrl.includes("item") || lowerUrl.includes("general")));
|
|
27
|
+
}
|
|
28
|
+
function parseSearchItemPayload(payload, meta) {
|
|
29
|
+
if (!payload || typeof payload !== "object")
|
|
30
|
+
return null;
|
|
31
|
+
const awemes = extractAwemeObjects(payload);
|
|
32
|
+
const items = [];
|
|
33
|
+
const seen = new Set();
|
|
34
|
+
for (const aw of awemes) {
|
|
35
|
+
const awemeId = asNonEmptyString(aw.aweme_id);
|
|
36
|
+
if (!awemeId || seen.has(awemeId))
|
|
37
|
+
continue;
|
|
38
|
+
seen.add(awemeId);
|
|
39
|
+
const author = asRecord(aw.author);
|
|
40
|
+
const stats = asRecord(aw.statistics);
|
|
41
|
+
const video = asRecord(aw.video);
|
|
42
|
+
const avatarObj = asRecord(author?.avatar_thumb) ?? asRecord(author?.avatar_medium);
|
|
43
|
+
const avatarList = asArray(avatarObj?.url_list);
|
|
44
|
+
const authorAvatarUrl = typeof avatarList?.[0] === "string"
|
|
45
|
+
? avatarList[0]
|
|
46
|
+
: typeof author?.avatar_url === "string"
|
|
47
|
+
? author.avatar_url
|
|
48
|
+
: undefined;
|
|
49
|
+
const cover = asRecord(video?.cover);
|
|
50
|
+
const urlList = asArray(cover?.url_list);
|
|
51
|
+
const coverUrl = typeof urlList?.[0] === "string" ? urlList[0] : undefined;
|
|
52
|
+
const duration = asNumber(video?.duration);
|
|
53
|
+
items.push({
|
|
54
|
+
awemeId,
|
|
55
|
+
title: typeof aw.desc === "string" ? aw.desc : undefined,
|
|
56
|
+
authorName: typeof author?.nickname === "string" ? author.nickname : undefined,
|
|
57
|
+
authorSecUid: typeof author?.sec_uid === "string" ? author.sec_uid : undefined,
|
|
58
|
+
authorAvatarUrl,
|
|
59
|
+
coverUrl,
|
|
60
|
+
commentCount: asNumber(stats?.comment_count),
|
|
61
|
+
createTime: asNumber(aw.create_time),
|
|
62
|
+
durationSec: typeof duration === "number" ? Math.round((duration || 0) / 1000) : undefined,
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
const hasBatchSignal = items.length > 0 ||
|
|
66
|
+
"verify_check" in payload ||
|
|
67
|
+
"has_more" in payload ||
|
|
68
|
+
"cursor" in payload ||
|
|
69
|
+
"status_code" in payload ||
|
|
70
|
+
"data" in payload ||
|
|
71
|
+
"business_data" in payload ||
|
|
72
|
+
"aweme_list" in payload;
|
|
73
|
+
if (!hasBatchSignal)
|
|
74
|
+
return null;
|
|
75
|
+
const verifyCheck = Boolean(payload.verify_check);
|
|
76
|
+
return {
|
|
77
|
+
arrivedAt: Date.now(),
|
|
78
|
+
offset: meta.offset,
|
|
79
|
+
count: meta.count,
|
|
80
|
+
cursor: Number(payload.cursor ?? meta.offset + items.length),
|
|
81
|
+
hasMore: Number(payload.has_more ?? 0),
|
|
82
|
+
statusCode: Number(payload.status_code ?? 0),
|
|
83
|
+
verifyCheck,
|
|
84
|
+
sourceUrlPattern: sourceUrlPattern(meta.url),
|
|
85
|
+
parsedVideoCount: items.length,
|
|
86
|
+
items,
|
|
87
|
+
rawUrl: redactSearchUrl(meta.url),
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
function sourceUrlPattern(url) {
|
|
91
|
+
const parsed = parseHttpUrl(url);
|
|
92
|
+
if (!parsed)
|
|
93
|
+
return "[unparseable]";
|
|
94
|
+
const path = parsed.pathname.endsWith("/") ? parsed.pathname : `${parsed.pathname}/`;
|
|
95
|
+
if (path.includes("/aweme/v1/web/search/item/")) {
|
|
96
|
+
return "/aweme/v1/web/search/item/";
|
|
97
|
+
}
|
|
98
|
+
return path;
|
|
99
|
+
}
|
|
100
|
+
function redactSearchUrl(url) {
|
|
101
|
+
const parsed = parseHttpUrl(url);
|
|
102
|
+
if (!parsed)
|
|
103
|
+
return "[unparseable]";
|
|
104
|
+
const offset = parsed.searchParams.get("offset") ?? "";
|
|
105
|
+
const count = parsed.searchParams.get("count") ?? "";
|
|
106
|
+
const cursor = parsed.searchParams.get("cursor") ?? "";
|
|
107
|
+
const kw = parsed.searchParams.get("keyword") ?? parsed.searchParams.get("query") ?? "";
|
|
108
|
+
return `${parsed.pathname}?offset=${offset}&count=${count}&cursor=${cursor}&keyword.len=${kw.length}`;
|
|
109
|
+
}
|
|
110
|
+
function extractAwemeObjects(root) {
|
|
111
|
+
const out = [];
|
|
112
|
+
const seenObjects = new WeakSet();
|
|
113
|
+
const seenAwemeIds = new Set();
|
|
114
|
+
let visited = 0;
|
|
115
|
+
const pushAweme = (value) => {
|
|
116
|
+
const aw = asRecord(value);
|
|
117
|
+
if (!aw)
|
|
118
|
+
return;
|
|
119
|
+
const awemeId = asNonEmptyString(aw.aweme_id);
|
|
120
|
+
if (!awemeId || seenAwemeIds.has(awemeId))
|
|
121
|
+
return;
|
|
122
|
+
seenAwemeIds.add(awemeId);
|
|
123
|
+
out.push(aw);
|
|
124
|
+
};
|
|
125
|
+
const visit = (value, depth) => {
|
|
126
|
+
if (value === null || typeof value !== "object")
|
|
127
|
+
return;
|
|
128
|
+
if (depth > 8 || visited > 1500 || out.length >= 300)
|
|
129
|
+
return;
|
|
130
|
+
visited++;
|
|
131
|
+
const obj = value;
|
|
132
|
+
if (seenObjects.has(obj))
|
|
133
|
+
return;
|
|
134
|
+
seenObjects.add(obj);
|
|
135
|
+
if (Array.isArray(value)) {
|
|
136
|
+
for (const item of value)
|
|
137
|
+
visit(item, depth + 1);
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
const rec = value;
|
|
141
|
+
pushAweme(rec);
|
|
142
|
+
const directAweme = asRecord(rec.aweme_info);
|
|
143
|
+
if (directAweme)
|
|
144
|
+
pushAweme(directAweme);
|
|
145
|
+
for (const child of Object.values(rec)) {
|
|
146
|
+
visit(child, depth + 1);
|
|
147
|
+
}
|
|
148
|
+
};
|
|
149
|
+
visit(root, 0);
|
|
150
|
+
return out;
|
|
151
|
+
}
|
|
152
|
+
function parseHttpUrl(url) {
|
|
153
|
+
try {
|
|
154
|
+
const parsed = new URL(url);
|
|
155
|
+
if (!/^https?:$/i.test(parsed.protocol))
|
|
156
|
+
return null;
|
|
157
|
+
return parsed;
|
|
158
|
+
}
|
|
159
|
+
catch {
|
|
160
|
+
return null;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
function asRecord(value) {
|
|
164
|
+
return value && typeof value === "object" && !Array.isArray(value)
|
|
165
|
+
? value
|
|
166
|
+
: undefined;
|
|
167
|
+
}
|
|
168
|
+
function asArray(value) {
|
|
169
|
+
return Array.isArray(value) ? value : undefined;
|
|
170
|
+
}
|
|
171
|
+
function asNonEmptyString(value) {
|
|
172
|
+
if (typeof value === "string")
|
|
173
|
+
return value.trim();
|
|
174
|
+
if (typeof value === "number" && Number.isFinite(value))
|
|
175
|
+
return String(Math.trunc(value));
|
|
176
|
+
return "";
|
|
177
|
+
}
|
|
178
|
+
function asNumber(value) {
|
|
179
|
+
if (typeof value === "number" && Number.isFinite(value))
|
|
180
|
+
return value;
|
|
181
|
+
if (typeof value === "string" && value.trim()) {
|
|
182
|
+
const parsed = Number(value);
|
|
183
|
+
return Number.isFinite(parsed) ? parsed : undefined;
|
|
184
|
+
}
|
|
185
|
+
return undefined;
|
|
186
|
+
}
|
|
187
|
+
//# sourceMappingURL=runner-page-session.search-parser.js.map
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.resolveDouyinUserAgent = resolveDouyinUserAgent;
|
|
4
|
+
const DEFAULT_CHROME_VERSION = "148.0.0.0";
|
|
5
|
+
function pickChromeVersion(versions) {
|
|
6
|
+
const raw = String(versions?.chrome ?? "").trim();
|
|
7
|
+
if (!/^\d+(?:\.\d+){0,3}$/.test(raw)) {
|
|
8
|
+
return DEFAULT_CHROME_VERSION;
|
|
9
|
+
}
|
|
10
|
+
const parts = raw.split(".");
|
|
11
|
+
while (parts.length < 4)
|
|
12
|
+
parts.push("0");
|
|
13
|
+
return parts.join(".");
|
|
14
|
+
}
|
|
15
|
+
function buildUserAgent(platformLabel, chromeVersion) {
|
|
16
|
+
return (`Mozilla/5.0 (${platformLabel}) ` +
|
|
17
|
+
`AppleWebKit/537.36 (KHTML, like Gecko) ` +
|
|
18
|
+
`Chrome/${chromeVersion} Safari/537.36`);
|
|
19
|
+
}
|
|
20
|
+
function resolveDouyinUserAgent(platform = process.platform, electronVersions = process.versions) {
|
|
21
|
+
const chromeVersion = pickChromeVersion(electronVersions);
|
|
22
|
+
switch (platform) {
|
|
23
|
+
case "win32":
|
|
24
|
+
return buildUserAgent("Windows NT 10.0; Win64; x64", chromeVersion);
|
|
25
|
+
case "linux":
|
|
26
|
+
return buildUserAgent("X11; Linux x86_64", chromeVersion);
|
|
27
|
+
case "darwin":
|
|
28
|
+
default:
|
|
29
|
+
return buildUserAgent("Macintosh; Intel Mac OS X 10_15_7", chromeVersion);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
//# sourceMappingURL=runner-page-session.user-agent.js.map
|
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.startDouyinLogin = exports.fetchVideoComments = exports.RunnerError = void 0;
|
|
4
|
+
exports.getLoginStatus = getLoginStatus;
|
|
5
|
+
exports.startPlatformLogin = startPlatformLogin;
|
|
6
|
+
exports.fetchContentComments = fetchContentComments;
|
|
7
|
+
exports.searchKeywordForLeads = searchKeywordForLeads;
|
|
8
|
+
exports.searchKeywordsBatch = searchKeywordsBatch;
|
|
9
|
+
exports.shutdownRunner = shutdownRunner;
|
|
10
|
+
exports.mapSearchResultForDouyin = mapSearchResultForDouyin;
|
|
11
|
+
const logger_1 = require("./kernel/logger");
|
|
12
|
+
const registry_1 = require("./platforms/registry");
|
|
13
|
+
const usage_throttle_1 = require("./usage-throttle");
|
|
14
|
+
const log = logger_1.logger.scope("platform-runner");
|
|
15
|
+
const SLOT_ID = 0;
|
|
16
|
+
const MAX_SLOTS = 2;
|
|
17
|
+
const DEFAULT_FETCH_WAIT_MS = 25000;
|
|
18
|
+
class RunnerError extends Error {
|
|
19
|
+
constructor(code, message) {
|
|
20
|
+
super(message);
|
|
21
|
+
this.code = code;
|
|
22
|
+
this.name = "RunnerError";
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
exports.RunnerError = RunnerError;
|
|
26
|
+
let runnerBusyWith = null;
|
|
27
|
+
const managers = new Map();
|
|
28
|
+
function sleep(ms) {
|
|
29
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
30
|
+
}
|
|
31
|
+
async function withRunnerLock(operation, fn) {
|
|
32
|
+
if (runnerBusyWith) {
|
|
33
|
+
throw new RunnerError("BUSY", `正在执行「${runnerBusyWith}」,请等它结束后再试`);
|
|
34
|
+
}
|
|
35
|
+
runnerBusyWith = operation;
|
|
36
|
+
try {
|
|
37
|
+
return await fn();
|
|
38
|
+
}
|
|
39
|
+
finally {
|
|
40
|
+
runnerBusyWith = null;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
function assertCrawlQuota(platform, units) {
|
|
44
|
+
const adapter = (0, registry_1.getPlatformAdapter)(platform);
|
|
45
|
+
const verdict = (0, usage_throttle_1.consumeCrawlQuota)(platform, units, adapter.risk.dailyQuotaUnits);
|
|
46
|
+
if (verdict.ok)
|
|
47
|
+
return;
|
|
48
|
+
if (verdict.reason === "interval") {
|
|
49
|
+
throw new RunnerError("RATE_LIMITED", `两次抓取之间需要间隔 30 秒(还差 ${verdict.waitSec} 秒)`);
|
|
50
|
+
}
|
|
51
|
+
throw new RunnerError("DAILY_LIMITED", `今天 ${adapter.displayName} 的安全抓取额度已用完(${verdict.usedToday}/${verdict.limit}),明天再继续`);
|
|
52
|
+
}
|
|
53
|
+
function getManager(platform) {
|
|
54
|
+
let mgr = managers.get(platform);
|
|
55
|
+
if (!mgr) {
|
|
56
|
+
const adapter = (0, registry_1.getPlatformAdapter)(platform);
|
|
57
|
+
mgr = adapter.createManager(process.env.PPXC_MCP_PAGE_VISIBLE === "1");
|
|
58
|
+
managers.set(platform, mgr);
|
|
59
|
+
}
|
|
60
|
+
return mgr;
|
|
61
|
+
}
|
|
62
|
+
function randomDelay(adapter) {
|
|
63
|
+
const { perItemMinDelayMs, perItemMaxDelayMs } = adapter.risk;
|
|
64
|
+
return perItemMinDelayMs + Math.floor(Math.random() * (perItemMaxDelayMs - perItemMinDelayMs));
|
|
65
|
+
}
|
|
66
|
+
async function ensureOnContentPage(session, contentId, contentUrl) {
|
|
67
|
+
const current = session.currentUrl();
|
|
68
|
+
if (current.includes(contentId)) {
|
|
69
|
+
await sleep(1000);
|
|
70
|
+
return;
|
|
71
|
+
}
|
|
72
|
+
try {
|
|
73
|
+
await session.loadUrl(contentUrl);
|
|
74
|
+
}
|
|
75
|
+
catch {
|
|
76
|
+
}
|
|
77
|
+
await sleep(3000);
|
|
78
|
+
}
|
|
79
|
+
async function throwIfVerification(session, adapter) {
|
|
80
|
+
if (await adapter.probeVerification(session)) {
|
|
81
|
+
session.reveal();
|
|
82
|
+
throw new RunnerError("VERIFICATION_REQUIRED", `${adapter.displayName}要求验证,已弹出窗口请人工处理`);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
async function getLoginStatus(platform) {
|
|
86
|
+
const adapter = (0, registry_1.getPlatformAdapter)(platform);
|
|
87
|
+
const session = getManager(platform).acquire(SLOT_ID, { visible: false });
|
|
88
|
+
const loggedIn = await adapter.isLoggedIn(session);
|
|
89
|
+
if (!loggedIn)
|
|
90
|
+
return { loggedIn: false };
|
|
91
|
+
const displayName = await adapter.getProfileDisplayName(session);
|
|
92
|
+
return { loggedIn: true, displayName };
|
|
93
|
+
}
|
|
94
|
+
async function startPlatformLogin(platform, timeoutMs = 180000) {
|
|
95
|
+
const adapter = (0, registry_1.getPlatformAdapter)(platform);
|
|
96
|
+
return withRunnerLock(`${adapter.displayName}扫码登录`, async () => {
|
|
97
|
+
const result = await adapter.startLogin(getManager(platform), SLOT_ID, timeoutMs);
|
|
98
|
+
return { loggedIn: result.loggedIn, displayName: result.displayName };
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
async function fetchContentComments(platform, contentUrlOrId, maxComments = 30) {
|
|
102
|
+
const adapter = (0, registry_1.getPlatformAdapter)(platform);
|
|
103
|
+
return withRunnerLock(`${adapter.displayName}分析评论`, () => fetchContentCommentsInner(adapter, contentUrlOrId, maxComments));
|
|
104
|
+
}
|
|
105
|
+
async function fetchContentCommentsInner(adapter, contentUrlOrId, maxComments) {
|
|
106
|
+
const count = Math.max(1, Math.min(50, Math.floor(maxComments) || 30));
|
|
107
|
+
const manager = getManager(adapter.id);
|
|
108
|
+
const session = manager.acquire(SLOT_ID, { visible: process.env.PPXC_MCP_PAGE_VISIBLE === "1" });
|
|
109
|
+
await sleep(500);
|
|
110
|
+
if (!(await adapter.isLoggedIn(session))) {
|
|
111
|
+
throw new RunnerError("LOGIN_REQUIRED", `${adapter.displayName}未登录,需要先扫码登录`);
|
|
112
|
+
}
|
|
113
|
+
assertCrawlQuota(adapter.id, 1);
|
|
114
|
+
let contentId;
|
|
115
|
+
try {
|
|
116
|
+
contentId = await adapter.resolveContentId(contentUrlOrId, session);
|
|
117
|
+
}
|
|
118
|
+
catch {
|
|
119
|
+
throw new RunnerError("BAD_VIDEO_LINK", `无法从链接里识别出${adapter.displayName}内容:${contentUrlOrId}`);
|
|
120
|
+
}
|
|
121
|
+
const contentUrl = adapter.preferredContentUrl?.(contentUrlOrId, contentId, session) ??
|
|
122
|
+
adapter.buildContentUrl(contentId);
|
|
123
|
+
await ensureOnContentPage(session, contentId, contentUrl);
|
|
124
|
+
const fetched = await adapter.fetchComments(session, contentId, contentUrl, count, DEFAULT_FETCH_WAIT_MS);
|
|
125
|
+
if (!fetched.json) {
|
|
126
|
+
await throwIfVerification(session, adapter);
|
|
127
|
+
throw new RunnerError("FETCH_TIMEOUT", `${adapter.displayName}评论没有返回数据`);
|
|
128
|
+
}
|
|
129
|
+
const json = fetched.json;
|
|
130
|
+
if (adapter.id === "douyin" && Number(json.status_code ?? 0) !== 0) {
|
|
131
|
+
await throwIfVerification(session, adapter);
|
|
132
|
+
throw new RunnerError("FETCH_TIMEOUT", `${adapter.displayName}返回非成功状态`);
|
|
133
|
+
}
|
|
134
|
+
const comments = adapter.parseComments(contentId, fetched.json);
|
|
135
|
+
return {
|
|
136
|
+
contentId,
|
|
137
|
+
contentUrl,
|
|
138
|
+
awemeId: contentId,
|
|
139
|
+
videoUrl: contentUrl,
|
|
140
|
+
comments,
|
|
141
|
+
rawCount: fetched.rawCount,
|
|
142
|
+
hasMore: fetched.hasMore,
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
async function searchKeywordForLeads(platform, keyword, maxItems = 5, commentsPerItem = 30, slotId = SLOT_ID, shouldAbort) {
|
|
146
|
+
const adapter = (0, registry_1.getPlatformAdapter)(platform);
|
|
147
|
+
const kw = String(keyword ?? "").trim();
|
|
148
|
+
if (!kw)
|
|
149
|
+
throw new RunnerError("NO_SEARCH_RESULT", "搜索词不能为空");
|
|
150
|
+
const itemLimit = Math.max(1, Math.min(adapter.risk.maxVideosPerKeyword, Math.floor(maxItems) || 5));
|
|
151
|
+
const commentCount = Math.max(1, Math.min(50, Math.floor(commentsPerItem) || 30));
|
|
152
|
+
const manager = getManager(platform);
|
|
153
|
+
const session = manager.acquire(slotId, { visible: process.env.PPXC_MCP_PAGE_VISIBLE === "1" });
|
|
154
|
+
await sleep(500);
|
|
155
|
+
if (!(await adapter.isLoggedIn(session))) {
|
|
156
|
+
throw new RunnerError("LOGIN_REQUIRED", `${adapter.displayName}未登录,需要先扫码登录`);
|
|
157
|
+
}
|
|
158
|
+
const batch = await adapter.awaitSearchBatch(session, kw, DEFAULT_FETCH_WAIT_MS);
|
|
159
|
+
if (!batch) {
|
|
160
|
+
await throwIfVerification(session, adapter);
|
|
161
|
+
throw new RunnerError("NO_SEARCH_RESULT", `没搜到「${kw}」的内容,换个词或稍后再试`);
|
|
162
|
+
}
|
|
163
|
+
if (batch.verifyCheck) {
|
|
164
|
+
await throwIfVerification(session, adapter);
|
|
165
|
+
}
|
|
166
|
+
let ranked = batch.items
|
|
167
|
+
.filter((it) => it.contentId && (it.commentCount ?? 0) >= adapter.risk.minCommentCount)
|
|
168
|
+
.sort((a, b) => (b.commentCount ?? 0) - (a.commentCount ?? 0));
|
|
169
|
+
if (ranked.length === 0) {
|
|
170
|
+
ranked = batch.items.filter((it) => it.contentId);
|
|
171
|
+
}
|
|
172
|
+
const perAuthor = new Map();
|
|
173
|
+
const selected = [];
|
|
174
|
+
for (const it of ranked) {
|
|
175
|
+
if (selected.length >= itemLimit)
|
|
176
|
+
break;
|
|
177
|
+
const authorKey = String(it.authorSecUid ?? it.authorName ?? `__anon_${it.contentId}`);
|
|
178
|
+
const seen = perAuthor.get(authorKey) ?? 0;
|
|
179
|
+
if (seen >= adapter.risk.maxPerAuthor)
|
|
180
|
+
continue;
|
|
181
|
+
perAuthor.set(authorKey, seen + 1);
|
|
182
|
+
selected.push(it);
|
|
183
|
+
}
|
|
184
|
+
if (selected.length === 0) {
|
|
185
|
+
throw new RunnerError("NO_SEARCH_RESULT", `「${kw}」搜到的内容评论都太少,换个词试试`);
|
|
186
|
+
}
|
|
187
|
+
const items = [];
|
|
188
|
+
let totalComments = 0;
|
|
189
|
+
for (const it of selected) {
|
|
190
|
+
if (shouldAbort?.()) {
|
|
191
|
+
log.warn("search aborted between items", { platform: adapter.id, keyword: kw });
|
|
192
|
+
break;
|
|
193
|
+
}
|
|
194
|
+
const contentId = String(it.contentId);
|
|
195
|
+
const contentUrl = it.contentUrl || adapter.buildContentUrl(contentId);
|
|
196
|
+
try {
|
|
197
|
+
await ensureOnContentPage(session, contentId, contentUrl);
|
|
198
|
+
const fetched = await adapter.fetchComments(session, contentId, contentUrl, commentCount, DEFAULT_FETCH_WAIT_MS);
|
|
199
|
+
if (fetched.json) {
|
|
200
|
+
const comments = adapter.parseComments(contentId, fetched.json);
|
|
201
|
+
totalComments += comments.length;
|
|
202
|
+
items.push({
|
|
203
|
+
contentId,
|
|
204
|
+
contentUrl,
|
|
205
|
+
awemeId: contentId,
|
|
206
|
+
videoUrl: contentUrl,
|
|
207
|
+
title: it.title ?? "",
|
|
208
|
+
authorName: it.authorName ?? "",
|
|
209
|
+
commentCount: it.commentCount ?? 0,
|
|
210
|
+
comments,
|
|
211
|
+
});
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
catch (err) {
|
|
215
|
+
log.warn("search item comment fetch failed", {
|
|
216
|
+
contentId,
|
|
217
|
+
msg: err instanceof Error ? err.message : String(err),
|
|
218
|
+
});
|
|
219
|
+
}
|
|
220
|
+
await sleep(randomDelay(adapter));
|
|
221
|
+
}
|
|
222
|
+
return {
|
|
223
|
+
keyword: kw,
|
|
224
|
+
itemsFound: ranked.length,
|
|
225
|
+
itemsRead: items.length,
|
|
226
|
+
items,
|
|
227
|
+
videos: items,
|
|
228
|
+
totalComments,
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
async function searchKeywordsBatch(platform, keywords, maxItemsPerKeyword = 5, commentsPerItem = 30) {
|
|
232
|
+
const adapter = (0, registry_1.getPlatformAdapter)(platform);
|
|
233
|
+
const cleaned = [];
|
|
234
|
+
const seen = new Set();
|
|
235
|
+
for (const raw of keywords) {
|
|
236
|
+
const kw = String(raw ?? "").trim();
|
|
237
|
+
if (kw && !seen.has(kw)) {
|
|
238
|
+
seen.add(kw);
|
|
239
|
+
cleaned.push(kw);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
if (cleaned.length === 0)
|
|
243
|
+
throw new RunnerError("NO_SEARCH_RESULT", "搜索词不能为空");
|
|
244
|
+
return withRunnerLock(`${adapter.displayName}关键词搜索`, async () => {
|
|
245
|
+
const probeSession = getManager(platform).acquire(SLOT_ID, {
|
|
246
|
+
visible: process.env.PPXC_MCP_PAGE_VISIBLE === "1",
|
|
247
|
+
});
|
|
248
|
+
if (!(await adapter.isLoggedIn(probeSession))) {
|
|
249
|
+
throw new RunnerError("LOGIN_REQUIRED", `${adapter.displayName}未登录,需要先扫码登录`);
|
|
250
|
+
}
|
|
251
|
+
assertCrawlQuota(platform, cleaned.length);
|
|
252
|
+
const perKeywordCap = Math.max(1, Math.min(Math.max(1, Math.min(adapter.risk.maxVideosPerKeyword, Math.floor(maxItemsPerKeyword) || 5)), Math.floor(adapter.risk.totalContentBudget / cleaned.length)));
|
|
253
|
+
const slotCount = Math.min(MAX_SLOTS, cleaned.length);
|
|
254
|
+
const outcomes = [];
|
|
255
|
+
let verificationHit = false;
|
|
256
|
+
const runSlot = async (slotId) => {
|
|
257
|
+
const myKeywords = cleaned.filter((_kw, idx) => idx % slotCount === slotId);
|
|
258
|
+
if (slotId > 0)
|
|
259
|
+
await sleep(adapter.risk.slotStaggerMs);
|
|
260
|
+
for (const kw of myKeywords) {
|
|
261
|
+
if (verificationHit) {
|
|
262
|
+
outcomes.push({ keyword: kw, ok: false, errorCode: "VERIFICATION_REQUIRED" });
|
|
263
|
+
continue;
|
|
264
|
+
}
|
|
265
|
+
try {
|
|
266
|
+
const result = await searchKeywordForLeads(platform, kw, perKeywordCap, commentsPerItem, slotId, () => verificationHit);
|
|
267
|
+
outcomes.push({ keyword: kw, ok: true, result });
|
|
268
|
+
}
|
|
269
|
+
catch (err) {
|
|
270
|
+
const code = err instanceof RunnerError ? err.code : "INTERNAL";
|
|
271
|
+
log.warn("batch keyword failed", { platform, keyword: kw, code });
|
|
272
|
+
outcomes.push({ keyword: kw, ok: false, errorCode: code });
|
|
273
|
+
if (code === "VERIFICATION_REQUIRED")
|
|
274
|
+
verificationHit = true;
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
};
|
|
278
|
+
await Promise.all(Array.from({ length: slotCount }, (_v, slotId) => runSlot(slotId)));
|
|
279
|
+
return outcomes;
|
|
280
|
+
});
|
|
281
|
+
}
|
|
282
|
+
async function shutdownRunner() {
|
|
283
|
+
for (const mgr of managers.values()) {
|
|
284
|
+
try {
|
|
285
|
+
await mgr.destroyAll();
|
|
286
|
+
}
|
|
287
|
+
catch {
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
managers.clear();
|
|
291
|
+
}
|
|
292
|
+
const fetchVideoComments = (videoUrlOrId, maxComments) => fetchContentComments("douyin", videoUrlOrId, maxComments);
|
|
293
|
+
exports.fetchVideoComments = fetchVideoComments;
|
|
294
|
+
const startDouyinLogin = (timeoutMs) => startPlatformLogin("douyin", timeoutMs);
|
|
295
|
+
exports.startDouyinLogin = startDouyinLogin;
|
|
296
|
+
function mapSearchResultForDouyin(result) {
|
|
297
|
+
return {
|
|
298
|
+
keyword: result.keyword,
|
|
299
|
+
videosFound: result.itemsFound,
|
|
300
|
+
videosRead: result.itemsRead,
|
|
301
|
+
videos: result.items.map((it) => ({
|
|
302
|
+
awemeId: it.contentId,
|
|
303
|
+
videoUrl: it.contentUrl,
|
|
304
|
+
title: it.title,
|
|
305
|
+
authorName: it.authorName,
|
|
306
|
+
commentCount: it.commentCount,
|
|
307
|
+
comments: it.comments,
|
|
308
|
+
})),
|
|
309
|
+
totalComments: result.totalComments,
|
|
310
|
+
};
|
|
311
|
+
}
|
|
312
|
+
//# sourceMappingURL=platform-runner.js.map
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.detectPlatformFromUrl = detectPlatformFromUrl;
|
|
4
|
+
exports.normalizePlatformId = normalizePlatformId;
|
|
5
|
+
function detectPlatformFromUrl(input) {
|
|
6
|
+
const raw = String(input ?? "").trim().toLowerCase();
|
|
7
|
+
if (!raw)
|
|
8
|
+
return null;
|
|
9
|
+
if (/douyin\.com|v\.douyin\.com|iesdouyin\.com/.test(raw) ||
|
|
10
|
+
/^\d{15,25}$/.test(raw)) {
|
|
11
|
+
return "douyin";
|
|
12
|
+
}
|
|
13
|
+
if (/xiaohongshu\.com|xhslink\.com|xhs\.cn/.test(raw)) {
|
|
14
|
+
return "xiaohongshu";
|
|
15
|
+
}
|
|
16
|
+
if (/kuaishou\.com|gifshow\.com|chenzhongtech\.com|ksurl\.cn/.test(raw)) {
|
|
17
|
+
return "kuaishou";
|
|
18
|
+
}
|
|
19
|
+
return null;
|
|
20
|
+
}
|
|
21
|
+
function normalizePlatformId(explicit, contentUrl) {
|
|
22
|
+
const fromArg = String(explicit ?? "").trim().toLowerCase();
|
|
23
|
+
if (fromArg === "douyin" || fromArg === "xiaohongshu" || fromArg === "kuaishou") {
|
|
24
|
+
return fromArg;
|
|
25
|
+
}
|
|
26
|
+
if (contentUrl) {
|
|
27
|
+
const detected = detectPlatformFromUrl(contentUrl);
|
|
28
|
+
if (detected)
|
|
29
|
+
return detected;
|
|
30
|
+
}
|
|
31
|
+
return "douyin";
|
|
32
|
+
}
|
|
33
|
+
//# sourceMappingURL=detect-platform.js.map
|