yuque-ai-mcp 2.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/board/create-board.d.ts +7 -0
- package/dist/board/create-board.js +50 -0
- package/dist/board/create-board.js.map +1 -0
- package/dist/board/get-board.d.ts +7 -0
- package/dist/board/get-board.js +44 -0
- package/dist/board/get-board.js.map +1 -0
- package/dist/board/index.d.ts +5 -0
- package/dist/board/index.js +8 -0
- package/dist/board/index.js.map +1 -0
- package/dist/board/update-board.d.ts +7 -0
- package/dist/board/update-board.js +65 -0
- package/dist/board/update-board.js.map +1 -0
- package/dist/cli-http.d.ts +2 -0
- package/dist/cli-http.js +16 -0
- package/dist/cli-http.js.map +1 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +15 -0
- package/dist/cli.js.map +1 -0
- package/dist/common/api-client.d.ts +27 -0
- package/dist/common/api-client.js +198 -0
- package/dist/common/api-client.js.map +1 -0
- package/dist/common/config.d.ts +46 -0
- package/dist/common/config.js +86 -0
- package/dist/common/config.js.map +1 -0
- package/dist/common/copy-common.d.ts +8 -0
- package/dist/common/copy-common.js +13 -0
- package/dist/common/copy-common.js.map +1 -0
- package/dist/common/errors.d.ts +44 -0
- package/dist/common/errors.js +164 -0
- package/dist/common/errors.js.map +1 -0
- package/dist/common/export-common.d.ts +32 -0
- package/dist/common/export-common.js +125 -0
- package/dist/common/export-common.js.map +1 -0
- package/dist/common/format.d.ts +247 -0
- package/dist/common/format.js +199 -0
- package/dist/common/format.js.map +1 -0
- package/dist/common/register-tools.d.ts +29 -0
- package/dist/common/register-tools.js +89 -0
- package/dist/common/register-tools.js.map +1 -0
- package/dist/common/repo-capacity.d.ts +26 -0
- package/dist/common/repo-capacity.js +105 -0
- package/dist/common/repo-capacity.js.map +1 -0
- package/dist/common/schedule-common.d.ts +38 -0
- package/dist/common/schedule-common.js +111 -0
- package/dist/common/schedule-common.js.map +1 -0
- package/dist/common/text-utils.d.ts +11 -0
- package/dist/common/text-utils.js +29 -0
- package/dist/common/text-utils.js.map +1 -0
- package/dist/common/toc-cache.d.ts +30 -0
- package/dist/common/toc-cache.js +109 -0
- package/dist/common/toc-cache.js.map +1 -0
- package/dist/common/types.d.ts +17 -0
- package/dist/common/types.js +3 -0
- package/dist/common/types.js.map +1 -0
- package/dist/common/validate.d.ts +27 -0
- package/dist/common/validate.js +70 -0
- package/dist/common/validate.js.map +1 -0
- package/dist/common/web-request.d.ts +13 -0
- package/dist/common/web-request.js +53 -0
- package/dist/common/web-request.js.map +1 -0
- package/dist/crawler/extract.d.ts +19 -0
- package/dist/crawler/extract.js +220 -0
- package/dist/crawler/extract.js.map +1 -0
- package/dist/crawler/fetch.d.ts +8 -0
- package/dist/crawler/fetch.js +89 -0
- package/dist/crawler/fetch.js.map +1 -0
- package/dist/crawler/index.d.ts +6 -0
- package/dist/crawler/index.js +10 -0
- package/dist/crawler/index.js.map +1 -0
- package/dist/crawler/save.d.ts +11 -0
- package/dist/crawler/save.js +160 -0
- package/dist/crawler/save.js.map +1 -0
- package/dist/crawler/schedule.d.ts +12 -0
- package/dist/crawler/schedule.js +132 -0
- package/dist/crawler/schedule.js.map +1 -0
- package/dist/doc/batch-get-docs.d.ts +8 -0
- package/dist/doc/batch-get-docs.js +100 -0
- package/dist/doc/batch-get-docs.js.map +1 -0
- package/dist/doc/copy-doc.d.ts +10 -0
- package/dist/doc/copy-doc.js +238 -0
- package/dist/doc/copy-doc.js.map +1 -0
- package/dist/doc/create-doc.d.ts +11 -0
- package/dist/doc/create-doc.js +82 -0
- package/dist/doc/create-doc.js.map +1 -0
- package/dist/doc/delete-doc.d.ts +8 -0
- package/dist/doc/delete-doc.js +39 -0
- package/dist/doc/delete-doc.js.map +1 -0
- package/dist/doc/diff-doc.d.ts +8 -0
- package/dist/doc/diff-doc.js +190 -0
- package/dist/doc/diff-doc.js.map +1 -0
- package/dist/doc/embed-url.d.ts +10 -0
- package/dist/doc/embed-url.js +137 -0
- package/dist/doc/embed-url.js.map +1 -0
- package/dist/doc/export-doc.d.ts +10 -0
- package/dist/doc/export-doc.js +135 -0
- package/dist/doc/export-doc.js.map +1 -0
- package/dist/doc/get-doc.d.ts +8 -0
- package/dist/doc/get-doc.js +40 -0
- package/dist/doc/get-doc.js.map +1 -0
- package/dist/doc/import-file-utils.d.ts +37 -0
- package/dist/doc/import-file-utils.js +256 -0
- package/dist/doc/import-file-utils.js.map +1 -0
- package/dist/doc/import-file.d.ts +13 -0
- package/dist/doc/import-file.js +333 -0
- package/dist/doc/import-file.js.map +1 -0
- package/dist/doc/import-url.d.ts +7 -0
- package/dist/doc/import-url.js +245 -0
- package/dist/doc/import-url.js.map +1 -0
- package/dist/doc/index.d.ts +16 -0
- package/dist/doc/index.js +30 -0
- package/dist/doc/index.js.map +1 -0
- package/dist/doc/list-docs.d.ts +8 -0
- package/dist/doc/list-docs.js +44 -0
- package/dist/doc/list-docs.js.map +1 -0
- package/dist/doc/update-doc.d.ts +8 -0
- package/dist/doc/update-doc.js +50 -0
- package/dist/doc/update-doc.js.map +1 -0
- package/dist/doc/version-detail.d.ts +8 -0
- package/dist/doc/version-detail.js +34 -0
- package/dist/doc/version-detail.js.map +1 -0
- package/dist/doc/versions.d.ts +8 -0
- package/dist/doc/versions.js +34 -0
- package/dist/doc/versions.js.map +1 -0
- package/dist/group/delete-user.d.ts +8 -0
- package/dist/group/delete-user.js +39 -0
- package/dist/group/delete-user.js.map +1 -0
- package/dist/group/index.d.ts +5 -0
- package/dist/group/index.js +8 -0
- package/dist/group/index.js.map +1 -0
- package/dist/group/list-users.d.ts +8 -0
- package/dist/group/list-users.js +40 -0
- package/dist/group/list-users.js.map +1 -0
- package/dist/group/update-user.d.ts +8 -0
- package/dist/group/update-user.js +37 -0
- package/dist/group/update-user.js.map +1 -0
- package/dist/http.d.ts +1 -0
- package/dist/http.js +114 -0
- package/dist/http.js.map +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +27 -0
- package/dist/index.js.map +1 -0
- package/dist/kv/common.d.ts +34 -0
- package/dist/kv/common.js +191 -0
- package/dist/kv/common.js.map +1 -0
- package/dist/kv/delete.d.ts +7 -0
- package/dist/kv/delete.js +52 -0
- package/dist/kv/delete.js.map +1 -0
- package/dist/kv/get.d.ts +7 -0
- package/dist/kv/get.js +52 -0
- package/dist/kv/get.js.map +1 -0
- package/dist/kv/index.d.ts +6 -0
- package/dist/kv/index.js +10 -0
- package/dist/kv/index.js.map +1 -0
- package/dist/kv/list.d.ts +5 -0
- package/dist/kv/list.js +42 -0
- package/dist/kv/list.js.map +1 -0
- package/dist/kv/set.d.ts +7 -0
- package/dist/kv/set.js +54 -0
- package/dist/kv/set.js.map +1 -0
- package/dist/mine/common.d.ts +4 -0
- package/dist/mine/common.js +5 -0
- package/dist/mine/common.js.map +1 -0
- package/dist/mine/editor-center.d.ts +9 -0
- package/dist/mine/editor-center.js +57 -0
- package/dist/mine/editor-center.js.map +1 -0
- package/dist/mine/get-book-stacks.d.ts +8 -0
- package/dist/mine/get-book-stacks.js +41 -0
- package/dist/mine/get-book-stacks.js.map +1 -0
- package/dist/mine/index.d.ts +4 -0
- package/dist/mine/index.js +6 -0
- package/dist/mine/index.js.map +1 -0
- package/dist/note/create-note.d.ts +8 -0
- package/dist/note/create-note.js +32 -0
- package/dist/note/create-note.js.map +1 -0
- package/dist/note/get-note.d.ts +8 -0
- package/dist/note/get-note.js +27 -0
- package/dist/note/get-note.js.map +1 -0
- package/dist/note/index.d.ts +6 -0
- package/dist/note/index.js +10 -0
- package/dist/note/index.js.map +1 -0
- package/dist/note/list-notes.d.ts +8 -0
- package/dist/note/list-notes.js +40 -0
- package/dist/note/list-notes.js.map +1 -0
- package/dist/note/update-note.d.ts +8 -0
- package/dist/note/update-note.js +53 -0
- package/dist/note/update-note.js.map +1 -0
- package/dist/recycle/common.d.ts +4 -0
- package/dist/recycle/common.js +5 -0
- package/dist/recycle/common.js.map +1 -0
- package/dist/recycle/destroy-recycle.d.ts +8 -0
- package/dist/recycle/destroy-recycle.js +38 -0
- package/dist/recycle/destroy-recycle.js.map +1 -0
- package/dist/recycle/index.d.ts +5 -0
- package/dist/recycle/index.js +8 -0
- package/dist/recycle/index.js.map +1 -0
- package/dist/recycle/list-recycles.d.ts +8 -0
- package/dist/recycle/list-recycles.js +57 -0
- package/dist/recycle/list-recycles.js.map +1 -0
- package/dist/recycle/restore-recycle.d.ts +7 -0
- package/dist/recycle/restore-recycle.js +32 -0
- package/dist/recycle/restore-recycle.js.map +1 -0
- package/dist/repo/batch-get-repos.d.ts +8 -0
- package/dist/repo/batch-get-repos.js +83 -0
- package/dist/repo/batch-get-repos.js.map +1 -0
- package/dist/repo/copy-repo.d.ts +7 -0
- package/dist/repo/copy-repo.js +137 -0
- package/dist/repo/copy-repo.js.map +1 -0
- package/dist/repo/create-repo.d.ts +8 -0
- package/dist/repo/create-repo.js +49 -0
- package/dist/repo/create-repo.js.map +1 -0
- package/dist/repo/delete-repo.d.ts +8 -0
- package/dist/repo/delete-repo.js +37 -0
- package/dist/repo/delete-repo.js.map +1 -0
- package/dist/repo/export-graph.d.ts +21 -0
- package/dist/repo/export-graph.js +73 -0
- package/dist/repo/export-graph.js.map +1 -0
- package/dist/repo/export-index.d.ts +26 -0
- package/dist/repo/export-index.js +72 -0
- package/dist/repo/export-index.js.map +1 -0
- package/dist/repo/export-repo.d.ts +14 -0
- package/dist/repo/export-repo.js +243 -0
- package/dist/repo/export-repo.js.map +1 -0
- package/dist/repo/export-toc.d.ts +17 -0
- package/dist/repo/export-toc.js +33 -0
- package/dist/repo/export-toc.js.map +1 -0
- package/dist/repo/get-repo.d.ts +8 -0
- package/dist/repo/get-repo.js +32 -0
- package/dist/repo/get-repo.js.map +1 -0
- package/dist/repo/index.d.ts +10 -0
- package/dist/repo/index.js +18 -0
- package/dist/repo/index.js.map +1 -0
- package/dist/repo/list-repos.d.ts +8 -0
- package/dist/repo/list-repos.js +48 -0
- package/dist/repo/list-repos.js.map +1 -0
- package/dist/repo/update-repo.d.ts +8 -0
- package/dist/repo/update-repo.js +48 -0
- package/dist/repo/update-repo.js.map +1 -0
- package/dist/rss/dedup.d.ts +9 -0
- package/dist/rss/dedup.js +21 -0
- package/dist/rss/dedup.js.map +1 -0
- package/dist/rss/fetch-feed.d.ts +8 -0
- package/dist/rss/fetch-feed.js +290 -0
- package/dist/rss/fetch-feed.js.map +1 -0
- package/dist/rss/index.d.ts +5 -0
- package/dist/rss/index.js +8 -0
- package/dist/rss/index.js.map +1 -0
- package/dist/rss/list-sources.d.ts +8 -0
- package/dist/rss/list-sources.js +39 -0
- package/dist/rss/list-sources.js.map +1 -0
- package/dist/rss/parser.d.ts +27 -0
- package/dist/rss/parser.js +87 -0
- package/dist/rss/parser.js.map +1 -0
- package/dist/rss/schedule.d.ts +14 -0
- package/dist/rss/schedule.js +171 -0
- package/dist/rss/schedule.js.map +1 -0
- package/dist/rss/sources.d.ts +26 -0
- package/dist/rss/sources.js +54 -0
- package/dist/rss/sources.js.map +1 -0
- package/dist/search/index.d.ts +4 -0
- package/dist/search/index.js +6 -0
- package/dist/search/index.js.map +1 -0
- package/dist/search/rag-search-utils.d.ts +28 -0
- package/dist/search/rag-search-utils.js +74 -0
- package/dist/search/rag-search-utils.js.map +1 -0
- package/dist/search/rag-search.d.ts +16 -0
- package/dist/search/rag-search.js +103 -0
- package/dist/search/rag-search.js.map +1 -0
- package/dist/search/search.d.ts +8 -0
- package/dist/search/search.js +43 -0
- package/dist/search/search.js.map +1 -0
- package/dist/statistic/book-statistics.d.ts +7 -0
- package/dist/statistic/book-statistics.js +49 -0
- package/dist/statistic/book-statistics.js.map +1 -0
- package/dist/statistic/doc-statistics.d.ts +7 -0
- package/dist/statistic/doc-statistics.js +53 -0
- package/dist/statistic/doc-statistics.js.map +1 -0
- package/dist/statistic/group-statistics.d.ts +8 -0
- package/dist/statistic/group-statistics.js +30 -0
- package/dist/statistic/group-statistics.js.map +1 -0
- package/dist/statistic/index.d.ts +6 -0
- package/dist/statistic/index.js +10 -0
- package/dist/statistic/index.js.map +1 -0
- package/dist/statistic/member-statistics.d.ts +7 -0
- package/dist/statistic/member-statistics.js +49 -0
- package/dist/statistic/member-statistics.js.map +1 -0
- package/dist/toc/batch-update.d.ts +11 -0
- package/dist/toc/batch-update.js +274 -0
- package/dist/toc/batch-update.js.map +1 -0
- package/dist/toc/get-toc.d.ts +8 -0
- package/dist/toc/get-toc.js +33 -0
- package/dist/toc/get-toc.js.map +1 -0
- package/dist/toc/index.d.ts +5 -0
- package/dist/toc/index.js +8 -0
- package/dist/toc/index.js.map +1 -0
- package/dist/toc/update-toc.d.ts +8 -0
- package/dist/toc/update-toc.js +74 -0
- package/dist/toc/update-toc.js.map +1 -0
- package/dist/upload/index.d.ts +3 -0
- package/dist/upload/index.js +4 -0
- package/dist/upload/index.js.map +1 -0
- package/dist/upload/upload-attachment.d.ts +11 -0
- package/dist/upload/upload-attachment.js +162 -0
- package/dist/upload/upload-attachment.js.map +1 -0
- package/dist/user/get-groups.d.ts +8 -0
- package/dist/user/get-groups.js +41 -0
- package/dist/user/get-groups.js.map +1 -0
- package/dist/user/get-user.d.ts +8 -0
- package/dist/user/get-user.js +24 -0
- package/dist/user/get-user.js.map +1 -0
- package/dist/user/hello.d.ts +8 -0
- package/dist/user/hello.js +17 -0
- package/dist/user/hello.js.map +1 -0
- package/dist/user/index.d.ts +5 -0
- package/dist/user/index.js +8 -0
- package/dist/user/index.js.map +1 -0
- package/package.json +49 -0
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* crawler/fetch — HTTP 请求抓取网页原始 HTML
|
|
3
|
+
*
|
|
4
|
+
* 职责:发 HTTP GET 请求,返回原始 HTML + 响应头 + 状态码。
|
|
5
|
+
* 不做任何解析、提取、清洗。Agent 拿到原始 HTML 后自行决定下一步。
|
|
6
|
+
*/
|
|
7
|
+
import { check, requiredString } from "../common/validate.js";
|
|
8
|
+
export const crawlFetch = {
|
|
9
|
+
name: "yuque_crawl_fetch",
|
|
10
|
+
description: "Fetch a web page and return raw HTML, response headers, and status code. No parsing/extraction — Agent decides what to do next. 详见 references/api/extended_api.md",
|
|
11
|
+
inputSchema: {
|
|
12
|
+
type: "object",
|
|
13
|
+
properties: {
|
|
14
|
+
url: { type: "string", description: "Target URL to fetch" },
|
|
15
|
+
headers: { type: "string", description: "Custom request headers as JSON string, e.g. '{\"Cookie\":\"...\",\"Referer\":\"...\"}'" },
|
|
16
|
+
timeout: { type: "number", description: "Request timeout in ms (default 15000, max 30000)" },
|
|
17
|
+
raw: { type: "boolean", description: "Return raw full JSON (default false, returns summary)" },
|
|
18
|
+
},
|
|
19
|
+
required: ["url"],
|
|
20
|
+
},
|
|
21
|
+
async handler(args) {
|
|
22
|
+
const __v = check(requiredString(args?.url, "url"));
|
|
23
|
+
if (__v)
|
|
24
|
+
return __v;
|
|
25
|
+
const url = args?.url;
|
|
26
|
+
const timeout = Math.min(args?.timeout ?? 15000, 30000);
|
|
27
|
+
let customHeaders = {};
|
|
28
|
+
if (args?.headers && typeof args.headers === "string") {
|
|
29
|
+
try {
|
|
30
|
+
customHeaders = JSON.parse(args.headers);
|
|
31
|
+
}
|
|
32
|
+
catch { /* ignore */ }
|
|
33
|
+
}
|
|
34
|
+
const controller = new AbortController();
|
|
35
|
+
const timer = setTimeout(() => controller.abort(), timeout);
|
|
36
|
+
const startedAt = Date.now();
|
|
37
|
+
try {
|
|
38
|
+
const res = await fetch(url, {
|
|
39
|
+
headers: {
|
|
40
|
+
"User-Agent": "Mozilla/5.0 (compatible; YuqueCrawler/1.0)",
|
|
41
|
+
"Accept": "text/html,application/xhtml+xml,*/*",
|
|
42
|
+
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
43
|
+
...customHeaders,
|
|
44
|
+
},
|
|
45
|
+
signal: controller.signal,
|
|
46
|
+
redirect: "follow",
|
|
47
|
+
});
|
|
48
|
+
clearTimeout(timer);
|
|
49
|
+
const body = await res.text();
|
|
50
|
+
const elapsed = Date.now() - startedAt;
|
|
51
|
+
// 提取响应头
|
|
52
|
+
const headers = {};
|
|
53
|
+
res.headers.forEach((v, k) => { headers[k] = v; });
|
|
54
|
+
const result = {
|
|
55
|
+
url: res.url, // 最终 URL(跟随重定向后)
|
|
56
|
+
status: res.status,
|
|
57
|
+
headers,
|
|
58
|
+
body,
|
|
59
|
+
bodySize: body.length,
|
|
60
|
+
elapsed,
|
|
61
|
+
};
|
|
62
|
+
return {
|
|
63
|
+
content: [{
|
|
64
|
+
type: "text",
|
|
65
|
+
text: JSON.stringify(result, null, 2),
|
|
66
|
+
}],
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
catch (err) {
|
|
70
|
+
clearTimeout(timer);
|
|
71
|
+
const elapsed = Date.now() - startedAt;
|
|
72
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
73
|
+
const isTimeout = err instanceof DOMException && err.name === "AbortError";
|
|
74
|
+
return {
|
|
75
|
+
content: [{
|
|
76
|
+
type: "text",
|
|
77
|
+
text: JSON.stringify({
|
|
78
|
+
error: isTimeout ? "TIMEOUT" : "FETCH_FAILED",
|
|
79
|
+
message: isTimeout ? `请求超时 (${timeout}ms)` : `请求失败: ${message}`,
|
|
80
|
+
url,
|
|
81
|
+
elapsed,
|
|
82
|
+
}, null, 2),
|
|
83
|
+
}],
|
|
84
|
+
isError: true,
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
},
|
|
88
|
+
};
|
|
89
|
+
//# sourceMappingURL=fetch.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch.js","sourceRoot":"","sources":["../../src/crawler/fetch.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,EAAE,KAAK,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAY9D,MAAM,CAAC,MAAM,UAAU,GAAY;IACjC,IAAI,EAAE,mBAAmB;IACzB,WAAW,EAAE,mKAAmK;IAEhL,WAAW,EAAE;QACX,IAAI,EAAE,QAAQ;QACd,UAAU,EAAE;YACV,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,qBAAqB,EAAE;YAC3D,OAAO,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,wFAAwF,EAAE;YAClI,OAAO,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,kDAAkD,EAAE;YAC5F,GAAG,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,uDAAuD,EAAE;SAC/F;QACD,QAAQ,EAAE,CAAC,KAAK,CAAC;KAClB;IAED,KAAK,CAAC,OAAO,CAAC,IAAI;QAChB,MAAM,GAAG,GAAG,KAAK,CAAC,cAAc,CAAC,IAAI,EAAE,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC;QACpD,IAAI,GAAG;YAAE,OAAO,GAAG,CAAC;QAEpB,MAAM,GAAG,GAAG,IAAI,EAAE,GAAa,CAAC;QAChC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAE,IAAI,EAAE,OAAkB,IAAI,KAAK,EAAE,KAAK,CAAC,CAAC;QACpE,IAAI,aAAa,GAA2B,EAAE,CAAC;QAE/C,IAAI,IAAI,EAAE,OAAO,IAAI,OAAO,IAAI,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;YACtD,IAAI,CAAC;gBAAC,aAAa,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAAC,CAAC;YAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC;QAC1E,CAAC;QAED,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;QACzC,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,OAAO,CAAC,CAAC;QAE5D,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAC3B,OAAO,EAAE;oBACP,YAAY,EAAE,4CAA4C;oBAC1D,QAAQ,EAAE,qCAAqC;oBAC/C,iBAAiB,EAAE,yBAAyB;oBAC5C,GAAG,aAAa;iBACjB;gBACD,MAAM,EAAE,UAAU,CAAC,MAAM;gBACzB,QAAQ,EAAE,QAAQ;aACnB,CAAC,CAAC;YAEH,YAAY,CAAC,KAAK,CAAC,CAAC;YAEpB,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;YAC9B,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YAEvC,QAAQ;YACR,MAAM,OAAO,GAA2B,EAAE,CAAC;YAC3C,GAAG,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,GAAG,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAEnD,MAAM,MAAM,GAAgB;gBAC1B,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,iBAAiB;gBAC/B,MAAM,EAAE,GAAG,CAAC,MAAM;gBAClB,OAAO;gBACP,IAAI;gBACJ,QAAQ,EAAE,IAAI,CAAC,MAAM;gBACrB,OAAO;aACR,CAAC;YAEF,OAAO;gBACL,OAAO,EAAE,CAAC;wBACR,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;qBACtC,CAAC;aACH,CAAC;QACJ,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,YAAY,CAAC,KAAK,CAAC,CAAC;YACpB,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACvC,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YACjE,MAAM,SAAS,GAAG,GAAG,YAAY,YAAY,IAAI,GAAG,CAAC,IAAI,KAAK,YAAY,CAAC;YAE3E,OAAO;gBACL,OAAO,EAAE,CAAC;wBACR,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;4BACnB,KAAK,EAAE,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,cAAc;4BAC7C,OAAO,EAAE,SAAS,CAAC,CAAC,CAAC,SAAS,OAAO,KAAK,CAAC,CAAC,CAAC,SAAS,OAAO,EAAE;4BAC/D,GAAG;4BACH,OAAO;yBACR,EAAE,IAAI,EAAE,CAAC,CAAC;qBACZ,CAAC;gBACF,OAAO,EAAE,IAAI;aACd,CAAC;QACJ,CAAC;IACH,CAAC;CACF,CAAC"}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
export { crawlFetch } from "./fetch.js";
|
|
2
|
+
export { crawlExtract } from "./extract.js";
|
|
3
|
+
export { crawlSave } from "./save.js";
|
|
4
|
+
export { crawlSchedule } from "./schedule.js";
|
|
5
|
+
import type { McpTool } from "../common/types.js";
|
|
6
|
+
export declare const crawlerTools: McpTool[];
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export { crawlFetch } from "./fetch.js";
|
|
2
|
+
export { crawlExtract } from "./extract.js";
|
|
3
|
+
export { crawlSave } from "./save.js";
|
|
4
|
+
export { crawlSchedule } from "./schedule.js";
|
|
5
|
+
import { crawlFetch } from "./fetch.js";
|
|
6
|
+
import { crawlExtract } from "./extract.js";
|
|
7
|
+
import { crawlSave } from "./save.js";
|
|
8
|
+
import { crawlSchedule } from "./schedule.js";
|
|
9
|
+
export const crawlerTools = [crawlFetch, crawlExtract, crawlSave, crawlSchedule];
|
|
10
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/crawler/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AACxC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AACtC,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAE9C,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AACxC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AACtC,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAG9C,MAAM,CAAC,MAAM,YAAY,GAAc,CAAC,UAAU,EAAE,YAAY,EAAE,SAAS,EAAE,aAAa,CAAC,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* crawler/save — 去重 + 写入语雀
|
|
3
|
+
*
|
|
4
|
+
* 职责:接收 Agent 处理好的 HTML/标题 → 去重(KV JSON map)→ 创建语雀文档。
|
|
5
|
+
* fetch 和 extract 由 yuque_crawl_fetch + yuque_crawl_extract 负责。
|
|
6
|
+
*
|
|
7
|
+
* 端点到语雀:POST /repos/{book_id}/docs(创建文档)
|
|
8
|
+
* 去重:调用 kv/common.ts 的 loadKvMap / kvIncrementalSet
|
|
9
|
+
*/
|
|
10
|
+
import type { McpTool } from "../common/types.js";
|
|
11
|
+
export declare const crawlSave: McpTool;
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* crawler/save — 去重 + 写入语雀
|
|
3
|
+
*
|
|
4
|
+
* 职责:接收 Agent 处理好的 HTML/标题 → 去重(KV JSON map)→ 创建语雀文档。
|
|
5
|
+
* fetch 和 extract 由 yuque_crawl_fetch + yuque_crawl_extract 负责。
|
|
6
|
+
*
|
|
7
|
+
* 端点到语雀:POST /repos/{book_id}/docs(创建文档)
|
|
8
|
+
* 去重:调用 kv/common.ts 的 loadKvMap / kvIncrementalSet
|
|
9
|
+
*/
|
|
10
|
+
import { createHash } from "crypto";
|
|
11
|
+
import { apiPut } from "../common/api-client.js";
|
|
12
|
+
import { check, requiredString } from "../common/validate.js";
|
|
13
|
+
import { loadConfig } from "../common/config.js";
|
|
14
|
+
import { loadKvMap, kvIncrementalSet } from "../kv/common.js";
|
|
15
|
+
import { createDocWithAutoExpand } from "../common/repo-capacity.js";
|
|
16
|
+
/** 解析目标知识库 */
|
|
17
|
+
function resolveRepo(source, paramRepo) {
|
|
18
|
+
if (paramRepo)
|
|
19
|
+
return parseInt(paramRepo, 10) || null;
|
|
20
|
+
const cfg = loadConfig();
|
|
21
|
+
const ids = cfg.crawler?.namespaces?.[source || ""]?.book_id;
|
|
22
|
+
if (ids && ids.length > 0)
|
|
23
|
+
return ids[ids.length - 1];
|
|
24
|
+
return null;
|
|
25
|
+
}
|
|
26
|
+
/** 生成去重 slug(URL → md5 前 12 位) */
|
|
27
|
+
function buildSlug(url) {
|
|
28
|
+
return createHash("md5").update(url).digest("hex").slice(0, 12);
|
|
29
|
+
}
|
|
30
|
+
export const crawlSave = {
|
|
31
|
+
name: "yuque_crawl_save",
|
|
32
|
+
description: "Dedup + save HTML to Yuque repo. Use yuque_crawl_fetch + yuque_crawl_extract to prepare content first. 详见 references/api/extended_api.md",
|
|
33
|
+
inputSchema: {
|
|
34
|
+
type: "object",
|
|
35
|
+
properties: {
|
|
36
|
+
url: { type: "string", description: "Source URL (for dedup slug + source footer)" },
|
|
37
|
+
title: { type: "string", description: "Document title (required)" },
|
|
38
|
+
body: { type: "string", description: "HTML body content to save (required)" },
|
|
39
|
+
source: { type: "string", description: "Source key for repo routing and KV namespace, e.g. 'cnblogs'" },
|
|
40
|
+
target_repo: { type: "string", description: "Target repo ID. Falls back to config crawler.namespaces.{source}.book_id." },
|
|
41
|
+
kv_namespace: { type: "string", description: "KV namespace for dedup. Defaults to source if set, otherwise 'crawler'." },
|
|
42
|
+
format: { type: "string", description: "Content format: html (default) | markdown | lake" },
|
|
43
|
+
raw: { type: "boolean", description: "Return raw full JSON (default false, returns summary)" },
|
|
44
|
+
},
|
|
45
|
+
required: ["url", "title", "body"],
|
|
46
|
+
},
|
|
47
|
+
async handler(args) {
|
|
48
|
+
const __v = check(requiredString(args?.url, "url"), requiredString(args?.title, "title"), requiredString(args?.body, "body"));
|
|
49
|
+
if (__v)
|
|
50
|
+
return __v;
|
|
51
|
+
const url = args?.url;
|
|
52
|
+
const title = args?.title;
|
|
53
|
+
const body = args?.body;
|
|
54
|
+
const source = args?.source;
|
|
55
|
+
const targetRepoParam = args?.target_repo;
|
|
56
|
+
const kvNamespace = args?.kv_namespace || source || "crawler";
|
|
57
|
+
const format = args?.format ?? "html";
|
|
58
|
+
const cfg = loadConfig();
|
|
59
|
+
const targetRepo = resolveRepo(source, targetRepoParam);
|
|
60
|
+
const enableKv = !!(cfg.kv?.enabled);
|
|
61
|
+
// 1. 去重
|
|
62
|
+
const slug = buildSlug(url);
|
|
63
|
+
let isDuplicate = false;
|
|
64
|
+
if (enableKv) {
|
|
65
|
+
try {
|
|
66
|
+
const existingMap = await loadKvMap("crawler", kvNamespace);
|
|
67
|
+
isDuplicate = slug in existingMap;
|
|
68
|
+
}
|
|
69
|
+
catch { /* 去重检查失败不影响主流程 */ }
|
|
70
|
+
}
|
|
71
|
+
if (isDuplicate) {
|
|
72
|
+
return {
|
|
73
|
+
content: [{
|
|
74
|
+
type: "text",
|
|
75
|
+
text: JSON.stringify({
|
|
76
|
+
status: "skipped",
|
|
77
|
+
reason: "duplicate",
|
|
78
|
+
url,
|
|
79
|
+
title,
|
|
80
|
+
slug,
|
|
81
|
+
}, null, 2),
|
|
82
|
+
}],
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
// 2. 写入语雀
|
|
86
|
+
if (!targetRepo) {
|
|
87
|
+
return {
|
|
88
|
+
content: [{ type: "text", text: JSON.stringify({
|
|
89
|
+
error: "NO_TARGET_REPO",
|
|
90
|
+
message: "未配置目标知识库,请在 config.json 中设置 crawler.namespaces.{source}.book_id 或传 target_repo 参数",
|
|
91
|
+
}, null, 2) }],
|
|
92
|
+
isError: true,
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
const now = new Date();
|
|
96
|
+
const ts = `${now.getFullYear()}-${String(now.getMonth() + 1).padStart(2, '0')}-${String(now.getDate()).padStart(2, '0')} ${String(now.getHours()).padStart(2, '0')}:${String(now.getMinutes()).padStart(2, '0')}:${String(now.getSeconds()).padStart(2, '0')}`;
|
|
97
|
+
const docBody = `> 来源:${url} | 抓取时间:${ts}\n\n${body}`;
|
|
98
|
+
const createResult = await createDocWithAutoExpand(targetRepo, "crawler", source || "crawler", {
|
|
99
|
+
title,
|
|
100
|
+
body: docBody,
|
|
101
|
+
slug,
|
|
102
|
+
description: `原文链接: ${url}`,
|
|
103
|
+
format,
|
|
104
|
+
public: 0,
|
|
105
|
+
}, `Create doc: ${title}`);
|
|
106
|
+
if (!createResult.ok) {
|
|
107
|
+
return {
|
|
108
|
+
content: [{ type: "text", text: JSON.stringify({
|
|
109
|
+
status: "failed",
|
|
110
|
+
url,
|
|
111
|
+
title,
|
|
112
|
+
error: createResult.error,
|
|
113
|
+
expanded: createResult.expanded,
|
|
114
|
+
}, null, 2) }],
|
|
115
|
+
isError: true,
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
const docId = createResult.id;
|
|
119
|
+
const finalRepo = createResult.book_id ?? targetRepo;
|
|
120
|
+
// 加入目录
|
|
121
|
+
if (docId) {
|
|
122
|
+
try {
|
|
123
|
+
await apiPut(`/repos/${finalRepo}/toc`, {
|
|
124
|
+
action: "appendNode",
|
|
125
|
+
action_mode: "sibling",
|
|
126
|
+
type: "DOC",
|
|
127
|
+
doc_ids: [docId],
|
|
128
|
+
}, `Add to TOC: ${title}`);
|
|
129
|
+
}
|
|
130
|
+
catch { /* TOC 失败不影响主流程 */ }
|
|
131
|
+
}
|
|
132
|
+
// 3. 增量写入 KV 标记
|
|
133
|
+
if (enableKv) {
|
|
134
|
+
try {
|
|
135
|
+
const kvMeta = JSON.stringify({
|
|
136
|
+
link: url,
|
|
137
|
+
date: new Date().toISOString(),
|
|
138
|
+
});
|
|
139
|
+
await kvIncrementalSet("crawler", kvNamespace, slug, kvMeta);
|
|
140
|
+
}
|
|
141
|
+
catch { /* KV 标记失败不影响主流程 */ }
|
|
142
|
+
}
|
|
143
|
+
return {
|
|
144
|
+
content: [{
|
|
145
|
+
type: "text",
|
|
146
|
+
text: JSON.stringify({
|
|
147
|
+
status: "saved",
|
|
148
|
+
url,
|
|
149
|
+
title,
|
|
150
|
+
slug,
|
|
151
|
+
doc_id: docId,
|
|
152
|
+
target_repo: targetRepo,
|
|
153
|
+
kv_namespace: kvNamespace,
|
|
154
|
+
body_size: body.length,
|
|
155
|
+
}, null, 2),
|
|
156
|
+
}],
|
|
157
|
+
};
|
|
158
|
+
},
|
|
159
|
+
};
|
|
160
|
+
//# sourceMappingURL=save.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"save.js","sourceRoot":"","sources":["../../src/crawler/save.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,OAAO,EAAiB,MAAM,EAAE,MAAM,yBAAyB,CAAC;AAChE,OAAO,EAAE,KAAK,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AACjD,OAAO,EAAE,SAAS,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AAC9D,OAAO,EAAE,uBAAuB,EAAE,MAAM,4BAA4B,CAAC;AAErE,cAAc;AACd,SAAS,WAAW,CAAC,MAAe,EAAE,SAAkB;IACtD,IAAI,SAAS;QAAE,OAAO,QAAQ,CAAC,SAAS,EAAE,EAAE,CAAC,IAAI,IAAI,CAAC;IACtD,MAAM,GAAG,GAAG,UAAU,EAAE,CAAC;IACzB,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,EAAE,UAAU,EAAE,CAAC,MAAM,IAAI,EAAE,CAAC,EAAE,OAAO,CAAC;IAC7D,IAAI,GAAG,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,GAAG,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACtD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,kCAAkC;AAClC,SAAS,SAAS,CAAC,GAAW;IAC5B,OAAO,UAAU,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AAClE,CAAC;AAED,MAAM,CAAC,MAAM,SAAS,GAAY;IAChC,IAAI,EAAE,kBAAkB;IACxB,WAAW,EAAE,0IAA0I;IAEvJ,WAAW,EAAE;QACX,IAAI,EAAE,QAAQ;QACd,UAAU,EAAE;YACV,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,6CAA6C,EAAE;YACnF,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,2BAA2B,EAAE;YACnE,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,sCAAsC,EAAE;YAC7E,MAAM,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,8DAA8D,EAAE;YACvG,WAAW,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,2EAA2E,EAAE;YACzH,YAAY,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,yEAAyE,EAAE;YACxH,MAAM,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,kDAAkD,EAAE;YAC3F,GAAG,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,uDAAuD,EAAE;SAC/F;QACD,QAAQ,EAAE,CAAC,KAAK,EAAE,OAAO,EAAE,MAAM,CAAC;KACnC;IAED,KAAK,CAAC,OAAO,CAAC,IAAI;QAChB,MAAM,GAAG,GAAG,KAAK,CACf,cAAc,CAAC,IAAI,EAAE,GAAG,EAAE,KAAK,CAAC,EAChC,cAAc,CAAC,IAAI,EAAE,KAAK,EAAE,OAAO,CAAC,EACpC,cAAc,CAAC,IAAI,EAAE,IAAI,EAAE,MAAM,CAAC,CACnC,CAAC;QACF,IAAI,GAAG;YAAE,OAAO,GAAG,CAAC;QAEpB,MAAM,GAAG,GAAG,IAAI,EAAE,GAAa,CAAC;QAChC,MAAM,KAAK,GAAG,IAAI,EAAE,KAAe,CAAC;QACpC,MAAM,IAAI,GAAG,IAAI,EAAE,IAAc,CAAC;QAClC,MAAM,MAAM,GAAG,IAAI,EAAE,MAA4B,CAAC;QAClD,MAAM,eAAe,GAAG,IAAI,EAAE,WAAiC,CAAC;QAChE,MAAM,WAAW,GAAI,IAAI,EAAE,YAAuB,IAAI,MAAM,IAAI,SAAS,CAAC;QAC1E,MAAM,MAAM,GAAI,IAAI,EAAE,MAAiB,IAAI,MAAM,CAAC;QAElD,MAAM,GAAG,GAAG,UAAU,EAAE,CAAC;QACzB,MAAM,UAAU,GAAG,WAAW,CAAC,MAAM,EAAE,eAAe,CAAC,CAAC;QACxD,MAAM,QAAQ,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,EAAE,OAAO,CAAC,CAAC;QAErC,QAAQ;QACR,MAAM,IAAI,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC;QAC5B,IAAI,WAAW,GAAG,KAAK,CAAC;QAExB,IAAI,QAAQ,EAAE,CAAC;YACb,IAAI,CAAC;gBACH,MAAM,WAAW,GAAG,MAAM,SAAS,CAAC,SAAS,EAAE,WAAW,CAAC,CAAC;gBAC5D,WAAW,GAAG,IAAI,IAAI,WAAW,CAAC;YACpC,CAAC;YAAC,MAAM,CAAC,CAAC,kBAAkB,CAAC,CAAC;QAChC,CAAC;QAED,IAAI,WAAW,EAAE,CAAC;YAChB,OAAO;gBACL,OAAO,EAAE,CAAC;wBACR,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;4BACnB,MAAM,EAAE,SAAS;4BACjB,MAAM,EAAE,WAAW;4BACnB,GAAG;4BACH,KAAK;4BACL,IAAI;yBACL,EAAE,IAAI,EAAE,CAAC,CAAC;qBACZ,CAAC;aACH,CAAC;QACJ,CAAC;QAED,UAAU;QACV,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,OAAO;gBACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;4BACtD,KAAK,EAAE,gBAAgB;4BACvB,OAAO,EAAE,mFAAmF;yBAC7F,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;gBACd,OAAO,EAAE,IAAI;aACd,CAAC;QACJ,CAAC;QAED,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,EAAE,GAAG,GAAG,GAAG,CAAC,WAAW,EAAE,IAAI,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,GAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAC,GAAG,CAAC,IAAI,MAAM,CAAC,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAC,GAAG,CAAC,IAAI,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAC,GAAG,CAAC,IAAI,MAAM,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAC,GAAG,CAAC,IAAI,MAAM,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAC,GAAG,CAAC,EAAE,CAAC;QACzP,MAAM,OAAO,GAAG,QAAQ,GAAG,WAAW,EAAE,OAAO,IAAI,EAAE,CAAC;QAEtD,MAAM,YAAY,GAAG,MAAM,uBAAuB,CAChD,UAAU,EACV,SAAS,EACT,MAAM,IAAI,SAAS,EACnB;YACE,KAAK;YACL,IAAI,EAAE,OAAO;YACb,IAAI;YACJ,WAAW,EAAE,SAAS,GAAG,EAAE;YAC3B,MAAM;YACN,MAAM,EAAE,CAAC;SACV,EACD,eAAe,KAAK,EAAE,CACvB,CAAC;QAEF,IAAI,CAAC,YAAY,CAAC,EAAE,EAAE,CAAC;YACrB,OAAO;gBACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;4BACtD,MAAM,EAAE,QAAQ;4BAChB,GAAG;4BACH,KAAK;4BACL,KAAK,EAAE,YAAY,CAAC,KAAK;4BACzB,QAAQ,EAAE,YAAY,CAAC,QAAQ;yBAChC,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;gBACd,OAAO,EAAE,IAAI;aACd,CAAC;QACJ,CAAC;QAED,MAAM,KAAK,GAAG,YAAY,CAAC,EAAE,CAAC;QAC9B,MAAM,SAAS,GAAG,YAAY,CAAC,OAAO,IAAI,UAAU,CAAC;QAErD,OAAO;QACP,IAAI,KAAK,EAAE,CAAC;YACV,IAAI,CAAC;gBACH,MAAM,MAAM,CAAC,UAAU,SAAS,MAAM,EAAE;oBACtC,MAAM,EAAE,YAAY;oBACpB,WAAW,EAAE,SAAS;oBACtB,IAAI,EAAE,KAAK;oBACX,OAAO,EAAE,CAAC,KAAK,CAAC;iBACjB,EAAE,eAAe,KAAK,EAAE,CAAC,CAAC;YAC7B,CAAC;YAAC,MAAM,CAAC,CAAC,kBAAkB,CAAC,CAAC;QAChC,CAAC;QAED,gBAAgB;QAChB,IAAI,QAAQ,EAAE,CAAC;YACb,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC;oBAC5B,IAAI,EAAE,GAAG;oBACT,IAAI,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;iBAC/B,CAAC,CAAC;gBACH,MAAM,gBAAgB,CAAC,SAAS,EAAE,WAAW,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC;YAC/D,CAAC;YAAC,MAAM,CAAC,CAAC,mBAAmB,CAAC,CAAC;QACjC,CAAC;QAED,OAAO;YACL,OAAO,EAAE,CAAC;oBACR,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;wBACnB,MAAM,EAAE,OAAO;wBACf,GAAG;wBACH,KAAK;wBACL,IAAI;wBACJ,MAAM,EAAE,KAAK;wBACb,WAAW,EAAE,UAAU;wBACvB,YAAY,EAAE,WAAW;wBACzB,SAAS,EAAE,IAAI,CAAC,MAAM;qBACvB,EAAE,IAAI,EAAE,CAAC,CAAC;iBACZ,CAAC;SACH,CAAC;IACJ,CAAC;CACF,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* crawler/schedule — 爬虫定时抓取策略分析
|
|
3
|
+
*
|
|
4
|
+
* 与 rss/schedule 共用 schedule-common.ts 公共逻辑。
|
|
5
|
+
* 频率分档(保守策略,最小间隔 1 天):
|
|
6
|
+
* - 高频:近 7 天 ≥5 篇 → 每天 1 次
|
|
7
|
+
* - 中频:近 7 天 1-4 篇 → 每 7 天
|
|
8
|
+
* - 低频:近 14 天 1 篇 → 每 15 天
|
|
9
|
+
* - 休眠:近 30 天 0 篇 → 每 30 天
|
|
10
|
+
*/
|
|
11
|
+
import type { McpTool } from "../common/types.js";
|
|
12
|
+
export declare const crawlSchedule: McpTool;
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* crawler/schedule — 爬虫定时抓取策略分析
|
|
3
|
+
*
|
|
4
|
+
* 与 rss/schedule 共用 schedule-common.ts 公共逻辑。
|
|
5
|
+
* 频率分档(保守策略,最小间隔 1 天):
|
|
6
|
+
* - 高频:近 7 天 ≥5 篇 → 每天 1 次
|
|
7
|
+
* - 中频:近 7 天 1-4 篇 → 每 7 天
|
|
8
|
+
* - 低频:近 14 天 1 篇 → 每 15 天
|
|
9
|
+
* - 休眠:近 30 天 0 篇 → 每 30 天
|
|
10
|
+
*/
|
|
11
|
+
import { check, requiredString } from "../common/validate.js";
|
|
12
|
+
import { loadConfig } from "../common/config.js";
|
|
13
|
+
import { loadKvMap } from "../kv/common.js";
|
|
14
|
+
import { classifyBand, parseArticles, countRecent, getScheduleSlugs, findScheduleDoc, upsertScheduleDoc, } from "../common/schedule-common.js";
|
|
15
|
+
const CRAWLER_SCHEDULE_SLUG = "crawler-schedule";
|
|
16
|
+
// ── Crawler 专用:schedule body 构建 ──
|
|
17
|
+
function buildCrawlScheduleBody(source, lastFetch, nextFetch, band, recent7d, recent14d, recent30d) {
|
|
18
|
+
const lines = [];
|
|
19
|
+
lines.push(`# 爬虫/${source}`);
|
|
20
|
+
lines.push("");
|
|
21
|
+
lines.push(`- 类型: crawler`);
|
|
22
|
+
lines.push(`- 数据源: ${source}`);
|
|
23
|
+
lines.push(`- 上次抓取: ${lastFetch || "无记录"}`);
|
|
24
|
+
lines.push(`- 推荐下次抓取: ${nextFetch}`);
|
|
25
|
+
lines.push(`- 抓取间隔: ${band.intervalDays}天`);
|
|
26
|
+
lines.push(`- 频率分档: ${band.label}`);
|
|
27
|
+
lines.push(`- 近7天文章数: ${recent7d}`);
|
|
28
|
+
lines.push(`- 近14天文章数: ${recent14d}`);
|
|
29
|
+
lines.push(`- 近30天文章数: ${recent30d}`);
|
|
30
|
+
return lines.join("\n");
|
|
31
|
+
}
|
|
32
|
+
// ── 工具定义 ──
|
|
33
|
+
export const crawlSchedule = {
|
|
34
|
+
name: "yuque_crawl_schedule",
|
|
35
|
+
description: "Analyze crawler recent fetch frequency from KV dedup data and recommend next fetch interval. 通过 config.json crawler.namespaces.{source}.schedule_slugs 定位配置文档。",
|
|
36
|
+
inputSchema: {
|
|
37
|
+
type: "object",
|
|
38
|
+
properties: {
|
|
39
|
+
source: { type: "string", description: "Source key, e.g. 'cnblogs'." },
|
|
40
|
+
kv_namespace: { type: "string", description: "KV namespace for dedup data. Defaults to source." },
|
|
41
|
+
mode: { type: "string", description: "Mode: 'analyze' (analyze + write back, default) | 'dry_run' (analyze only, no write)" },
|
|
42
|
+
raw: { type: "boolean", description: "Return raw full JSON (default false)" },
|
|
43
|
+
},
|
|
44
|
+
required: ["source"],
|
|
45
|
+
},
|
|
46
|
+
async handler(args) {
|
|
47
|
+
const __v = check(requiredString(args?.source, "source"));
|
|
48
|
+
if (__v)
|
|
49
|
+
return __v;
|
|
50
|
+
const source = args?.source;
|
|
51
|
+
const kvNamespace = args?.kv_namespace || source;
|
|
52
|
+
const mode = args?.mode ?? "analyze";
|
|
53
|
+
const cfg = loadConfig();
|
|
54
|
+
const enableKv = !!(cfg.kv?.enabled);
|
|
55
|
+
if (!enableKv) {
|
|
56
|
+
return {
|
|
57
|
+
content: [{ type: "text", text: JSON.stringify({
|
|
58
|
+
error: "KV_DISABLED", message: "KV 功能未启用",
|
|
59
|
+
}, null, 2) }],
|
|
60
|
+
isError: true,
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
const kvMap = await loadKvMap("crawler", kvNamespace);
|
|
64
|
+
const totalEntries = Object.keys(kvMap).length;
|
|
65
|
+
if (totalEntries === 0) {
|
|
66
|
+
return {
|
|
67
|
+
content: [{ type: "text", text: JSON.stringify({
|
|
68
|
+
error: "NO_DATA",
|
|
69
|
+
message: `KV namespace '${kvNamespace}' 中没有数据`,
|
|
70
|
+
hint: "先用 yuque_crawl_save 抓取一些文章后再调用此工具",
|
|
71
|
+
}, null, 2) }],
|
|
72
|
+
isError: true,
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
const articles = parseArticles(kvMap);
|
|
76
|
+
if (articles.length === 0) {
|
|
77
|
+
return {
|
|
78
|
+
content: [{ type: "text", text: JSON.stringify({
|
|
79
|
+
source, kv_namespace: kvNamespace, total_articles: totalEntries,
|
|
80
|
+
warning: "KV 中数据为旧格式(无时间戳),无法分析频率。",
|
|
81
|
+
fallback_recommendation: { strategy: "保守策略", interval: "每 7 天" },
|
|
82
|
+
}, null, 2) }],
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
const now = new Date();
|
|
86
|
+
const recent7d = countRecent(articles, 7, now);
|
|
87
|
+
const recent14d = countRecent(articles, 14, now);
|
|
88
|
+
const recent30d = countRecent(articles, 30, now);
|
|
89
|
+
const band = classifyBand(recent7d, recent14d);
|
|
90
|
+
const hasScheduleSlugs = getScheduleSlugs("crawler", source).length > 0;
|
|
91
|
+
const analysisMode = hasScheduleSlugs ? "schedule_book" : "kv_fallback";
|
|
92
|
+
let lastFetch = null;
|
|
93
|
+
if (hasScheduleSlugs) {
|
|
94
|
+
const found = await findScheduleDoc("crawler", source, CRAWLER_SCHEDULE_SLUG);
|
|
95
|
+
lastFetch = found.lastFetch;
|
|
96
|
+
}
|
|
97
|
+
const nextFetchDate = new Date(now.getTime() + band.intervalDays * 24 * 60 * 60 * 1000);
|
|
98
|
+
const nextFetch = nextFetchDate.toISOString().slice(0, 10);
|
|
99
|
+
let writeResult;
|
|
100
|
+
if (analysisMode === "schedule_book" && mode !== "dry_run") {
|
|
101
|
+
const title = `爬虫/${source}`;
|
|
102
|
+
const body = buildCrawlScheduleBody(source, lastFetch, nextFetch, band, recent7d, recent14d, recent30d);
|
|
103
|
+
const result = await upsertScheduleDoc("crawler", source, CRAWLER_SCHEDULE_SLUG, title, body);
|
|
104
|
+
writeResult = {
|
|
105
|
+
status: result.ok ? "updated" : "failed",
|
|
106
|
+
slug: result.slug,
|
|
107
|
+
error: result.error,
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
return {
|
|
111
|
+
content: [{
|
|
112
|
+
type: "text",
|
|
113
|
+
text: JSON.stringify({
|
|
114
|
+
source,
|
|
115
|
+
mode: analysisMode,
|
|
116
|
+
analysis: {
|
|
117
|
+
band: band.label,
|
|
118
|
+
intervalDays: band.intervalDays,
|
|
119
|
+
lastFetch,
|
|
120
|
+
nextFetch,
|
|
121
|
+
recent7dCount: recent7d,
|
|
122
|
+
recent14dCount: recent14d,
|
|
123
|
+
recent30dCount: recent30d,
|
|
124
|
+
totalArticles: totalEntries,
|
|
125
|
+
},
|
|
126
|
+
writeResult,
|
|
127
|
+
}, null, 2),
|
|
128
|
+
}],
|
|
129
|
+
};
|
|
130
|
+
},
|
|
131
|
+
};
|
|
132
|
+
//# sourceMappingURL=schedule.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schedule.js","sourceRoot":"","sources":["../../src/crawler/schedule.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAGH,OAAO,EAAE,KAAK,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AACjD,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAC5C,OAAO,EAEL,YAAY,EACZ,aAAa,EACb,WAAW,EACX,gBAAgB,EAChB,eAAe,EACf,iBAAiB,GAClB,MAAM,8BAA8B,CAAC;AAEtC,MAAM,qBAAqB,GAAG,kBAAkB,CAAC;AAEjD,oCAAoC;AAEpC,SAAS,sBAAsB,CAC7B,MAAc,EACd,SAAwB,EACxB,SAAiB,EACjB,IAA6C,EAC7C,QAAgB,EAChB,SAAiB,EACjB,SAAiB;IAEjB,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,CAAC,IAAI,CAAC,QAAQ,MAAM,EAAE,CAAC,CAAC;IAC7B,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACf,KAAK,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;IAC5B,KAAK,CAAC,IAAI,CAAC,UAAU,MAAM,EAAE,CAAC,CAAC;IAC/B,KAAK,CAAC,IAAI,CAAC,WAAW,SAAS,IAAI,KAAK,EAAE,CAAC,CAAC;IAC5C,KAAK,CAAC,IAAI,CAAC,aAAa,SAAS,EAAE,CAAC,CAAC;IACrC,KAAK,CAAC,IAAI,CAAC,WAAW,IAAI,CAAC,YAAY,GAAG,CAAC,CAAC;IAC5C,KAAK,CAAC,IAAI,CAAC,WAAW,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC;IACpC,KAAK,CAAC,IAAI,CAAC,aAAa,QAAQ,EAAE,CAAC,CAAC;IACpC,KAAK,CAAC,IAAI,CAAC,cAAc,SAAS,EAAE,CAAC,CAAC;IACtC,KAAK,CAAC,IAAI,CAAC,cAAc,SAAS,EAAE,CAAC,CAAC;IACtC,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,aAAa;AAEb,MAAM,CAAC,MAAM,aAAa,GAAY;IACpC,IAAI,EAAE,sBAAsB;IAC5B,WAAW,EAAE,gKAAgK;IAE7K,WAAW,EAAE;QACX,IAAI,EAAE,QAAQ;QACd,UAAU,EAAE;YACV,MAAM,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,6BAA6B,EAAE;YACtE,YAAY,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,kDAAkD,EAAE;YACjG,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,sFAAsF,EAAE;YAC7H,GAAG,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,sCAAsC,EAAE;SAC9E;QACD,QAAQ,EAAE,CAAC,QAAQ,CAAC;KACrB;IAED,KAAK,CAAC,OAAO,CAAC,IAAI;QAChB,MAAM,GAAG,GAAG,KAAK,CAAC,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC;QAC1D,IAAI,GAAG;YAAE,OAAO,GAAG,CAAC;QAEpB,MAAM,MAAM,GAAG,IAAI,EAAE,MAAgB,CAAC;QACtC,MAAM,WAAW,GAAI,IAAI,EAAE,YAAuB,IAAI,MAAM,CAAC;QAC7D,MAAM,IAAI,GAAI,IAAI,EAAE,IAAe,IAAI,SAAS,CAAC;QACjD,MAAM,GAAG,GAAG,UAAU,EAAE,CAAC;QACzB,MAAM,QAAQ,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,EAAE,OAAO,CAAC,CAAC;QAErC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,OAAO;gBACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;4BACtD,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,UAAU;yBAC1C,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;gBACd,OAAO,EAAE,IAAI;aACd,CAAC;QACJ,CAAC;QAED,MAAM,KAAK,GAAG,MAAM,SAAS,CAAC,SAAS,EAAE,WAAW,CAAC,CAAC;QACtD,MAAM,YAAY,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;QAE/C,IAAI,YAAY,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO;gBACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;4BACtD,KAAK,EAAE,SAAS;4BAChB,OAAO,EAAE,iBAAiB,WAAW,SAAS;4BAC9C,IAAI,EAAE,mCAAmC;yBAC1C,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;gBACd,OAAO,EAAE,IAAI;aACd,CAAC;QACJ,CAAC;QAED,MAAM,QAAQ,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;QAEtC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,OAAO;gBACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;4BACtD,MAAM,EAAE,YAAY,EAAE,WAAW,EAAE,cAAc,EAAE,YAAY;4BAC/D,OAAO,EAAE,0BAA0B;4BACnC,uBAAuB,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE;yBACjE,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;aACf,CAAC;QACJ,CAAC;QAED,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,QAAQ,GAAG,WAAW,CAAC,QAAQ,EAAE,CAAC,EAAE,GAAG,CAAC,CAAC;QAC/C,MAAM,SAAS,GAAG,WAAW,CAAC,QAAQ,EAAE,EAAE,EAAE,GAAG,CAAC,CAAC;QACjD,MAAM,SAAS,GAAG,WAAW,CAAC,QAAQ,EAAE,EAAE,EAAE,GAAG,CAAC,CAAC;QACjD,MAAM,IAAI,GAAG,YAAY,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;QAE/C,MAAM,gBAAgB,GAAG,gBAAgB,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;QACxE,MAAM,YAAY,GAAG,gBAAgB,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,aAAa,CAAC;QAExE,IAAI,SAAS,GAAkB,IAAI,CAAC;QACpC,IAAI,gBAAgB,EAAE,CAAC;YACrB,MAAM,KAAK,GAAG,MAAM,eAAe,CAAC,SAAS,EAAE,MAAM,EAAE,qBAAqB,CAAC,CAAC;YAC9E,SAAS,GAAG,KAAK,CAAC,SAAS,CAAC;QAC9B,CAAC;QAED,MAAM,aAAa,GAAG,IAAI,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC,YAAY,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC,CAAC;QACxF,MAAM,SAAS,GAAG,aAAa,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAE3D,IAAI,WAA0E,CAAC;QAC/E,IAAI,YAAY,KAAK,eAAe,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;YAC3D,MAAM,KAAK,GAAG,MAAM,MAAM,EAAE,CAAC;YAC7B,MAAM,IAAI,GAAG,sBAAsB,CAAC,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;YACxG,MAAM,MAAM,GAAG,MAAM,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,qBAAqB,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC;YAC9F,WAAW,GAAG;gBACZ,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,QAAQ;gBACxC,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,KAAK,EAAE,MAAM,CAAC,KAAK;aACpB,CAAC;QACJ,CAAC;QAED,OAAO;YACL,OAAO,EAAE,CAAC;oBACR,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;wBACnB,MAAM;wBACN,IAAI,EAAE,YAAY;wBAClB,QAAQ,EAAE;4BACR,IAAI,EAAE,IAAI,CAAC,KAAK;4BAChB,YAAY,EAAE,IAAI,CAAC,YAAY;4BAC/B,SAAS;4BACT,SAAS;4BACT,aAAa,EAAE,QAAQ;4BACvB,cAAc,EAAE,SAAS;4BACzB,cAAc,EAAE,SAAS;4BACzB,aAAa,EAAE,YAAY;yBAC5B;wBACD,WAAW;qBACZ,EAAE,IAAI,EAAE,CAAC,CAAC;iBACZ,CAAC;SACH,CAAC;IACJ,CAAC;CACF,CAAC"}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* doc/batch-get — 批量获取文档详情
|
|
3
|
+
*
|
|
4
|
+
* 并发 GET /api/v2/repos/:book_id/docs/:id
|
|
5
|
+
* 只读操作,不涉及写
|
|
6
|
+
*/
|
|
7
|
+
import { apiGet, isErrorResult } from "../common/api-client.js";
|
|
8
|
+
import { requiredString, check } from "../common/validate.js";
|
|
9
|
+
import { formatDoc, wrapResult } from "../common/format.js";
|
|
10
|
+
export const docBatchGet = {
|
|
11
|
+
name: "yuque_batch_get_docs",
|
|
12
|
+
description: "Batch get document details (concurrent GET, read-only, max 20). 详见 references/api/doc_api.md",
|
|
13
|
+
inputSchema: {
|
|
14
|
+
type: "object",
|
|
15
|
+
properties: {
|
|
16
|
+
book_id: {
|
|
17
|
+
type: "string",
|
|
18
|
+
description: "Repository ID (numeric) or namespace like group/book_slug (required, shared for all docs)",
|
|
19
|
+
},
|
|
20
|
+
ids: {
|
|
21
|
+
type: "string",
|
|
22
|
+
description: "Document IDs as JSON array, e.g. [123,456] or [\"slug-a\",\"slug-b\"] (required, max 20)",
|
|
23
|
+
},
|
|
24
|
+
raw: {
|
|
25
|
+
type: "boolean",
|
|
26
|
+
description: "Return raw full JSON (default false, returns trimmed fields)",
|
|
27
|
+
},
|
|
28
|
+
},
|
|
29
|
+
required: ["book_id", "ids"],
|
|
30
|
+
},
|
|
31
|
+
async handler(args) {
|
|
32
|
+
const bookId = args?.book_id;
|
|
33
|
+
const idsRaw = args?.ids;
|
|
34
|
+
const raw = args?.raw;
|
|
35
|
+
// 校验
|
|
36
|
+
const v = check(requiredString(bookId, "book_id"), requiredString(idsRaw, "ids"));
|
|
37
|
+
if (v)
|
|
38
|
+
return v;
|
|
39
|
+
let ids;
|
|
40
|
+
try {
|
|
41
|
+
ids = JSON.parse(idsRaw);
|
|
42
|
+
}
|
|
43
|
+
catch {
|
|
44
|
+
return {
|
|
45
|
+
content: [{ type: "text", text: JSON.stringify({
|
|
46
|
+
error: "ids 必须是合法 JSON 数组 / ids must be a valid JSON array",
|
|
47
|
+
hint: "zh/en",
|
|
48
|
+
}, null, 2) }],
|
|
49
|
+
isError: true,
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
if (!Array.isArray(ids) || ids.length === 0) {
|
|
53
|
+
return {
|
|
54
|
+
content: [{ type: "text", text: JSON.stringify({
|
|
55
|
+
error: "ids 不能为空数组 / ids must be a non-empty array",
|
|
56
|
+
hint: "zh/en",
|
|
57
|
+
}, null, 2) }],
|
|
58
|
+
isError: true,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
if (ids.length > 20) {
|
|
62
|
+
return {
|
|
63
|
+
content: [{ type: "text", text: JSON.stringify({
|
|
64
|
+
error: "ids 最多 20 个 / ids max 20",
|
|
65
|
+
hint: "zh/en",
|
|
66
|
+
}, null, 2) }],
|
|
67
|
+
isError: true,
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
// 并发获取
|
|
71
|
+
const results = await Promise.all(ids.map((id) => apiGet(`/repos/${bookId}/docs/${encodeURIComponent(String(id))}`, {
|
|
72
|
+
raw: raw ? "1" : "0",
|
|
73
|
+
}, `Get doc ${id}`).then((data) => {
|
|
74
|
+
if (isErrorResult(data))
|
|
75
|
+
return { id, error: true, detail: data };
|
|
76
|
+
return { id, ok: true, data };
|
|
77
|
+
})));
|
|
78
|
+
// 组装结果
|
|
79
|
+
const output = {};
|
|
80
|
+
const errors = [];
|
|
81
|
+
for (const r of results) {
|
|
82
|
+
if (r.error) {
|
|
83
|
+
errors.push({ id: r.id, detail: r.detail });
|
|
84
|
+
}
|
|
85
|
+
else if (r.ok && r.data) {
|
|
86
|
+
const rawData = r.data?.data ?? r.data;
|
|
87
|
+
output[String(r.id)] = raw ? rawData : wrapResult(r.data, formatDoc, false);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
const response = { total: ids.length, ok: Object.keys(output).length };
|
|
91
|
+
if (Object.keys(output).length > 0)
|
|
92
|
+
response.docs = output;
|
|
93
|
+
if (errors.length > 0)
|
|
94
|
+
response.errors = errors;
|
|
95
|
+
return {
|
|
96
|
+
content: [{ type: "text", text: JSON.stringify(response, null, 2) }],
|
|
97
|
+
};
|
|
98
|
+
},
|
|
99
|
+
};
|
|
100
|
+
//# sourceMappingURL=batch-get-docs.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"batch-get-docs.js","sourceRoot":"","sources":["../../src/doc/batch-get-docs.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,EAAE,MAAM,EAAE,aAAa,EAAE,MAAM,yBAAyB,CAAC;AAChE,OAAO,EAAE,cAAc,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,EAAE,SAAS,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AAG5D,MAAM,CAAC,MAAM,WAAW,GAAY;IAClC,IAAI,EAAE,sBAAsB;IAC5B,WAAW,EAAE,8FAA8F;IAE3G,WAAW,EAAE;QACX,IAAI,EAAE,QAAQ;QACd,UAAU,EAAE;YACV,OAAO,EAAE;gBACP,IAAI,EAAE,QAAQ;gBACd,WAAW,EAAE,2FAA2F;aACzG;YACD,GAAG,EAAE;gBACH,IAAI,EAAE,QAAQ;gBACd,WAAW,EAAE,0FAA0F;aACxG;YACD,GAAG,EAAE;gBACH,IAAI,EAAE,SAAS;gBACf,WAAW,EAAE,8DAA8D;aAC5E;SACF;QACD,QAAQ,EAAE,CAAC,SAAS,EAAE,KAAK,CAAC;KAC7B;IAED,KAAK,CAAC,OAAO,CAAC,IAAI;QAChB,MAAM,MAAM,GAAG,IAAI,EAAE,OAAiB,CAAC;QACvC,MAAM,MAAM,GAAG,IAAI,EAAE,GAAa,CAAC;QACnC,MAAM,GAAG,GAAG,IAAI,EAAE,GAA0B,CAAC;QAE7C,KAAK;QACL,MAAM,CAAC,GAAG,KAAK,CACb,cAAc,CAAC,MAAM,EAAE,SAAS,CAAC,EACjC,cAAc,CAAC,MAAM,EAAE,KAAK,CAAC,CAC9B,CAAC;QACF,IAAI,CAAC;YAAE,OAAO,CAAC,CAAC;QAEhB,IAAI,GAAwB,CAAC;QAC7B,IAAI,CAAC;YACH,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QAC3B,CAAC;QAAC,MAAM,CAAC;YACP,OAAO;gBACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;4BACtD,KAAK,EAAE,oDAAoD;4BAC3D,IAAI,EAAE,OAAO;yBACd,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;gBACd,OAAO,EAAE,IAAI;aACd,CAAC;QACJ,CAAC;QAED,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5C,OAAO;gBACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;4BACtD,KAAK,EAAE,4CAA4C;4BACnD,IAAI,EAAE,OAAO;yBACd,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;gBACd,OAAO,EAAE,IAAI;aACd,CAAC;QACJ,CAAC;QACD,IAAI,GAAG,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;YACpB,OAAO;gBACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;4BACtD,KAAK,EAAE,0BAA0B;4BACjC,IAAI,EAAE,OAAO;yBACd,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;gBACd,OAAO,EAAE,IAAI;aACd,CAAC;QACJ,CAAC;QAED,OAAO;QACP,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAC/B,GAAG,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CACb,MAAM,CAAC,UAAU,MAAM,SAAS,kBAAkB,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE;YAChE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG;SACrB,EAAE,WAAW,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,IAAa,EAAE,EAAE;YACzC,IAAI,aAAa,CAAC,IAAI,CAAC;gBAAE,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC;YAClE,OAAO,EAAE,EAAE,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QAChC,CAAC,CAAC,CACH,CACF,CAAC;QAEF,OAAO;QACP,MAAM,MAAM,GAA4B,EAAE,CAAC;QAC3C,MAAM,MAAM,GAAoD,EAAE,CAAC;QAEnE,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC;gBACZ,MAAM,CAAC,IAAI,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAqB,EAAE,MAAM,EAAG,CAAS,CAAC,MAAM,EAAE,CAAC,CAAC;YAC1E,CAAC;iBAAM,IAAI,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC1B,MAAM,OAAO,GAAI,CAAC,CAAC,IAAY,EAAE,IAAI,IAAI,CAAC,CAAC,IAAI,CAAC;gBAChD,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,EAAE,SAAS,EAAE,KAAK,CAAC,CAAC;YAC9E,CAAC;QACH,CAAC;QAED,MAAM,QAAQ,GAA4B,EAAE,KAAK,EAAE,GAAG,CAAC,MAAM,EAAE,EAAE,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,CAAC;QAChG,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,MAAM,GAAG,CAAC;YAAE,QAAQ,CAAC,IAAI,GAAG,MAAM,CAAC;QAC3D,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;YAAE,QAAQ,CAAC,MAAM,GAAG,MAAM,CAAC;QAEhD,OAAO;YACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;SAC9E,CAAC;IACJ,CAAC;CACF,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* doc/copy-doc — 单文档跨知识库复制
|
|
3
|
+
*
|
|
4
|
+
* 两种模式:
|
|
5
|
+
* 1. Agent 传入 title/body/format/paths → 工具建目录+创建文档
|
|
6
|
+
* 2. Agent 传入 doc_id + source_book_id → 工具拉取源文档,返回原始内容给 Agent
|
|
7
|
+
* Agent 清洗后再调模式1创建。解决大文档(>30KB)通过 mcporter CLI 传 body 不稳定的问题。
|
|
8
|
+
*/
|
|
9
|
+
import type { McpTool } from "../common/types.js";
|
|
10
|
+
export declare const docCopySingle: McpTool;
|