mcp-docs-scraper 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +357 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +20 -0
- package/dist/index.js.map +1 -0
- package/dist/server.d.ts +6 -0
- package/dist/server.d.ts.map +1 -0
- package/dist/server.js +231 -0
- package/dist/server.js.map +1 -0
- package/dist/services/cache-manager.d.ts +100 -0
- package/dist/services/cache-manager.d.ts.map +1 -0
- package/dist/services/cache-manager.js +212 -0
- package/dist/services/cache-manager.js.map +1 -0
- package/dist/services/content-cleaner.d.ts +48 -0
- package/dist/services/content-cleaner.d.ts.map +1 -0
- package/dist/services/content-cleaner.js +295 -0
- package/dist/services/content-cleaner.js.map +1 -0
- package/dist/services/github-detector.d.ts +49 -0
- package/dist/services/github-detector.d.ts.map +1 -0
- package/dist/services/github-detector.js +276 -0
- package/dist/services/github-detector.js.map +1 -0
- package/dist/services/github-fetcher.d.ts +94 -0
- package/dist/services/github-fetcher.d.ts.map +1 -0
- package/dist/services/github-fetcher.js +393 -0
- package/dist/services/github-fetcher.js.map +1 -0
- package/dist/services/search-index.d.ts +106 -0
- package/dist/services/search-index.d.ts.map +1 -0
- package/dist/services/search-index.js +210 -0
- package/dist/services/search-index.js.map +1 -0
- package/dist/services/web-scraper.d.ts +88 -0
- package/dist/services/web-scraper.d.ts.map +1 -0
- package/dist/services/web-scraper.js +244 -0
- package/dist/services/web-scraper.js.map +1 -0
- package/dist/tools/clear-cache.d.ts +24 -0
- package/dist/tools/clear-cache.d.ts.map +1 -0
- package/dist/tools/clear-cache.js +29 -0
- package/dist/tools/clear-cache.js.map +1 -0
- package/dist/tools/detect-github.d.ts +21 -0
- package/dist/tools/detect-github.d.ts.map +1 -0
- package/dist/tools/detect-github.js +18 -0
- package/dist/tools/detect-github.js.map +1 -0
- package/dist/tools/get-content.d.ts +43 -0
- package/dist/tools/get-content.d.ts.map +1 -0
- package/dist/tools/get-content.js +84 -0
- package/dist/tools/get-content.js.map +1 -0
- package/dist/tools/get-tree.d.ts +31 -0
- package/dist/tools/get-tree.d.ts.map +1 -0
- package/dist/tools/get-tree.js +102 -0
- package/dist/tools/get-tree.js.map +1 -0
- package/dist/tools/index-docs.d.ts +63 -0
- package/dist/tools/index-docs.d.ts.map +1 -0
- package/dist/tools/index-docs.js +371 -0
- package/dist/tools/index-docs.js.map +1 -0
- package/dist/tools/index.d.ts +11 -0
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/index.js +11 -0
- package/dist/tools/index.js.map +1 -0
- package/dist/tools/list-cached.d.ts +19 -0
- package/dist/tools/list-cached.d.ts.map +1 -0
- package/dist/tools/list-cached.js +20 -0
- package/dist/tools/list-cached.js.map +1 -0
- package/dist/tools/search-docs.d.ts +31 -0
- package/dist/tools/search-docs.d.ts.map +1 -0
- package/dist/tools/search-docs.js +64 -0
- package/dist/tools/search-docs.js.map +1 -0
- package/dist/types/cache.d.ts +53 -0
- package/dist/types/cache.d.ts.map +1 -0
- package/dist/types/cache.js +2 -0
- package/dist/types/cache.js.map +1 -0
- package/dist/types/errors.d.ts +102 -0
- package/dist/types/errors.d.ts.map +1 -0
- package/dist/types/errors.js +216 -0
- package/dist/types/errors.js.map +1 -0
- package/dist/types/index.d.ts +6 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +5 -0
- package/dist/types/index.js.map +1 -0
- package/dist/utils/fs.d.ts +45 -0
- package/dist/utils/fs.d.ts.map +1 -0
- package/dist/utils/fs.js +113 -0
- package/dist/utils/fs.js.map +1 -0
- package/dist/utils/rate-limit.d.ts +55 -0
- package/dist/utils/rate-limit.d.ts.map +1 -0
- package/dist/utils/rate-limit.js +89 -0
- package/dist/utils/rate-limit.js.map +1 -0
- package/dist/utils/url.d.ts +69 -0
- package/dist/utils/url.d.ts.map +1 -0
- package/dist/utils/url.js +251 -0
- package/dist/utils/url.js.map +1 -0
- package/package.json +58 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"detect-github.js","sourceRoot":"","sources":["../../src/tools/detect-github.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EACL,gBAAgB,GAEjB,MAAM,gCAAgC,CAAC;AACxC,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAgBrD;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,KAAwB;IAExB,MAAM,EAAE,GAAG,EAAE,GAAG,KAAK,CAAC;IAEtB,+BAA+B;IAC/B,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,MAAM,IAAI,eAAe,CAAC,iCAAiC,EAAE,KAAK,CAAC,CAAC;IACtE,CAAC;IAED,gBAAgB;IAChB,OAAO,gBAAgB,CAAC,GAAG,CAAC,CAAC;AAC/B,CAAC"}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* get_docs_content tool - Retrieves actual content of specific doc files from cache.
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Input parameters for get_docs_content tool.
|
|
6
|
+
*/
|
|
7
|
+
export interface GetDocsContentInput {
|
|
8
|
+
/** The docs ID from index_docs response */
|
|
9
|
+
docs_id: string;
|
|
10
|
+
/** Array of file paths to fetch */
|
|
11
|
+
paths: string[];
|
|
12
|
+
/** Output format (default: markdown) */
|
|
13
|
+
format?: "markdown" | "raw";
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Content information for a single file.
|
|
17
|
+
*/
|
|
18
|
+
export interface FileContent {
|
|
19
|
+
/** The actual content */
|
|
20
|
+
content: string;
|
|
21
|
+
/** Extracted title (first H1 heading) */
|
|
22
|
+
title?: string;
|
|
23
|
+
/** List of headings for quick navigation */
|
|
24
|
+
headings: string[];
|
|
25
|
+
/** Size in bytes */
|
|
26
|
+
size_bytes: number;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Output from get_docs_content tool.
|
|
30
|
+
*/
|
|
31
|
+
export interface GetDocsContentOutput {
|
|
32
|
+
/** The docs ID */
|
|
33
|
+
docs_id: string;
|
|
34
|
+
/** Content for each found path */
|
|
35
|
+
contents: Record<string, FileContent>;
|
|
36
|
+
/** Paths that don't exist in cache */
|
|
37
|
+
not_found: string[];
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Gets the content of specific files from cached documentation.
|
|
41
|
+
*/
|
|
42
|
+
export declare function getDocsContent(input: GetDocsContentInput): Promise<GetDocsContentOutput>;
|
|
43
|
+
//# sourceMappingURL=get-content.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"get-content.d.ts","sourceRoot":"","sources":["../../src/tools/get-content.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,2CAA2C;IAC3C,OAAO,EAAE,MAAM,CAAC;IAChB,mCAAmC;IACnC,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,wCAAwC;IACxC,MAAM,CAAC,EAAE,UAAU,GAAG,KAAK,CAAC;CAC7B;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,yBAAyB;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,yCAAyC;IACzC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,4CAA4C;IAC5C,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,oBAAoB;IACpB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,kBAAkB;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,kCAAkC;IAClC,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;IACtC,sCAAsC;IACtC,SAAS,EAAE,MAAM,EAAE,CAAC;CACrB;AA0CD;;GAEG;AACH,wBAAsB,cAAc,CAClC,KAAK,EAAE,mBAAmB,GACzB,OAAO,CAAC,oBAAoB,CAAC,CAqD/B"}
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* get_docs_content tool - Retrieves actual content of specific doc files from cache.
|
|
3
|
+
*/
|
|
4
|
+
import { cacheManager } from "../services/cache-manager.js";
|
|
5
|
+
import { CacheNotFoundError, ValidationError } from "../types/errors.js";
|
|
6
|
+
/**
|
|
7
|
+
* Extracts headings from markdown content.
|
|
8
|
+
* Returns an array of heading text (without the # prefix).
|
|
9
|
+
*/
|
|
10
|
+
function extractHeadings(content) {
|
|
11
|
+
const headings = [];
|
|
12
|
+
const lines = content.split("\n");
|
|
13
|
+
for (const line of lines) {
|
|
14
|
+
// Match markdown headings (# to ######)
|
|
15
|
+
const match = line.match(/^(#{1,6})\s+(.+)$/);
|
|
16
|
+
if (match) {
|
|
17
|
+
const level = match[1].length;
|
|
18
|
+
const text = match[2].trim();
|
|
19
|
+
// Include heading level for context
|
|
20
|
+
headings.push(`${"#".repeat(level)} ${text}`);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
return headings;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Extracts the title from markdown content.
|
|
27
|
+
* Returns the first H1 heading, or undefined if none found.
|
|
28
|
+
*/
|
|
29
|
+
function extractTitle(content) {
|
|
30
|
+
const lines = content.split("\n");
|
|
31
|
+
for (const line of lines) {
|
|
32
|
+
// Match H1 heading
|
|
33
|
+
const match = line.match(/^#\s+(.+)$/);
|
|
34
|
+
if (match) {
|
|
35
|
+
return match[1].trim();
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
return undefined;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Gets the content of specific files from cached documentation.
|
|
42
|
+
*/
|
|
43
|
+
export async function getDocsContent(input) {
|
|
44
|
+
const { docs_id, paths, format = "markdown" } = input;
|
|
45
|
+
// Validate required parameters
|
|
46
|
+
if (!docs_id) {
|
|
47
|
+
throw new ValidationError("Missing required parameter: docs_id", "docs_id");
|
|
48
|
+
}
|
|
49
|
+
if (!paths || !Array.isArray(paths) || paths.length === 0) {
|
|
50
|
+
throw new ValidationError("Missing required parameter: paths (must be a non-empty array of file paths)", "paths");
|
|
51
|
+
}
|
|
52
|
+
// Find the cached docs entry to determine source type
|
|
53
|
+
const meta = await cacheManager.findById(docs_id);
|
|
54
|
+
if (!meta) {
|
|
55
|
+
throw new CacheNotFoundError(docs_id);
|
|
56
|
+
}
|
|
57
|
+
const contents = {};
|
|
58
|
+
const not_found = [];
|
|
59
|
+
// Fetch each requested path
|
|
60
|
+
for (const path of paths) {
|
|
61
|
+
// Normalize path (remove leading slash if present)
|
|
62
|
+
const normalizedPath = path.replace(/^\/+/, "");
|
|
63
|
+
const content = await cacheManager.getContent(meta.source, docs_id, normalizedPath);
|
|
64
|
+
if (content === null) {
|
|
65
|
+
not_found.push(path);
|
|
66
|
+
}
|
|
67
|
+
else {
|
|
68
|
+
const headings = extractHeadings(content);
|
|
69
|
+
const title = extractTitle(content);
|
|
70
|
+
contents[path] = {
|
|
71
|
+
content: format === "raw" ? content : content,
|
|
72
|
+
title,
|
|
73
|
+
headings,
|
|
74
|
+
size_bytes: new TextEncoder().encode(content).length,
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
return {
|
|
79
|
+
docs_id,
|
|
80
|
+
contents,
|
|
81
|
+
not_found,
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
//# sourceMappingURL=get-content.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"get-content.js","sourceRoot":"","sources":["../../src/tools/get-content.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,8BAA8B,CAAC;AAC5D,OAAO,EAAE,kBAAkB,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAwCzE;;;GAGG;AACH,SAAS,eAAe,CAAC,OAAe;IACtC,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,wCAAwC;QACxC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC;QAC9C,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;YAC9B,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAC7B,oCAAoC;YACpC,QAAQ,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;QAChD,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;GAGG;AACH,SAAS,YAAY,CAAC,OAAe;IACnC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,mBAAmB;QACnB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;QACvC,IAAI,KAAK,EAAE,CAAC;YACV,OAAO,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACzB,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,KAA0B;IAE1B,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,GAAG,UAAU,EAAE,GAAG,KAAK,CAAC;IAEtD,+BAA+B;IAC/B,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,MAAM,IAAI,eAAe,CAAC,qCAAqC,EAAE,SAAS,CAAC,CAAC;IAC9E,CAAC;IAED,IAAI,CAAC,KAAK,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1D,MAAM,IAAI,eAAe,CAAC,6EAA6E,EAAE,OAAO,CAAC,CAAC;IACpH,CAAC;IAED,sDAAsD;IACtD,MAAM,IAAI,GAAG,MAAM,YAAY,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;IAElD,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,MAAM,IAAI,kBAAkB,CAAC,OAAO,CAAC,CAAC;IACxC,CAAC;IAED,MAAM,QAAQ,GAAgC,EAAE,CAAC;IACjD,MAAM,SAAS,GAAa,EAAE,CAAC;IAE/B,4BAA4B;IAC5B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,mDAAmD;QACnD,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QAEhD,MAAM,OAAO,GAAG,MAAM,YAAY,CAAC,UAAU,CAC3C,IAAI,CAAC,MAAM,EACX,OAAO,EACP,cAAc,CACf,CAAC;QAEF,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;YACrB,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvB,CAAC;aAAM,CAAC;YACN,MAAM,QAAQ,GAAG,eAAe,CAAC,OAAO,CAAC,CAAC;YAC1C,MAAM,KAAK,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;YAEpC,QAAQ,CAAC,IAAI,CAAC,GAAG;gBACf,OAAO,EAAE,MAAM,KAAK,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO;gBAC7C,KAAK;gBACL,QAAQ;gBACR,UAAU,EAAE,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM;aACrD,CAAC;QACJ,CAAC;IACH,CAAC;IAED,OAAO;QACL,OAAO;QACP,QAAQ;QACR,SAAS;KACV,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* get_docs_tree tool - Retrieves the file tree for cached documentation.
|
|
3
|
+
*/
|
|
4
|
+
import type { DocsTreeNode } from "../types/cache.js";
|
|
5
|
+
/**
|
|
6
|
+
* Input parameters for get_docs_tree tool.
|
|
7
|
+
*/
|
|
8
|
+
export interface GetDocsTreeInput {
|
|
9
|
+
/** The docs ID from index_docs response */
|
|
10
|
+
docs_id: string;
|
|
11
|
+
/** Subtree path to filter (optional, default: root) */
|
|
12
|
+
path?: string;
|
|
13
|
+
/** Maximum depth to return (optional, default: unlimited) */
|
|
14
|
+
max_depth?: number;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Output from get_docs_tree tool.
|
|
18
|
+
*/
|
|
19
|
+
export interface GetDocsTreeOutput {
|
|
20
|
+
/** The docs ID */
|
|
21
|
+
docs_id: string;
|
|
22
|
+
/** The path being returned */
|
|
23
|
+
path: string;
|
|
24
|
+
/** The tree structure */
|
|
25
|
+
tree: DocsTreeNode[];
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Gets the documentation tree for a cached docs entry.
|
|
29
|
+
*/
|
|
30
|
+
export declare function getDocsTree(input: GetDocsTreeInput): Promise<GetDocsTreeOutput>;
|
|
31
|
+
//# sourceMappingURL=get-tree.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"get-tree.d.ts","sourceRoot":"","sources":["../../src/tools/get-tree.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAItD;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,2CAA2C;IAC3C,OAAO,EAAE,MAAM,CAAC;IAChB,uDAAuD;IACvD,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,6DAA6D;IAC7D,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,kBAAkB;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,8BAA8B;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,yBAAyB;IACzB,IAAI,EAAE,YAAY,EAAE,CAAC;CACtB;AAoED;;GAEG;AACH,wBAAsB,WAAW,CAAC,KAAK,EAAE,gBAAgB,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAyCrF"}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* get_docs_tree tool - Retrieves the file tree for cached documentation.
|
|
3
|
+
*/
|
|
4
|
+
import { cacheManager } from "../services/cache-manager.js";
|
|
5
|
+
import { CacheNotFoundError, ValidationError } from "../types/errors.js";
|
|
6
|
+
/**
|
|
7
|
+
* Finds a subtree at the given path within a tree structure.
|
|
8
|
+
* Returns the children of the node at the given path, or null if not found.
|
|
9
|
+
*/
|
|
10
|
+
function findSubtree(tree, targetPath) {
|
|
11
|
+
// Normalize path (remove leading/trailing slashes)
|
|
12
|
+
const normalizedPath = targetPath.replace(/^\/+|\/+$/g, "");
|
|
13
|
+
if (!normalizedPath) {
|
|
14
|
+
return tree; // Empty path returns root
|
|
15
|
+
}
|
|
16
|
+
// Search for the node at the target path
|
|
17
|
+
function search(nodes) {
|
|
18
|
+
for (const node of nodes) {
|
|
19
|
+
// Normalize node path for comparison
|
|
20
|
+
const nodePath = node.path.replace(/^\/+|\/+$/g, "");
|
|
21
|
+
if (nodePath === normalizedPath) {
|
|
22
|
+
// Found the target node
|
|
23
|
+
if (node.type === "folder" && node.children) {
|
|
24
|
+
return node.children;
|
|
25
|
+
}
|
|
26
|
+
// If it's a file, return it as a single-element array
|
|
27
|
+
return [node];
|
|
28
|
+
}
|
|
29
|
+
// Check if the target path is under this node
|
|
30
|
+
if (normalizedPath.startsWith(nodePath + "/") && node.children) {
|
|
31
|
+
const result = search(node.children);
|
|
32
|
+
if (result)
|
|
33
|
+
return result;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
return null;
|
|
37
|
+
}
|
|
38
|
+
return search(tree);
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Limits the depth of a tree structure.
|
|
42
|
+
* depth=1 means only immediate children, depth=2 includes grandchildren, etc.
|
|
43
|
+
*/
|
|
44
|
+
function limitDepth(tree, maxDepth, currentDepth = 1) {
|
|
45
|
+
if (maxDepth <= 0) {
|
|
46
|
+
return [];
|
|
47
|
+
}
|
|
48
|
+
return tree.map((node) => {
|
|
49
|
+
if (node.type === "folder" && node.children) {
|
|
50
|
+
if (currentDepth >= maxDepth) {
|
|
51
|
+
// Don't include children beyond max depth
|
|
52
|
+
return {
|
|
53
|
+
...node,
|
|
54
|
+
children: undefined,
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
return {
|
|
58
|
+
...node,
|
|
59
|
+
children: limitDepth(node.children, maxDepth, currentDepth + 1),
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
return node;
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Gets the documentation tree for a cached docs entry.
|
|
67
|
+
*/
|
|
68
|
+
export async function getDocsTree(input) {
|
|
69
|
+
const { docs_id, path = "", max_depth } = input;
|
|
70
|
+
// Validate required parameters
|
|
71
|
+
if (!docs_id) {
|
|
72
|
+
throw new ValidationError("Missing required parameter: docs_id", "docs_id");
|
|
73
|
+
}
|
|
74
|
+
// Find the cached docs entry
|
|
75
|
+
const meta = await cacheManager.findById(docs_id);
|
|
76
|
+
if (!meta) {
|
|
77
|
+
throw new CacheNotFoundError(docs_id);
|
|
78
|
+
}
|
|
79
|
+
// Get the tree (or subtree if path specified)
|
|
80
|
+
let tree;
|
|
81
|
+
if (path) {
|
|
82
|
+
const subtree = findSubtree(meta.tree, path);
|
|
83
|
+
if (!subtree) {
|
|
84
|
+
throw new Error(`Path not found in documentation tree: "${path}". ` +
|
|
85
|
+
`Use get_docs_tree without a path to see the full tree.`);
|
|
86
|
+
}
|
|
87
|
+
tree = subtree;
|
|
88
|
+
}
|
|
89
|
+
else {
|
|
90
|
+
tree = meta.tree;
|
|
91
|
+
}
|
|
92
|
+
// Apply max_depth if specified
|
|
93
|
+
if (max_depth !== undefined && max_depth > 0) {
|
|
94
|
+
tree = limitDepth(tree, max_depth);
|
|
95
|
+
}
|
|
96
|
+
return {
|
|
97
|
+
docs_id,
|
|
98
|
+
path: path || "/",
|
|
99
|
+
tree,
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
//# sourceMappingURL=get-tree.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"get-tree.js","sourceRoot":"","sources":["../../src/tools/get-tree.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EAAE,YAAY,EAAE,MAAM,8BAA8B,CAAC;AAC5D,OAAO,EAAE,kBAAkB,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AA0BzE;;;GAGG;AACH,SAAS,WAAW,CAAC,IAAoB,EAAE,UAAkB;IAC3D,mDAAmD;IACnD,MAAM,cAAc,GAAG,UAAU,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;IAE5D,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,OAAO,IAAI,CAAC,CAAC,0BAA0B;IACzC,CAAC;IAED,yCAAyC;IACzC,SAAS,MAAM,CAAC,KAAqB;QACnC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,qCAAqC;YACrC,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;YAErD,IAAI,QAAQ,KAAK,cAAc,EAAE,CAAC;gBAChC,wBAAwB;gBACxB,IAAI,IAAI,CAAC,IAAI,KAAK,QAAQ,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;oBAC5C,OAAO,IAAI,CAAC,QAAQ,CAAC;gBACvB,CAAC;gBACD,sDAAsD;gBACtD,OAAO,CAAC,IAAI,CAAC,CAAC;YAChB,CAAC;YAED,8CAA8C;YAC9C,IAAI,cAAc,CAAC,UAAU,CAAC,QAAQ,GAAG,GAAG,CAAC,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;gBAC/D,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBACrC,IAAI,MAAM;oBAAE,OAAO,MAAM,CAAC;YAC5B,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC;AACtB,CAAC;AAED;;;GAGG;AACH,SAAS,UAAU,CAAC,IAAoB,EAAE,QAAgB,EAAE,YAAY,GAAG,CAAC;IAC1E,IAAI,QAAQ,IAAI,CAAC,EAAE,CAAC;QAClB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACvB,IAAI,IAAI,CAAC,IAAI,KAAK,QAAQ,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAC5C,IAAI,YAAY,IAAI,QAAQ,EAAE,CAAC;gBAC7B,0CAA0C;gBAC1C,OAAO;oBACL,GAAG,IAAI;oBACP,QAAQ,EAAE,SAAS;iBACpB,CAAC;YACJ,CAAC;YACD,OAAO;gBACL,GAAG,IAAI;gBACP,QAAQ,EAAE,UAAU,CAAC,IAAI,CAAC,QAAQ,EAAE,QAAQ,EAAE,YAAY,GAAG,CAAC,CAAC;aAChE,CAAC;QACJ,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,KAAuB;IACvD,MAAM,EAAE,OAAO,EAAE,IAAI,GAAG,EAAE,EAAE,SAAS,EAAE,GAAG,KAAK,CAAC;IAEhD,+BAA+B;IAC/B,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,MAAM,IAAI,eAAe,CAAC,qCAAqC,EAAE,SAAS,CAAC,CAAC;IAC9E,CAAC;IAED,6BAA6B;IAC7B,MAAM,IAAI,GAAG,MAAM,YAAY,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;IAElD,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,MAAM,IAAI,kBAAkB,CAAC,OAAO,CAAC,CAAC;IACxC,CAAC;IAED,8CAA8C;IAC9C,IAAI,IAAoB,CAAC;IAEzB,IAAI,IAAI,EAAE,CAAC;QACT,MAAM,OAAO,GAAG,WAAW,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;QAC7C,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CACb,0CAA0C,IAAI,KAAK;gBACjD,wDAAwD,CAC3D,CAAC;QACJ,CAAC;QACD,IAAI,GAAG,OAAO,CAAC;IACjB,CAAC;SAAM,CAAC;QACN,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC;IACnB,CAAC;IAED,+BAA+B;IAC/B,IAAI,SAAS,KAAK,SAAS,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;QAC7C,IAAI,GAAG,UAAU,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;IACrC,CAAC;IAED,OAAO;QACL,OAAO;QACP,IAAI,EAAE,IAAI,IAAI,GAAG;QACjB,IAAI;KACL,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* index_docs tool - Fetches and caches documentation from GitHub repositories or websites.
|
|
3
|
+
*/
|
|
4
|
+
import type { DocsTreeNode } from "../types/cache.js";
|
|
5
|
+
/**
|
|
6
|
+
* Input parameters for index_docs tool.
|
|
7
|
+
*/
|
|
8
|
+
export interface IndexDocsInput {
|
|
9
|
+
/** GitHub repo URL or docs website */
|
|
10
|
+
url: string;
|
|
11
|
+
/** Detection method (default: "github" for now) */
|
|
12
|
+
type?: "github" | "scrape" | "auto";
|
|
13
|
+
/** Crawl depth for scraping (not used for GitHub) */
|
|
14
|
+
depth?: number;
|
|
15
|
+
/** URL patterns to include */
|
|
16
|
+
include_patterns?: string[];
|
|
17
|
+
/** URL patterns to exclude */
|
|
18
|
+
exclude_patterns?: string[];
|
|
19
|
+
/** Ignore cache, re-fetch */
|
|
20
|
+
force_refresh?: boolean;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Output from index_docs tool.
|
|
24
|
+
*/
|
|
25
|
+
export interface IndexDocsOutput {
|
|
26
|
+
/** Unique cache ID for this docs set */
|
|
27
|
+
id: string;
|
|
28
|
+
/** Source type */
|
|
29
|
+
source: "github" | "scraped";
|
|
30
|
+
/** Repository in "owner/repo" format (if GitHub) */
|
|
31
|
+
repo?: string;
|
|
32
|
+
/** Base URL (if scraped) */
|
|
33
|
+
base_url?: string;
|
|
34
|
+
/** Top-level tree structure */
|
|
35
|
+
tree: DocsTreeNode[];
|
|
36
|
+
/** Indexing statistics */
|
|
37
|
+
stats: {
|
|
38
|
+
pages: number;
|
|
39
|
+
total_size_bytes: number;
|
|
40
|
+
indexed_at: string;
|
|
41
|
+
};
|
|
42
|
+
/** How the source was detected (for auto mode) */
|
|
43
|
+
detection_method?: string;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Parses a GitHub URL to extract owner and repo.
|
|
47
|
+
* Supports formats:
|
|
48
|
+
* - https://github.com/owner/repo
|
|
49
|
+
* - https://github.com/owner/repo/tree/branch/path
|
|
50
|
+
* - github.com/owner/repo
|
|
51
|
+
*/
|
|
52
|
+
export declare function parseGitHubUrl(url: string): {
|
|
53
|
+
owner: string;
|
|
54
|
+
repo: string;
|
|
55
|
+
branch?: string;
|
|
56
|
+
path?: string;
|
|
57
|
+
} | null;
|
|
58
|
+
/**
|
|
59
|
+
* Main index_docs implementation.
|
|
60
|
+
* Supports GitHub URLs and website scraping.
|
|
61
|
+
*/
|
|
62
|
+
export declare function indexDocs(input: IndexDocsInput): Promise<IndexDocsOutput>;
|
|
63
|
+
//# sourceMappingURL=index-docs.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index-docs.d.ts","sourceRoot":"","sources":["../../src/tools/index-docs.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAuBtD;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,sCAAsC;IACtC,GAAG,EAAE,MAAM,CAAC;IACZ,mDAAmD;IACnD,IAAI,CAAC,EAAE,QAAQ,GAAG,QAAQ,GAAG,MAAM,CAAC;IACpC,qDAAqD;IACrD,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,8BAA8B;IAC9B,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC5B,8BAA8B;IAC9B,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC5B,6BAA6B;IAC7B,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,wCAAwC;IACxC,EAAE,EAAE,MAAM,CAAC;IACX,kBAAkB;IAClB,MAAM,EAAE,QAAQ,GAAG,SAAS,CAAC;IAC7B,oDAAoD;IACpD,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,4BAA4B;IAC5B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,+BAA+B;IAC/B,IAAI,EAAE,YAAY,EAAE,CAAC;IACrB,0BAA0B;IAC1B,KAAK,EAAE;QACL,KAAK,EAAE,MAAM,CAAC;QACd,gBAAgB,EAAE,MAAM,CAAC;QACzB,UAAU,EAAE,MAAM,CAAC;KACpB,CAAC;IACF,kDAAkD;IAClD,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED;;;;;;GAMG;AACH,wBAAgB,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG;IAC3C,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf,GAAG,IAAI,CA2CP;AA6TD;;;GAGG;AACH,wBAAsB,SAAS,CAC7B,KAAK,EAAE,cAAc,GACpB,OAAO,CAAC,eAAe,CAAC,CAyG1B"}
|