mcp-docs-scraper 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +357 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +20 -0
- package/dist/index.js.map +1 -0
- package/dist/server.d.ts +6 -0
- package/dist/server.d.ts.map +1 -0
- package/dist/server.js +231 -0
- package/dist/server.js.map +1 -0
- package/dist/services/cache-manager.d.ts +100 -0
- package/dist/services/cache-manager.d.ts.map +1 -0
- package/dist/services/cache-manager.js +212 -0
- package/dist/services/cache-manager.js.map +1 -0
- package/dist/services/content-cleaner.d.ts +48 -0
- package/dist/services/content-cleaner.d.ts.map +1 -0
- package/dist/services/content-cleaner.js +295 -0
- package/dist/services/content-cleaner.js.map +1 -0
- package/dist/services/github-detector.d.ts +49 -0
- package/dist/services/github-detector.d.ts.map +1 -0
- package/dist/services/github-detector.js +276 -0
- package/dist/services/github-detector.js.map +1 -0
- package/dist/services/github-fetcher.d.ts +94 -0
- package/dist/services/github-fetcher.d.ts.map +1 -0
- package/dist/services/github-fetcher.js +393 -0
- package/dist/services/github-fetcher.js.map +1 -0
- package/dist/services/search-index.d.ts +106 -0
- package/dist/services/search-index.d.ts.map +1 -0
- package/dist/services/search-index.js +210 -0
- package/dist/services/search-index.js.map +1 -0
- package/dist/services/web-scraper.d.ts +88 -0
- package/dist/services/web-scraper.d.ts.map +1 -0
- package/dist/services/web-scraper.js +244 -0
- package/dist/services/web-scraper.js.map +1 -0
- package/dist/tools/clear-cache.d.ts +24 -0
- package/dist/tools/clear-cache.d.ts.map +1 -0
- package/dist/tools/clear-cache.js +29 -0
- package/dist/tools/clear-cache.js.map +1 -0
- package/dist/tools/detect-github.d.ts +21 -0
- package/dist/tools/detect-github.d.ts.map +1 -0
- package/dist/tools/detect-github.js +18 -0
- package/dist/tools/detect-github.js.map +1 -0
- package/dist/tools/get-content.d.ts +43 -0
- package/dist/tools/get-content.d.ts.map +1 -0
- package/dist/tools/get-content.js +84 -0
- package/dist/tools/get-content.js.map +1 -0
- package/dist/tools/get-tree.d.ts +31 -0
- package/dist/tools/get-tree.d.ts.map +1 -0
- package/dist/tools/get-tree.js +102 -0
- package/dist/tools/get-tree.js.map +1 -0
- package/dist/tools/index-docs.d.ts +63 -0
- package/dist/tools/index-docs.d.ts.map +1 -0
- package/dist/tools/index-docs.js +371 -0
- package/dist/tools/index-docs.js.map +1 -0
- package/dist/tools/index.d.ts +11 -0
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/index.js +11 -0
- package/dist/tools/index.js.map +1 -0
- package/dist/tools/list-cached.d.ts +19 -0
- package/dist/tools/list-cached.d.ts.map +1 -0
- package/dist/tools/list-cached.js +20 -0
- package/dist/tools/list-cached.js.map +1 -0
- package/dist/tools/search-docs.d.ts +31 -0
- package/dist/tools/search-docs.d.ts.map +1 -0
- package/dist/tools/search-docs.js +64 -0
- package/dist/tools/search-docs.js.map +1 -0
- package/dist/types/cache.d.ts +53 -0
- package/dist/types/cache.d.ts.map +1 -0
- package/dist/types/cache.js +2 -0
- package/dist/types/cache.js.map +1 -0
- package/dist/types/errors.d.ts +102 -0
- package/dist/types/errors.d.ts.map +1 -0
- package/dist/types/errors.js +216 -0
- package/dist/types/errors.js.map +1 -0
- package/dist/types/index.d.ts +6 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +5 -0
- package/dist/types/index.js.map +1 -0
- package/dist/utils/fs.d.ts +45 -0
- package/dist/utils/fs.d.ts.map +1 -0
- package/dist/utils/fs.js +113 -0
- package/dist/utils/fs.js.map +1 -0
- package/dist/utils/rate-limit.d.ts +55 -0
- package/dist/utils/rate-limit.d.ts.map +1 -0
- package/dist/utils/rate-limit.js +89 -0
- package/dist/utils/rate-limit.js.map +1 -0
- package/dist/utils/url.d.ts +69 -0
- package/dist/utils/url.d.ts.map +1 -0
- package/dist/utils/url.js +251 -0
- package/dist/utils/url.js.map +1 -0
- package/package.json +58 -0
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GitHub Detector Service - Detects GitHub repository from documentation websites.
|
|
3
|
+
*
|
|
4
|
+
* Detection strategies (in order of confidence):
|
|
5
|
+
* 1. Direct GitHub URL → high confidence
|
|
6
|
+
* 2. github.io pattern → high confidence
|
|
7
|
+
* 3. "Edit on GitHub" links → high confidence
|
|
8
|
+
* 4. GitHub links in page content → medium confidence
|
|
9
|
+
* 5. Meta tags with repo info → medium confidence
|
|
10
|
+
*/
|
|
11
|
+
/**
|
|
12
|
+
* Result of GitHub repository detection.
|
|
13
|
+
*/
|
|
14
|
+
export interface GitHubDetectionResult {
|
|
15
|
+
/** Whether a GitHub repository was found */
|
|
16
|
+
found: boolean;
|
|
17
|
+
/** Repository in "owner/repo" format */
|
|
18
|
+
repo?: string;
|
|
19
|
+
/** Path within the repo where docs are located */
|
|
20
|
+
docs_path?: string;
|
|
21
|
+
/** Confidence level of the detection */
|
|
22
|
+
confidence: "high" | "medium" | "low";
|
|
23
|
+
/** How the repo was detected (for debugging) */
|
|
24
|
+
detection_method?: string;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Options for GitHub detection.
|
|
28
|
+
*/
|
|
29
|
+
export interface DetectionOptions {
|
|
30
|
+
/** Timeout for fetching the page (ms) */
|
|
31
|
+
timeout?: number;
|
|
32
|
+
/** User agent for requests */
|
|
33
|
+
userAgent?: string;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Detects GitHub repository from a documentation website URL.
|
|
37
|
+
*
|
|
38
|
+
* @param url The URL to analyze
|
|
39
|
+
* @param options Detection options
|
|
40
|
+
* @returns Detection result with repo info and confidence
|
|
41
|
+
*/
|
|
42
|
+
export declare function detectGitHubRepo(url: string, options?: DetectionOptions): Promise<GitHubDetectionResult>;
|
|
43
|
+
/**
|
|
44
|
+
* GitHub detector singleton for convenience.
|
|
45
|
+
*/
|
|
46
|
+
export declare const githubDetector: {
|
|
47
|
+
detect: typeof detectGitHubRepo;
|
|
48
|
+
};
|
|
49
|
+
//# sourceMappingURL=github-detector.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"github-detector.d.ts","sourceRoot":"","sources":["../../src/services/github-detector.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAIH;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,4CAA4C;IAC5C,KAAK,EAAE,OAAO,CAAC;IACf,wCAAwC;IACxC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,kDAAkD;IAClD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,wCAAwC;IACxC,UAAU,EAAE,MAAM,GAAG,QAAQ,GAAG,KAAK,CAAC;IACtC,gDAAgD;IAChD,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,yCAAyC;IACzC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,8BAA8B;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AA+OD;;;;;;GAMG;AACH,wBAAsB,gBAAgB,CACpC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,gBAAqB,GAC7B,OAAO,CAAC,qBAAqB,CAAC,CA2ChC;AAED;;GAEG;AACH,eAAO,MAAM,cAAc;;CAE1B,CAAC"}
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GitHub Detector Service - Detects GitHub repository from documentation websites.
|
|
3
|
+
*
|
|
4
|
+
* Detection strategies (in order of confidence):
|
|
5
|
+
* 1. Direct GitHub URL → high confidence
|
|
6
|
+
* 2. github.io pattern → high confidence
|
|
7
|
+
* 3. "Edit on GitHub" links → high confidence
|
|
8
|
+
* 4. GitHub links in page content → medium confidence
|
|
9
|
+
* 5. Meta tags with repo info → medium confidence
|
|
10
|
+
*/
|
|
11
|
+
import { parseGitHubUrl } from "../tools/index-docs.js";
|
|
12
|
+
const DEFAULT_OPTIONS = {
|
|
13
|
+
timeout: 10000,
|
|
14
|
+
userAgent: "mcp-docs-scraper/1.0 (github-detector)",
|
|
15
|
+
};
|
|
16
|
+
/**
|
|
17
|
+
* Patterns for detecting GitHub links in HTML.
|
|
18
|
+
*/
|
|
19
|
+
const GITHUB_LINK_PATTERNS = [
|
|
20
|
+
// Edit on GitHub links (highest priority)
|
|
21
|
+
/href=["']([^"']*github\.com\/[^"'\/]+\/[^"'\/]+(?:\/[^"']*)?)["'][^>]*>(?:[^<]*(?:edit|view|source)[^<]*github|github[^<]*(?:edit|view|source))/gi,
|
|
22
|
+
// General GitHub repo links
|
|
23
|
+
/href=["'](https?:\/\/github\.com\/([a-zA-Z0-9_-]+)\/([a-zA-Z0-9_.-]+))(?:\/[^"']*)?["']/gi,
|
|
24
|
+
];
|
|
25
|
+
/**
|
|
26
|
+
* Extracts owner/repo from a GitHub URL.
|
|
27
|
+
*/
|
|
28
|
+
function extractRepoFromUrl(url) {
|
|
29
|
+
try {
|
|
30
|
+
const parsed = new URL(url);
|
|
31
|
+
if (!parsed.hostname.includes("github.com") && !parsed.hostname.includes("github.io")) {
|
|
32
|
+
return null;
|
|
33
|
+
}
|
|
34
|
+
// Handle github.io URLs
|
|
35
|
+
if (parsed.hostname.endsWith(".github.io")) {
|
|
36
|
+
const owner = parsed.hostname.replace(".github.io", "");
|
|
37
|
+
// For github.io, the first path segment is usually the repo
|
|
38
|
+
const pathParts = parsed.pathname.split("/").filter(Boolean);
|
|
39
|
+
const repo = pathParts[0] || `${owner}.github.io`;
|
|
40
|
+
return { owner, repo };
|
|
41
|
+
}
|
|
42
|
+
// Handle github.com URLs
|
|
43
|
+
const pathParts = parsed.pathname.split("/").filter(Boolean);
|
|
44
|
+
if (pathParts.length >= 2) {
|
|
45
|
+
const owner = pathParts[0];
|
|
46
|
+
const repo = pathParts[1].replace(/\.git$/, "");
|
|
47
|
+
const path = pathParts.length > 2 ? pathParts.slice(2).join("/") : undefined;
|
|
48
|
+
return { owner, repo, path };
|
|
49
|
+
}
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
catch {
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Detects if the URL itself is a GitHub URL or github.io URL.
|
|
58
|
+
*/
|
|
59
|
+
function detectFromUrl(url) {
|
|
60
|
+
// Check if it's already a GitHub URL
|
|
61
|
+
const githubInfo = parseGitHubUrl(url);
|
|
62
|
+
if (githubInfo) {
|
|
63
|
+
return {
|
|
64
|
+
found: true,
|
|
65
|
+
repo: `${githubInfo.owner}/${githubInfo.repo}`,
|
|
66
|
+
docs_path: githubInfo.path,
|
|
67
|
+
confidence: "high",
|
|
68
|
+
detection_method: "direct_github_url",
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
// Check for github.io pattern
|
|
72
|
+
try {
|
|
73
|
+
const parsed = new URL(url);
|
|
74
|
+
if (parsed.hostname.endsWith(".github.io")) {
|
|
75
|
+
const owner = parsed.hostname.replace(".github.io", "");
|
|
76
|
+
const pathParts = parsed.pathname.split("/").filter(Boolean);
|
|
77
|
+
// For github.io sites, the repo is often the first path segment
|
|
78
|
+
// or it could be a user/org pages site (owner.github.io)
|
|
79
|
+
if (pathParts.length > 0) {
|
|
80
|
+
// Could be a project page: owner.github.io/repo-name
|
|
81
|
+
return {
|
|
82
|
+
found: true,
|
|
83
|
+
repo: `${owner}/${pathParts[0]}`,
|
|
84
|
+
confidence: "high",
|
|
85
|
+
detection_method: "github_io_project",
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
else {
|
|
89
|
+
// User/org pages: owner.github.io → owner/owner.github.io
|
|
90
|
+
return {
|
|
91
|
+
found: true,
|
|
92
|
+
repo: `${owner}/${owner}.github.io`,
|
|
93
|
+
confidence: "high",
|
|
94
|
+
detection_method: "github_io_user",
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
catch {
|
|
100
|
+
// Invalid URL
|
|
101
|
+
}
|
|
102
|
+
return null;
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Extracts GitHub repo from "Edit on GitHub" or similar links in HTML.
|
|
106
|
+
*/
|
|
107
|
+
function detectFromEditLinks(html) {
|
|
108
|
+
// Look for "Edit on GitHub" style links
|
|
109
|
+
const editPatterns = [
|
|
110
|
+
/href=["']([^"']*github\.com\/[^"']+)["'][^>]*>[^<]*edit[^<]*on[^<]*github/gi,
|
|
111
|
+
/href=["']([^"']*github\.com\/[^"']+)["'][^>]*>[^<]*view[^<]*on[^<]*github/gi,
|
|
112
|
+
/href=["']([^"']*github\.com\/[^"']+)["'][^>]*>[^<]*source[^<]*on[^<]*github/gi,
|
|
113
|
+
/href=["']([^"']*github\.com\/[^"']+)["'][^>]*>[^<]*github[^<]*source/gi,
|
|
114
|
+
/"editUrl":\s*"([^"]*github\.com\/[^"]+)"/gi,
|
|
115
|
+
/"edit_uri":\s*"([^"]*github\.com\/[^"]+)"/gi,
|
|
116
|
+
];
|
|
117
|
+
for (const pattern of editPatterns) {
|
|
118
|
+
const match = pattern.exec(html);
|
|
119
|
+
if (match) {
|
|
120
|
+
const repoInfo = extractRepoFromUrl(match[1]);
|
|
121
|
+
if (repoInfo) {
|
|
122
|
+
return {
|
|
123
|
+
found: true,
|
|
124
|
+
repo: `${repoInfo.owner}/${repoInfo.repo}`,
|
|
125
|
+
docs_path: repoInfo.path,
|
|
126
|
+
confidence: "high",
|
|
127
|
+
detection_method: "edit_on_github_link",
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
// Reset regex lastIndex
|
|
132
|
+
pattern.lastIndex = 0;
|
|
133
|
+
}
|
|
134
|
+
return null;
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* Extracts GitHub repo from general GitHub links in HTML.
|
|
138
|
+
*/
|
|
139
|
+
function detectFromLinks(html) {
|
|
140
|
+
// Find all GitHub links
|
|
141
|
+
const linkPattern = /href=["'](https?:\/\/github\.com\/([a-zA-Z0-9_-]+)\/([a-zA-Z0-9_.-]+))(?:\/[^"']*)?["']/gi;
|
|
142
|
+
const repos = new Map();
|
|
143
|
+
let match;
|
|
144
|
+
while ((match = linkPattern.exec(html)) !== null) {
|
|
145
|
+
const owner = match[2];
|
|
146
|
+
const repo = match[3].replace(/\.git$/, "");
|
|
147
|
+
// Skip common non-repo patterns
|
|
148
|
+
if (["issues", "pulls", "discussions", "sponsors", "marketplace"].includes(repo)) {
|
|
149
|
+
continue;
|
|
150
|
+
}
|
|
151
|
+
const key = `${owner}/${repo}`;
|
|
152
|
+
repos.set(key, (repos.get(key) || 0) + 1);
|
|
153
|
+
}
|
|
154
|
+
if (repos.size === 0) {
|
|
155
|
+
return null;
|
|
156
|
+
}
|
|
157
|
+
// Find the most frequently linked repo
|
|
158
|
+
let bestRepo = "";
|
|
159
|
+
let maxCount = 0;
|
|
160
|
+
for (const [repo, count] of repos) {
|
|
161
|
+
if (count > maxCount) {
|
|
162
|
+
maxCount = count;
|
|
163
|
+
bestRepo = repo;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
if (bestRepo) {
|
|
167
|
+
return {
|
|
168
|
+
found: true,
|
|
169
|
+
repo: bestRepo,
|
|
170
|
+
confidence: maxCount >= 3 ? "medium" : "low",
|
|
171
|
+
detection_method: `github_links_found_${maxCount}`,
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
return null;
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* Extracts GitHub repo from meta tags.
|
|
178
|
+
*/
|
|
179
|
+
function detectFromMetaTags(html) {
|
|
180
|
+
// Look for og:url or other meta tags with GitHub info
|
|
181
|
+
const metaPatterns = [
|
|
182
|
+
/<meta[^>]+property=["']og:url["'][^>]+content=["']([^"']+github\.com[^"']+)["']/gi,
|
|
183
|
+
/<meta[^>]+name=["']github:repo["'][^>]+content=["']([^"']+)["']/gi,
|
|
184
|
+
/<meta[^>]+content=["']([^"']+github\.com[^"']+)["'][^>]+property=["']og:url["']/gi,
|
|
185
|
+
];
|
|
186
|
+
for (const pattern of metaPatterns) {
|
|
187
|
+
const match = pattern.exec(html);
|
|
188
|
+
if (match) {
|
|
189
|
+
const repoInfo = extractRepoFromUrl(match[1]);
|
|
190
|
+
if (repoInfo) {
|
|
191
|
+
return {
|
|
192
|
+
found: true,
|
|
193
|
+
repo: `${repoInfo.owner}/${repoInfo.repo}`,
|
|
194
|
+
confidence: "medium",
|
|
195
|
+
detection_method: "meta_tag",
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
pattern.lastIndex = 0;
|
|
200
|
+
}
|
|
201
|
+
return null;
|
|
202
|
+
}
|
|
203
|
+
/**
|
|
204
|
+
* Fetches a webpage and returns its HTML content.
|
|
205
|
+
*/
|
|
206
|
+
async function fetchPage(url, options) {
|
|
207
|
+
try {
|
|
208
|
+
const response = await fetch(url, {
|
|
209
|
+
headers: {
|
|
210
|
+
"User-Agent": options.userAgent,
|
|
211
|
+
Accept: "text/html,application/xhtml+xml",
|
|
212
|
+
},
|
|
213
|
+
redirect: "follow",
|
|
214
|
+
signal: AbortSignal.timeout(options.timeout),
|
|
215
|
+
});
|
|
216
|
+
if (!response.ok) {
|
|
217
|
+
return null;
|
|
218
|
+
}
|
|
219
|
+
return await response.text();
|
|
220
|
+
}
|
|
221
|
+
catch {
|
|
222
|
+
return null;
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Detects GitHub repository from a documentation website URL.
|
|
227
|
+
*
|
|
228
|
+
* @param url The URL to analyze
|
|
229
|
+
* @param options Detection options
|
|
230
|
+
* @returns Detection result with repo info and confidence
|
|
231
|
+
*/
|
|
232
|
+
export async function detectGitHubRepo(url, options = {}) {
|
|
233
|
+
const opts = { ...DEFAULT_OPTIONS, ...options };
|
|
234
|
+
// Strategy 1: Check if URL itself is GitHub or github.io
|
|
235
|
+
const urlResult = detectFromUrl(url);
|
|
236
|
+
if (urlResult) {
|
|
237
|
+
return urlResult;
|
|
238
|
+
}
|
|
239
|
+
// Strategy 2-5: Fetch the page and analyze content
|
|
240
|
+
const html = await fetchPage(url, opts);
|
|
241
|
+
if (!html) {
|
|
242
|
+
return {
|
|
243
|
+
found: false,
|
|
244
|
+
confidence: "low",
|
|
245
|
+
detection_method: "fetch_failed",
|
|
246
|
+
};
|
|
247
|
+
}
|
|
248
|
+
// Strategy 2: Look for "Edit on GitHub" links (highest confidence)
|
|
249
|
+
const editResult = detectFromEditLinks(html);
|
|
250
|
+
if (editResult) {
|
|
251
|
+
return editResult;
|
|
252
|
+
}
|
|
253
|
+
// Strategy 3: Look for meta tags
|
|
254
|
+
const metaResult = detectFromMetaTags(html);
|
|
255
|
+
if (metaResult) {
|
|
256
|
+
return metaResult;
|
|
257
|
+
}
|
|
258
|
+
// Strategy 4: Look for any GitHub links
|
|
259
|
+
const linkResult = detectFromLinks(html);
|
|
260
|
+
if (linkResult) {
|
|
261
|
+
return linkResult;
|
|
262
|
+
}
|
|
263
|
+
// No GitHub repo found
|
|
264
|
+
return {
|
|
265
|
+
found: false,
|
|
266
|
+
confidence: "low",
|
|
267
|
+
detection_method: "no_github_found",
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
/**
|
|
271
|
+
* GitHub detector singleton for convenience.
|
|
272
|
+
*/
|
|
273
|
+
export const githubDetector = {
|
|
274
|
+
detect: detectGitHubRepo,
|
|
275
|
+
};
|
|
276
|
+
//# sourceMappingURL=github-detector.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"github-detector.js","sourceRoot":"","sources":["../../src/services/github-detector.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AA4BxD,MAAM,eAAe,GAA+B;IAClD,OAAO,EAAE,KAAK;IACd,SAAS,EAAE,wCAAwC;CACpD,CAAC;AAEF;;GAEG;AACH,MAAM,oBAAoB,GAAG;IAC3B,0CAA0C;IAC1C,mJAAmJ;IACnJ,4BAA4B;IAC5B,2FAA2F;CAC5F,CAAC;AAEF;;GAEG;AACH,SAAS,kBAAkB,CAAC,GAAW;IACrC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAE5B,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;YACtF,OAAO,IAAI,CAAC;QACd,CAAC;QAED,wBAAwB;QACxB,IAAI,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;YAC3C,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;YACxD,4DAA4D;YAC5D,MAAM,SAAS,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YAC7D,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,GAAG,KAAK,YAAY,CAAC;YAClD,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;QACzB,CAAC;QAED,yBAAyB;QACzB,MAAM,SAAS,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC7D,IAAI,SAAS,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;YAC1B,MAAM,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;YAC3B,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;YAChD,MAAM,IAAI,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAC7E,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QAC/B,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,aAAa,CAAC,GAAW;IAChC,qCAAqC;IACrC,MAAM,UAAU,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC;IACvC,IAAI,UAAU,EAAE,CAAC;QACf,OAAO;YACL,KAAK,EAAE,IAAI;YACX,IAAI,EAAE,GAAG,UAAU,CAAC,KAAK,IAAI,UAAU,CAAC,IAAI,EAAE;YAC9C,SAAS,EAAE,UAAU,CAAC,IAAI;YAC1B,UAAU,EAAE,MAAM;YAClB,gBAAgB,EAAE,mBAAmB;SACtC,CAAC;IACJ,CAAC;IAED,8BAA8B;IAC9B,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAC5B,IAAI,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;YAC3C,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;YACxD,MAAM,SAAS,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YAE7D,gEAAgE;YAChE,yDAAyD;YACzD,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACzB,qDAAqD;gBACrD,OAAO;oBACL,KAAK,EAAE,IAAI;oBACX,IAAI,EAAE,GAAG,KAAK,IAAI,SAAS,CAAC,CAAC,CAAC,EAAE;oBAChC,UAAU,EAAE,MAAM;oBAClB,gBAAgB,EAAE,mBAAmB;iBACtC,CAAC;YACJ,CAAC;iBAAM,CAAC;gBACN,0DAA0D;gBAC1D,OAAO;oBACL,KAAK,EAAE,IAAI;oBACX,IAAI,EAAE,GAAG,KAAK,IAAI,KAAK,YAAY;oBACnC,UAAU,EAAE,MAAM;oBAClB,gBAAgB,EAAE,gBAAgB;iBACnC,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,cAAc;IAChB,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,SAAS,mBAAmB,CAAC,IAAY;IACvC,wCAAwC;IACxC,MAAM,YAAY,GAAG;QACnB,6EAA6E;QAC7E,6EAA6E;QAC7E,+EAA+E;QAC/E,wEAAwE;QACxE,4CAA4C;QAC5C,6CAA6C;KAC9C,CAAC;IAEF,KAAK,MAAM,OAAO,IAAI,YAAY,EAAE,CAAC;QACnC,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjC,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,QAAQ,GAAG,kBAAkB,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9C,IAAI,QAAQ,EAAE,CAAC;gBACb,OAAO;oBACL,KAAK,EAAE,IAAI;oBACX,IAAI,EAAE,GAAG,QAAQ,CAAC,KAAK,IAAI,QAAQ,CAAC,IAAI,EAAE;oBAC1C,SAAS,EAAE,QAAQ,CAAC,IAAI;oBACxB,UAAU,EAAE,MAAM;oBAClB,gBAAgB,EAAE,qBAAqB;iBACxC,CAAC;YACJ,CAAC;QACH,CAAC;QACD,wBAAwB;QACxB,OAAO,CAAC,SAAS,GAAG,CAAC,CAAC;IACxB,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,SAAS,eAAe,CAAC,IAAY;IACnC,wBAAwB;IACxB,MAAM,WAAW,GAAG,2FAA2F,CAAC;IAChH,MAAM,KAAK,GAAG,IAAI,GAAG,EAAkB,CAAC;IAExC,IAAI,KAAK,CAAC;IACV,OAAO,CAAC,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACjD,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACvB,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QAE5C,gCAAgC;QAChC,IAAI,CAAC,QAAQ,EAAE,OAAO,EAAE,aAAa,EAAE,UAAU,EAAE,aAAa,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YACjF,SAAS;QACX,CAAC;QAED,MAAM,GAAG,GAAG,GAAG,KAAK,IAAI,IAAI,EAAE,CAAC;QAC/B,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC5C,CAAC;IAED,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;QACrB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,uCAAuC;IACvC,IAAI,QAAQ,GAAG,EAAE,CAAC;IAClB,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,KAAK,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,KAAK,EAAE,CAAC;QAClC,IAAI,KAAK,GAAG,QAAQ,EAAE,CAAC;YACrB,QAAQ,GAAG,KAAK,CAAC;YACjB,QAAQ,GAAG,IAAI,CAAC;QAClB,CAAC;IACH,CAAC;IAED,IAAI,QAAQ,EAAE,CAAC;QACb,OAAO;YACL,KAAK,EAAE,IAAI;YACX,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE,QAAQ,IAAI,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK;YAC5C,gBAAgB,EAAE,sBAAsB,QAAQ,EAAE;SACnD,CAAC;IACJ,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,SAAS,kBAAkB,CAAC,IAAY;IACtC,sDAAsD;IACtD,MAAM,YAAY,GAAG;QACnB,mFAAmF;QACnF,mEAAmE;QACnE,mFAAmF;KACpF,CAAC;IAEF,KAAK,MAAM,OAAO,IAAI,YAAY,EAAE,CAAC;QACnC,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjC,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,QAAQ,GAAG,kBAAkB,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9C,IAAI,QAAQ,EAAE,CAAC;gBACb,OAAO;oBACL,KAAK,EAAE,IAAI;oBACX,IAAI,EAAE,GAAG,QAAQ,CAAC,KAAK,IAAI,QAAQ,CAAC,IAAI,EAAE;oBAC1C,UAAU,EAAE,QAAQ;oBACpB,gBAAgB,EAAE,UAAU;iBAC7B,CAAC;YACJ,CAAC;QACH,CAAC;QACD,OAAO,CAAC,SAAS,GAAG,CAAC,CAAC;IACxB,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,SAAS,CAAC,GAAW,EAAE,OAAmC;IACvE,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAChC,OAAO,EAAE;gBACP,YAAY,EAAE,OAAO,CAAC,SAAS;gBAC/B,MAAM,EAAE,iCAAiC;aAC1C;YACD,QAAQ,EAAE,QAAQ;YAClB,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC;SAC7C,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,OAAO,IAAI,CAAC;QACd,CAAC;QAED,OAAO,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;IAC/B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,GAAW,EACX,UAA4B,EAAE;IAE9B,MAAM,IAAI,GAA+B,EAAE,GAAG,eAAe,EAAE,GAAG,OAAO,EAAE,CAAC;IAE5E,yDAAyD;IACzD,MAAM,SAAS,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC;IACrC,IAAI,SAAS,EAAE,CAAC;QACd,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,mDAAmD;IACnD,MAAM,IAAI,GAAG,MAAM,SAAS,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;IACxC,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,OAAO;YACL,KAAK,EAAE,KAAK;YACZ,UAAU,EAAE,KAAK;YACjB,gBAAgB,EAAE,cAAc;SACjC,CAAC;IACJ,CAAC;IAED,mEAAmE;IACnE,MAAM,UAAU,GAAG,mBAAmB,CAAC,IAAI,CAAC,CAAC;IAC7C,IAAI,UAAU,EAAE,CAAC;QACf,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,iCAAiC;IACjC,MAAM,UAAU,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC;IAC5C,IAAI,UAAU,EAAE,CAAC;QACf,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,wCAAwC;IACxC,MAAM,UAAU,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACzC,IAAI,UAAU,EAAE,CAAC;QACf,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,uBAAuB;IACvB,OAAO;QACL,KAAK,EAAE,KAAK;QACZ,UAAU,EAAE,KAAK;QACjB,gBAAgB,EAAE,iBAAiB;KACpC,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,MAAM,cAAc,GAAG;IAC5B,MAAM,EAAE,gBAAgB;CACzB,CAAC"}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import type { DocsTreeNode } from "../types/index.js";
|
|
2
|
+
/**
|
|
3
|
+
* Result of fetching a repository tree.
|
|
4
|
+
*/
|
|
5
|
+
export interface FetchTreeResult {
|
|
6
|
+
/** The repository in "owner/repo" format */
|
|
7
|
+
repo: string;
|
|
8
|
+
/** The branch that was used */
|
|
9
|
+
branch: string;
|
|
10
|
+
/** The hierarchical file tree */
|
|
11
|
+
tree: DocsTreeNode[];
|
|
12
|
+
/** Total number of files found */
|
|
13
|
+
fileCount: number;
|
|
14
|
+
/** Total size of all files in bytes */
|
|
15
|
+
totalSize: number;
|
|
16
|
+
/** Whether the tree was truncated due to size limits */
|
|
17
|
+
truncated: boolean;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Options for tree fetching.
|
|
21
|
+
*/
|
|
22
|
+
export interface FetchTreeOptions {
|
|
23
|
+
/** Starting path within the repo (default: root) */
|
|
24
|
+
path?: string;
|
|
25
|
+
/** Branch to use (default: auto-detect main/master) */
|
|
26
|
+
branch?: string;
|
|
27
|
+
/** File extensions to include (default: all markdown) */
|
|
28
|
+
extensions?: string[];
|
|
29
|
+
/** Maximum depth to traverse (default: 10) */
|
|
30
|
+
maxDepth?: number;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Parses "owner/repo" format into separate parts.
|
|
34
|
+
*/
|
|
35
|
+
export declare function parseRepoString(repoString: string): {
|
|
36
|
+
owner: string;
|
|
37
|
+
repo: string;
|
|
38
|
+
};
|
|
39
|
+
/**
|
|
40
|
+
* Checks if GitHub authentication is configured.
|
|
41
|
+
*/
|
|
42
|
+
export declare function isAuthenticated(): boolean;
|
|
43
|
+
/**
|
|
44
|
+
* Fetches the file tree from a GitHub repository.
|
|
45
|
+
* Uses the Git Trees API for efficiency (1 request for entire tree).
|
|
46
|
+
*
|
|
47
|
+
* @param repoString Repository in "owner/repo" format
|
|
48
|
+
* @param options Fetch options
|
|
49
|
+
* @returns Tree structure with metadata
|
|
50
|
+
*/
|
|
51
|
+
export declare function fetchRepoTree(repoString: string, options?: FetchTreeOptions): Promise<FetchTreeResult>;
|
|
52
|
+
/**
|
|
53
|
+
* Result of fetching file content.
|
|
54
|
+
*/
|
|
55
|
+
export interface FetchContentResult {
|
|
56
|
+
/** The file path */
|
|
57
|
+
path: string;
|
|
58
|
+
/** The file content */
|
|
59
|
+
content: string;
|
|
60
|
+
/** Size in bytes */
|
|
61
|
+
size: number;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Fetches raw file content from GitHub via raw.githubusercontent.com.
|
|
65
|
+
* This endpoint has no rate limits (served from CDN).
|
|
66
|
+
*
|
|
67
|
+
* @param repoString Repository in "owner/repo" format
|
|
68
|
+
* @param branch Branch name
|
|
69
|
+
* @param filePath Path to the file within the repo
|
|
70
|
+
* @returns File content or null if not found
|
|
71
|
+
*/
|
|
72
|
+
export declare function fetchFileContent(repoString: string, branch: string, filePath: string): Promise<FetchContentResult | null>;
|
|
73
|
+
/**
|
|
74
|
+
* Fetches multiple files from a repository.
|
|
75
|
+
* Handles 404s gracefully by skipping missing files.
|
|
76
|
+
*
|
|
77
|
+
* @param repoString Repository in "owner/repo" format
|
|
78
|
+
* @param branch Branch name
|
|
79
|
+
* @param filePaths Array of file paths to fetch
|
|
80
|
+
* @returns Array of successfully fetched files and array of not found paths
|
|
81
|
+
*/
|
|
82
|
+
export declare function fetchMultipleFiles(repoString: string, branch: string, filePaths: string[]): Promise<{
|
|
83
|
+
files: FetchContentResult[];
|
|
84
|
+
notFound: string[];
|
|
85
|
+
}>;
|
|
86
|
+
/**
|
|
87
|
+
* Gets the current GitHub API rate limit status.
|
|
88
|
+
*/
|
|
89
|
+
export declare function getRateLimitStatus(): string;
|
|
90
|
+
/**
|
|
91
|
+
* Gets the raw rate limit info.
|
|
92
|
+
*/
|
|
93
|
+
export declare function getRateLimitInfo(): import("../utils/rate-limit.js").RateLimitInfo | null;
|
|
94
|
+
//# sourceMappingURL=github-fetcher.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"github-fetcher.d.ts","sourceRoot":"","sources":["../../src/services/github-fetcher.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAiCtD;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,4CAA4C;IAC5C,IAAI,EAAE,MAAM,CAAC;IACb,+BAA+B;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,iCAAiC;IACjC,IAAI,EAAE,YAAY,EAAE,CAAC;IACrB,kCAAkC;IAClC,SAAS,EAAE,MAAM,CAAC;IAClB,uCAAuC;IACvC,SAAS,EAAE,MAAM,CAAC;IAClB,wDAAwD;IACxD,SAAS,EAAE,OAAO,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,oDAAoD;IACpD,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,uDAAuD;IACvD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,yDAAyD;IACzD,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,8CAA8C;IAC9C,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAID;;GAEG;AACH,wBAAgB,eAAe,CAAC,UAAU,EAAE,MAAM,GAAG;IACnD,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACd,CAQA;AAoBD;;GAEG;AACH,wBAAgB,eAAe,IAAI,OAAO,CAEzC;AA8SD;;;;;;;GAOG;AACH,wBAAsB,aAAa,CACjC,UAAU,EAAE,MAAM,EAClB,OAAO,GAAE,gBAAqB,GAC7B,OAAO,CAAC,eAAe,CAAC,CAqC1B;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,oBAAoB;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,uBAAuB;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,oBAAoB;IACpB,IAAI,EAAE,MAAM,CAAC;CACd;AAED;;;;;;;;GAQG;AACH,wBAAsB,gBAAgB,CACpC,UAAU,EAAE,MAAM,EAClB,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,kBAAkB,GAAG,IAAI,CAAC,CA6BpC;AAED;;;;;;;;GAQG;AACH,wBAAsB,kBAAkB,CACtC,UAAU,EAAE,MAAM,EAClB,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EAAE,GAClB,OAAO,CAAC;IACT,KAAK,EAAE,kBAAkB,EAAE,CAAC;IAC5B,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB,CAAC,CA2BD;AAED;;GAEG;AACH,wBAAgB,kBAAkB,IAAI,MAAM,CAE3C;AAED;;GAEG;AACH,wBAAgB,gBAAgB,0DAE/B"}
|