mcp-docs-scraper 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +357 -0
  3. package/dist/index.d.ts +3 -0
  4. package/dist/index.d.ts.map +1 -0
  5. package/dist/index.js +20 -0
  6. package/dist/index.js.map +1 -0
  7. package/dist/server.d.ts +6 -0
  8. package/dist/server.d.ts.map +1 -0
  9. package/dist/server.js +231 -0
  10. package/dist/server.js.map +1 -0
  11. package/dist/services/cache-manager.d.ts +100 -0
  12. package/dist/services/cache-manager.d.ts.map +1 -0
  13. package/dist/services/cache-manager.js +212 -0
  14. package/dist/services/cache-manager.js.map +1 -0
  15. package/dist/services/content-cleaner.d.ts +48 -0
  16. package/dist/services/content-cleaner.d.ts.map +1 -0
  17. package/dist/services/content-cleaner.js +295 -0
  18. package/dist/services/content-cleaner.js.map +1 -0
  19. package/dist/services/github-detector.d.ts +49 -0
  20. package/dist/services/github-detector.d.ts.map +1 -0
  21. package/dist/services/github-detector.js +276 -0
  22. package/dist/services/github-detector.js.map +1 -0
  23. package/dist/services/github-fetcher.d.ts +94 -0
  24. package/dist/services/github-fetcher.d.ts.map +1 -0
  25. package/dist/services/github-fetcher.js +393 -0
  26. package/dist/services/github-fetcher.js.map +1 -0
  27. package/dist/services/search-index.d.ts +106 -0
  28. package/dist/services/search-index.d.ts.map +1 -0
  29. package/dist/services/search-index.js +210 -0
  30. package/dist/services/search-index.js.map +1 -0
  31. package/dist/services/web-scraper.d.ts +88 -0
  32. package/dist/services/web-scraper.d.ts.map +1 -0
  33. package/dist/services/web-scraper.js +244 -0
  34. package/dist/services/web-scraper.js.map +1 -0
  35. package/dist/tools/clear-cache.d.ts +24 -0
  36. package/dist/tools/clear-cache.d.ts.map +1 -0
  37. package/dist/tools/clear-cache.js +29 -0
  38. package/dist/tools/clear-cache.js.map +1 -0
  39. package/dist/tools/detect-github.d.ts +21 -0
  40. package/dist/tools/detect-github.d.ts.map +1 -0
  41. package/dist/tools/detect-github.js +18 -0
  42. package/dist/tools/detect-github.js.map +1 -0
  43. package/dist/tools/get-content.d.ts +43 -0
  44. package/dist/tools/get-content.d.ts.map +1 -0
  45. package/dist/tools/get-content.js +84 -0
  46. package/dist/tools/get-content.js.map +1 -0
  47. package/dist/tools/get-tree.d.ts +31 -0
  48. package/dist/tools/get-tree.d.ts.map +1 -0
  49. package/dist/tools/get-tree.js +102 -0
  50. package/dist/tools/get-tree.js.map +1 -0
  51. package/dist/tools/index-docs.d.ts +63 -0
  52. package/dist/tools/index-docs.d.ts.map +1 -0
  53. package/dist/tools/index-docs.js +371 -0
  54. package/dist/tools/index-docs.js.map +1 -0
  55. package/dist/tools/index.d.ts +11 -0
  56. package/dist/tools/index.d.ts.map +1 -0
  57. package/dist/tools/index.js +11 -0
  58. package/dist/tools/index.js.map +1 -0
  59. package/dist/tools/list-cached.d.ts +19 -0
  60. package/dist/tools/list-cached.d.ts.map +1 -0
  61. package/dist/tools/list-cached.js +20 -0
  62. package/dist/tools/list-cached.js.map +1 -0
  63. package/dist/tools/search-docs.d.ts +31 -0
  64. package/dist/tools/search-docs.d.ts.map +1 -0
  65. package/dist/tools/search-docs.js +64 -0
  66. package/dist/tools/search-docs.js.map +1 -0
  67. package/dist/types/cache.d.ts +53 -0
  68. package/dist/types/cache.d.ts.map +1 -0
  69. package/dist/types/cache.js +2 -0
  70. package/dist/types/cache.js.map +1 -0
  71. package/dist/types/errors.d.ts +102 -0
  72. package/dist/types/errors.d.ts.map +1 -0
  73. package/dist/types/errors.js +216 -0
  74. package/dist/types/errors.js.map +1 -0
  75. package/dist/types/index.d.ts +6 -0
  76. package/dist/types/index.d.ts.map +1 -0
  77. package/dist/types/index.js +5 -0
  78. package/dist/types/index.js.map +1 -0
  79. package/dist/utils/fs.d.ts +45 -0
  80. package/dist/utils/fs.d.ts.map +1 -0
  81. package/dist/utils/fs.js +113 -0
  82. package/dist/utils/fs.js.map +1 -0
  83. package/dist/utils/rate-limit.d.ts +55 -0
  84. package/dist/utils/rate-limit.d.ts.map +1 -0
  85. package/dist/utils/rate-limit.js +89 -0
  86. package/dist/utils/rate-limit.js.map +1 -0
  87. package/dist/utils/url.d.ts +69 -0
  88. package/dist/utils/url.d.ts.map +1 -0
  89. package/dist/utils/url.js +251 -0
  90. package/dist/utils/url.js.map +1 -0
  91. package/package.json +58 -0
@@ -0,0 +1,49 @@
1
+ /**
2
+ * GitHub Detector Service - Detects GitHub repository from documentation websites.
3
+ *
4
+ * Detection strategies (in order of confidence):
5
+ * 1. Direct GitHub URL → high confidence
6
+ * 2. github.io pattern → high confidence
7
+ * 3. "Edit on GitHub" links → high confidence
8
+ * 4. GitHub links in page content → medium confidence
9
+ * 5. Meta tags with repo info → medium confidence
10
+ */
11
+ /**
12
+ * Result of GitHub repository detection.
13
+ */
14
+ export interface GitHubDetectionResult {
15
+ /** Whether a GitHub repository was found */
16
+ found: boolean;
17
+ /** Repository in "owner/repo" format */
18
+ repo?: string;
19
+ /** Path within the repo where docs are located */
20
+ docs_path?: string;
21
+ /** Confidence level of the detection */
22
+ confidence: "high" | "medium" | "low";
23
+ /** How the repo was detected (for debugging) */
24
+ detection_method?: string;
25
+ }
26
+ /**
27
+ * Options for GitHub detection.
28
+ */
29
+ export interface DetectionOptions {
30
+ /** Timeout for fetching the page (ms) */
31
+ timeout?: number;
32
+ /** User agent for requests */
33
+ userAgent?: string;
34
+ }
35
+ /**
36
+ * Detects GitHub repository from a documentation website URL.
37
+ *
38
+ * @param url The URL to analyze
39
+ * @param options Detection options
40
+ * @returns Detection result with repo info and confidence
41
+ */
42
+ export declare function detectGitHubRepo(url: string, options?: DetectionOptions): Promise<GitHubDetectionResult>;
43
+ /**
44
+ * GitHub detector singleton for convenience.
45
+ */
46
+ export declare const githubDetector: {
47
+ detect: typeof detectGitHubRepo;
48
+ };
49
+ //# sourceMappingURL=github-detector.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"github-detector.d.ts","sourceRoot":"","sources":["../../src/services/github-detector.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAIH;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,4CAA4C;IAC5C,KAAK,EAAE,OAAO,CAAC;IACf,wCAAwC;IACxC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,kDAAkD;IAClD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,wCAAwC;IACxC,UAAU,EAAE,MAAM,GAAG,QAAQ,GAAG,KAAK,CAAC;IACtC,gDAAgD;IAChD,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,yCAAyC;IACzC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,8BAA8B;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AA+OD;;;;;;GAMG;AACH,wBAAsB,gBAAgB,CACpC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,gBAAqB,GAC7B,OAAO,CAAC,qBAAqB,CAAC,CA2ChC;AAED;;GAEG;AACH,eAAO,MAAM,cAAc;;CAE1B,CAAC"}
@@ -0,0 +1,276 @@
1
+ /**
2
+ * GitHub Detector Service - Detects GitHub repository from documentation websites.
3
+ *
4
+ * Detection strategies (in order of confidence):
5
+ * 1. Direct GitHub URL → high confidence
6
+ * 2. github.io pattern → high confidence
7
+ * 3. "Edit on GitHub" links → high confidence
8
+ * 4. GitHub links in page content → medium confidence
9
+ * 5. Meta tags with repo info → medium confidence
10
+ */
11
+ import { parseGitHubUrl } from "../tools/index-docs.js";
12
+ const DEFAULT_OPTIONS = {
13
+ timeout: 10000,
14
+ userAgent: "mcp-docs-scraper/1.0 (github-detector)",
15
+ };
16
+ /**
17
+ * Patterns for detecting GitHub links in HTML.
18
+ */
19
+ const GITHUB_LINK_PATTERNS = [
20
+ // Edit on GitHub links (highest priority)
21
+ /href=["']([^"']*github\.com\/[^"'\/]+\/[^"'\/]+(?:\/[^"']*)?)["'][^>]*>(?:[^<]*(?:edit|view|source)[^<]*github|github[^<]*(?:edit|view|source))/gi,
22
+ // General GitHub repo links
23
+ /href=["'](https?:\/\/github\.com\/([a-zA-Z0-9_-]+)\/([a-zA-Z0-9_.-]+))(?:\/[^"']*)?["']/gi,
24
+ ];
25
+ /**
26
+ * Extracts owner/repo from a GitHub URL.
27
+ */
28
+ function extractRepoFromUrl(url) {
29
+ try {
30
+ const parsed = new URL(url);
31
+ if (!parsed.hostname.includes("github.com") && !parsed.hostname.includes("github.io")) {
32
+ return null;
33
+ }
34
+ // Handle github.io URLs
35
+ if (parsed.hostname.endsWith(".github.io")) {
36
+ const owner = parsed.hostname.replace(".github.io", "");
37
+ // For github.io, the first path segment is usually the repo
38
+ const pathParts = parsed.pathname.split("/").filter(Boolean);
39
+ const repo = pathParts[0] || `${owner}.github.io`;
40
+ return { owner, repo };
41
+ }
42
+ // Handle github.com URLs
43
+ const pathParts = parsed.pathname.split("/").filter(Boolean);
44
+ if (pathParts.length >= 2) {
45
+ const owner = pathParts[0];
46
+ const repo = pathParts[1].replace(/\.git$/, "");
47
+ const path = pathParts.length > 2 ? pathParts.slice(2).join("/") : undefined;
48
+ return { owner, repo, path };
49
+ }
50
+ return null;
51
+ }
52
+ catch {
53
+ return null;
54
+ }
55
+ }
56
+ /**
57
+ * Detects if the URL itself is a GitHub URL or github.io URL.
58
+ */
59
+ function detectFromUrl(url) {
60
+ // Check if it's already a GitHub URL
61
+ const githubInfo = parseGitHubUrl(url);
62
+ if (githubInfo) {
63
+ return {
64
+ found: true,
65
+ repo: `${githubInfo.owner}/${githubInfo.repo}`,
66
+ docs_path: githubInfo.path,
67
+ confidence: "high",
68
+ detection_method: "direct_github_url",
69
+ };
70
+ }
71
+ // Check for github.io pattern
72
+ try {
73
+ const parsed = new URL(url);
74
+ if (parsed.hostname.endsWith(".github.io")) {
75
+ const owner = parsed.hostname.replace(".github.io", "");
76
+ const pathParts = parsed.pathname.split("/").filter(Boolean);
77
+ // For github.io sites, the repo is often the first path segment
78
+ // or it could be a user/org pages site (owner.github.io)
79
+ if (pathParts.length > 0) {
80
+ // Could be a project page: owner.github.io/repo-name
81
+ return {
82
+ found: true,
83
+ repo: `${owner}/${pathParts[0]}`,
84
+ confidence: "high",
85
+ detection_method: "github_io_project",
86
+ };
87
+ }
88
+ else {
89
+ // User/org pages: owner.github.io → owner/owner.github.io
90
+ return {
91
+ found: true,
92
+ repo: `${owner}/${owner}.github.io`,
93
+ confidence: "high",
94
+ detection_method: "github_io_user",
95
+ };
96
+ }
97
+ }
98
+ }
99
+ catch {
100
+ // Invalid URL
101
+ }
102
+ return null;
103
+ }
104
+ /**
105
+ * Extracts GitHub repo from "Edit on GitHub" or similar links in HTML.
106
+ */
107
+ function detectFromEditLinks(html) {
108
+ // Look for "Edit on GitHub" style links
109
+ const editPatterns = [
110
+ /href=["']([^"']*github\.com\/[^"']+)["'][^>]*>[^<]*edit[^<]*on[^<]*github/gi,
111
+ /href=["']([^"']*github\.com\/[^"']+)["'][^>]*>[^<]*view[^<]*on[^<]*github/gi,
112
+ /href=["']([^"']*github\.com\/[^"']+)["'][^>]*>[^<]*source[^<]*on[^<]*github/gi,
113
+ /href=["']([^"']*github\.com\/[^"']+)["'][^>]*>[^<]*github[^<]*source/gi,
114
+ /"editUrl":\s*"([^"]*github\.com\/[^"]+)"/gi,
115
+ /"edit_uri":\s*"([^"]*github\.com\/[^"]+)"/gi,
116
+ ];
117
+ for (const pattern of editPatterns) {
118
+ const match = pattern.exec(html);
119
+ if (match) {
120
+ const repoInfo = extractRepoFromUrl(match[1]);
121
+ if (repoInfo) {
122
+ return {
123
+ found: true,
124
+ repo: `${repoInfo.owner}/${repoInfo.repo}`,
125
+ docs_path: repoInfo.path,
126
+ confidence: "high",
127
+ detection_method: "edit_on_github_link",
128
+ };
129
+ }
130
+ }
131
+ // Reset regex lastIndex
132
+ pattern.lastIndex = 0;
133
+ }
134
+ return null;
135
+ }
136
+ /**
137
+ * Extracts GitHub repo from general GitHub links in HTML.
138
+ */
139
+ function detectFromLinks(html) {
140
+ // Find all GitHub links
141
+ const linkPattern = /href=["'](https?:\/\/github\.com\/([a-zA-Z0-9_-]+)\/([a-zA-Z0-9_.-]+))(?:\/[^"']*)?["']/gi;
142
+ const repos = new Map();
143
+ let match;
144
+ while ((match = linkPattern.exec(html)) !== null) {
145
+ const owner = match[2];
146
+ const repo = match[3].replace(/\.git$/, "");
147
+ // Skip common non-repo patterns
148
+ if (["issues", "pulls", "discussions", "sponsors", "marketplace"].includes(repo)) {
149
+ continue;
150
+ }
151
+ const key = `${owner}/${repo}`;
152
+ repos.set(key, (repos.get(key) || 0) + 1);
153
+ }
154
+ if (repos.size === 0) {
155
+ return null;
156
+ }
157
+ // Find the most frequently linked repo
158
+ let bestRepo = "";
159
+ let maxCount = 0;
160
+ for (const [repo, count] of repos) {
161
+ if (count > maxCount) {
162
+ maxCount = count;
163
+ bestRepo = repo;
164
+ }
165
+ }
166
+ if (bestRepo) {
167
+ return {
168
+ found: true,
169
+ repo: bestRepo,
170
+ confidence: maxCount >= 3 ? "medium" : "low",
171
+ detection_method: `github_links_found_${maxCount}`,
172
+ };
173
+ }
174
+ return null;
175
+ }
176
+ /**
177
+ * Extracts GitHub repo from meta tags.
178
+ */
179
+ function detectFromMetaTags(html) {
180
+ // Look for og:url or other meta tags with GitHub info
181
+ const metaPatterns = [
182
+ /<meta[^>]+property=["']og:url["'][^>]+content=["']([^"']+github\.com[^"']+)["']/gi,
183
+ /<meta[^>]+name=["']github:repo["'][^>]+content=["']([^"']+)["']/gi,
184
+ /<meta[^>]+content=["']([^"']+github\.com[^"']+)["'][^>]+property=["']og:url["']/gi,
185
+ ];
186
+ for (const pattern of metaPatterns) {
187
+ const match = pattern.exec(html);
188
+ if (match) {
189
+ const repoInfo = extractRepoFromUrl(match[1]);
190
+ if (repoInfo) {
191
+ return {
192
+ found: true,
193
+ repo: `${repoInfo.owner}/${repoInfo.repo}`,
194
+ confidence: "medium",
195
+ detection_method: "meta_tag",
196
+ };
197
+ }
198
+ }
199
+ pattern.lastIndex = 0;
200
+ }
201
+ return null;
202
+ }
203
+ /**
204
+ * Fetches a webpage and returns its HTML content.
205
+ */
206
+ async function fetchPage(url, options) {
207
+ try {
208
+ const response = await fetch(url, {
209
+ headers: {
210
+ "User-Agent": options.userAgent,
211
+ Accept: "text/html,application/xhtml+xml",
212
+ },
213
+ redirect: "follow",
214
+ signal: AbortSignal.timeout(options.timeout),
215
+ });
216
+ if (!response.ok) {
217
+ return null;
218
+ }
219
+ return await response.text();
220
+ }
221
+ catch {
222
+ return null;
223
+ }
224
+ }
225
+ /**
226
+ * Detects GitHub repository from a documentation website URL.
227
+ *
228
+ * @param url The URL to analyze
229
+ * @param options Detection options
230
+ * @returns Detection result with repo info and confidence
231
+ */
232
+ export async function detectGitHubRepo(url, options = {}) {
233
+ const opts = { ...DEFAULT_OPTIONS, ...options };
234
+ // Strategy 1: Check if URL itself is GitHub or github.io
235
+ const urlResult = detectFromUrl(url);
236
+ if (urlResult) {
237
+ return urlResult;
238
+ }
239
+ // Strategy 2-5: Fetch the page and analyze content
240
+ const html = await fetchPage(url, opts);
241
+ if (!html) {
242
+ return {
243
+ found: false,
244
+ confidence: "low",
245
+ detection_method: "fetch_failed",
246
+ };
247
+ }
248
+ // Strategy 2: Look for "Edit on GitHub" links (highest confidence)
249
+ const editResult = detectFromEditLinks(html);
250
+ if (editResult) {
251
+ return editResult;
252
+ }
253
+ // Strategy 3: Look for meta tags
254
+ const metaResult = detectFromMetaTags(html);
255
+ if (metaResult) {
256
+ return metaResult;
257
+ }
258
+ // Strategy 4: Look for any GitHub links
259
+ const linkResult = detectFromLinks(html);
260
+ if (linkResult) {
261
+ return linkResult;
262
+ }
263
+ // No GitHub repo found
264
+ return {
265
+ found: false,
266
+ confidence: "low",
267
+ detection_method: "no_github_found",
268
+ };
269
+ }
270
+ /**
271
+ * GitHub detector singleton for convenience.
272
+ */
273
+ export const githubDetector = {
274
+ detect: detectGitHubRepo,
275
+ };
276
+ //# sourceMappingURL=github-detector.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"github-detector.js","sourceRoot":"","sources":["../../src/services/github-detector.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AA4BxD,MAAM,eAAe,GAA+B;IAClD,OAAO,EAAE,KAAK;IACd,SAAS,EAAE,wCAAwC;CACpD,CAAC;AAEF;;GAEG;AACH,MAAM,oBAAoB,GAAG;IAC3B,0CAA0C;IAC1C,mJAAmJ;IACnJ,4BAA4B;IAC5B,2FAA2F;CAC5F,CAAC;AAEF;;GAEG;AACH,SAAS,kBAAkB,CAAC,GAAW;IACrC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAE5B,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;YACtF,OAAO,IAAI,CAAC;QACd,CAAC;QAED,wBAAwB;QACxB,IAAI,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;YAC3C,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;YACxD,4DAA4D;YAC5D,MAAM,SAAS,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YAC7D,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,GAAG,KAAK,YAAY,CAAC;YAClD,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;QACzB,CAAC;QAED,yBAAyB;QACzB,MAAM,SAAS,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC7D,IAAI,SAAS,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;YAC1B,MAAM,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;YAC3B,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;YAChD,MAAM,IAAI,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAC7E,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QAC/B,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,aAAa,CAAC,GAAW;IAChC,qCAAqC;IACrC,MAAM,UAAU,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC;IACvC,IAAI,UAAU,EAAE,CAAC;QACf,OAAO;YACL,KAAK,EAAE,IAAI;YACX,IAAI,EAAE,GAAG,UAAU,CAAC,KAAK,IAAI,UAAU,CAAC,IAAI,EAAE;YAC9C,SAAS,EAAE,UAAU,CAAC,IAAI;YAC1B,UAAU,EAAE,MAAM;YAClB,gBAAgB,EAAE,mBAAmB;SACtC,CAAC;IACJ,CAAC;IAED,8BAA8B;IAC9B,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAC5B,IAAI,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;YAC3C,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;YACxD,MAAM,SAAS,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YAE7D,gEAAgE;YAChE,yDAAyD;YACzD,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACzB,qDAAqD;gBACrD,OAAO;oBACL,KAAK,EAAE,IAAI;oBACX,IAAI,EAAE,GAAG,KAAK,IAAI,SAAS,CAAC,CAAC,CAAC,EAAE;oBAChC,UAAU,EAAE,MAAM;oBAClB,gBAAgB,EAAE,mBAAmB;iBACtC,CAAC;YACJ,CAAC;iBAAM,CAAC;gBACN,0DAA0D;gBAC1D,OAAO;oBACL,KAAK,EAAE,IAAI;oBACX,IAAI,EAAE,GAAG,KAAK,IAAI,KAAK,YAAY;oBACnC,UAAU,EAAE,MAAM;oBAClB,gBAAgB,EAAE,gBAAgB;iBACnC,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,cAAc;IAChB,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,SAAS,mBAAmB,CAAC,IAAY;IACvC,wCAAwC;IACxC,MAAM,YAAY,GAAG;QACnB,6EAA6E;QAC7E,6EAA6E;QAC7E,+EAA+E;QAC/E,wEAAwE;QACxE,4CAA4C;QAC5C,6CAA6C;KAC9C,CAAC;IAEF,KAAK,MAAM,OAAO,IAAI,YAAY,EAAE,CAAC;QACnC,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjC,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,QAAQ,GAAG,kBAAkB,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9C,IAAI,QAAQ,EAAE,CAAC;gBACb,OAAO;oBACL,KAAK,EAAE,IAAI;oBACX,IAAI,EAAE,GAAG,QAAQ,CAAC,KAAK,IAAI,QAAQ,CAAC,IAAI,EAAE;oBAC1C,SAAS,EAAE,QAAQ,CAAC,IAAI;oBACxB,UAAU,EAAE,MAAM;oBAClB,gBAAgB,EAAE,qBAAqB;iBACxC,CAAC;YACJ,CAAC;QACH,CAAC;QACD,wBAAwB;QACxB,OAAO,CAAC,SAAS,GAAG,CAAC,CAAC;IACxB,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,SAAS,eAAe,CAAC,IAAY;IACnC,wBAAwB;IACxB,MAAM,WAAW,GAAG,2FAA2F,CAAC;IAChH,MAAM,KAAK,GAAG,IAAI,GAAG,EAAkB,CAAC;IAExC,IAAI,KAAK,CAAC;IACV,OAAO,CAAC,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACjD,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACvB,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QAE5C,gCAAgC;QAChC,IAAI,CAAC,QAAQ,EAAE,OAAO,EAAE,aAAa,EAAE,UAAU,EAAE,aAAa,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YACjF,SAAS;QACX,CAAC;QAED,MAAM,GAAG,GAAG,GAAG,KAAK,IAAI,IAAI,EAAE,CAAC;QAC/B,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC5C,CAAC;IAED,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;QACrB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,uCAAuC;IACvC,IAAI,QAAQ,GAAG,EAAE,CAAC;IAClB,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,KAAK,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,KAAK,EAAE,CAAC;QAClC,IAAI,KAAK,GAAG,QAAQ,EAAE,CAAC;YACrB,QAAQ,GAAG,KAAK,CAAC;YACjB,QAAQ,GAAG,IAAI,CAAC;QAClB,CAAC;IACH,CAAC;IAED,IAAI,QAAQ,EAAE,CAAC;QACb,OAAO;YACL,KAAK,EAAE,IAAI;YACX,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE,QAAQ,IAAI,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK;YAC5C,gBAAgB,EAAE,sBAAsB,QAAQ,EAAE;SACnD,CAAC;IACJ,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,SAAS,kBAAkB,CAAC,IAAY;IACtC,sDAAsD;IACtD,MAAM,YAAY,GAAG;QACnB,mFAAmF;QACnF,mEAAmE;QACnE,mFAAmF;KACpF,CAAC;IAEF,KAAK,MAAM,OAAO,IAAI,YAAY,EAAE,CAAC;QACnC,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjC,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,QAAQ,GAAG,kBAAkB,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9C,IAAI,QAAQ,EAAE,CAAC;gBACb,OAAO;oBACL,KAAK,EAAE,IAAI;oBACX,IAAI,EAAE,GAAG,QAAQ,CAAC,KAAK,IAAI,QAAQ,CAAC,IAAI,EAAE;oBAC1C,UAAU,EAAE,QAAQ;oBACpB,gBAAgB,EAAE,UAAU;iBAC7B,CAAC;YACJ,CAAC;QACH,CAAC;QACD,OAAO,CAAC,SAAS,GAAG,CAAC,CAAC;IACxB,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,SAAS,CAAC,GAAW,EAAE,OAAmC;IACvE,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAChC,OAAO,EAAE;gBACP,YAAY,EAAE,OAAO,CAAC,SAAS;gBAC/B,MAAM,EAAE,iCAAiC;aAC1C;YACD,QAAQ,EAAE,QAAQ;YAClB,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC;SAC7C,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,OAAO,IAAI,CAAC;QACd,CAAC;QAED,OAAO,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;IAC/B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,GAAW,EACX,UAA4B,EAAE;IAE9B,MAAM,IAAI,GAA+B,EAAE,GAAG,eAAe,EAAE,GAAG,OAAO,EAAE,CAAC;IAE5E,yDAAyD;IACzD,MAAM,SAAS,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC;IACrC,IAAI,SAAS,EAAE,CAAC;QACd,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,mDAAmD;IACnD,MAAM,IAAI,GAAG,MAAM,SAAS,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;IACxC,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,OAAO;YACL,KAAK,EAAE,KAAK;YACZ,UAAU,EAAE,KAAK;YACjB,gBAAgB,EAAE,cAAc;SACjC,CAAC;IACJ,CAAC;IAED,mEAAmE;IACnE,MAAM,UAAU,GAAG,mBAAmB,CAAC,IAAI,CAAC,CAAC;IAC7C,IAAI,UAAU,EAAE,CAAC;QACf,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,iCAAiC;IACjC,MAAM,UAAU,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC;IAC5C,IAAI,UAAU,EAAE,CAAC;QACf,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,wCAAwC;IACxC,MAAM,UAAU,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACzC,IAAI,UAAU,EAAE,CAAC;QACf,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,uBAAuB;IACvB,OAAO;QACL,KAAK,EAAE,KAAK;QACZ,UAAU,EAAE,KAAK;QACjB,gBAAgB,EAAE,iBAAiB;KACpC,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,MAAM,cAAc,GAAG;IAC5B,MAAM,EAAE,gBAAgB;CACzB,CAAC"}
@@ -0,0 +1,94 @@
1
+ import type { DocsTreeNode } from "../types/index.js";
2
+ /**
3
+ * Result of fetching a repository tree.
4
+ */
5
+ export interface FetchTreeResult {
6
+ /** The repository in "owner/repo" format */
7
+ repo: string;
8
+ /** The branch that was used */
9
+ branch: string;
10
+ /** The hierarchical file tree */
11
+ tree: DocsTreeNode[];
12
+ /** Total number of files found */
13
+ fileCount: number;
14
+ /** Total size of all files in bytes */
15
+ totalSize: number;
16
+ /** Whether the tree was truncated due to size limits */
17
+ truncated: boolean;
18
+ }
19
+ /**
20
+ * Options for tree fetching.
21
+ */
22
+ export interface FetchTreeOptions {
23
+ /** Starting path within the repo (default: root) */
24
+ path?: string;
25
+ /** Branch to use (default: auto-detect main/master) */
26
+ branch?: string;
27
+ /** File extensions to include (default: all markdown) */
28
+ extensions?: string[];
29
+ /** Maximum depth to traverse (default: 10) */
30
+ maxDepth?: number;
31
+ }
32
+ /**
33
+ * Parses "owner/repo" format into separate parts.
34
+ */
35
+ export declare function parseRepoString(repoString: string): {
36
+ owner: string;
37
+ repo: string;
38
+ };
39
+ /**
40
+ * Checks if GitHub authentication is configured.
41
+ */
42
+ export declare function isAuthenticated(): boolean;
43
+ /**
44
+ * Fetches the file tree from a GitHub repository.
45
+ * Uses the Git Trees API for efficiency (1 request for entire tree).
46
+ *
47
+ * @param repoString Repository in "owner/repo" format
48
+ * @param options Fetch options
49
+ * @returns Tree structure with metadata
50
+ */
51
+ export declare function fetchRepoTree(repoString: string, options?: FetchTreeOptions): Promise<FetchTreeResult>;
52
+ /**
53
+ * Result of fetching file content.
54
+ */
55
+ export interface FetchContentResult {
56
+ /** The file path */
57
+ path: string;
58
+ /** The file content */
59
+ content: string;
60
+ /** Size in bytes */
61
+ size: number;
62
+ }
63
+ /**
64
+ * Fetches raw file content from GitHub via raw.githubusercontent.com.
65
+ * This endpoint has no rate limits (served from CDN).
66
+ *
67
+ * @param repoString Repository in "owner/repo" format
68
+ * @param branch Branch name
69
+ * @param filePath Path to the file within the repo
70
+ * @returns File content or null if not found
71
+ */
72
+ export declare function fetchFileContent(repoString: string, branch: string, filePath: string): Promise<FetchContentResult | null>;
73
+ /**
74
+ * Fetches multiple files from a repository.
75
+ * Handles 404s gracefully by skipping missing files.
76
+ *
77
+ * @param repoString Repository in "owner/repo" format
78
+ * @param branch Branch name
79
+ * @param filePaths Array of file paths to fetch
80
+ * @returns Array of successfully fetched files and array of not found paths
81
+ */
82
+ export declare function fetchMultipleFiles(repoString: string, branch: string, filePaths: string[]): Promise<{
83
+ files: FetchContentResult[];
84
+ notFound: string[];
85
+ }>;
86
+ /**
87
+ * Gets the current GitHub API rate limit status.
88
+ */
89
+ export declare function getRateLimitStatus(): string;
90
+ /**
91
+ * Gets the raw rate limit info.
92
+ */
93
+ export declare function getRateLimitInfo(): import("../utils/rate-limit.js").RateLimitInfo | null;
94
+ //# sourceMappingURL=github-fetcher.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"github-fetcher.d.ts","sourceRoot":"","sources":["../../src/services/github-fetcher.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAiCtD;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,4CAA4C;IAC5C,IAAI,EAAE,MAAM,CAAC;IACb,+BAA+B;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,iCAAiC;IACjC,IAAI,EAAE,YAAY,EAAE,CAAC;IACrB,kCAAkC;IAClC,SAAS,EAAE,MAAM,CAAC;IAClB,uCAAuC;IACvC,SAAS,EAAE,MAAM,CAAC;IAClB,wDAAwD;IACxD,SAAS,EAAE,OAAO,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,oDAAoD;IACpD,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,uDAAuD;IACvD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,yDAAyD;IACzD,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,8CAA8C;IAC9C,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAID;;GAEG;AACH,wBAAgB,eAAe,CAAC,UAAU,EAAE,MAAM,GAAG;IACnD,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACd,CAQA;AAoBD;;GAEG;AACH,wBAAgB,eAAe,IAAI,OAAO,CAEzC;AA8SD;;;;;;;GAOG;AACH,wBAAsB,aAAa,CACjC,UAAU,EAAE,MAAM,EAClB,OAAO,GAAE,gBAAqB,GAC7B,OAAO,CAAC,eAAe,CAAC,CAqC1B;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,oBAAoB;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,uBAAuB;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,oBAAoB;IACpB,IAAI,EAAE,MAAM,CAAC;CACd;AAED;;;;;;;;GAQG;AACH,wBAAsB,gBAAgB,CACpC,UAAU,EAAE,MAAM,EAClB,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,kBAAkB,GAAG,IAAI,CAAC,CA6BpC;AAED;;;;;;;;GAQG;AACH,wBAAsB,kBAAkB,CACtC,UAAU,EAAE,MAAM,EAClB,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EAAE,GAClB,OAAO,CAAC;IACT,KAAK,EAAE,kBAAkB,EAAE,CAAC;IAC5B,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB,CAAC,CA2BD;AAED;;GAEG;AACH,wBAAgB,kBAAkB,IAAI,MAAM,CAE3C;AAED;;GAEG;AACH,wBAAgB,gBAAgB,0DAE/B"}