mcp-docs-scraper 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +357 -0
  3. package/dist/index.d.ts +3 -0
  4. package/dist/index.d.ts.map +1 -0
  5. package/dist/index.js +20 -0
  6. package/dist/index.js.map +1 -0
  7. package/dist/server.d.ts +6 -0
  8. package/dist/server.d.ts.map +1 -0
  9. package/dist/server.js +231 -0
  10. package/dist/server.js.map +1 -0
  11. package/dist/services/cache-manager.d.ts +100 -0
  12. package/dist/services/cache-manager.d.ts.map +1 -0
  13. package/dist/services/cache-manager.js +212 -0
  14. package/dist/services/cache-manager.js.map +1 -0
  15. package/dist/services/content-cleaner.d.ts +48 -0
  16. package/dist/services/content-cleaner.d.ts.map +1 -0
  17. package/dist/services/content-cleaner.js +295 -0
  18. package/dist/services/content-cleaner.js.map +1 -0
  19. package/dist/services/github-detector.d.ts +49 -0
  20. package/dist/services/github-detector.d.ts.map +1 -0
  21. package/dist/services/github-detector.js +276 -0
  22. package/dist/services/github-detector.js.map +1 -0
  23. package/dist/services/github-fetcher.d.ts +94 -0
  24. package/dist/services/github-fetcher.d.ts.map +1 -0
  25. package/dist/services/github-fetcher.js +393 -0
  26. package/dist/services/github-fetcher.js.map +1 -0
  27. package/dist/services/search-index.d.ts +106 -0
  28. package/dist/services/search-index.d.ts.map +1 -0
  29. package/dist/services/search-index.js +210 -0
  30. package/dist/services/search-index.js.map +1 -0
  31. package/dist/services/web-scraper.d.ts +88 -0
  32. package/dist/services/web-scraper.d.ts.map +1 -0
  33. package/dist/services/web-scraper.js +244 -0
  34. package/dist/services/web-scraper.js.map +1 -0
  35. package/dist/tools/clear-cache.d.ts +24 -0
  36. package/dist/tools/clear-cache.d.ts.map +1 -0
  37. package/dist/tools/clear-cache.js +29 -0
  38. package/dist/tools/clear-cache.js.map +1 -0
  39. package/dist/tools/detect-github.d.ts +21 -0
  40. package/dist/tools/detect-github.d.ts.map +1 -0
  41. package/dist/tools/detect-github.js +18 -0
  42. package/dist/tools/detect-github.js.map +1 -0
  43. package/dist/tools/get-content.d.ts +43 -0
  44. package/dist/tools/get-content.d.ts.map +1 -0
  45. package/dist/tools/get-content.js +84 -0
  46. package/dist/tools/get-content.js.map +1 -0
  47. package/dist/tools/get-tree.d.ts +31 -0
  48. package/dist/tools/get-tree.d.ts.map +1 -0
  49. package/dist/tools/get-tree.js +102 -0
  50. package/dist/tools/get-tree.js.map +1 -0
  51. package/dist/tools/index-docs.d.ts +63 -0
  52. package/dist/tools/index-docs.d.ts.map +1 -0
  53. package/dist/tools/index-docs.js +371 -0
  54. package/dist/tools/index-docs.js.map +1 -0
  55. package/dist/tools/index.d.ts +11 -0
  56. package/dist/tools/index.d.ts.map +1 -0
  57. package/dist/tools/index.js +11 -0
  58. package/dist/tools/index.js.map +1 -0
  59. package/dist/tools/list-cached.d.ts +19 -0
  60. package/dist/tools/list-cached.d.ts.map +1 -0
  61. package/dist/tools/list-cached.js +20 -0
  62. package/dist/tools/list-cached.js.map +1 -0
  63. package/dist/tools/search-docs.d.ts +31 -0
  64. package/dist/tools/search-docs.d.ts.map +1 -0
  65. package/dist/tools/search-docs.js +64 -0
  66. package/dist/tools/search-docs.js.map +1 -0
  67. package/dist/types/cache.d.ts +53 -0
  68. package/dist/types/cache.d.ts.map +1 -0
  69. package/dist/types/cache.js +2 -0
  70. package/dist/types/cache.js.map +1 -0
  71. package/dist/types/errors.d.ts +102 -0
  72. package/dist/types/errors.d.ts.map +1 -0
  73. package/dist/types/errors.js +216 -0
  74. package/dist/types/errors.js.map +1 -0
  75. package/dist/types/index.d.ts +6 -0
  76. package/dist/types/index.d.ts.map +1 -0
  77. package/dist/types/index.js +5 -0
  78. package/dist/types/index.js.map +1 -0
  79. package/dist/utils/fs.d.ts +45 -0
  80. package/dist/utils/fs.d.ts.map +1 -0
  81. package/dist/utils/fs.js +113 -0
  82. package/dist/utils/fs.js.map +1 -0
  83. package/dist/utils/rate-limit.d.ts +55 -0
  84. package/dist/utils/rate-limit.d.ts.map +1 -0
  85. package/dist/utils/rate-limit.js +89 -0
  86. package/dist/utils/rate-limit.js.map +1 -0
  87. package/dist/utils/url.d.ts +69 -0
  88. package/dist/utils/url.d.ts.map +1 -0
  89. package/dist/utils/url.js +251 -0
  90. package/dist/utils/url.js.map +1 -0
  91. package/package.json +58 -0
@@ -0,0 +1 @@
1
+ {"version":3,"file":"rate-limit.js","sourceRoot":"","sources":["../../src/utils/rate-limit.ts"],"names":[],"mappings":"AAAA;;GAEG;AAaH;;GAEG;AACH,MAAM,OAAO,gBAAgB;IACnB,IAAI,GAAyB,IAAI,CAAC;IAE1C;;OAEG;IACH,iBAAiB,CAAC,OAAgB;QAChC,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;QAC/C,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;QACvD,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;QAE/C,IAAI,KAAK,IAAI,SAAS,IAAI,KAAK,EAAE,CAAC;YAChC,IAAI,CAAC,IAAI,GAAG;gBACV,KAAK,EAAE,QAAQ,CAAC,KAAK,EAAE,EAAE,CAAC;gBAC1B,SAAS,EAAE,QAAQ,CAAC,SAAS,EAAE,EAAE,CAAC;gBAClC,KAAK,EAAE,QAAQ,CAAC,KAAK,EAAE,EAAE,CAAC;gBAC1B,SAAS,EAAE,IAAI,IAAI,EAAE;aACtB,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;OAEG;IACH,OAAO;QACL,OAAO,IAAI,CAAC,IAAI,CAAC;IACnB,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,YAAoB,CAAC;QACzB,IAAI,CAAC,IAAI,CAAC,IAAI;YAAE,OAAO,KAAK,CAAC;QAC7B,OAAO,IAAI,CAAC,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IACzC,CAAC;IAED;;OAEG;IACH,WAAW;QACT,IAAI,CAAC,IAAI,CAAC,IAAI;YAAE,OAAO,KAAK,CAAC;QAC7B,OAAO,IAAI,CAAC,IAAI,CAAC,SAAS,IAAI,CAAC,CAAC;IAClC,CAAC;IAED;;;OAGG;IACH,iBAAiB;QACf,IAAI,CAAC,IAAI,CAAC,IAAI;YAAE,OAAO,CAAC,CAAC;QACzB,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,CAAC,gBAAgB;QAC1D,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACvB,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,GAAG,GAAG,CAAC,CAAC;IACtC,CAAC;IAED;;OAEG;IACH,gBAAgB;QACd,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YACf,OAAO,+BAA+B,CAAC;QACzC,CAAC;QAED,MAAM,iBAAiB,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,iBAAiB,EAAE,GAAG,KAAK,CAAC,CAAC;QAEtE,IAAI,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC;YACvB,OAAO,mCAAmC,iBAAiB,WAAW,CAAC;QACzE,CAAC;QAED,IAAI,IAAI,CAAC,KAAK,EAAE,EAAE,CAAC;YACjB,OAAO,mBAAmB,IAAI,CAAC,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,IAAI,CAAC,KAAK,yBAAyB,iBAAiB,WAAW,CAAC;QACxH,CAAC;QAED,OAAO,eAAe,IAAI,CAAC,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,IAAI,CAAC,KAAK,YAAY,CAAC;IAC3E,CAAC;IAED;;;OAGG;IACH,YAAY;QACV,IAAI,CAAC,IAAI,CAAC,IAAI;YAAE,OAAO,SAAS,CAAC;QACjC,OAAO,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,CAAC;IAC1C,CAAC;CACF;AAED;;GAEG;AACH,MAAM,CAAC,MAAM,eAAe,GAAG,IAAI,gBAAgB,EAAE,CAAC"}
@@ -0,0 +1,69 @@
1
+ /**
2
+ * URL utilities for web scraping.
3
+ *
4
+ * Provides URL normalization, domain extraction, and link filtering.
5
+ */
6
+ /**
7
+ * Normalizes a URL by removing fragments, tracking params, and normalizing format.
8
+ *
9
+ * @param url The URL to normalize
10
+ * @returns Normalized URL string
11
+ */
12
+ export declare function normalizeUrl(url: string): string;
13
+ /**
14
+ * Extracts the domain (host) from a URL.
15
+ *
16
+ * @param url The URL to extract domain from
17
+ * @returns Domain string or null if invalid
18
+ */
19
+ export declare function extractDomain(url: string): string | null;
20
+ /**
21
+ * Checks if two URLs are on the same domain.
22
+ *
23
+ * @param url1 First URL
24
+ * @param url2 Second URL
25
+ * @returns True if same domain
26
+ */
27
+ export declare function isSameDomain(url1: string, url2: string): boolean;
28
+ /**
29
+ * Resolves a potentially relative URL against a base URL.
30
+ *
31
+ * @param href The href to resolve (may be relative or absolute)
32
+ * @param baseUrl The base URL to resolve against
33
+ * @returns Absolute URL string or null if invalid
34
+ */
35
+ export declare function resolveUrl(href: string, baseUrl: string): string | null;
36
+ /**
37
+ * Checks if a URL should be crawled based on various criteria.
38
+ *
39
+ * @param url The URL to check
40
+ * @param baseUrl The base URL of the crawl
41
+ * @returns Object with isValid boolean and reason if invalid
42
+ */
43
+ export declare function shouldCrawl(url: string, baseUrl: string): {
44
+ isValid: boolean;
45
+ reason?: string;
46
+ };
47
+ /**
48
+ * Extracts all links from an HTML document.
49
+ *
50
+ * @param html The HTML content
51
+ * @param baseUrl The base URL for resolving relative links
52
+ * @returns Array of absolute URLs found in the document
53
+ */
54
+ export declare function extractLinks(html: string, baseUrl: string): string[];
55
+ /**
56
+ * Gets the path depth of a URL (number of path segments).
57
+ *
58
+ * @param url The URL to analyze
59
+ * @returns Number of path segments
60
+ */
61
+ export declare function getPathDepth(url: string): number;
62
+ /**
63
+ * Converts a URL to a safe filename for caching.
64
+ *
65
+ * @param url The URL to convert
66
+ * @returns Safe filename string
67
+ */
68
+ export declare function urlToFilename(url: string): string;
69
+ //# sourceMappingURL=url.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"url.d.ts","sourceRoot":"","sources":["../../src/utils/url.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAmBH;;;;;GAKG;AACH,wBAAgB,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAgChD;AAED;;;;;GAKG;AACH,wBAAgB,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAOxD;AAED;;;;;;GAMG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAWhE;AAED;;;;;;GAMG;AACH,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAYvE;AAED;;;;;;GAMG;AACH,wBAAgB,WAAW,CACzB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,MAAM,GACd;IAAE,OAAO,EAAE,OAAO,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,CAwCvC;AAED;;;;;;GAMG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAoCpE;AAED;;;;;GAKG;AACH,wBAAgB,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAQhD;AAED;;;;;GAKG;AACH,wBAAgB,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CA+BjD"}
@@ -0,0 +1,251 @@
1
+ /**
2
+ * URL utilities for web scraping.
3
+ *
4
+ * Provides URL normalization, domain extraction, and link filtering.
5
+ */
6
+ /**
7
+ * Tracking parameters to remove from URLs.
8
+ */
9
+ const TRACKING_PARAMS = [
10
+ "utm_source",
11
+ "utm_medium",
12
+ "utm_campaign",
13
+ "utm_term",
14
+ "utm_content",
15
+ "ref",
16
+ "source",
17
+ "fbclid",
18
+ "gclid",
19
+ "msclkid",
20
+ "_ga",
21
+ ];
22
+ /**
23
+ * Normalizes a URL by removing fragments, tracking params, and normalizing format.
24
+ *
25
+ * @param url The URL to normalize
26
+ * @returns Normalized URL string
27
+ */
28
+ export function normalizeUrl(url) {
29
+ try {
30
+ const parsed = new URL(url);
31
+ // Remove fragment
32
+ parsed.hash = "";
33
+ // Remove tracking parameters
34
+ for (const param of TRACKING_PARAMS) {
35
+ parsed.searchParams.delete(param);
36
+ }
37
+ // Sort remaining search params for consistency
38
+ parsed.searchParams.sort();
39
+ // Normalize path - remove trailing slash except for root
40
+ if (parsed.pathname.length > 1 && parsed.pathname.endsWith("/")) {
41
+ parsed.pathname = parsed.pathname.slice(0, -1);
42
+ }
43
+ // Decode URL-encoded characters in pathname for readability
44
+ try {
45
+ parsed.pathname = decodeURIComponent(parsed.pathname);
46
+ }
47
+ catch {
48
+ // Keep as-is if decoding fails
49
+ }
50
+ return parsed.href;
51
+ }
52
+ catch {
53
+ // Return as-is if URL is invalid
54
+ return url;
55
+ }
56
+ }
57
+ /**
58
+ * Extracts the domain (host) from a URL.
59
+ *
60
+ * @param url The URL to extract domain from
61
+ * @returns Domain string or null if invalid
62
+ */
63
+ export function extractDomain(url) {
64
+ try {
65
+ const parsed = new URL(url);
66
+ return parsed.hostname;
67
+ }
68
+ catch {
69
+ return null;
70
+ }
71
+ }
72
+ /**
73
+ * Checks if two URLs are on the same domain.
74
+ *
75
+ * @param url1 First URL
76
+ * @param url2 Second URL
77
+ * @returns True if same domain
78
+ */
79
+ export function isSameDomain(url1, url2) {
80
+ const domain1 = extractDomain(url1);
81
+ const domain2 = extractDomain(url2);
82
+ if (!domain1 || !domain2) {
83
+ return false;
84
+ }
85
+ // Handle www prefix variations
86
+ const normalize = (d) => d.replace(/^www\./, "");
87
+ return normalize(domain1) === normalize(domain2);
88
+ }
89
+ /**
90
+ * Resolves a potentially relative URL against a base URL.
91
+ *
92
+ * @param href The href to resolve (may be relative or absolute)
93
+ * @param baseUrl The base URL to resolve against
94
+ * @returns Absolute URL string or null if invalid
95
+ */
96
+ export function resolveUrl(href, baseUrl) {
97
+ try {
98
+ // Handle protocol-relative URLs
99
+ if (href.startsWith("//")) {
100
+ const base = new URL(baseUrl);
101
+ return new URL(`${base.protocol}${href}`).href;
102
+ }
103
+ return new URL(href, baseUrl).href;
104
+ }
105
+ catch {
106
+ return null;
107
+ }
108
+ }
109
+ /**
110
+ * Checks if a URL should be crawled based on various criteria.
111
+ *
112
+ * @param url The URL to check
113
+ * @param baseUrl The base URL of the crawl
114
+ * @returns Object with isValid boolean and reason if invalid
115
+ */
116
+ export function shouldCrawl(url, baseUrl) {
117
+ try {
118
+ const parsed = new URL(url);
119
+ // Only HTTP/HTTPS
120
+ if (!["http:", "https:"].includes(parsed.protocol)) {
121
+ return { isValid: false, reason: "non-http protocol" };
122
+ }
123
+ // Must be same domain
124
+ if (!isSameDomain(url, baseUrl)) {
125
+ return { isValid: false, reason: "external domain" };
126
+ }
127
+ // Skip common non-content paths
128
+ const skipPatterns = [
129
+ /\/api\//i,
130
+ /\/auth\//i,
131
+ /\/login/i,
132
+ /\/logout/i,
133
+ /\/signup/i,
134
+ /\/register/i,
135
+ /\/admin/i,
136
+ /\/cdn-cgi\//i,
137
+ /\.(pdf|zip|tar|gz|exe|dmg|pkg|deb|rpm)$/i,
138
+ /\.(png|jpg|jpeg|gif|svg|ico|webp)$/i,
139
+ /\.(css|js|json|xml|rss|atom)$/i,
140
+ /\.(mp3|mp4|avi|mov|wmv|flv|webm)$/i,
141
+ ];
142
+ for (const pattern of skipPatterns) {
143
+ if (pattern.test(parsed.pathname)) {
144
+ return { isValid: false, reason: "non-content path" };
145
+ }
146
+ }
147
+ return { isValid: true };
148
+ }
149
+ catch {
150
+ return { isValid: false, reason: "invalid URL" };
151
+ }
152
+ }
153
+ /**
154
+ * Extracts all links from an HTML document.
155
+ *
156
+ * @param html The HTML content
157
+ * @param baseUrl The base URL for resolving relative links
158
+ * @returns Array of absolute URLs found in the document
159
+ */
160
+ export function extractLinks(html, baseUrl) {
161
+ const links = [];
162
+ const seen = new Set();
163
+ // Simple regex to extract href attributes
164
+ // This is faster than parsing full DOM for link extraction
165
+ const hrefRegex = /href=["']([^"']+)["']/gi;
166
+ let match;
167
+ while ((match = hrefRegex.exec(html)) !== null) {
168
+ const href = match[1];
169
+ // Skip anchors, javascript, mailto, tel
170
+ if (href.startsWith("#") ||
171
+ href.startsWith("javascript:") ||
172
+ href.startsWith("mailto:") ||
173
+ href.startsWith("tel:")) {
174
+ continue;
175
+ }
176
+ const resolved = resolveUrl(href, baseUrl);
177
+ if (resolved) {
178
+ const normalized = normalizeUrl(resolved);
179
+ // Check if should crawl and not seen
180
+ const { isValid } = shouldCrawl(normalized, baseUrl);
181
+ if (isValid && !seen.has(normalized)) {
182
+ seen.add(normalized);
183
+ links.push(normalized);
184
+ }
185
+ }
186
+ }
187
+ return links;
188
+ }
189
+ /**
190
+ * Gets the path depth of a URL (number of path segments).
191
+ *
192
+ * @param url The URL to analyze
193
+ * @returns Number of path segments
194
+ */
195
+ export function getPathDepth(url) {
196
+ try {
197
+ const parsed = new URL(url);
198
+ const segments = parsed.pathname.split("/").filter(Boolean);
199
+ return segments.length;
200
+ }
201
+ catch {
202
+ return 0;
203
+ }
204
+ }
205
+ /**
206
+ * Converts a URL to a safe filename for caching.
207
+ *
208
+ * @param url The URL to convert
209
+ * @returns Safe filename string
210
+ */
211
+ export function urlToFilename(url) {
212
+ try {
213
+ const parsed = new URL(url);
214
+ // Use pathname as base
215
+ let filename = parsed.pathname;
216
+ // Add search params hash if present
217
+ if (parsed.search) {
218
+ const hash = simpleHash(parsed.search);
219
+ filename += `_${hash}`;
220
+ }
221
+ // Clean up the filename
222
+ filename = filename
223
+ .replace(/^\//, "") // Remove leading slash
224
+ .replace(/\//g, "_") // Replace slashes with underscores
225
+ .replace(/[^a-zA-Z0-9_.-]/g, "_") // Replace special chars
226
+ .replace(/_+/g, "_") // Collapse multiple underscores
227
+ .slice(0, 200); // Limit length
228
+ // Ensure it ends with .md for markdown files
229
+ if (!filename.endsWith(".md")) {
230
+ filename = filename || "index";
231
+ filename += ".md";
232
+ }
233
+ return filename;
234
+ }
235
+ catch {
236
+ return "page.md";
237
+ }
238
+ }
239
+ /**
240
+ * Simple string hash for deduplication.
241
+ */
242
+ function simpleHash(str) {
243
+ let hash = 0;
244
+ for (let i = 0; i < str.length; i++) {
245
+ const char = str.charCodeAt(i);
246
+ hash = (hash << 5) - hash + char;
247
+ hash = hash & hash; // Convert to 32bit integer
248
+ }
249
+ return Math.abs(hash).toString(36);
250
+ }
251
+ //# sourceMappingURL=url.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"url.js","sourceRoot":"","sources":["../../src/utils/url.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH;;GAEG;AACH,MAAM,eAAe,GAAG;IACtB,YAAY;IACZ,YAAY;IACZ,cAAc;IACd,UAAU;IACV,aAAa;IACb,KAAK;IACL,QAAQ;IACR,QAAQ;IACR,OAAO;IACP,SAAS;IACT,KAAK;CACN,CAAC;AAEF;;;;;GAKG;AACH,MAAM,UAAU,YAAY,CAAC,GAAW;IACtC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAE5B,kBAAkB;QAClB,MAAM,CAAC,IAAI,GAAG,EAAE,CAAC;QAEjB,6BAA6B;QAC7B,KAAK,MAAM,KAAK,IAAI,eAAe,EAAE,CAAC;YACpC,MAAM,CAAC,YAAY,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QACpC,CAAC;QAED,+CAA+C;QAC/C,MAAM,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC;QAE3B,yDAAyD;QACzD,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YAChE,MAAM,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QACjD,CAAC;QAED,4DAA4D;QAC5D,IAAI,CAAC;YACH,MAAM,CAAC,QAAQ,GAAG,kBAAkB,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QACxD,CAAC;QAAC,MAAM,CAAC;YACP,+BAA+B;QACjC,CAAC;QAED,OAAO,MAAM,CAAC,IAAI,CAAC;IACrB,CAAC;IAAC,MAAM,CAAC;QACP,iCAAiC;QACjC,OAAO,GAAG,CAAC;IACb,CAAC;AACH,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,aAAa,CAAC,GAAW;IACvC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAC5B,OAAO,MAAM,CAAC,QAAQ,CAAC;IACzB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY,EAAE,IAAY;IACrD,MAAM,OAAO,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC;IACpC,MAAM,OAAO,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC;IAEpC,IAAI,CAAC,OAAO,IAAI,CAAC,OAAO,EAAE,CAAC;QACzB,OAAO,KAAK,CAAC;IACf,CAAC;IAED,+BAA+B;IAC/B,MAAM,SAAS,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;IACzD,OAAO,SAAS,CAAC,OAAO,CAAC,KAAK,SAAS,CAAC,OAAO,CAAC,CAAC;AACnD,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,UAAU,CAAC,IAAY,EAAE,OAAe;IACtD,IAAI,CAAC;QACH,gCAAgC;QAChC,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YAC1B,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;YAC9B,OAAO,IAAI,GAAG,CAAC,GAAG,IAAI,CAAC,QAAQ,GAAG,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC;QACjD,CAAC;QAED,OAAO,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;IACrC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,WAAW,CACzB,GAAW,EACX,OAAe;IAEf,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAE5B,kBAAkB;QAClB,IAAI,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;YACnD,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,mBAAmB,EAAE,CAAC;QACzD,CAAC;QAED,sBAAsB;QACtB,IAAI,CAAC,YAAY,CAAC,GAAG,EAAE,OAAO,CAAC,EAAE,CAAC;YAChC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,iBAAiB,EAAE,CAAC;QACvD,CAAC;QAED,gCAAgC;QAChC,MAAM,YAAY,GAAG;YACnB,UAAU;YACV,WAAW;YACX,UAAU;YACV,WAAW;YACX,WAAW;YACX,aAAa;YACb,UAAU;YACV,cAAc;YACd,0CAA0C;YAC1C,qCAAqC;YACrC,gCAAgC;YAChC,oCAAoC;SACrC,CAAC;QAEF,KAAK,MAAM,OAAO,IAAI,YAAY,EAAE,CAAC;YACnC,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAClC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,kBAAkB,EAAE,CAAC;YACxD,CAAC;QACH,CAAC;QAED,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;IAC3B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,aAAa,EAAE,CAAC;IACnD,CAAC;AACH,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY,EAAE,OAAe;IACxD,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAE/B,0CAA0C;IAC1C,2DAA2D;IAC3D,MAAM,SAAS,GAAG,yBAAyB,CAAC;IAC5C,IAAI,KAAK,CAAC;IAEV,OAAO,CAAC,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC/C,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QAEtB,wCAAwC;QACxC,IACE,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC;YACpB,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;YAC9B,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;YAC1B,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,EACvB,CAAC;YACD,SAAS;QACX,CAAC;QAED,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QAC3C,IAAI,QAAQ,EAAE,CAAC;YACb,MAAM,UAAU,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;YAE1C,qCAAqC;YACrC,MAAM,EAAE,OAAO,EAAE,GAAG,WAAW,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;YACrD,IAAI,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;gBACrC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;gBACrB,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YACzB,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,YAAY,CAAC,GAAW;IACtC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAC5B,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC5D,OAAO,QAAQ,CAAC,MAAM,CAAC;IACzB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,CAAC,CAAC;IACX,CAAC;AACH,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,aAAa,CAAC,GAAW;IACvC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAE5B,uBAAuB;QACvB,IAAI,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;QAE/B,oCAAoC;QACpC,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YAClB,MAAM,IAAI,GAAG,UAAU,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACvC,QAAQ,IAAI,IAAI,IAAI,EAAE,CAAC;QACzB,CAAC;QAED,wBAAwB;QACxB,QAAQ,GAAG,QAAQ;aAChB,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,uBAAuB;aAC1C,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,mCAAmC;aACvD,OAAO,CAAC,kBAAkB,EAAE,GAAG,CAAC,CAAC,wBAAwB;aACzD,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,gCAAgC;aACpD,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,eAAe;QAEjC,6CAA6C;QAC7C,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;YAC9B,QAAQ,GAAG,QAAQ,IAAI,OAAO,CAAC;YAC/B,QAAQ,IAAI,KAAK,CAAC;QACpB,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,UAAU,CAAC,GAAW;IAC7B,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,MAAM,IAAI,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QAC/B,IAAI,GAAG,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,IAAI,GAAG,IAAI,CAAC;QACjC,IAAI,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,2BAA2B;IACjD,CAAC;IACD,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;AACrC,CAAC"}
package/package.json ADDED
@@ -0,0 +1,58 @@
1
+ {
2
+ "name": "mcp-docs-scraper",
3
+ "version": "0.1.0",
4
+ "description": "An MCP server that gives coding agents fast, efficient access to library documentation",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "bin": {
8
+ "mcp-docs-scraper": "dist/index.js"
9
+ },
10
+ "files": [
11
+ "dist",
12
+ "README.md",
13
+ "LICENSE"
14
+ ],
15
+ "keywords": [
16
+ "mcp",
17
+ "model-context-protocol",
18
+ "documentation",
19
+ "scraper",
20
+ "ai",
21
+ "llm",
22
+ "claude",
23
+ "cursor",
24
+ "github",
25
+ "web-scraper"
26
+ ],
27
+ "author": "Kuba Kwiecien <kwiscion@gmail.com>",
28
+ "license": "MIT",
29
+ "repository": {
30
+ "type": "git",
31
+ "url": "https://github.com/kwiscion/mcp-docs-scraper.git"
32
+ },
33
+ "homepage": "https://github.com/kwiscion/mcp-docs-scraper#readme",
34
+ "bugs": {
35
+ "url": "https://github.com/kwiscion/mcp-docs-scraper/issues"
36
+ },
37
+ "engines": {
38
+ "node": ">=22.0.0"
39
+ },
40
+ "dependencies": {
41
+ "@modelcontextprotocol/sdk": "^1.25.0",
42
+ "cheerio": "^1.1.2",
43
+ "minisearch": "^7.2.0",
44
+ "turndown": "^7.2.2",
45
+ "zod": "^4.3.5"
46
+ },
47
+ "devDependencies": {
48
+ "@types/node": "^22.0.0",
49
+ "@types/turndown": "^5.0.6",
50
+ "tsx": "^4.21.0",
51
+ "typescript": "^5.9.0"
52
+ },
53
+ "scripts": {
54
+ "build": "tsc",
55
+ "start": "node dist/index.js",
56
+ "dev": "tsx src/index.ts"
57
+ }
58
+ }