@arabold/docs-mcp-server 1.34.0 → 1.35.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +249 -60
- package/dist/index.js.map +1 -1
- package/package.json +2 -1
package/dist/index.js
CHANGED
|
@@ -33,6 +33,7 @@ import mime from "mime";
|
|
|
33
33
|
import { HeaderGenerator } from "header-generator";
|
|
34
34
|
import fs$1 from "node:fs/promises";
|
|
35
35
|
import axios from "axios";
|
|
36
|
+
import { MarkItDown } from "markitdown-ts";
|
|
36
37
|
import { VirtualConsole, JSDOM } from "jsdom";
|
|
37
38
|
import psl from "psl";
|
|
38
39
|
import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
|
|
@@ -945,6 +946,10 @@ const DEFAULT_CONFIG = {
|
|
|
945
946
|
childLimit: 3,
|
|
946
947
|
precedingSiblingsLimit: 1,
|
|
947
948
|
subsequentSiblingsLimit: 2
|
|
949
|
+
},
|
|
950
|
+
document: {
|
|
951
|
+
maxSize: 10 * 1024 * 1024
|
|
952
|
+
// 10MB max size for PDF/Office documents
|
|
948
953
|
}
|
|
949
954
|
};
|
|
950
955
|
const AppConfigSchema = z.object({
|
|
@@ -1018,7 +1023,10 @@ const AppConfigSchema = z.object({
|
|
|
1018
1023
|
childLimit: z.coerce.number().int().default(DEFAULT_CONFIG.assembly.childLimit),
|
|
1019
1024
|
precedingSiblingsLimit: z.coerce.number().int().default(DEFAULT_CONFIG.assembly.precedingSiblingsLimit),
|
|
1020
1025
|
subsequentSiblingsLimit: z.coerce.number().int().default(DEFAULT_CONFIG.assembly.subsequentSiblingsLimit)
|
|
1021
|
-
}).default(DEFAULT_CONFIG.assembly)
|
|
1026
|
+
}).default(DEFAULT_CONFIG.assembly),
|
|
1027
|
+
document: z.object({
|
|
1028
|
+
maxSize: z.coerce.number().int().default(DEFAULT_CONFIG.document.maxSize)
|
|
1029
|
+
}).default(DEFAULT_CONFIG.document)
|
|
1022
1030
|
});
|
|
1023
1031
|
const defaults = AppConfigSchema.parse({});
|
|
1024
1032
|
const configMappings = [
|
|
@@ -2370,6 +2378,31 @@ class MimeTypeUtils {
|
|
|
2370
2378
|
static isJson(mimeType) {
|
|
2371
2379
|
return mimeType === "application/json" || mimeType === "text/json" || mimeType === "text/x-json";
|
|
2372
2380
|
}
|
|
2381
|
+
/**
|
|
2382
|
+
* Checks if a MIME type represents PDF content.
|
|
2383
|
+
*/
|
|
2384
|
+
static isPdf(mimeType) {
|
|
2385
|
+
return mimeType === "application/pdf";
|
|
2386
|
+
}
|
|
2387
|
+
/**
|
|
2388
|
+
* Checks if a MIME type represents an Office document (DOCX, XLSX, PPTX).
|
|
2389
|
+
*/
|
|
2390
|
+
static isOfficeDocument(mimeType) {
|
|
2391
|
+
return mimeType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document" || mimeType === "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" || mimeType === "application/vnd.openxmlformats-officedocument.presentationml.presentation";
|
|
2392
|
+
}
|
|
2393
|
+
/**
|
|
2394
|
+
* Checks if a MIME type represents a Jupyter Notebook.
|
|
2395
|
+
*/
|
|
2396
|
+
static isJupyterNotebook(mimeType) {
|
|
2397
|
+
return mimeType === "application/x-ipynb+json";
|
|
2398
|
+
}
|
|
2399
|
+
/**
|
|
2400
|
+
* Checks if a MIME type represents a document that can be processed
|
|
2401
|
+
* by the DocumentPipeline (PDF, Office docs, Jupyter notebooks).
|
|
2402
|
+
*/
|
|
2403
|
+
static isSupportedDocument(mimeType) {
|
|
2404
|
+
return MimeTypeUtils.isPdf(mimeType) || MimeTypeUtils.isOfficeDocument(mimeType) || MimeTypeUtils.isJupyterNotebook(mimeType);
|
|
2405
|
+
}
|
|
2373
2406
|
/**
|
|
2374
2407
|
* Checks if a MIME type represents source code that should be wrapped in code blocks.
|
|
2375
2408
|
*/
|
|
@@ -3098,7 +3131,9 @@ function normalizeUrl(url, options = defaultNormalizerOptions) {
|
|
|
3098
3131
|
try {
|
|
3099
3132
|
const parsedUrl = new URL(url);
|
|
3100
3133
|
const finalOptions = { ...defaultNormalizerOptions, ...options };
|
|
3101
|
-
const normalized = new URL(
|
|
3134
|
+
const normalized = new URL(url);
|
|
3135
|
+
normalized.search = "";
|
|
3136
|
+
normalized.hash = "";
|
|
3102
3137
|
if (finalOptions.removeIndex) {
|
|
3103
3138
|
normalized.pathname = normalized.pathname.replace(
|
|
3104
3139
|
/\/index\.(html|htm|asp|php|jsp)$/i,
|
|
@@ -3110,13 +3145,13 @@ function normalizeUrl(url, options = defaultNormalizerOptions) {
|
|
|
3110
3145
|
}
|
|
3111
3146
|
const preservedHash = !finalOptions.removeHash ? parsedUrl.hash : "";
|
|
3112
3147
|
const preservedSearch = !finalOptions.removeQuery ? parsedUrl.search : "";
|
|
3113
|
-
|
|
3114
|
-
|
|
3115
|
-
result += preservedSearch;
|
|
3148
|
+
if (!finalOptions.removeQuery) {
|
|
3149
|
+
normalized.search = preservedSearch;
|
|
3116
3150
|
}
|
|
3117
|
-
if (
|
|
3118
|
-
|
|
3151
|
+
if (!finalOptions.removeHash) {
|
|
3152
|
+
normalized.hash = preservedHash;
|
|
3119
3153
|
}
|
|
3154
|
+
let result = normalized.href;
|
|
3120
3155
|
if (finalOptions.ignoreCase) {
|
|
3121
3156
|
result = result.toLowerCase();
|
|
3122
3157
|
}
|
|
@@ -3790,6 +3825,181 @@ ${"```"}`;
|
|
|
3790
3825
|
return window2.document;
|
|
3791
3826
|
}
|
|
3792
3827
|
}
|
|
3828
|
+
class BasePipeline {
|
|
3829
|
+
/**
|
|
3830
|
+
* Determines if this pipeline can process content with the given MIME type.
|
|
3831
|
+
* Must be implemented by derived classes.
|
|
3832
|
+
*/
|
|
3833
|
+
canProcess(_mimeType, _content) {
|
|
3834
|
+
throw new Error("Method not implemented.");
|
|
3835
|
+
}
|
|
3836
|
+
/**
|
|
3837
|
+
* Processes the raw content through the pipeline.
|
|
3838
|
+
* Must be implemented by derived classes.
|
|
3839
|
+
*/
|
|
3840
|
+
async process(_rawContent, _options, _fetcher) {
|
|
3841
|
+
throw new Error("Method not implemented.");
|
|
3842
|
+
}
|
|
3843
|
+
/**
|
|
3844
|
+
* Cleanup resources used by this pipeline.
|
|
3845
|
+
* Default implementation does nothing - override in derived classes as needed.
|
|
3846
|
+
*/
|
|
3847
|
+
async close() {
|
|
3848
|
+
}
|
|
3849
|
+
/**
|
|
3850
|
+
* Executes a middleware stack on the given context.
|
|
3851
|
+
* This is a utility method used by derived pipeline classes.
|
|
3852
|
+
*
|
|
3853
|
+
* @param middleware - The middleware stack to execute
|
|
3854
|
+
* @param context - The context to process
|
|
3855
|
+
*/
|
|
3856
|
+
async executeMiddlewareStack(middleware, context) {
|
|
3857
|
+
let index = -1;
|
|
3858
|
+
const dispatch = async (i) => {
|
|
3859
|
+
if (i <= index) throw new Error("next() called multiple times");
|
|
3860
|
+
index = i;
|
|
3861
|
+
const mw = middleware[i];
|
|
3862
|
+
if (!mw) return;
|
|
3863
|
+
await mw.process(context, dispatch.bind(null, i + 1));
|
|
3864
|
+
};
|
|
3865
|
+
try {
|
|
3866
|
+
await dispatch(0);
|
|
3867
|
+
} catch (error) {
|
|
3868
|
+
context.errors.push(error instanceof Error ? error : new Error(String(error)));
|
|
3869
|
+
}
|
|
3870
|
+
}
|
|
3871
|
+
}
|
|
3872
|
+
class DocumentPipeline extends BasePipeline {
|
|
3873
|
+
markitdown;
|
|
3874
|
+
splitter;
|
|
3875
|
+
maxSize;
|
|
3876
|
+
constructor(config) {
|
|
3877
|
+
super();
|
|
3878
|
+
this.markitdown = new MarkItDown();
|
|
3879
|
+
this.maxSize = config.document.maxSize;
|
|
3880
|
+
const semanticSplitter = new SemanticMarkdownSplitter(
|
|
3881
|
+
config.splitter.preferredChunkSize,
|
|
3882
|
+
config.splitter.maxChunkSize
|
|
3883
|
+
);
|
|
3884
|
+
this.splitter = new GreedySplitter(
|
|
3885
|
+
semanticSplitter,
|
|
3886
|
+
config.splitter.minChunkSize,
|
|
3887
|
+
config.splitter.preferredChunkSize,
|
|
3888
|
+
config.splitter.maxChunkSize
|
|
3889
|
+
);
|
|
3890
|
+
}
|
|
3891
|
+
canProcess(mimeType) {
|
|
3892
|
+
return MimeTypeUtils.isSupportedDocument(mimeType);
|
|
3893
|
+
}
|
|
3894
|
+
async process(rawContent, _options) {
|
|
3895
|
+
const buffer = Buffer.isBuffer(rawContent.content) ? rawContent.content : Buffer.from(rawContent.content);
|
|
3896
|
+
if (buffer.length > this.maxSize) {
|
|
3897
|
+
logger.warn(
|
|
3898
|
+
`Document exceeds size limit (${buffer.length} > ${this.maxSize}): ${rawContent.source}`
|
|
3899
|
+
);
|
|
3900
|
+
return {
|
|
3901
|
+
title: null,
|
|
3902
|
+
contentType: rawContent.mimeType,
|
|
3903
|
+
textContent: null,
|
|
3904
|
+
links: [],
|
|
3905
|
+
errors: [new Error(`Document exceeds maximum size of ${this.maxSize} bytes`)],
|
|
3906
|
+
chunks: []
|
|
3907
|
+
};
|
|
3908
|
+
}
|
|
3909
|
+
const extension = this.extractExtension(rawContent.source);
|
|
3910
|
+
if (!extension) {
|
|
3911
|
+
logger.warn(`Could not determine file extension: ${rawContent.source}`);
|
|
3912
|
+
return {
|
|
3913
|
+
title: null,
|
|
3914
|
+
contentType: rawContent.mimeType,
|
|
3915
|
+
textContent: null,
|
|
3916
|
+
links: [],
|
|
3917
|
+
errors: [new Error("Could not determine file extension for document")],
|
|
3918
|
+
chunks: []
|
|
3919
|
+
};
|
|
3920
|
+
}
|
|
3921
|
+
try {
|
|
3922
|
+
const result = await this.markitdown.convertBuffer(buffer, {
|
|
3923
|
+
file_extension: `.${extension}`
|
|
3924
|
+
});
|
|
3925
|
+
if (!result?.markdown) {
|
|
3926
|
+
logger.warn(`No content extracted from document: ${rawContent.source}`);
|
|
3927
|
+
return {
|
|
3928
|
+
title: null,
|
|
3929
|
+
contentType: rawContent.mimeType,
|
|
3930
|
+
textContent: null,
|
|
3931
|
+
links: [],
|
|
3932
|
+
errors: [],
|
|
3933
|
+
chunks: []
|
|
3934
|
+
};
|
|
3935
|
+
}
|
|
3936
|
+
const title = result.title || this.extractFilename(rawContent.source);
|
|
3937
|
+
let markdown = result.markdown;
|
|
3938
|
+
if (extension === "xlsx") {
|
|
3939
|
+
markdown = this.promoteTableHeaders(markdown);
|
|
3940
|
+
}
|
|
3941
|
+
const chunks = await this.splitter.splitText(markdown, "text/markdown");
|
|
3942
|
+
return {
|
|
3943
|
+
title,
|
|
3944
|
+
contentType: "text/markdown",
|
|
3945
|
+
// Output is always markdown
|
|
3946
|
+
textContent: markdown,
|
|
3947
|
+
links: [],
|
|
3948
|
+
// Documents don't have extractable links
|
|
3949
|
+
errors: [],
|
|
3950
|
+
chunks
|
|
3951
|
+
};
|
|
3952
|
+
} catch (error) {
|
|
3953
|
+
const errorName = error instanceof Error ? error.name : "UnknownError";
|
|
3954
|
+
const safeMessage = `Failed to convert document: ${errorName}`;
|
|
3955
|
+
logger.warn(`${safeMessage} for ${rawContent.source}`);
|
|
3956
|
+
return {
|
|
3957
|
+
title: null,
|
|
3958
|
+
contentType: rawContent.mimeType,
|
|
3959
|
+
textContent: null,
|
|
3960
|
+
links: [],
|
|
3961
|
+
errors: [new Error(safeMessage)],
|
|
3962
|
+
chunks: []
|
|
3963
|
+
};
|
|
3964
|
+
}
|
|
3965
|
+
}
|
|
3966
|
+
extractExtension(source) {
|
|
3967
|
+
try {
|
|
3968
|
+
const url = new URL(source);
|
|
3969
|
+
return this.getExtensionFromPath(url.pathname);
|
|
3970
|
+
} catch {
|
|
3971
|
+
return this.getExtensionFromPath(source);
|
|
3972
|
+
}
|
|
3973
|
+
}
|
|
3974
|
+
getExtensionFromPath(pathStr) {
|
|
3975
|
+
const lastSlash = pathStr.lastIndexOf("/");
|
|
3976
|
+
const filename = lastSlash >= 0 ? pathStr.substring(lastSlash + 1) : pathStr;
|
|
3977
|
+
const lastDot = filename.lastIndexOf(".");
|
|
3978
|
+
if (lastDot > 0) {
|
|
3979
|
+
return filename.substring(lastDot + 1).toLowerCase();
|
|
3980
|
+
}
|
|
3981
|
+
return null;
|
|
3982
|
+
}
|
|
3983
|
+
/**
|
|
3984
|
+
* Post-processes Markdown to fix empty table headers generated by sheet-to-html conversions.
|
|
3985
|
+
* Detects tables where the header row is empty and promotes the first data row to be the header.
|
|
3986
|
+
*/
|
|
3987
|
+
promoteTableHeaders(markdown) {
|
|
3988
|
+
const emptyHeaderPattern = /^\|(?:\s*\|)+\s*$\r?\n^(\|(?:\s*:?-+:?\s*\|)+)\s*$\r?\n^(\|.*\|)\s*$/gm;
|
|
3989
|
+
return markdown.replace(emptyHeaderPattern, "$2\n$1");
|
|
3990
|
+
}
|
|
3991
|
+
extractFilename(source) {
|
|
3992
|
+
try {
|
|
3993
|
+
const url = new URL(source);
|
|
3994
|
+
const pathname = url.pathname;
|
|
3995
|
+
const lastSlash = pathname.lastIndexOf("/");
|
|
3996
|
+
return pathname.substring(lastSlash + 1) || null;
|
|
3997
|
+
} catch {
|
|
3998
|
+
const lastSlash = source.lastIndexOf("/");
|
|
3999
|
+
return source.substring(lastSlash + 1) || null;
|
|
4000
|
+
}
|
|
4001
|
+
}
|
|
4002
|
+
}
|
|
3793
4003
|
class HtmlCheerioParserMiddleware {
|
|
3794
4004
|
async process(context, next) {
|
|
3795
4005
|
try {
|
|
@@ -5194,50 +5404,6 @@ function convertToString(content, charset) {
|
|
|
5194
5404
|
}
|
|
5195
5405
|
}
|
|
5196
5406
|
}
|
|
5197
|
-
class BasePipeline {
|
|
5198
|
-
/**
|
|
5199
|
-
* Determines if this pipeline can process content with the given MIME type.
|
|
5200
|
-
* Must be implemented by derived classes.
|
|
5201
|
-
*/
|
|
5202
|
-
canProcess(_mimeType, _content) {
|
|
5203
|
-
throw new Error("Method not implemented.");
|
|
5204
|
-
}
|
|
5205
|
-
/**
|
|
5206
|
-
* Processes the raw content through the pipeline.
|
|
5207
|
-
* Must be implemented by derived classes.
|
|
5208
|
-
*/
|
|
5209
|
-
async process(_rawContent, _options, _fetcher) {
|
|
5210
|
-
throw new Error("Method not implemented.");
|
|
5211
|
-
}
|
|
5212
|
-
/**
|
|
5213
|
-
* Cleanup resources used by this pipeline.
|
|
5214
|
-
* Default implementation does nothing - override in derived classes as needed.
|
|
5215
|
-
*/
|
|
5216
|
-
async close() {
|
|
5217
|
-
}
|
|
5218
|
-
/**
|
|
5219
|
-
* Executes a middleware stack on the given context.
|
|
5220
|
-
* This is a utility method used by derived pipeline classes.
|
|
5221
|
-
*
|
|
5222
|
-
* @param middleware - The middleware stack to execute
|
|
5223
|
-
* @param context - The context to process
|
|
5224
|
-
*/
|
|
5225
|
-
async executeMiddlewareStack(middleware, context) {
|
|
5226
|
-
let index = -1;
|
|
5227
|
-
const dispatch = async (i) => {
|
|
5228
|
-
if (i <= index) throw new Error("next() called multiple times");
|
|
5229
|
-
index = i;
|
|
5230
|
-
const mw = middleware[i];
|
|
5231
|
-
if (!mw) return;
|
|
5232
|
-
await mw.process(context, dispatch.bind(null, i + 1));
|
|
5233
|
-
};
|
|
5234
|
-
try {
|
|
5235
|
-
await dispatch(0);
|
|
5236
|
-
} catch (error) {
|
|
5237
|
-
context.errors.push(error instanceof Error ? error : new Error(String(error)));
|
|
5238
|
-
}
|
|
5239
|
-
}
|
|
5240
|
-
}
|
|
5241
5407
|
class HtmlPipeline extends BasePipeline {
|
|
5242
5408
|
playwrightMiddleware;
|
|
5243
5409
|
standardMiddleware;
|
|
@@ -7067,7 +7233,7 @@ class TextPipeline extends BasePipeline {
|
|
|
7067
7233
|
let PipelineFactory$1 = class PipelineFactory {
|
|
7068
7234
|
/**
|
|
7069
7235
|
* Creates the standard set of content pipelines used by all scraper strategies.
|
|
7070
|
-
* Includes HTML, Markdown, JSON, source code, and text processing capabilities.
|
|
7236
|
+
* Includes HTML, Markdown, JSON, source code, document, and text processing capabilities.
|
|
7071
7237
|
* Each pipeline now handles both preprocessing and content-specific splitting.
|
|
7072
7238
|
* TextPipeline is placed last as the universal fallback for unknown content types.
|
|
7073
7239
|
*
|
|
@@ -7077,6 +7243,8 @@ let PipelineFactory$1 = class PipelineFactory {
|
|
|
7077
7243
|
return [
|
|
7078
7244
|
new JsonPipeline(appConfig),
|
|
7079
7245
|
new SourceCodePipeline(appConfig),
|
|
7246
|
+
new DocumentPipeline(appConfig),
|
|
7247
|
+
// PDF, Office docs, Jupyter notebooks
|
|
7080
7248
|
new HtmlPipeline(appConfig),
|
|
7081
7249
|
new MarkdownPipeline(appConfig),
|
|
7082
7250
|
new TextPipeline(appConfig)
|
|
@@ -11246,7 +11414,7 @@ const Layout = ({
|
|
|
11246
11414
|
children,
|
|
11247
11415
|
eventClientConfig
|
|
11248
11416
|
}) => {
|
|
11249
|
-
const versionString = version || "1.
|
|
11417
|
+
const versionString = version || "1.35.0";
|
|
11250
11418
|
const versionInitializer = `versionUpdate({ currentVersion: ${`'${versionString}'`} })`;
|
|
11251
11419
|
return /* @__PURE__ */ jsxs("html", { lang: "en", children: [
|
|
11252
11420
|
/* @__PURE__ */ jsxs("head", { children: [
|
|
@@ -12692,7 +12860,8 @@ function registerNewJobRoutes(server, scrapeTool, scraperConfig) {
|
|
|
12692
12860
|
reply.type("text/html");
|
|
12693
12861
|
try {
|
|
12694
12862
|
let parsePatterns = function(input) {
|
|
12695
|
-
if (
|
|
12863
|
+
if (input === void 0) return void 0;
|
|
12864
|
+
if (input.trim() === "") return [];
|
|
12696
12865
|
return input.split(/\n|,/).map((s) => s.trim()).filter((s) => s.length > 0);
|
|
12697
12866
|
}, parseHeaders2 = function(input) {
|
|
12698
12867
|
if (!input) return void 0;
|
|
@@ -13044,7 +13213,7 @@ const LibrarySearchCard = ({ library }) => {
|
|
|
13044
13213
|
] });
|
|
13045
13214
|
};
|
|
13046
13215
|
const SearchResultItem = async ({ result }) => {
|
|
13047
|
-
const isMarkdown = result.mimeType ? MimeTypeUtils.isMarkdown(result.mimeType) : true;
|
|
13216
|
+
const isMarkdown = result.mimeType ? MimeTypeUtils.isMarkdown(result.mimeType) || MimeTypeUtils.isSupportedDocument(result.mimeType) : true;
|
|
13048
13217
|
const jsdom = createJSDOM("");
|
|
13049
13218
|
const purifier = DOMPurify(jsdom.window);
|
|
13050
13219
|
let contentElement;
|
|
@@ -13586,7 +13755,7 @@ class AppServer {
|
|
|
13586
13755
|
try {
|
|
13587
13756
|
if (telemetry.isEnabled()) {
|
|
13588
13757
|
telemetry.setGlobalContext({
|
|
13589
|
-
appVersion: "1.
|
|
13758
|
+
appVersion: "1.35.0",
|
|
13590
13759
|
appPlatform: process.platform,
|
|
13591
13760
|
appNodeVersion: process.version,
|
|
13592
13761
|
appServicesEnabled: this.getActiveServicesList(),
|
|
@@ -14810,8 +14979,12 @@ class GitHubScraperStrategy extends BaseScraperStrategy {
|
|
|
14810
14979
|
".tsv",
|
|
14811
14980
|
".log"
|
|
14812
14981
|
];
|
|
14982
|
+
const documentExtensions = [".pdf", ".docx", ".xlsx", ".pptx", ".ipynb"];
|
|
14813
14983
|
const pathLower = path2.toLowerCase();
|
|
14814
14984
|
const hasTextExtension = textExtensions.some((ext) => pathLower.endsWith(ext));
|
|
14985
|
+
const hasDocumentExtension = documentExtensions.some(
|
|
14986
|
+
(ext) => pathLower.endsWith(ext)
|
|
14987
|
+
);
|
|
14815
14988
|
const hasCompoundExtension = pathLower.includes(".env.") || pathLower.endsWith(".env") || pathLower.includes(".config.") || pathLower.includes(".lock");
|
|
14816
14989
|
const fileName = path2.split("/").pop() || "";
|
|
14817
14990
|
const fileNameLower = fileName.toLowerCase();
|
|
@@ -14845,7 +15018,7 @@ class GitHubScraperStrategy extends BaseScraperStrategy {
|
|
|
14845
15018
|
}
|
|
14846
15019
|
return fileNameLower === name || fileNameLower.startsWith(`${name}.`);
|
|
14847
15020
|
});
|
|
14848
|
-
if (hasTextExtension || hasCompoundExtension || isCommonTextFile) {
|
|
15021
|
+
if (hasTextExtension || hasDocumentExtension || hasCompoundExtension || isCommonTextFile) {
|
|
14849
15022
|
return shouldIncludeUrl(path2, options.includePatterns, options.excludePatterns);
|
|
14850
15023
|
}
|
|
14851
15024
|
const mimeType = mime.getType(path2);
|
|
@@ -14982,7 +15155,23 @@ class LocalFileStrategy extends BaseScraperStrategy {
|
|
|
14982
15155
|
}
|
|
14983
15156
|
if (stats.isDirectory()) {
|
|
14984
15157
|
const contents = await fs$1.readdir(filePath);
|
|
14985
|
-
const links = contents.map((name) =>
|
|
15158
|
+
const links = contents.map((name) => {
|
|
15159
|
+
const url = new URL(`file://${path.join(filePath, name)}`);
|
|
15160
|
+
if (url.hostname !== "") {
|
|
15161
|
+
url.pathname = `/${url.hostname}${url.pathname}`;
|
|
15162
|
+
url.hostname = "";
|
|
15163
|
+
}
|
|
15164
|
+
return url.href;
|
|
15165
|
+
}).filter((url) => {
|
|
15166
|
+
const allowed = this.shouldProcessUrl(url, options);
|
|
15167
|
+
if (!allowed) {
|
|
15168
|
+
logger.debug(`Skipping out-of-scope link: ${url}`);
|
|
15169
|
+
}
|
|
15170
|
+
return allowed;
|
|
15171
|
+
});
|
|
15172
|
+
logger.debug(
|
|
15173
|
+
`Found ${links.length} files in ${filePath} (from ${contents.length} entries)`
|
|
15174
|
+
);
|
|
14986
15175
|
return { url: item.url, links, status: FetchStatus.SUCCESS };
|
|
14987
15176
|
}
|
|
14988
15177
|
const rawContent = await this.fileFetcher.fetch(item.url, {
|
|
@@ -17216,7 +17405,7 @@ function createCli(argv) {
|
|
|
17216
17405
|
let globalEventBus = null;
|
|
17217
17406
|
let globalTelemetryService = null;
|
|
17218
17407
|
const commandStartTimes = /* @__PURE__ */ new Map();
|
|
17219
|
-
const cli = yargs(hideBin(argv)).scriptName("docs-mcp-server").strict().usage("Usage: $0 <command> [options]").version("1.
|
|
17408
|
+
const cli = yargs(hideBin(argv)).scriptName("docs-mcp-server").strict().usage("Usage: $0 <command> [options]").version("1.35.0").option("verbose", {
|
|
17220
17409
|
type: "boolean",
|
|
17221
17410
|
description: "Enable verbose (debug) logging",
|
|
17222
17411
|
default: false
|
|
@@ -17272,7 +17461,7 @@ function createCli(argv) {
|
|
|
17272
17461
|
if (shouldEnableTelemetry() && telemetry.isEnabled()) {
|
|
17273
17462
|
const commandName = argv2._[0]?.toString() || "default";
|
|
17274
17463
|
telemetry.setGlobalContext({
|
|
17275
|
-
appVersion: "1.
|
|
17464
|
+
appVersion: "1.35.0",
|
|
17276
17465
|
appPlatform: process.platform,
|
|
17277
17466
|
appNodeVersion: process.version,
|
|
17278
17467
|
appInterface: "cli",
|