@arabold/docs-mcp-server 1.25.1 → 1.25.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +94 -1
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -709,7 +709,7 @@ function extractProtocol(urlOrPath) {
|
|
|
709
709
|
}
|
|
710
710
|
}
|
|
711
711
|
const name = "@arabold/docs-mcp-server";
|
|
712
|
-
const version = "1.25.
|
|
712
|
+
const version = "1.25.1";
|
|
713
713
|
const description = "MCP server for fetching and searching documentation";
|
|
714
714
|
const type = "module";
|
|
715
715
|
const bin = { "docs-mcp-server": "dist/index.js" };
|
|
@@ -4814,6 +4814,98 @@ class MarkdownMetadataExtractorMiddleware {
|
|
|
4814
4814
|
await next();
|
|
4815
4815
|
}
|
|
4816
4816
|
}
|
|
4817
|
+
class HtmlNormalizationMiddleware {
|
|
4818
|
+
async process(context, next) {
|
|
4819
|
+
if (!context.dom) {
|
|
4820
|
+
logger.debug(
|
|
4821
|
+
`Skipping HTML normalization for ${context.source} - no DOM available`
|
|
4822
|
+
);
|
|
4823
|
+
await next();
|
|
4824
|
+
return;
|
|
4825
|
+
}
|
|
4826
|
+
try {
|
|
4827
|
+
logger.debug(`Normalizing HTML URLs and links for ${context.source}`);
|
|
4828
|
+
const $ = context.dom;
|
|
4829
|
+
const baseUrl = context.source;
|
|
4830
|
+
this.normalizeImageUrls($, baseUrl);
|
|
4831
|
+
this.normalizeLinks($, baseUrl);
|
|
4832
|
+
logger.debug(`Successfully normalized HTML content for ${context.source}`);
|
|
4833
|
+
} catch (error) {
|
|
4834
|
+
logger.error(`❌ Failed to normalize HTML for ${context.source}: ${error}`);
|
|
4835
|
+
context.errors.push(
|
|
4836
|
+
error instanceof Error ? error : new Error(`HTML normalization failed: ${String(error)}`)
|
|
4837
|
+
);
|
|
4838
|
+
}
|
|
4839
|
+
await next();
|
|
4840
|
+
}
|
|
4841
|
+
/**
|
|
4842
|
+
* Normalizes image URLs by converting relative URLs to absolute URLs.
|
|
4843
|
+
*/
|
|
4844
|
+
normalizeImageUrls($, baseUrl) {
|
|
4845
|
+
$("img").each((_index, element) => {
|
|
4846
|
+
const $img = $(element);
|
|
4847
|
+
const src = $img.attr("src");
|
|
4848
|
+
if (!src) return;
|
|
4849
|
+
try {
|
|
4850
|
+
new URL(src);
|
|
4851
|
+
} catch {
|
|
4852
|
+
try {
|
|
4853
|
+
const absoluteUrl = new URL(src, baseUrl).href;
|
|
4854
|
+
$img.attr("src", absoluteUrl);
|
|
4855
|
+
logger.debug(`Converted relative image URL: ${src} → ${absoluteUrl}`);
|
|
4856
|
+
} catch (error) {
|
|
4857
|
+
logger.debug(`Failed to resolve relative image URL: ${src} - ${error}`);
|
|
4858
|
+
}
|
|
4859
|
+
}
|
|
4860
|
+
});
|
|
4861
|
+
}
|
|
4862
|
+
/**
|
|
4863
|
+
* Normalizes links by:
|
|
4864
|
+
* - Converting relative URLs to absolute URLs
|
|
4865
|
+
* - Unwrapping anchor links (preserving text content)
|
|
4866
|
+
* - Unwrapping non-HTTP links (preserving text content)
|
|
4867
|
+
*/
|
|
4868
|
+
normalizeLinks($, baseUrl) {
|
|
4869
|
+
$("a").each((_index, element) => {
|
|
4870
|
+
const $link = $(element);
|
|
4871
|
+
const href = $link.attr("href");
|
|
4872
|
+
if (!href) {
|
|
4873
|
+
this.unwrapElement($, $link);
|
|
4874
|
+
return;
|
|
4875
|
+
}
|
|
4876
|
+
if (href.startsWith("#")) {
|
|
4877
|
+
logger.debug(`Removing anchor link: ${href}`);
|
|
4878
|
+
this.unwrapElement($, $link);
|
|
4879
|
+
return;
|
|
4880
|
+
}
|
|
4881
|
+
try {
|
|
4882
|
+
const url = new URL(href);
|
|
4883
|
+
if (url.protocol !== "http:" && url.protocol !== "https:") {
|
|
4884
|
+
logger.debug(`Removing non-HTTP link: ${href}`);
|
|
4885
|
+
this.unwrapElement($, $link);
|
|
4886
|
+
return;
|
|
4887
|
+
}
|
|
4888
|
+
} catch {
|
|
4889
|
+
try {
|
|
4890
|
+
const absoluteUrl = new URL(href, baseUrl).href;
|
|
4891
|
+
$link.attr("href", absoluteUrl);
|
|
4892
|
+
logger.debug(`Converted relative link URL: ${href} → ${absoluteUrl}`);
|
|
4893
|
+
} catch (error) {
|
|
4894
|
+
logger.debug(`Failed to resolve relative link URL: ${href} - ${error}`);
|
|
4895
|
+
this.unwrapElement($, $link);
|
|
4896
|
+
}
|
|
4897
|
+
}
|
|
4898
|
+
});
|
|
4899
|
+
}
|
|
4900
|
+
/**
|
|
4901
|
+
* Unwraps an element by replacing it with its HTML content.
|
|
4902
|
+
* This preserves the inner HTML (including nested elements) while removing the wrapping tag.
|
|
4903
|
+
*/
|
|
4904
|
+
unwrapElement(_$, $element) {
|
|
4905
|
+
const htmlContent = $element.html() || $element.text();
|
|
4906
|
+
$element.replaceWith(htmlContent);
|
|
4907
|
+
}
|
|
4908
|
+
}
|
|
4817
4909
|
function detectCharsetFromHtml(htmlContent) {
|
|
4818
4910
|
const charsetMatch = htmlContent.match(
|
|
4819
4911
|
/<meta\s+charset\s*=\s*["']?([^"'>\s]+)["']?[^>]*>/i
|
|
@@ -4937,6 +5029,7 @@ class HtmlPipeline extends BasePipeline {
|
|
|
4937
5029
|
new HtmlMetadataExtractorMiddleware(),
|
|
4938
5030
|
new HtmlLinkExtractorMiddleware(),
|
|
4939
5031
|
new HtmlSanitizerMiddleware(),
|
|
5032
|
+
new HtmlNormalizationMiddleware(),
|
|
4940
5033
|
new HtmlToMarkdownMiddleware()
|
|
4941
5034
|
];
|
|
4942
5035
|
const semanticSplitter = new SemanticMarkdownSplitter(
|