npm - @arabold/docs-mcp-server - Versions diffs - 1.12.4 → 1.14.0 - Mend

@arabold/docs-mcp-server 1.12.4 → 1.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/dist/{DocumentManagementService-BupnR1eC.js → DocumentManagementService-BZ_ZZgPI.js} +284 -210
package/dist/DocumentManagementService-BZ_ZZgPI.js.map +1 -0
package/dist/{EmbeddingFactory-DZKXkqOe.js → EmbeddingFactory-Dz1hdJJe.js} +4 -3
package/dist/EmbeddingFactory-Dz1hdJJe.js.map +1 -0
package/dist/{FindVersionTool-BcnLvjlo.js → FindVersionTool-Dfw5Lbql.js} +34 -64
package/dist/FindVersionTool-Dfw5Lbql.js.map +1 -0
package/dist/{RemoveTool-Bqpr8F9m.js → RemoveTool-w8KGOaXw.js} +2 -2
package/dist/{RemoveTool-Bqpr8F9m.js.map → RemoveTool-w8KGOaXw.js.map} +1 -1
package/dist/assets/main.css +1 -1
package/dist/cli.js +3 -3
package/dist/server.js +4 -3
package/dist/server.js.map +1 -1
package/dist/web.js +23 -4
package/dist/web.js.map +1 -1
package/package.json +1 -1
package/public/assets/main.css +1 -1
package/dist/DocumentManagementService-BupnR1eC.js.map +0 -1
package/dist/EmbeddingFactory-DZKXkqOe.js.map +0 -1
package/dist/FindVersionTool-BcnLvjlo.js.map +0 -1

package/dist/{DocumentManagementService-BupnR1eC.js → DocumentManagementService-BZ_ZZgPI.js} RENAMED Viewed

@@ -10,6 +10,7 @@ import { VirtualConsole, JSDOM } from "jsdom";
 import { chromium } from "playwright";
 import { gfm } from "@joplin/turndown-plugin-gfm";
 import TurndownService from "turndown";
+import { TextDecoder } from "node:util";
 import { URL as URL$1, fileURLToPath } from "node:url";
 import * as semver from "semver";
 import semver__default from "semver";
@@ -168,6 +169,49 @@ const FETCHER_BASE_DELAY = 1e3;
 const SPLITTER_MIN_CHUNK_SIZE = 500;
 const SPLITTER_PREFERRED_CHUNK_SIZE = 1500;
 const SPLITTER_MAX_CHUNK_SIZE = 5e3;
+const EMBEDDING_BATCH_SIZE = 100;
+class MimeTypeUtils {
+  /**
+   * Parses a Content-Type header string into its MIME type and charset.
+   * @param contentTypeHeader The Content-Type header string (e.g., "text/html; charset=utf-8").
+   * @returns A ParsedContentType object, or a default if parsing fails.
+   */
+  static parseContentType(contentTypeHeader) {
+    if (!contentTypeHeader) {
+      return { mimeType: "application/octet-stream" };
+    }
+    const parts = contentTypeHeader.split(";").map((part) => part.trim());
+    const mimeType = parts[0].toLowerCase();
+    let charset;
+    for (let i = 1; i < parts.length; i++) {
+      const param = parts[i];
+      if (param.toLowerCase().startsWith("charset=")) {
+        charset = param.substring("charset=".length).toLowerCase();
+        break;
+      }
+    }
+    return { mimeType, charset };
+  }
+  /**
+   * Checks if a MIME type represents HTML content.
+   */
+  static isHtml(mimeType) {
+    return mimeType === "text/html" || mimeType === "application/xhtml+xml";
+  }
+  /**
+   * Checks if a MIME type represents Markdown content.
+   */
+  static isMarkdown(mimeType) {
+    return mimeType === "text/markdown" || mimeType === "text/x-markdown";
+  }
+  /**
+   * Checks if a MIME type represents plain text content.
+   */
+  static isText(mimeType) {
+    return mimeType.startsWith("text/");
+  }
+  // Extend with more helpers as needed (isJson, isXml, isPdf, etc.)
+}
 class FingerprintGenerator {
   headerGenerator;
   /**
@@ -245,11 +289,15 @@ class HttpFetcher {
           maxRedirects: followRedirects ? 5 : 0
         };
         const response = await axios.get(source, config);
+        const contentTypeHeader = response.headers["content-type"];
+        const { mimeType, charset } = MimeTypeUtils.parseContentType(contentTypeHeader);
+        const contentEncoding = response.headers["content-encoding"];
         return {
           content: response.data,
-          mimeType: response.headers["content-type"] || "application/octet-stream",
-          source,
-          encoding: response.headers["content-encoding"]
+          mimeType,
+          charset,
+          encoding: contentEncoding,
+          source
         };
       } catch (error) {
         const axiosError = error;
@@ -322,55 +370,11 @@ class FileFetcher {
     }
   }
 }
-class ContentProcessingPipeline {
-  middleware;
-  /**
-   * Creates an instance of ContentProcessingPipeline.
-   * @param middleware An array of middleware instances to execute in order.
-   */
-  constructor(middleware) {
-    this.middleware = middleware;
-  }
-  /**
-   * Executes the middleware pipeline with the given initial context.
-   * @param initialContext The starting context for the pipeline.
-   * @returns A promise that resolves with the final context after all middleware have executed.
-   */
-  async run(initialContext) {
-    let index = -1;
-    const dispatch = async (i) => {
-      if (i <= index) {
-        throw new Error("next() called multiple times");
-      }
-      index = i;
-      const mw = this.middleware[i];
-      if (!mw) {
-        return;
-      }
-      const next = dispatch.bind(null, i + 1);
-      try {
-        await mw.process(initialContext, next);
-      } catch (error) {
-        initialContext.errors.push(
-          error instanceof Error ? error : new Error(String(error))
-        );
-        logger.warn(`Error in middleware pipeline: ${error}`);
-      }
-    };
-    await dispatch(0);
-    return initialContext;
-  }
-}
 class HtmlCheerioParserMiddleware {
   async process(context, next) {
-    if (!context.contentType.startsWith("text/html")) {
-      await next();
-      return;
-    }
-    const htmlString = typeof context.content === "string" ? context.content : Buffer.from(context.content).toString("utf-8");
     try {
       logger.debug(`Parsing HTML content with Cheerio from ${context.source}`);
-      const $ = cheerio.load(htmlString);
+      const $ = cheerio.load(context.content);
       context.dom = $;
       await next();
     } catch (error) {
@@ -403,17 +407,15 @@ function createJSDOM(html, options) {
 class HtmlLinkExtractorMiddleware {
   /**
    * Processes the context to extract links from the sanitized HTML body.
-   * @param context The current processing context.
+   * @param context The current middleware context.
    * @param next Function to call the next middleware.
    */
   async process(context, next) {
     const $ = context.dom;
     if (!$) {
-      if (context.contentType.startsWith("text/html")) {
-        logger.warn(
-          `Skipping ${this.constructor.name}: context.dom is missing for HTML content. Ensure HtmlCheerioParserMiddleware runs before this.`
-        );
-      }
+      logger.warn(
+        `Skipping ${this.constructor.name}: context.dom is missing. Ensure HtmlCheerioParserMiddleware runs before this.`
+      );
       await next();
       return;
     }
@@ -460,11 +462,9 @@ class HtmlMetadataExtractorMiddleware {
   async process(context, next) {
     const $ = context.dom;
     if (!$) {
-      if (context.contentType.startsWith("text/html")) {
-        logger.warn(
-          `Skipping ${this.constructor.name}: context.dom is missing for HTML content. Ensure HtmlCheerioParserMiddleware runs before this.`
-        );
-      }
+      logger.warn(
+        `Skipping ${this.constructor.name}: context.dom is missing. Ensure HtmlCheerioParserMiddleware runs before this.`
+      );
       await next();
       return;
     }
@@ -526,10 +526,6 @@ class HtmlPlaywrightMiddleware {
     }
   }
   async process(context, next) {
-    if (!context.contentType.startsWith("text/html")) {
-      await next();
-      return;
-    }
     const scrapeMode = context.options?.scrapeMode ?? ScrapeMode.Auto;
     const shouldRunPlaywright = scrapeMode === ScrapeMode.Playwright || scrapeMode === ScrapeMode.Auto;
     if (!shouldRunPlaywright) {
@@ -552,7 +548,7 @@ class HtmlPlaywrightMiddleware {
         if (route.request().url() === context.source) {
           return route.fulfill({
             status: 200,
-            contentType: context.contentType,
+            contentType: "text/html",
             body: context.content
           });
         }
@@ -655,11 +651,9 @@ class HtmlSanitizerMiddleware {
   async process(context, next) {
     const $ = context.dom;
     if (!$) {
-      if (context.contentType.startsWith("text/html")) {
-        logger.warn(
-          `Skipping ${this.constructor.name}: context.dom is missing. Ensure HtmlCheerioParserMiddleware runs before this.`
-        );
-      }
+      logger.warn(
+        `Skipping ${this.constructor.name}: context.dom is missing. Ensure HtmlCheerioParserMiddleware runs before this.`
+      );
       await next();
       return;
     }
@@ -769,11 +763,9 @@ ${text.replace(/^\n+|\n+$/g, "")}
   async process(context, next) {
     const $ = context.dom;
     if (!$) {
-      if (context.contentType.startsWith("text/html")) {
-        logger.warn(
-          `Skipping ${this.constructor.name}: context.dom is missing for HTML content. Ensure HtmlCheerioParserMiddleware ran correctly.`
-        );
-      }
+      logger.warn(
+        `Skipping ${this.constructor.name}: context.dom is missing. Ensure HtmlCheerioParserMiddleware ran correctly.`
+      );
       await next();
       return;
     }
@@ -785,10 +777,8 @@ ${text.replace(/^\n+|\n+$/g, "")}
         const warnMsg = `HTML to Markdown conversion resulted in empty content for ${context.source}.`;
         logger.warn(warnMsg);
         context.content = "";
-        context.contentType = "text/markdown";
       } else {
         context.content = markdown;
-        context.contentType = "text/markdown";
         logger.debug(`Successfully converted HTML to Markdown for ${context.source}`);
       }
     } catch (error) {
@@ -809,10 +799,8 @@ class MarkdownLinkExtractorMiddleware {
    * @param next Function to call the next middleware.
    */
   async process(context, next) {
-    if (context.contentType === "text/markdown") {
-      if (!Array.isArray(context.links)) {
-        context.links = [];
-      }
+    if (!Array.isArray(context.links)) {
+      context.links = [];
     }
     await next();
   }
@@ -824,31 +812,153 @@ class MarkdownMetadataExtractorMiddleware {
    * @param next Function to call the next middleware.
    */
   async process(context, next) {
-    if (context.contentType === "text/markdown" || context.contentType === "text/plain") {
-      try {
-        const textContent = typeof context.content === "string" ? context.content : Buffer.from(context.content).toString("utf-8");
-        if (typeof context.content !== "string") {
-          context.content = textContent;
-        }
-        let title = "Untitled";
-        if (context.contentType === "text/markdown") {
-          const match = textContent.match(/^#\s+(.*)$/m);
-          if (match?.[1]) {
-            title = match[1].trim();
-          }
-        }
-        context.metadata.title = title;
-      } catch (error) {
-        context.errors.push(
-          new Error(
-            `Failed to extract metadata from Markdown: ${error instanceof Error ? error.message : String(error)}`
-          )
-        );
+    try {
+      let title = "Untitled";
+      const match = context.content.match(/^#\s+(.*)$/m);
+      if (match?.[1]) {
+        title = match[1].trim();
       }
+      context.metadata.title = title;
+    } catch (error) {
+      context.errors.push(
+        new Error(
+          `Failed to extract metadata from Markdown: ${error instanceof Error ? error.message : String(error)}`
+        )
+      );
     }
     await next();
   }
 }
+function convertToString(content, charset) {
+  if (Buffer.isBuffer(content)) {
+    const decoder = new TextDecoder(charset || "utf-8");
+    return decoder.decode(content);
+  }
+  return content;
+}
+class BasePipeline {
+  /**
+   * Determines if this pipeline can process the given content.
+   * Must be implemented by derived classes.
+   */
+  canProcess(_rawContent) {
+    throw new Error("Method not implemented.");
+  }
+  /**
+   * Processes the raw content through the pipeline.
+   * Must be implemented by derived classes.
+   */
+  async process(_rawContent, _options, _fetcher) {
+    throw new Error("Method not implemented.");
+  }
+  /**
+   * Executes a middleware stack on the given context.
+   * This is a utility method used by derived pipeline classes.
+   *
+   * @param middleware - The middleware stack to execute
+   * @param context - The context to process
+   */
+  async executeMiddlewareStack(middleware, context) {
+    let index = -1;
+    const dispatch = async (i) => {
+      if (i <= index) throw new Error("next() called multiple times");
+      index = i;
+      const mw = middleware[i];
+      if (!mw) return;
+      await mw.process(context, dispatch.bind(null, i + 1));
+    };
+    try {
+      await dispatch(0);
+    } catch (error) {
+      context.errors.push(error instanceof Error ? error : new Error(String(error)));
+    }
+  }
+  /**
+   * Cleans up resources when the pipeline is no longer needed.
+   * Default implementation does nothing.
+   */
+  async close() {
+  }
+}
+class HtmlPipeline extends BasePipeline {
+  playwrightMiddleware;
+  standardMiddleware;
+  constructor() {
+    super();
+    this.playwrightMiddleware = new HtmlPlaywrightMiddleware();
+    this.standardMiddleware = [
+      new HtmlCheerioParserMiddleware(),
+      new HtmlMetadataExtractorMiddleware(),
+      new HtmlLinkExtractorMiddleware(),
+      new HtmlSanitizerMiddleware(),
+      new HtmlToMarkdownMiddleware()
+    ];
+  }
+  canProcess(rawContent) {
+    return MimeTypeUtils.isHtml(rawContent.mimeType);
+  }
+  async process(rawContent, options, fetcher) {
+    const contentString = convertToString(rawContent.content, rawContent.charset);
+    const context = {
+      content: contentString,
+      source: rawContent.source,
+      metadata: {},
+      links: [],
+      errors: [],
+      options,
+      fetcher
+    };
+    let middleware = [...this.standardMiddleware];
+    if (options.scrapeMode === "playwright" || options.scrapeMode === "auto") {
+      middleware = [this.playwrightMiddleware, ...middleware];
+    }
+    await this.executeMiddlewareStack(middleware, context);
+    return {
+      textContent: typeof context.content === "string" ? context.content : "",
+      metadata: context.metadata,
+      links: context.links,
+      errors: context.errors
+    };
+  }
+  async close() {
+    await this.playwrightMiddleware.closeBrowser();
+  }
+}
+class MarkdownPipeline extends BasePipeline {
+  middleware;
+  constructor() {
+    super();
+    this.middleware = [
+      new MarkdownMetadataExtractorMiddleware(),
+      new MarkdownLinkExtractorMiddleware()
+    ];
+  }
+  canProcess(rawContent) {
+    if (!rawContent.mimeType) return false;
+    return MimeTypeUtils.isMarkdown(rawContent.mimeType) || MimeTypeUtils.isText(rawContent.mimeType);
+  }
+  async process(rawContent, options, fetcher) {
+    const contentString = convertToString(rawContent.content, rawContent.charset);
+    const context = {
+      content: contentString,
+      source: rawContent.source,
+      metadata: {},
+      links: [],
+      errors: [],
+      options,
+      fetcher
+    };
+    await this.executeMiddlewareStack(this.middleware, context);
+    return {
+      textContent: typeof context.content === "string" ? context.content : "",
+      metadata: context.metadata,
+      links: context.links,
+      errors: context.errors
+    };
+  }
+  async close() {
+  }
+}
 class PipelineError extends Error {
   constructor(message, cause) {
     super(message);
@@ -976,12 +1086,15 @@ class BaseScraperStrategy {
 class WebScraperStrategy extends BaseScraperStrategy {
   httpFetcher = new HttpFetcher();
   shouldFollowLinkFn;
-  playwrightMiddleware;
-  // Add member
+  htmlPipeline;
+  markdownPipeline;
+  pipelines;
   constructor(options = {}) {
     super({ urlNormalizerOptions: options.urlNormalizerOptions });
     this.shouldFollowLinkFn = options.shouldFollowLink;
-    this.playwrightMiddleware = new HtmlPlaywrightMiddleware();
+    this.htmlPipeline = new HtmlPipeline();
+    this.markdownPipeline = new MarkdownPipeline();
+    this.pipelines = [this.htmlPipeline, this.markdownPipeline];
   }
   canHandle(url) {
     try {
@@ -1015,54 +1128,28 @@ class WebScraperStrategy extends BaseScraperStrategy {
         followRedirects: options.followRedirects
       };
       const rawContent = await this.httpFetcher.fetch(url, fetchOptions);
-      const initialContext = {
-        content: rawContent.content,
-        contentType: rawContent.mimeType,
-        source: rawContent.source,
-        // Use the final source URL after redirects
-        metadata: {},
-        links: [],
-        errors: [],
-        options,
-        fetcher: this.httpFetcher
-      };
-      let pipeline;
-      if (initialContext.contentType.startsWith("text/html")) {
-        const htmlPipelineSteps = [
-          this.playwrightMiddleware,
-          // Use the instance member
-          // TODO: Add HtmlJsExecutorMiddleware here if needed based on options
-          new HtmlCheerioParserMiddleware(),
-          // Always runs after content is finalized
-          new HtmlMetadataExtractorMiddleware(),
-          new HtmlLinkExtractorMiddleware(),
-          new HtmlSanitizerMiddleware(),
-          // Element remover
-          new HtmlToMarkdownMiddleware()
-        ];
-        pipeline = new ContentProcessingPipeline(htmlPipelineSteps);
-      } else if (initialContext.contentType === "text/markdown" || initialContext.contentType === "text/plain") {
-        pipeline = new ContentProcessingPipeline([
-          new MarkdownMetadataExtractorMiddleware(),
-          new MarkdownLinkExtractorMiddleware()
-          // Placeholder for now
-        ]);
-      } else {
+      let processed;
+      for (const pipeline of this.pipelines) {
+        if (pipeline.canProcess(rawContent)) {
+          processed = await pipeline.process(rawContent, options, this.httpFetcher);
+          break;
+        }
+      }
+      if (!processed) {
         logger.warn(
-          `Unsupported content type "${initialContext.contentType}" for URL ${url}. Skipping processing.`
+          `Unsupported content type "${rawContent.mimeType}" for URL ${url}. Skipping processing.`
         );
         return { document: void 0, links: [] };
       }
-      const finalContext = await pipeline.run(initialContext);
-      for (const err of finalContext.errors) {
+      for (const err of processed.errors) {
         logger.warn(`Processing error for ${url}: ${err.message}`);
       }
-      if (typeof finalContext.content !== "string" || !finalContext.content.trim()) {
+      if (!processed.textContent || !processed.textContent.trim()) {
         logger.warn(`No processable content found for ${url} after pipeline execution.`);
-        return { document: void 0, links: finalContext.links };
+        return { document: void 0, links: processed.links };
       }
       const baseUrl = new URL(options.url);
-      const filteredLinks = finalContext.links.filter((link) => {
+      const filteredLinks = processed.links.filter((link) => {
         try {
           const targetUrl = new URL(link);
           const scope = options.scope || "subpages";
@@ -1073,20 +1160,16 @@ class WebScraperStrategy extends BaseScraperStrategy {
       });
       return {
         document: {
-          content: finalContext.content,
-          // Final processed content (Markdown)
+          content: processed.textContent,
           metadata: {
-            url: finalContext.source,
-            // URL after redirects
-            // Ensure title is a string, default to "Untitled"
-            title: typeof finalContext.metadata.title === "string" ? finalContext.metadata.title : "Untitled",
+            url,
+            title: typeof processed.metadata.title === "string" ? processed.metadata.title : "Untitled",
             library: options.library,
-            version: options.version
-            // Add other metadata from context if needed
+            version: options.version,
+            ...processed.metadata
           }
         },
         links: filteredLinks
-        // Use the filtered links
       };
     } catch (error) {
       logger.error(`Failed processing page ${url}: ${error}`);
@@ -1101,7 +1184,8 @@ class WebScraperStrategy extends BaseScraperStrategy {
     try {
       await super.scrape(options, progressCallback, signal);
     } finally {
-      await this.playwrightMiddleware.closeBrowser();
+      await this.htmlPipeline.close();
+      await this.markdownPipeline.close();
     }
   }
 }
@@ -1153,6 +1237,15 @@ class GitHubScraperStrategy {
 }
 class LocalFileStrategy extends BaseScraperStrategy {
   fileFetcher = new FileFetcher();
+  htmlPipeline;
+  markdownPipeline;
+  pipelines;
+  constructor() {
+    super();
+    this.htmlPipeline = new HtmlPipeline();
+    this.markdownPipeline = new MarkdownPipeline();
+    this.pipelines = [this.htmlPipeline, this.markdownPipeline];
+  }
   canHandle(url) {
     return url.startsWith("file://");
   }
@@ -1167,62 +1260,41 @@ class LocalFileStrategy extends BaseScraperStrategy {
     }
     logger.info(`📄 Processing file ${this.pageCount}/${options.maxPages}: ${filePath}`);
     const rawContent = await this.fileFetcher.fetch(item.url);
-    const initialContext = {
-      content: rawContent.content,
-      contentType: rawContent.mimeType,
-      source: rawContent.source,
-      // file:// URL
-      metadata: {},
-      links: [],
-      // LocalFileStrategy doesn't extract links from file content itself
-      errors: [],
-      options
-      // Pass the full options object
-    };
-    let pipeline;
-    if (initialContext.contentType.startsWith("text/html")) {
-      pipeline = new ContentProcessingPipeline([
-        new HtmlCheerioParserMiddleware(),
-        new HtmlMetadataExtractorMiddleware(),
-        // No HtmlLinkExtractorMiddleware needed for local files
-        new HtmlSanitizerMiddleware(),
-        new HtmlToMarkdownMiddleware()
-      ]);
-    } else if (initialContext.contentType === "text/markdown" || initialContext.contentType === "text/plain" || // Treat plain text as markdown
-    initialContext.contentType.startsWith("text/")) {
-      pipeline = new ContentProcessingPipeline([
-        new MarkdownMetadataExtractorMiddleware()
-        // No MarkdownLinkExtractorMiddleware needed for local files
-      ]);
-    } else {
+    let processed;
+    for (const pipeline of this.pipelines) {
+      if (pipeline.canProcess(rawContent)) {
+        processed = await pipeline.process(rawContent, options, this.fileFetcher);
+        break;
+      }
+    }
+    if (!processed) {
       logger.warn(
-        `Unsupported content type "${initialContext.contentType}" for file ${filePath}. Skipping processing.`
+        `Unsupported content type "${rawContent.mimeType}" for file ${filePath}. Skipping processing.`
       );
       return { document: void 0, links: [] };
     }
-    const finalContext = await pipeline.run(initialContext);
-    for (const err of finalContext.errors) {
+    for (const err of processed.errors) {
       logger.warn(`Processing error for ${filePath}: ${err.message}`);
     }
-    const finalContentString = typeof finalContext.content === "string" ? finalContext.content : Buffer.from(finalContext.content).toString("utf-8");
     return {
       document: {
-        // Use the potentially empty string content
-        content: finalContentString,
+        content: typeof processed.textContent === "string" ? processed.textContent : "",
         metadata: {
-          url: finalContext.source,
-          // Use context source (file:// URL)
-          // Ensure title is a string, default to "Untitled"
-          title: typeof finalContext.metadata.title === "string" ? finalContext.metadata.title : "Untitled",
+          url: rawContent.source,
+          title: typeof processed.metadata.title === "string" ? processed.metadata.title : "Untitled",
           library: options.library,
           version: options.version
         }
       }
-      // No links returned from file content processing
     };
   }
   async scrape(options, progressCallback, signal) {
-    await super.scrape(options, progressCallback, signal);
+    try {
+      await super.scrape(options, progressCallback, signal);
+    } finally {
+      await this.htmlPipeline.close();
+      await this.markdownPipeline.close();
+    }
   }
 }
 class NpmScraperStrategy {
@@ -1456,7 +1528,9 @@ class PipelineManager {
     };
     this.jobMap.set(jobId, job);
     this.jobQueue.push(jobId);
-    logger.info(`📝 Job enqueued: ${jobId} for ${library}@${version}`);
+    logger.info(
+      `📝 Job enqueued: ${jobId} for ${library}${version ? `@${version}` : ""}`
+    );
     await this.callbacks.onJobStatusChange?.(job);
     if (this.isRunning) {
       this._processQueue();
@@ -2758,7 +2832,7 @@ class DocumentStore {
    */
   async initializeEmbeddings() {
     const modelSpec = process.env.DOCS_MCP_EMBEDDING_MODEL || "text-embedding-3-small";
-    const { createEmbeddingModel } = await import("./EmbeddingFactory-DZKXkqOe.js");
+    const { createEmbeddingModel } = await import("./EmbeddingFactory-Dz1hdJJe.js");
     this.embeddings = createEmbeddingModel(modelSpec);
     const testVector = await this.embeddings.embedQuery("test");
     this.modelDimension = testVector.length;
@@ -2873,7 +2947,12 @@ class DocumentStore {
 `;
         return `${header}${doc.pageContent}`;
       });
-      const rawEmbeddings = await this.embeddings.embedDocuments(texts);
+      const rawEmbeddings = [];
+      for (let i = 0; i < texts.length; i += EMBEDDING_BATCH_SIZE) {
+        const batchTexts = texts.slice(i, i + EMBEDDING_BATCH_SIZE);
+        const batchEmbeddings = await this.embeddings.embedDocuments(batchTexts);
+        rawEmbeddings.push(...batchEmbeddings);
+      }
       const paddedEmbeddings = rawEmbeddings.map((vector) => this.padVector(vector));
       const transaction = this.db.transaction((docs) => {
         for (let i = 0; i < docs.length; i++) {
@@ -3372,12 +3451,11 @@ class DocumentManagementService {
   }
 }
 export {
-  ContentProcessingPipeline as C,
   DocumentManagementService as D,
   FileFetcher as F,
   HttpFetcher as H,
   LibraryNotFoundError as L,
-  MarkdownMetadataExtractorMiddleware as M,
+  MarkdownPipeline as M,
   PipelineJobStatus as P,
   SearchTool as S,
   ToolError as T,
@@ -3392,18 +3470,14 @@ export {
   DEFAULT_HTTP_PORT as h,
   DEFAULT_MAX_CONCURRENCY as i,
   ScrapeMode as j,
-  HtmlPlaywrightMiddleware as k,
+  HtmlPipeline as k,
   logger as l,
-  HtmlCheerioParserMiddleware as m,
-  HtmlMetadataExtractorMiddleware as n,
-  HtmlSanitizerMiddleware as o,
-  HtmlToMarkdownMiddleware as p,
-  ScraperError as q,
-  createJSDOM as r,
-  setLogLevel as s,
-  getProjectRoot as t,
-  DEFAULT_WEB_PORT as u,
-  DimensionError as v,
-  VECTOR_DIMENSION as w
+  ScraperError as m,
+  createJSDOM as n,
+  getProjectRoot as o,
+  DEFAULT_WEB_PORT as p,
+  DimensionError as q,
+  VECTOR_DIMENSION as r,
+  setLogLevel as s
 };
-//# sourceMappingURL=DocumentManagementService-BupnR1eC.js.map
+//# sourceMappingURL=DocumentManagementService-BZ_ZZgPI.js.map