npm - docusaurus-plugin-llms - Versions diffs - 0.1.5 → 0.2.0 - Mend

docusaurus-plugin-llms 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/lib/utils.js CHANGED Viewed

@@ -35,17 +35,22 @@ var __importStar = (this && this.__importStar) || (function () {
         return result;
     };
 })();
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.writeFile = writeFile;
 exports.readFile = readFile;
 exports.shouldIgnoreFile = shouldIgnoreFile;
 exports.readMarkdownFiles = readMarkdownFiles;
 exports.extractTitle = extractTitle;
+exports.resolvePartialImports = resolvePartialImports;
 exports.cleanMarkdownContent = cleanMarkdownContent;
 exports.applyPathTransformations = applyPathTransformations;
 const fs = __importStar(require("fs/promises"));
 const path = __importStar(require("path"));
 const minimatch_1 = require("minimatch");
+const gray_matter_1 = __importDefault(require("gray-matter"));
 /**
  * Write content to a file
  * @param filePath - Path to write the file to
@@ -96,7 +101,10 @@ async function readMarkdownFiles(dir, baseDir, ignorePatterns = []) {
             files.push(...subDirFiles);
         }
         else if (entry.name.endsWith('.md') || entry.name.endsWith('.mdx')) {
-            files.push(fullPath);
+            // Skip partial files (those starting with underscore)
+            if (!entry.name.startsWith('_')) {
+                files.push(fullPath);
+            }
         }
     }
     return files;
@@ -123,14 +131,117 @@ function extractTitle(data, content, filePath) {
         .replace(/-/g, ' ')
         .replace(/\b\w/g, c => c.toUpperCase());
 }
+/**
+ * Resolve and inline partial imports in markdown content
+ * @param content - The markdown content with import statements
+ * @param filePath - The path of the file containing the imports
+ * @returns Content with partials resolved
+ */
+async function resolvePartialImports(content, filePath) {
+    let resolved = content;
+    // Match import statements for partials and JSX usage
+    // Pattern 1: import PartialName from './_partial.mdx'
+    // Pattern 2: import { PartialName } from './_partial.mdx'
+    const importRegex = /^\s*import\s+(?:(\w+)|{\s*(\w+)\s*})\s+from\s+['"]([^'"]+_[^'"]+\.mdx?)['"];?\s*$/gm;
+    const imports = new Map();
+    // First pass: collect all imports
+    let match;
+    while ((match = importRegex.exec(content)) !== null) {
+        const componentName = match[1] || match[2];
+        const importPath = match[3];
+        // Only process imports for partial files (containing underscore)
+        if (importPath.includes('_')) {
+            imports.set(componentName, importPath);
+        }
+    }
+    // Resolve each partial import
+    for (const [componentName, importPath] of imports) {
+        try {
+            // Resolve the partial file path relative to the current file
+            const dir = path.dirname(filePath);
+            const partialPath = path.resolve(dir, importPath);
+            // Read the partial file
+            const partialContent = await readFile(partialPath);
+            const { content: partialMarkdown } = (0, gray_matter_1.default)(partialContent);
+            // Remove the import statement
+            resolved = resolved.replace(new RegExp(`^\\s*import\\s+(?:${componentName}|{\\s*${componentName}\\s*})\\s+from\\s+['"]${importPath.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}['"];?\\s*$`, 'gm'), '');
+            // Replace JSX usage with the partial content
+            // Handle both self-closing tags and tags with content
+            // <PartialName /> or <PartialName></PartialName> or <PartialName>...</PartialName>
+            const jsxRegex = new RegExp(`<${componentName}\\s*(?:[^>]*?)(?:/>|>[^<]*</${componentName}>)`, 'g');
+            resolved = resolved.replace(jsxRegex, partialMarkdown.trim());
+        }
+        catch (error) {
+            console.warn(`Failed to resolve partial import "${importPath}" in ${filePath}: ${error}`);
+            // Leave the import and usage as-is if we can't resolve it
+        }
+    }
+    return resolved;
+}
 /**
  * Clean markdown content for LLM consumption
  * @param content - Raw markdown content
+ * @param excludeImports - Whether to exclude import statements
+ * @param removeDuplicateHeadings - Whether to remove redundant content that duplicates heading text
  * @returns Cleaned content
  */
-function cleanMarkdownContent(content) {
-    // Remove HTML tags
-    let cleaned = content.replace(/<[^>]*>/g, '');
+function cleanMarkdownContent(content, excludeImports = false, removeDuplicateHeadings = false) {
+    let cleaned = content;
+    // Remove import statements if requested
+    if (excludeImports) {
+        // Remove ES6/React import statements
+        // This regex matches:
+        // - import ... from "...";
+        // - import ... from '...';
+        // - import { ... } from "...";
+        // - import * as ... from "...";
+        // - import "..."; (side-effect imports)
+        cleaned = cleaned.replace(/^\s*import\s+.*?;?\s*$/gm, '');
+    }
+    // Remove HTML tags, but preserve XML content in code blocks
+    // We need to be selective to avoid removing XML content from code blocks
+    // This regex targets common HTML tags while being more conservative about XML
+    cleaned = cleaned.replace(/<\/?(?:div|span|p|br|hr|img|a|strong|em|b|i|u|h[1-6]|ul|ol|li|table|tr|td|th|thead|tbody)\b[^>]*>/gi, '');
+    // Remove redundant content that just repeats the heading (if requested)
+    if (removeDuplicateHeadings) {
+        // Split content into lines and process line by line
+        const lines = cleaned.split('\n');
+        const processedLines = [];
+        let i = 0;
+        while (i < lines.length) {
+            const currentLine = lines[i];
+            // Check if current line is a heading (accounting for leading whitespace)
+            const headingMatch = currentLine.match(/^\s*(#+)\s+(.+)$/);
+            if (headingMatch) {
+                const headingLevel = headingMatch[1];
+                const headingText = headingMatch[2].trim();
+                processedLines.push(currentLine);
+                i++;
+                // Look ahead for potential redundant content
+                // Skip empty lines
+                while (i < lines.length && lines[i].trim() === '') {
+                    processedLines.push(lines[i]);
+                    i++;
+                }
+                // Check if the next non-empty line just repeats the heading text
+                // but is NOT itself a heading (to avoid removing valid headings of different levels)
+                if (i < lines.length) {
+                    const nextLine = lines[i].trim();
+                    const nextLineIsHeading = /^\s*#+\s+/.test(nextLine);
+                    // Only remove if it exactly matches the heading text AND is not a heading itself
+                    if (nextLine === headingText && !nextLineIsHeading) {
+                        // Skip this redundant line
+                        i++;
+                    }
+                }
+            }
+            else {
+                processedLines.push(currentLine);
+                i++;
+            }
+        }
+        cleaned = processedLines.join('\n');
+    }
     // Normalize whitespace
     cleaned = cleaned.replace(/\r\n/g, '\n')
         .replace(/\n{3,}/g, '\n\n')

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "docusaurus-plugin-llms",
-  "version": "0.1.5",
+  "version": "0.2.0",
   "description": "Docusaurus plugin for generating LLM-friendly documentation following the llmstxt.org standard",
   "main": "lib/index.js",
   "types": "lib/index.d.ts",
@@ -9,7 +9,7 @@
     "watch": "tsc --watch",
     "cleanup": "node cleanup.js",
     "prepublishOnly": "npm run build && npm run cleanup",
-    "test:unit": "node tests/test-path-transforms.js",
+    "test:unit": "node tests/test-path-transforms.js && node tests/test-header-deduplication.js && node tests/test-import-removal.js && node tests/test-partials.js && node tests/test-root-content.js",
     "test:integration": "node tests/test-path-transformation.js",
     "test": "npm run build && npm run test:unit && npm run test:integration"
   },

package/src/generator.ts CHANGED Viewed

@@ -36,6 +36,7 @@ function cleanDescriptionForToc(description: string): string {
  * @param fileDescription - Description for the file
  * @param includeFullContent - Whether to include full content or just links
  * @param version - Version of the file
+ * @param customRootContent - Optional custom content to include at the root level
  */
 export async function generateLLMFile(
   docs: DocInfo[],
@@ -43,24 +44,78 @@ export async function generateLLMFile(
   fileTitle: string,
   fileDescription: string,
   includeFullContent: boolean,
-  version?: string
+  version?: string,
+  customRootContent?: string
 ): Promise<void> {
   console.log(`Generating file: ${outputPath}, version: ${version || 'undefined'}`);
   const versionInfo = version ? `\n\nVersion: ${version}` : '';
   if (includeFullContent) {
-    // Generate full content file
+    // Generate full content file with header deduplication
+    const usedHeaders = new Set<string>();
     const fullContentSections = docs.map(doc => {
-      return `## ${doc.title}
+      // Check if content already starts with the same heading to avoid duplication
+      const trimmedContent = doc.content.trim();
+      const firstLine = trimmedContent.split('\n')[0];
+      // Check if the first line is a heading that matches our title
+      const headingMatch = firstLine.match(/^#+\s+(.+)$/);
+      const firstHeadingText = headingMatch ? headingMatch[1].trim() : null;
+      // Determine the header text to use (original title or make it unique)
+      let headerText = doc.title;
+      let uniqueHeader = headerText;
+      let counter = 1;
+      // If this header has been used before, make it unique by adding a suffix
+      while (usedHeaders.has(uniqueHeader.toLowerCase())) {
+        counter++;
+        // Try to make it more descriptive by adding the file path info if available
+        if (doc.path && counter === 2) {
+          const pathParts = doc.path.split('/');
+          const folderName = pathParts.length > 1 ? pathParts[pathParts.length - 2] : '';
+          if (folderName) {
+            uniqueHeader = `${headerText} (${folderName.charAt(0).toUpperCase() + folderName.slice(1)})`;
+          } else {
+            uniqueHeader = `${headerText} (${counter})`;
+          }
+        } else {
+          uniqueHeader = `${headerText} (${counter})`;
+        }
+      }
+      usedHeaders.add(uniqueHeader.toLowerCase());
+      if (firstHeadingText === doc.title) {
+        // Content already has the same heading, replace it with our unique header if needed
+        if (uniqueHeader !== doc.title) {
+          const restOfContent = trimmedContent.split('\n').slice(1).join('\n');
+          return `## ${uniqueHeader}
+${restOfContent}`;
+        } else {
+          // Replace the existing H1 with H2 to comply with llmstxt.org standard
+          const restOfContent = trimmedContent.split('\n').slice(1).join('\n');
+          return `## ${uniqueHeader}
+${restOfContent}`;
+        }
+      } else {
+        // Content doesn't have the same heading, add our unique H2 header
+        return `## ${uniqueHeader}
 ${doc.content}`;
+      }
     });
+    // Use custom root content or default message
+    const rootContent = customRootContent || 'This file contains all documentation content in a single document following the llmstxt.org standard.';
     const llmFileContent = `# ${fileTitle}
 > ${fileDescription}${versionInfo}
-This file contains all documentation content in a single document following the llmstxt.org standard.
+${rootContent}
 ${fullContentSections.join('\n\n---\n\n')}
 `;
@@ -75,11 +130,14 @@ ${fullContentSections.join('\n\n---\n\n')}
       return `- [${doc.title}](${doc.url})${cleanedDescription ? `: ${cleanedDescription}` : ''}`;
     });
+    // Use custom root content or default message
+    const rootContent = customRootContent || 'This file contains links to documentation sections following the llmstxt.org standard.';
     const llmFileContent = `# ${fileTitle}
 > ${fileDescription}${versionInfo}
-This file contains links to documentation sections following the llmstxt.org standard.
+${rootContent}
 ## Table of Contents
@@ -92,6 +150,75 @@ ${tocItems.join('\n')}
   console.log(`Generated: ${outputPath}`);
 }
+/**
+ * Generate individual markdown files for each document
+ * @param docs - Processed document information
+ * @param outputDir - Directory to write the markdown files
+ * @param siteUrl - Base site URL
+ * @returns Updated docs with new URLs pointing to generated markdown files
+ */
+export async function generateIndividualMarkdownFiles(
+  docs: DocInfo[],
+  outputDir: string,
+  siteUrl: string
+): Promise<DocInfo[]> {
+  const updatedDocs: DocInfo[] = [];
+  // Create a map to ensure unique filenames
+  const usedFilenames = new Set<string>();
+  for (const doc of docs) {
+    // Generate a filename from the document title or URL path
+    let baseFilename = doc.title
+      .toLowerCase()
+      .replace(/[^a-z0-9]+/g, '-')
+      .replace(/^-+|-+$/g, '');
+    // Fallback to URL path if title generates empty filename
+    if (!baseFilename) {
+      baseFilename = doc.path
+        .replace(/^\/+|\/+$/g, '') // Remove leading/trailing slashes
+        .replace(/\//g, '-')
+        .replace(/[^a-z0-9-]/gi, '-')
+        .toLowerCase();
+    }
+    // Ensure filename uniqueness
+    let filename = `${baseFilename}.md`;
+    let counter = 1;
+    while (usedFilenames.has(filename)) {
+      filename = `${baseFilename}-${counter}.md`;
+      counter++;
+    }
+    usedFilenames.add(filename);
+    // Create markdown content following llmstxt.org standard
+    const markdownContent = `# ${doc.title}
+> ${doc.description}
+${doc.content}
+`;
+    // Write the markdown file
+    const markdownPath = path.join(outputDir, filename);
+    await writeFile(markdownPath, markdownContent);
+    // Create updated DocInfo with new URL pointing to the generated markdown file
+    const newUrl = `${siteUrl}/${filename}`;
+    updatedDocs.push({
+      ...doc,
+      url: newUrl,
+      path: `/${filename}` // Update path to the new markdown file
+    });
+    console.log(`Generated markdown file: ${filename}`);
+  }
+  return updatedDocs;
+}
 /**
  * Generate standard LLM files (llms.txt and llms-full.txt)
  * @param context - Plugin context
@@ -103,6 +230,7 @@ export async function generateStandardLLMFiles(
 ): Promise<void> {
   const {
     outDir,
+    siteUrl,
     docTitle,
     docDescription,
     options
@@ -115,7 +243,10 @@ export async function generateStandardLLMFiles(
     llmsFullTxtFilename = 'llms-full.txt',
     includeOrder = [],
     includeUnmatchedLast = true,
-    version
+    version,
+    generateMarkdownFiles = false,
+    rootContent,
+    fullRootContent
   } = options;
   if (!generateLLMsTxt && !generateLLMsFullTxt) {
@@ -123,7 +254,7 @@ export async function generateStandardLLMFiles(
   }
   // Process files for the standard outputs
-  const processedDocs = await processFilesWithPatterns(
+  let processedDocs = await processFilesWithPatterns(
     context,
     allDocFiles,
     [], // No specific include patterns - include all
@@ -134,6 +265,16 @@ export async function generateStandardLLMFiles(
   console.log(`Processed ${processedDocs.length} documentation files for standard LLM files`);
+  // Generate individual markdown files if requested
+  if (generateMarkdownFiles && processedDocs.length > 0) {
+    console.log('Generating individual markdown files...');
+    processedDocs = await generateIndividualMarkdownFiles(
+      processedDocs,
+      outDir,
+      siteUrl
+    );
+  }
   // Generate llms.txt
   if (generateLLMsTxt) {
     const llmsTxtPath = path.join(outDir, llmsTxtFilename);
@@ -143,7 +284,8 @@ export async function generateStandardLLMFiles(
       docTitle,
       docDescription,
       false, // links only
-      version
+      version,
+      rootContent
     );
   }
@@ -156,7 +298,8 @@ export async function generateStandardLLMFiles(
       docTitle,
       docDescription,
       true, // full content
-      version
+      version,
+      fullRootContent
     );
   }
 }
@@ -170,8 +313,8 @@ export async function generateCustomLLMFiles(
   context: PluginContext,
   allDocFiles: string[]
 ): Promise<void> {
-  const { outDir, docTitle, docDescription, options } = context;
-  const { customLLMFiles = [], ignoreFiles = [] } = options;
+  const { outDir, siteUrl, docTitle, docDescription, options } = context;
+  const { customLLMFiles = [], ignoreFiles = [], generateMarkdownFiles = false } = options;
   if (customLLMFiles.length === 0) {
     return;
@@ -189,7 +332,7 @@ export async function generateCustomLLMFiles(
     }
     // Process files according to the custom configuration
-    const customDocs = await processFilesWithPatterns(
+    let customDocs = await processFilesWithPatterns(
       context,
       allDocFiles,
       customFile.includePatterns,
@@ -199,6 +342,16 @@ export async function generateCustomLLMFiles(
     );
     if (customDocs.length > 0) {
+      // Generate individual markdown files if requested
+      if (generateMarkdownFiles) {
+        console.log(`Generating individual markdown files for custom file: ${customFile.filename}...`);
+        customDocs = await generateIndividualMarkdownFiles(
+          customDocs,
+          outDir,
+          siteUrl
+        );
+      }
       // Use custom title/description or fall back to defaults
       const customTitle = customFile.title || docTitle;
       const customDescription = customFile.description || docDescription;
@@ -211,7 +364,8 @@ export async function generateCustomLLMFiles(
         customTitle,
         customDescription,
         customFile.fullContent,
-        customFile.version
+        customFile.version,
+        customFile.rootContent
       );
       console.log(`Generated custom LLM file: ${customFile.filename} with ${customDocs.length} documents`);

package/src/index.ts CHANGED Viewed

@@ -9,7 +9,7 @@
  */
 import * as path from 'path';
-import type { LoadContext, Plugin } from '@docusaurus/types';
+import type { LoadContext, Plugin, Props, RouteConfig } from '@docusaurus/types';
 import { PluginOptions, PluginContext } from './types';
 import { collectDocFiles, generateStandardLLMFiles, generateCustomLLMFiles } from './generator';
@@ -40,6 +40,11 @@ export default function docusaurusPluginLLMs(
     includeOrder = [],
     includeUnmatchedLast = true,
     customLLMFiles = [],
+    excludeImports = false,
+    removeDuplicateHeadings = false,
+    generateMarkdownFiles = false,
+    rootContent,
+    fullRootContent,
   } = options;
   const {
@@ -77,6 +82,11 @@ export default function docusaurusPluginLLMs(
       includeOrder,
       includeUnmatchedLast,
       customLLMFiles,
+      excludeImports,
+      removeDuplicateHeadings,
+      generateMarkdownFiles,
+      rootContent,
+      fullRootContent,
     }
   };
@@ -86,12 +96,46 @@ export default function docusaurusPluginLLMs(
     /**
      * Generates LLM-friendly documentation files after the build is complete
      */
-    async postBuild(): Promise<void> {
+    async postBuild(props?: Props & { content: unknown }): Promise<void> {
       console.log('Generating LLM-friendly documentation...');
       try {
+        let enhancedContext = pluginContext;
+        // If props are provided (Docusaurus 3.x+), use the resolved routes
+        if (props?.routes) {
+          // Create a map of file paths to their resolved URLs
+          const routeMap = new Map<string, string>();
+          // Helper function to recursively process routes
+          const processRoutes = (routes: RouteConfig[]) => {
+            routes.forEach(route => {
+              if (route.path) {
+                // Store the actual resolved path
+                routeMap.set(route.path, route.path);
+              }
+              // Process nested routes recursively
+              if (route.routes) {
+                processRoutes(route.routes);
+              }
+            });
+          };
+          // Process all routes (cast to RouteConfig[] for recursive processing)
+          processRoutes(props.routes as RouteConfig[]);
+          // Pass the resolved routes to the plugin context
+          enhancedContext = {
+            ...pluginContext,
+            routesPaths: props.routesPaths,
+            routes: props.routes,
+            routeMap,
+          };
+        }
         // Collect all document files
-        const allDocFiles = await collectDocFiles(pluginContext);
+        const allDocFiles = await collectDocFiles(enhancedContext);
         // Skip further processing if no documents were found
         if (allDocFiles.length === 0) {
@@ -100,10 +144,10 @@ export default function docusaurusPluginLLMs(
         }
         // Process standard LLM files (llms.txt and llms-full.txt)
-        await generateStandardLLMFiles(pluginContext, allDocFiles);
+        await generateStandardLLMFiles(enhancedContext, allDocFiles);
         // Process custom LLM files
-        await generateCustomLLMFiles(pluginContext, allDocFiles);
+        await generateCustomLLMFiles(enhancedContext, allDocFiles);
         // Output overall statistics
         console.log(`Stats: ${allDocFiles.length} total available documents processed`);