npm - docusaurus-plugin-llms - Versions diffs - 0.1.1 → 0.1.2 - Mend

docusaurus-plugin-llms 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md CHANGED Viewed

@@ -39,6 +39,13 @@ module.exports = {
         title: 'My Project Documentation',
         description: 'Complete reference documentation for My Project',
         includeBlog: true,
+        // Path transformation options
+        pathTransformation: {
+          // Paths to ignore when constructing URLs (will be removed if found)
+          ignorePaths: ['docs'],
+          // Paths to add when constructing URLs (will be prepended if not already present)
+          addPaths: ['api'],
+        },
       },
     ],
     // ... your other plugins
@@ -59,6 +66,40 @@ module.exports = {
 | `llmsTxtFilename`     | string   | `'llms.txt'`      | Custom filename for the links file           |
 | `llmsFullTxtFilename` | string   | `'llms-full.txt'` | Custom filename for the full content file    |
 | `includeBlog`         | boolean  | `false`           | Whether to include blog content              |
+| `pathTransformation`  | object   | `undefined`       | Path transformation options for URL construction |
+| `pathTransformation.ignorePaths` | string[] | `[]`    | Path segments to ignore when constructing URLs |
+| `pathTransformation.addPaths`   | string[] | `[]`    | Path segments to add when constructing URLs |
+### Path Transformation Examples
+The path transformation feature allows you to manipulate how URLs are constructed from file paths:
+**Example 1**: Remove 'docs' from the URL path
+```js
+pathTransformation: {
+  ignorePaths: ['docs'],
+}
+```
+File path: `/content/docs/manual/decorators.md` → URL: `https://example.com/manual/decorators`
+**Example 2**: Add 'api' to the URL path
+```js
+pathTransformation: {
+  addPaths: ['api'],
+}
+```
+File path: `/content/manual/decorators.md` → URL: `https://example.com/api/manual/decorators`
+**Example 3**: Combine both transformations
+```js
+pathTransformation: {
+  ignorePaths: ['docs'],
+  addPaths: ['api'],
+}
+```
+File path: `/content/docs/manual/decorators.md` → URL: `https://example.com/api/manual/decorators`
+The configuration supports multiple path segments in both arrays.
 ## How It Works
@@ -79,6 +120,7 @@ These files follow the [llmstxt standard](https://llmstxt.org/), making your doc
 - 🧹 Cleans HTML and normalizes content for optimal LLM consumption
 - 📊 Provides statistics about generated documentation
 - 📚 Option to include blog posts
+- 🔄 Path transformation to customize URL construction
 ## Implementation Details
@@ -88,9 +130,32 @@ The plugin:
 2. Optionally includes blog content
 3. Extracts metadata, titles, and content from each file
 4. Creates proper URL links to each document section
-5. Generates a table of contents in `llms.txt`
-6. Combines all documentation content in `llms-full.txt`
-7. Provides statistics about the generated documentation
+5. Applies path transformations according to configuration (removing or adding path segments)
+6. Generates a table of contents in `llms.txt`
+7. Combines all documentation content in `llms-full.txt`
+8. Provides statistics about the generated documentation
+## Testing
+The plugin includes comprehensive tests in the `tests` directory:
+- **Unit tests**: Test the path transformation functionality in isolation
+- **Integration tests**: Simulate a Docusaurus build with various configurations
+To run the tests:
+```bash
+# Run all tests
+npm test
+# Run just the unit tests
+npm run test:unit
+# Run just the integration tests
+npm run test:integration
+```
+For more detailed testing instructions, see [tests/TESTING.md](tests/TESTING.md).
 ## Future Enhancements

package/lib/index.d.ts CHANGED Viewed

@@ -30,6 +30,13 @@ interface PluginOptions {
     llmsFullTxtFilename?: string;
     /** Whether to include blog content (default: false) */
     includeBlog?: boolean;
+    /** Path transformation options for URL construction */
+    pathTransformation?: {
+        /** Path segments to ignore when constructing URLs (will be removed if found) */
+        ignorePaths?: string[];
+        /** Path segments to add when constructing URLs (will be prepended if not already present) */
+        addPaths?: string[];
+    };
 }
 /**
  * A Docusaurus plugin to generate LLM-friendly documentation following

package/lib/index.js CHANGED Viewed

@@ -141,15 +141,54 @@ function cleanMarkdownContent(content) {
         .trim();
     return cleaned;
 }
+/**
+ * Apply path transformations according to configuration
+ * @param urlPath - Original URL path
+ * @param pathTransformation - Path transformation configuration
+ * @returns Transformed URL path
+ */
+function applyPathTransformations(urlPath, pathTransformation) {
+    if (!pathTransformation) {
+        return urlPath;
+    }
+    let transformedPath = urlPath;
+    // Remove ignored path segments
+    if (pathTransformation.ignorePaths?.length) {
+        for (const ignorePath of pathTransformation.ignorePaths) {
+            // Create a regex that matches the ignore path at the beginning, middle, or end of the path
+            // We use word boundaries to ensure we match complete path segments
+            const ignoreRegex = new RegExp(`(^|/)(${ignorePath})(/|$)`, 'g');
+            transformedPath = transformedPath.replace(ignoreRegex, '$1$3');
+        }
+        // Clean up any double slashes that might have been created
+        transformedPath = transformedPath.replace(/\/+/g, '/');
+        // Remove leading slash if present
+        transformedPath = transformedPath.replace(/^\//, '');
+    }
+    // Add path segments if they're not already present
+    if (pathTransformation.addPaths?.length) {
+        // Process in reverse order to maintain the specified order in the final path
+        // This is because each path is prepended to the front
+        const pathsToAdd = [...pathTransformation.addPaths].reverse();
+        for (const addPath of pathsToAdd) {
+            // Only add if not already present at the beginning
+            if (!transformedPath.startsWith(addPath + '/') && transformedPath !== addPath) {
+                transformedPath = `${addPath}/${transformedPath}`;
+            }
+        }
+    }
+    return transformedPath;
+}
 /**
  * Process a markdown file and extract its metadata and content
  * @param filePath - Path to the markdown file
  * @param baseDir - Base directory
  * @param siteUrl - Base URL of the site
  * @param pathPrefix - Path prefix for URLs (e.g., 'docs' or 'blog')
+ * @param pathTransformation - Path transformation configuration
  * @returns Processed file data
  */
-async function processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix = 'docs') {
+async function processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix = 'docs', pathTransformation) {
     const content = await readFile(filePath);
     const { data, content: markdownContent } = (0, gray_matter_1.default)(content);
     const relativePath = path.relative(baseDir, filePath);
@@ -161,8 +200,16 @@ async function processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix = 'doc
     const linkPath = linkPathBase.endsWith('index')
         ? linkPathBase.replace(/\/index$/, '')
         : linkPathBase;
-    // Generate full URL
-    const fullUrl = new URL(`${pathPrefix}/${linkPath}`, siteUrl).toString();
+    // Apply path transformations to the link path
+    const transformedLinkPath = applyPathTransformations(linkPath, pathTransformation);
+    // Also apply path transformations to the pathPrefix if it's not empty
+    // This allows removing 'docs' from the path when specified in ignorePaths
+    let transformedPathPrefix = pathPrefix;
+    if (pathPrefix && pathTransformation?.ignorePaths?.includes(pathPrefix)) {
+        transformedPathPrefix = '';
+    }
+    // Generate full URL with transformed path and path prefix
+    const fullUrl = new URL(`${transformedPathPrefix ? `${transformedPathPrefix}/` : ''}${transformedLinkPath}`, siteUrl).toString();
     // Extract title
     const title = extractTitle(data, markdownContent, filePath);
     // Get description from frontmatter or first paragraph
@@ -196,7 +243,7 @@ async function processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix = 'doc
  */
 function docusaurusPluginLLMs(context, options = {}) {
     // Set default options
-    const { generateLLMsTxt = true, generateLLMsFullTxt = true, docsDir = 'docs', ignoreFiles = [], title, description, llmsTxtFilename = 'llms.txt', llmsFullTxtFilename = 'llms-full.txt', includeBlog = false, } = options;
+    const { generateLLMsTxt = true, generateLLMsFullTxt = true, docsDir = 'docs', ignoreFiles = [], title, description, llmsTxtFilename = 'llms.txt', llmsFullTxtFilename = 'llms-full.txt', includeBlog = false, pathTransformation, } = options;
     const { siteDir, siteConfig, outDir, } = context;
     return {
         name: 'docusaurus-plugin-llms',
@@ -225,7 +272,7 @@ function docusaurusPluginLLMs(context, options = {}) {
                         // Process each file
                         for (const filePath of docFiles) {
                             try {
-                                const docInfo = await processMarkdownFile(filePath, fullDocsDir, siteUrl, 'docs');
+                                const docInfo = await processMarkdownFile(filePath, fullDocsDir, siteUrl, 'docs', pathTransformation);
                                 allDocs.push(docInfo);
                             }
                             catch (err) {
@@ -252,7 +299,7 @@ function docusaurusPluginLLMs(context, options = {}) {
                             // Process each file
                             for (const filePath of blogFiles) {
                                 try {
-                                    const docInfo = await processMarkdownFile(filePath, blogDir, siteUrl, 'blog');
+                                    const docInfo = await processMarkdownFile(filePath, blogDir, siteUrl, 'blog', pathTransformation);
                                     allDocs.push(docInfo);
                                 }
                                 catch (err) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "docusaurus-plugin-llms",
-  "version": "0.1.1",
+  "version": "0.1.2",
   "description": "Docusaurus plugin for generating LLM-friendly documentation following the llmtxt.org standard",
   "main": "lib/index.js",
   "scripts": {
@@ -8,7 +8,9 @@
     "watch": "tsc --watch",
     "cleanup": "node cleanup.js",
     "prepublishOnly": "npm run build && npm run cleanup",
-    "test": "echo \"No tests specified\""
+    "test:unit": "node tests/test-path-transforms.js",
+    "test:integration": "node tests/test-path-transformation.js",
+    "test": "npm run build && npm run test:unit && npm run test:integration"
   },
   "files": [
     "lib",

package/src/index.ts CHANGED Viewed

@@ -55,6 +55,14 @@ interface PluginOptions {
   /** Whether to include blog content (default: false) */
   includeBlog?: boolean;
+  /** Path transformation options for URL construction */
+  pathTransformation?: {
+    /** Path segments to ignore when constructing URLs (will be removed if found) */
+    ignorePaths?: string[];
+    /** Path segments to add when constructing URLs (will be prepended if not already present) */
+    addPaths?: string[];
+  };
 }
 /**
@@ -165,19 +173,70 @@ function cleanMarkdownContent(content: string): string {
   return cleaned;
 }
+/**
+ * Apply path transformations according to configuration
+ * @param urlPath - Original URL path
+ * @param pathTransformation - Path transformation configuration
+ * @returns Transformed URL path
+ */
+function applyPathTransformations(
+  urlPath: string,
+  pathTransformation?: PluginOptions['pathTransformation']
+): string {
+  if (!pathTransformation) {
+    return urlPath;
+  }
+  let transformedPath = urlPath;
+  // Remove ignored path segments
+  if (pathTransformation.ignorePaths?.length) {
+    for (const ignorePath of pathTransformation.ignorePaths) {
+      // Create a regex that matches the ignore path at the beginning, middle, or end of the path
+      // We use word boundaries to ensure we match complete path segments
+      const ignoreRegex = new RegExp(`(^|/)(${ignorePath})(/|$)`, 'g');
+      transformedPath = transformedPath.replace(ignoreRegex, '$1$3');
+    }
+    // Clean up any double slashes that might have been created
+    transformedPath = transformedPath.replace(/\/+/g, '/');
+    // Remove leading slash if present
+    transformedPath = transformedPath.replace(/^\//, '');
+  }
+  // Add path segments if they're not already present
+  if (pathTransformation.addPaths?.length) {
+    // Process in reverse order to maintain the specified order in the final path
+    // This is because each path is prepended to the front
+    const pathsToAdd = [...pathTransformation.addPaths].reverse();
+    for (const addPath of pathsToAdd) {
+      // Only add if not already present at the beginning
+      if (!transformedPath.startsWith(addPath + '/') && transformedPath !== addPath) {
+        transformedPath = `${addPath}/${transformedPath}`;
+      }
+    }
+  }
+  return transformedPath;
+}
 /**
  * Process a markdown file and extract its metadata and content
  * @param filePath - Path to the markdown file
  * @param baseDir - Base directory
  * @param siteUrl - Base URL of the site
  * @param pathPrefix - Path prefix for URLs (e.g., 'docs' or 'blog')
+ * @param pathTransformation - Path transformation configuration
  * @returns Processed file data
  */
 async function processMarkdownFile(
   filePath: string,
   baseDir: string,
   siteUrl: string,
-  pathPrefix: string = 'docs'
+  pathPrefix: string = 'docs',
+  pathTransformation?: PluginOptions['pathTransformation']
 ): Promise<DocInfo> {
   const content = await readFile(filePath);
   const { data, content: markdownContent } = matter(content);
@@ -194,8 +253,21 @@ async function processMarkdownFile(
     ? linkPathBase.replace(/\/index$/, '')
     : linkPathBase;
-  // Generate full URL
-  const fullUrl = new URL(`${pathPrefix}/${linkPath}`, siteUrl).toString();
+  // Apply path transformations to the link path
+  const transformedLinkPath = applyPathTransformations(linkPath, pathTransformation);
+  // Also apply path transformations to the pathPrefix if it's not empty
+  // This allows removing 'docs' from the path when specified in ignorePaths
+  let transformedPathPrefix = pathPrefix;
+  if (pathPrefix && pathTransformation?.ignorePaths?.includes(pathPrefix)) {
+    transformedPathPrefix = '';
+  }
+  // Generate full URL with transformed path and path prefix
+  const fullUrl = new URL(
+    `${transformedPathPrefix ? `${transformedPathPrefix}/` : ''}${transformedLinkPath}`,
+    siteUrl
+  ).toString();
   // Extract title
   const title = extractTitle(data, markdownContent, filePath);
@@ -247,6 +319,7 @@ export default function docusaurusPluginLLMs(
     llmsTxtFilename = 'llms.txt',
     llmsFullTxtFilename = 'llms-full.txt',
     includeBlog = false,
+    pathTransformation,
   } = options;
   const {
@@ -296,7 +369,8 @@ export default function docusaurusPluginLLMs(
                   filePath,
                   fullDocsDir,
                   siteUrl,
-                  'docs'
+                  'docs',
+                  pathTransformation
                 );
                 allDocs.push(docInfo);
               } catch (err: any) {
@@ -329,7 +403,8 @@ export default function docusaurusPluginLLMs(
                     filePath,
                     blogDir,
                     siteUrl,
-                    'blog'
+                    'blog',
+                    pathTransformation
                   );
                   allDocs.push(docInfo);
                 } catch (err: any) {