docusaurus-plugin-llms 0.1.5 โ†’ 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -14,6 +14,8 @@ A Docusaurus plugin for generating LLM-friendly documentation following the [llm
14
14
  - ๐Ÿ“š Option to include blog posts
15
15
  - ๐Ÿงฉ Custom LLM files for specific documentation sections
16
16
  - ๐Ÿงน Cleans HTML and normalizes content for optimal LLM consumption
17
+ - ๐Ÿšซ Optional import statement removal for cleaner MDX content
18
+ - ๐Ÿ”„ Optional duplicate heading removal for concise output
17
19
  - ๐Ÿ“Š Provides statistics about generated documentation
18
20
 
19
21
  ## Table of Contents
@@ -24,6 +26,8 @@ A Docusaurus plugin for generating LLM-friendly documentation following the [llm
24
26
  - [Path Transformation Examples](#path-transformation-examples)
25
27
  - [Document Ordering Examples](#document-ordering-examples)
26
28
  - [Custom LLM Files](#custom-llm-files)
29
+ - [Content Cleaning Options](#content-cleaning-options)
30
+ - [Best Practices](#best-practices)
27
31
  - [How It Works](#how-it-works)
28
32
  - [Implementation Details](#implementation-details)
29
33
  - [Testing](#testing)
@@ -67,6 +71,11 @@ module.exports = {
67
71
  title: 'My Project Documentation',
68
72
  description: 'Complete reference documentation for My Project',
69
73
  includeBlog: true,
74
+ // Content cleaning options
75
+ excludeImports: true,
76
+ removeDuplicateHeadings: true,
77
+ // Generate individual markdown files following llmstxt.org specification
78
+ generateMarkdownFiles: true,
70
79
  // Control documentation order
71
80
  includeOrder: [
72
81
  'getting-started/*',
@@ -111,6 +120,7 @@ module.exports = {
111
120
  |----------------------------------|----------|-------------------|---------------------------------------------------------------|
112
121
  | `description` | string | Site tagline | Custom description to use in generated files |
113
122
  | `docsDir` | string | `'docs'` | Base directory for documentation files |
123
+ | `excludeImports` | boolean | `false` | Remove import statements from generated content |
114
124
  | `generateLLMsFullTxt` | boolean | `true` | Whether to generate the full content file |
115
125
  | `generateLLMsTxt` | boolean | `true` | Whether to generate the links file |
116
126
  | `ignoreFiles` | string[] | `[]` | Array of glob patterns for files to ignore |
@@ -122,9 +132,82 @@ module.exports = {
122
132
  | `pathTransformation.addPaths` | string[] | `[]` | Path segments to add when constructing URLs |
123
133
  | `pathTransformation.ignorePaths` | string[] | `[]` | Path segments to ignore when constructing URLs |
124
134
  | `pathTransformation` | object | `undefined` | Path transformation options for URL construction |
135
+ | `removeDuplicateHeadings` | boolean | `false` | Remove redundant content that duplicates heading text |
125
136
  | `title` | string | Site title | Custom title to use in generated files |
126
137
  | `version` | string | `undefined` | Global version to include in all generated files |
127
138
  | `customLLMFiles` | array | `[]` | Array of custom LLM file configurations |
139
+ | `generateMarkdownFiles` | boolean | `false` | Generate individual markdown files and link to them from llms.txt |
140
+ | `keepFrontMatter` | string[] | [] | Preserve selected front matter items when generating individual markdown files
141
+ | `rootContent` | string | (see below) | Custom content to include at the root level of llms.txt |
142
+ | `fullRootContent` | string | (see below) | Custom content to include at the root level of llms-full.txt |
143
+
144
+ ### Custom Root Content
145
+
146
+ The `rootContent` and `fullRootContent` options allow you to customize the introductory content that appears in your generated files, following the llmstxt.org standard which allows "zero or more markdown sections (e.g. paragraphs, lists, etc) of any type except headings" after the title and description.
147
+
148
+ #### Default Content
149
+
150
+ If not specified, the plugin uses these defaults:
151
+ - **llms.txt**: "This file contains links to documentation sections following the llmstxt.org standard."
152
+ - **llms-full.txt**: "This file contains all documentation content in a single document following the llmstxt.org standard."
153
+
154
+ #### Custom Content Examples
155
+
156
+ **Example 1**: Add project-specific context
157
+ ```js
158
+ rootContent: `Welcome to the MyProject documentation.
159
+
160
+ This documentation covers:
161
+ - Installation and setup
162
+ - API reference
163
+ - Advanced usage guides
164
+ - Troubleshooting
165
+
166
+ For the latest updates, visit https://myproject.dev/changelog`
167
+ ```
168
+
169
+ **Example 2**: Add technical specifications
170
+ ```js
171
+ fullRootContent: `Complete offline documentation bundle for MyProject v2.0.
172
+
173
+ **Format**: Markdown with code examples
174
+ **Languages**: JavaScript, TypeScript, Python
175
+ **Last Generated**: ${new Date().toISOString()}
176
+
177
+ > Note: Some features require authentication tokens.
178
+ > See the Authentication section for details.`
179
+ ```
180
+
181
+ **Example 3**: Add navigation hints for AI assistants
182
+ ```js
183
+ rootContent: `This documentation is optimized for AI assistants and LLMs.
184
+
185
+ Quick navigation:
186
+ - For API endpoints, search for "API:"
187
+ - For code examples, search for "Example:"
188
+ - For configuration, search for "Config:"
189
+
190
+ All code examples are MIT licensed unless otherwise noted.`
191
+ ```
192
+
193
+ #### Custom Root Content for Custom LLM Files
194
+
195
+ You can also specify root content for each custom LLM file:
196
+
197
+ ```js
198
+ customLLMFiles: [
199
+ {
200
+ filename: 'llms-api.txt',
201
+ includePatterns: ['api/**/*.md'],
202
+ fullContent: true,
203
+ title: 'API Documentation',
204
+ rootContent: `Complete API reference for all REST endpoints.
205
+
206
+ Authentication required for all endpoints except /health.
207
+ Base URL: https://api.example.com/v2`
208
+ }
209
+ ]
210
+ ```
128
211
 
129
212
  ### Path Transformation Examples
130
213
 
@@ -194,6 +277,46 @@ includeOrder: [
194
277
  ```
195
278
  Result: Installation and quick-start guides appear first, followed by other getting-started files, then API documentation in a specific order.
196
279
 
280
+ ### Docusaurus Partials Support
281
+
282
+ The plugin fully supports [Docusaurus partials](https://docusaurus.io/docs/markdown-features/react#importing-markdown) - reusable MDX content files that can be imported into other documents.
283
+
284
+ #### How It Works
285
+
286
+ 1. **Partial files** (MDX files starting with underscore, e.g., `_shared-config.mdx`) are automatically excluded from the generated `llms*.txt` files
287
+ 2. **Import statements** for partials are resolved and the content is inlined when processing documents
288
+
289
+ #### Example
290
+
291
+ Given a partial file `_api-config.mdx`:
292
+ ```mdx
293
+ ## API Configuration
294
+
295
+ Set your API endpoint:
296
+ ```javascript
297
+ const API_URL = 'https://api.example.com';
298
+ ```
299
+ ```
300
+
301
+ And a document that imports it:
302
+ ```mdx
303
+ ---
304
+ title: Getting Started
305
+ ---
306
+
307
+ # Getting Started Guide
308
+
309
+ import ApiConfig from './_api-config.mdx';
310
+
311
+ <ApiConfig />
312
+
313
+ Now you can make API calls...
314
+ ```
315
+
316
+ The plugin will:
317
+ - Exclude `_api-config.mdx` from `llms.txt`
318
+ - Replace the import and `<ApiConfig />` with the actual content in the processed document
319
+
197
320
  ### Custom LLM Files
198
321
 
199
322
  In addition to the standard `llms.txt` and `llms-full.txt` files, you can generate custom LLM-friendly files for different sections of your documentation with the `customLLMFiles` option:
@@ -348,6 +471,363 @@ Version: 1.0.0
348
471
  This file contains all documentation content in a single document following the llmstxt.org standard.
349
472
  ```
350
473
 
474
+ ## Content Cleaning Options
475
+
476
+ The plugin provides advanced content cleaning options to optimize your documentation for LLM consumption by removing unnecessary elements that can clutter the output.
477
+
478
+ ### Import Statement Removal (`excludeImports`)
479
+
480
+ The `excludeImports` option removes JavaScript/TypeScript import statements from your MDX files, which are typically not useful for LLMs and can create noise in the generated documentation.
481
+
482
+ #### When to Use
483
+ - Your documentation uses MDX files with React components
484
+ - You have many import statements for UI components
485
+ - You want cleaner, more readable output for LLMs
486
+
487
+ #### Example
488
+
489
+ **Before** (with `excludeImports: false`):
490
+ ```markdown
491
+ import ApiTabs from "@theme/ApiTabs";
492
+ import DiscriminatorTabs from "@theme/DiscriminatorTabs";
493
+ import MethodEndpoint from "@theme/ApiExplorer/MethodEndpoint";
494
+ import SecuritySchemes from "@theme/ApiExplorer/SecuritySchemes";
495
+ import MimeTabs from "@theme/MimeTabs";
496
+ import ParamsItem from "@theme/ParamsItem";
497
+
498
+ # Create User Account
499
+
500
+ This endpoint creates a new user account...
501
+ ```
502
+
503
+ **After** (with `excludeImports: true`):
504
+ ```markdown
505
+ # Create User Account
506
+
507
+ This endpoint creates a new user account...
508
+ ```
509
+
510
+ #### Configuration
511
+ ```js
512
+ {
513
+ excludeImports: true, // Remove all import statements
514
+ }
515
+ ```
516
+
517
+ ### Duplicate Heading Removal (`removeDuplicateHeadings`)
518
+
519
+ The `removeDuplicateHeadings` option removes redundant content that simply repeats the heading text immediately after the heading, which is common in auto-generated API documentation.
520
+
521
+ #### When to Use
522
+ - Your documentation has redundant content that repeats heading text
523
+ - You have auto-generated API docs with minimal content
524
+ - You want to eliminate repetitive patterns for cleaner LLM consumption
525
+
526
+ #### Example
527
+
528
+ **Before** (with `removeDuplicateHeadings: false`):
529
+ ```markdown
530
+ # Create Deliverable
531
+
532
+ Create Deliverable
533
+
534
+ ---
535
+
536
+ # Update User Profile
537
+
538
+ Update User Profile
539
+
540
+ ---
541
+ ```
542
+
543
+ **After** (with `removeDuplicateHeadings: true`):
544
+ ```markdown
545
+ # Create Deliverable
546
+
547
+ ---
548
+
549
+ # Update User Profile
550
+
551
+ ---
552
+ ```
553
+
554
+ #### Configuration
555
+ ```js
556
+ {
557
+ removeDuplicateHeadings: true, // Remove redundant heading text
558
+ }
559
+ ```
560
+
561
+ ### Combined Content Cleaning
562
+
563
+ For optimal LLM-friendly output, you can combine both options:
564
+
565
+ ```js
566
+ module.exports = {
567
+ plugins: [
568
+ [
569
+ 'docusaurus-plugin-llms',
570
+ {
571
+ // Enable both content cleaning options for optimal LLM output
572
+ excludeImports: true,
573
+ removeDuplicateHeadings: true,
574
+
575
+ // Other configuration options...
576
+ generateLLMsTxt: true,
577
+ generateLLMsFullTxt: true,
578
+ docsDir: 'docs',
579
+ },
580
+ ],
581
+ ],
582
+ };
583
+ ```
584
+
585
+ ### Content Cleaning by Use Case
586
+
587
+ #### Minimal Cleanup (Default Behavior)
588
+ ```js
589
+ {
590
+ excludeImports: false,
591
+ removeDuplicateHeadings: false
592
+ }
593
+ ```
594
+ - Preserves all original content
595
+ - Suitable when you want to keep import statements for reference
596
+ - Good for documentation that doesn't have redundant patterns
597
+
598
+ #### Import Cleanup Only
599
+ ```js
600
+ {
601
+ excludeImports: true,
602
+ removeDuplicateHeadings: false
603
+ }
604
+ ```
605
+ - Removes import statements but keeps all content
606
+ - Good for MDX-heavy documentation sites
607
+ - Maintains content structure while removing technical imports
608
+
609
+ #### Full Cleanup (Recommended for LLMs)
610
+ ```js
611
+ {
612
+ excludeImports: true,
613
+ removeDuplicateHeadings: true
614
+ }
615
+ ```
616
+ - Maximum cleanup for LLM consumption
617
+ - Removes both imports and redundant content
618
+ - Recommended for API documentation and auto-generated content
619
+ - Produces the cleanest, most concise output
620
+
621
+ ## Best Practices
622
+
623
+ ### For API Documentation
624
+ If you have auto-generated API documentation (like OpenAPI docs), enable both cleaning options:
625
+
626
+ ```js
627
+ {
628
+ excludeImports: true, // Remove React component imports
629
+ removeDuplicateHeadings: true, // Remove redundant API endpoint descriptions
630
+ generateLLMsFullTxt: true, // Create comprehensive single file
631
+ }
632
+ ```
633
+
634
+ ### For Tutorial Content
635
+ For hand-written tutorials and guides, you might want selective cleaning:
636
+
637
+ ```js
638
+ {
639
+ excludeImports: true, // Remove any MDX imports
640
+ removeDuplicateHeadings: false, // Keep all content as written
641
+ includeOrder: [ // Organize content logically
642
+ 'getting-started/*',
643
+ 'tutorials/*',
644
+ 'advanced/*'
645
+ ]
646
+ }
647
+ ```
648
+
649
+ ### For Multi-Language Documentation
650
+ Create separate clean files for different programming languages:
651
+
652
+ ```js
653
+ {
654
+ excludeImports: true,
655
+ removeDuplicateHeadings: true,
656
+ customLLMFiles: [
657
+ {
658
+ filename: 'llms-python.txt',
659
+ includePatterns: ['**/python/**/*.md'],
660
+ fullContent: true,
661
+ title: 'Python Documentation'
662
+ },
663
+ {
664
+ filename: 'llms-javascript.txt',
665
+ includePatterns: ['**/javascript/**/*.md'],
666
+ fullContent: true,
667
+ title: 'JavaScript Documentation'
668
+ }
669
+ ]
670
+ }
671
+ ```
672
+
673
+ ### Performance Considerations
674
+ - Content cleaning adds minimal processing overhead
675
+ - Both options work on the content after HTML tag removal
676
+ - No impact on your site's build performance
677
+ - Cleaning happens only during the LLM file generation phase
678
+
679
+ ### Backward Compatibility
680
+ Both options default to `false`, ensuring existing configurations continue to work without changes. Only users who explicitly enable these features will see the cleaned output.
681
+
682
+ ## Markdown File Generation (`generateMarkdownFiles`)
683
+
684
+ The `generateMarkdownFiles` option enables the plugin to generate individual markdown files for each documentation page, following the [llmstxt.org specification](https://llmstxt.org/) more closely. When enabled, this creates separate `.md` files for LLM consumption instead of linking to your original documentation pages.
685
+
686
+ ### How It Works
687
+
688
+ **Default Behavior (generateMarkdownFiles: false)**:
689
+ - Generates `llms.txt` with links to your original documentation pages
690
+ - Example: `[Getting Started](https://yoursite.com/docs/getting-started)`
691
+
692
+ **With generateMarkdownFiles: true**:
693
+ - Generates individual markdown files (e.g., `getting-started.md`, `api-reference.md`)
694
+ - Generates `llms.txt` with links to these generated markdown files
695
+ - Example: `[Getting Started](https://yoursite.com/getting-started.md)`
696
+
697
+ ### Key Benefits
698
+
699
+ 1. **Standards Compliance**: Follows the llmstxt.org specification by providing individual markdown files rather than linking to HTML pages
700
+ 2. **LLM Optimization**: Generated files contain clean, processed markdown optimized for LLM consumption
701
+ 3. **Self-Contained**: All necessary content is available in markdown format without requiring HTML parsing
702
+ 4. **Flexible Naming**: Automatically generates readable filenames based on document titles
703
+
704
+ ### Configuration Example
705
+
706
+ ```js
707
+ module.exports = {
708
+ plugins: [
709
+ [
710
+ 'docusaurus-plugin-llms',
711
+ {
712
+ generateMarkdownFiles: true, // Enable individual markdown file generation
713
+ generateLLMsTxt: true, // Generate index file linking to markdown files
714
+ excludeImports: true, // Clean up import statements
715
+ removeDuplicateHeadings: true, // Remove redundant content
716
+
717
+ // Other options work normally
718
+ includeOrder: ['getting-started/*', 'guides/*', 'api/*'],
719
+ pathTransformation: {
720
+ ignorePaths: ['docs']
721
+ }
722
+ }
723
+ ]
724
+ ]
725
+ }
726
+ ```
727
+
728
+ ### Generated File Structure
729
+
730
+ With `generateMarkdownFiles: true`, your output directory will contain:
731
+
732
+ ```
733
+ build/
734
+ โ”œโ”€โ”€ llms.txt # Index file with links to generated markdown files
735
+ โ”œโ”€โ”€ llms-full.txt # Full content file (if enabled)
736
+ โ”œโ”€โ”€ getting-started.md # Generated from your getting started docs
737
+ โ”œโ”€โ”€ api-reference.md # Generated from your API documentation
738
+ โ”œโ”€โ”€ user-guide.md # Generated from your user guides
739
+ โ””โ”€โ”€ ... # Other generated markdown files
740
+ ```
741
+
742
+ ### Filename Generation
743
+
744
+ The plugin generates readable filenames using this priority:
745
+
746
+ 1. **Document Title**: Converted to kebab-case (e.g., "Getting Started" โ†’ `getting-started.md`)
747
+ 2. **URL Path**: If title is unavailable, uses the document's URL path
748
+ 3. **Uniqueness**: Automatically appends numbers for duplicate names (e.g., `getting-started-1.md`)
749
+
750
+ ### Content Processing
751
+
752
+ Generated markdown files include:
753
+
754
+ - **Document title** as H1 heading
755
+ - **Document description** as blockquote (following llmstxt.org format)
756
+ - **Processed content** with optional cleaning (import removal, duplicate heading removal)
757
+ - **Proper markdown formatting** optimized for LLM consumption
758
+
759
+ ### Example Generated File
760
+
761
+ Input documentation about "API Authentication" would generate `api-authentication.md`:
762
+
763
+ ```markdown
764
+ # API Authentication
765
+
766
+ > Learn how to authenticate with our API using various methods
767
+
768
+ ## Overview
769
+
770
+ This guide covers all authentication methods supported by our API...
771
+
772
+ ## API Key Authentication
773
+
774
+ Use your API key to authenticate requests:
775
+
776
+ ```javascript
777
+ const client = new Client({ apiKey: 'your-key' });
778
+ ```
779
+ ```
780
+
781
+ ### Use Cases
782
+
783
+ #### Standards-Compliant Documentation
784
+ Perfect for projects that want to follow the llmstxt.org specification exactly:
785
+
786
+ ```js
787
+ {
788
+ generateMarkdownFiles: true,
789
+ generateLLMsTxt: true,
790
+ generateLLMsFullTxt: false // Optional: disable if only individual files are needed
791
+ }
792
+ ```
793
+
794
+ #### LLM Training Data
795
+ Generate clean markdown files for LLM training or fine-tuning:
796
+
797
+ ```js
798
+ {
799
+ generateMarkdownFiles: true,
800
+ excludeImports: true,
801
+ removeDuplicateHeadings: true,
802
+ customLLMFiles: [
803
+ {
804
+ filename: 'training-data.txt',
805
+ includePatterns: ['**/*.md'],
806
+ fullContent: true
807
+ }
808
+ ]
809
+ }
810
+ ```
811
+
812
+ #### Multi-Format Output
813
+ Generate both original links and markdown files for different use cases:
814
+
815
+ ```js
816
+ {
817
+ generateLLMsTxt: true, // Links to original pages
818
+ generateMarkdownFiles: true, // Also generate individual markdown files
819
+ llmsTxtFilename: 'llms-original.txt', // Original links file
820
+ // The markdown-linked version will be in llms.txt
821
+ }
822
+ ```
823
+
824
+ ### Compatibility
825
+
826
+ - **Fully backward compatible**: Defaults to `false`, existing configurations unchanged
827
+ - **Works with all existing options**: Path transformations, custom LLM files, content cleaning
828
+ - **Respects ordering**: Generated files maintain the same order as configured with `includeOrder`
829
+ - **Custom LLM files**: Also support markdown file generation when the global option is enabled
830
+
351
831
  ## How It Works
352
832
 
353
833
  This plugin automatically generates the following files during the build process:
@@ -10,8 +10,19 @@ import { DocInfo, PluginContext } from './types';
10
10
  * @param fileDescription - Description for the file
11
11
  * @param includeFullContent - Whether to include full content or just links
12
12
  * @param version - Version of the file
13
+ * @param customRootContent - Optional custom content to include at the root level
13
14
  */
14
- export declare function generateLLMFile(docs: DocInfo[], outputPath: string, fileTitle: string, fileDescription: string, includeFullContent: boolean, version?: string): Promise<void>;
15
+ export declare function generateLLMFile(docs: DocInfo[], outputPath: string, fileTitle: string, fileDescription: string, includeFullContent: boolean, version?: string, customRootContent?: string): Promise<void>;
16
+ /**
17
+ * Generate individual markdown files for each document
18
+ * @param docs - Processed document information
19
+ * @param outputDir - Directory to write the markdown files
20
+ * @param siteUrl - Base site URL
21
+ * @param docsDir - The configured docs directory name (e.g., 'docs', 'documentation', etc.)
22
+ * @param keepFrontMatter - Array of frontmatter keys to preserve in generated files
23
+ * @returns Updated docs with new URLs pointing to generated markdown files
24
+ */
25
+ export declare function generateIndividualMarkdownFiles(docs: DocInfo[], outputDir: string, siteUrl: string, docsDir?: string, keepFrontMatter?: string[]): Promise<DocInfo[]>;
15
26
  /**
16
27
  * Generate standard LLM files (llms.txt and llms-full.txt)
17
28
  * @param context - Plugin context