docusaurus-plugin-llms 0.1.4 โ†’ 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -14,6 +14,8 @@ A Docusaurus plugin for generating LLM-friendly documentation following the [llm
14
14
  - ๐Ÿ“š Option to include blog posts
15
15
  - ๐Ÿงฉ Custom LLM files for specific documentation sections
16
16
  - ๐Ÿงน Cleans HTML and normalizes content for optimal LLM consumption
17
+ - ๐Ÿšซ Optional import statement removal for cleaner MDX content
18
+ - ๐Ÿ”„ Optional duplicate heading removal for concise output
17
19
  - ๐Ÿ“Š Provides statistics about generated documentation
18
20
 
19
21
  ## Table of Contents
@@ -24,6 +26,8 @@ A Docusaurus plugin for generating LLM-friendly documentation following the [llm
24
26
  - [Path Transformation Examples](#path-transformation-examples)
25
27
  - [Document Ordering Examples](#document-ordering-examples)
26
28
  - [Custom LLM Files](#custom-llm-files)
29
+ - [Content Cleaning Options](#content-cleaning-options)
30
+ - [Best Practices](#best-practices)
27
31
  - [How It Works](#how-it-works)
28
32
  - [Implementation Details](#implementation-details)
29
33
  - [Testing](#testing)
@@ -67,6 +71,11 @@ module.exports = {
67
71
  title: 'My Project Documentation',
68
72
  description: 'Complete reference documentation for My Project',
69
73
  includeBlog: true,
74
+ // Content cleaning options
75
+ excludeImports: true,
76
+ removeDuplicateHeadings: true,
77
+ // Generate individual markdown files following llmstxt.org specification
78
+ generateMarkdownFiles: true,
70
79
  // Control documentation order
71
80
  includeOrder: [
72
81
  'getting-started/*',
@@ -111,6 +120,7 @@ module.exports = {
111
120
  |----------------------------------|----------|-------------------|---------------------------------------------------------------|
112
121
  | `description` | string | Site tagline | Custom description to use in generated files |
113
122
  | `docsDir` | string | `'docs'` | Base directory for documentation files |
123
+ | `excludeImports` | boolean | `false` | Remove import statements from generated content |
114
124
  | `generateLLMsFullTxt` | boolean | `true` | Whether to generate the full content file |
115
125
  | `generateLLMsTxt` | boolean | `true` | Whether to generate the links file |
116
126
  | `ignoreFiles` | string[] | `[]` | Array of glob patterns for files to ignore |
@@ -122,9 +132,81 @@ module.exports = {
122
132
  | `pathTransformation.addPaths` | string[] | `[]` | Path segments to add when constructing URLs |
123
133
  | `pathTransformation.ignorePaths` | string[] | `[]` | Path segments to ignore when constructing URLs |
124
134
  | `pathTransformation` | object | `undefined` | Path transformation options for URL construction |
135
+ | `removeDuplicateHeadings` | boolean | `false` | Remove redundant content that duplicates heading text |
125
136
  | `title` | string | Site title | Custom title to use in generated files |
126
137
  | `version` | string | `undefined` | Global version to include in all generated files |
127
138
  | `customLLMFiles` | array | `[]` | Array of custom LLM file configurations |
139
+ | `generateMarkdownFiles` | boolean | `false` | Generate individual markdown files and link to them from llms.txt |
140
+ | `rootContent` | string | (see below) | Custom content to include at the root level of llms.txt |
141
+ | `fullRootContent` | string | (see below) | Custom content to include at the root level of llms-full.txt |
142
+
143
+ ### Custom Root Content
144
+
145
+ The `rootContent` and `fullRootContent` options allow you to customize the introductory content that appears in your generated files, following the llmstxt.org standard which allows "zero or more markdown sections (e.g. paragraphs, lists, etc) of any type except headings" after the title and description.
146
+
147
+ #### Default Content
148
+
149
+ If not specified, the plugin uses these defaults:
150
+ - **llms.txt**: "This file contains links to documentation sections following the llmstxt.org standard."
151
+ - **llms-full.txt**: "This file contains all documentation content in a single document following the llmstxt.org standard."
152
+
153
+ #### Custom Content Examples
154
+
155
+ **Example 1**: Add project-specific context
156
+ ```js
157
+ rootContent: `Welcome to the MyProject documentation.
158
+
159
+ This documentation covers:
160
+ - Installation and setup
161
+ - API reference
162
+ - Advanced usage guides
163
+ - Troubleshooting
164
+
165
+ For the latest updates, visit https://myproject.dev/changelog`
166
+ ```
167
+
168
+ **Example 2**: Add technical specifications
169
+ ```js
170
+ fullRootContent: `Complete offline documentation bundle for MyProject v2.0.
171
+
172
+ **Format**: Markdown with code examples
173
+ **Languages**: JavaScript, TypeScript, Python
174
+ **Last Generated**: ${new Date().toISOString()}
175
+
176
+ > Note: Some features require authentication tokens.
177
+ > See the Authentication section for details.`
178
+ ```
179
+
180
+ **Example 3**: Add navigation hints for AI assistants
181
+ ```js
182
+ rootContent: `This documentation is optimized for AI assistants and LLMs.
183
+
184
+ Quick navigation:
185
+ - For API endpoints, search for "API:"
186
+ - For code examples, search for "Example:"
187
+ - For configuration, search for "Config:"
188
+
189
+ All code examples are MIT licensed unless otherwise noted.`
190
+ ```
191
+
192
+ #### Custom Root Content for Custom LLM Files
193
+
194
+ You can also specify root content for each custom LLM file:
195
+
196
+ ```js
197
+ customLLMFiles: [
198
+ {
199
+ filename: 'llms-api.txt',
200
+ includePatterns: ['api/**/*.md'],
201
+ fullContent: true,
202
+ title: 'API Documentation',
203
+ rootContent: `Complete API reference for all REST endpoints.
204
+
205
+ Authentication required for all endpoints except /health.
206
+ Base URL: https://api.example.com/v2`
207
+ }
208
+ ]
209
+ ```
128
210
 
129
211
  ### Path Transformation Examples
130
212
 
@@ -194,6 +276,46 @@ includeOrder: [
194
276
  ```
195
277
  Result: Installation and quick-start guides appear first, followed by other getting-started files, then API documentation in a specific order.
196
278
 
279
+ ### Docusaurus Partials Support
280
+
281
+ The plugin fully supports [Docusaurus partials](https://docusaurus.io/docs/markdown-features/react#importing-markdown) - reusable MDX content files that can be imported into other documents.
282
+
283
+ #### How It Works
284
+
285
+ 1. **Partial files** (MDX files starting with underscore, e.g., `_shared-config.mdx`) are automatically excluded from the generated `llms*.txt` files
286
+ 2. **Import statements** for partials are resolved and the content is inlined when processing documents
287
+
288
+ #### Example
289
+
290
+ Given a partial file `_api-config.mdx`:
291
+ ```mdx
292
+ ## API Configuration
293
+
294
+ Set your API endpoint:
295
+ ```javascript
296
+ const API_URL = 'https://api.example.com';
297
+ ```
298
+ ```
299
+
300
+ And a document that imports it:
301
+ ```mdx
302
+ ---
303
+ title: Getting Started
304
+ ---
305
+
306
+ # Getting Started Guide
307
+
308
+ import ApiConfig from './_api-config.mdx';
309
+
310
+ <ApiConfig />
311
+
312
+ Now you can make API calls...
313
+ ```
314
+
315
+ The plugin will:
316
+ - Exclude `_api-config.mdx` from `llms.txt`
317
+ - Replace the import and `<ApiConfig />` with the actual content in the processed document
318
+
197
319
  ### Custom LLM Files
198
320
 
199
321
  In addition to the standard `llms.txt` and `llms-full.txt` files, you can generate custom LLM-friendly files for different sections of your documentation with the `customLLMFiles` option:
@@ -345,9 +467,366 @@ The generated files will include the version information under the description:
345
467
 
346
468
  Version: 1.0.0
347
469
 
348
- This file contains all documentation content in a single document following the llmtxt.org standard.
470
+ This file contains all documentation content in a single document following the llmstxt.org standard.
471
+ ```
472
+
473
+ ## Content Cleaning Options
474
+
475
+ The plugin provides advanced content cleaning options to optimize your documentation for LLM consumption by removing unnecessary elements that can clutter the output.
476
+
477
+ ### Import Statement Removal (`excludeImports`)
478
+
479
+ The `excludeImports` option removes JavaScript/TypeScript import statements from your MDX files, which are typically not useful for LLMs and can create noise in the generated documentation.
480
+
481
+ #### When to Use
482
+ - Your documentation uses MDX files with React components
483
+ - You have many import statements for UI components
484
+ - You want cleaner, more readable output for LLMs
485
+
486
+ #### Example
487
+
488
+ **Before** (with `excludeImports: false`):
489
+ ```markdown
490
+ import ApiTabs from "@theme/ApiTabs";
491
+ import DiscriminatorTabs from "@theme/DiscriminatorTabs";
492
+ import MethodEndpoint from "@theme/ApiExplorer/MethodEndpoint";
493
+ import SecuritySchemes from "@theme/ApiExplorer/SecuritySchemes";
494
+ import MimeTabs from "@theme/MimeTabs";
495
+ import ParamsItem from "@theme/ParamsItem";
496
+
497
+ # Create User Account
498
+
499
+ This endpoint creates a new user account...
500
+ ```
501
+
502
+ **After** (with `excludeImports: true`):
503
+ ```markdown
504
+ # Create User Account
505
+
506
+ This endpoint creates a new user account...
507
+ ```
508
+
509
+ #### Configuration
510
+ ```js
511
+ {
512
+ excludeImports: true, // Remove all import statements
513
+ }
514
+ ```
515
+
516
+ ### Duplicate Heading Removal (`removeDuplicateHeadings`)
517
+
518
+ The `removeDuplicateHeadings` option removes redundant content that simply repeats the heading text immediately after the heading, which is common in auto-generated API documentation.
519
+
520
+ #### When to Use
521
+ - Your documentation has redundant content that repeats heading text
522
+ - You have auto-generated API docs with minimal content
523
+ - You want to eliminate repetitive patterns for cleaner LLM consumption
524
+
525
+ #### Example
526
+
527
+ **Before** (with `removeDuplicateHeadings: false`):
528
+ ```markdown
529
+ # Create Deliverable
530
+
531
+ Create Deliverable
532
+
533
+ ---
534
+
535
+ # Update User Profile
536
+
537
+ Update User Profile
538
+
539
+ ---
540
+ ```
541
+
542
+ **After** (with `removeDuplicateHeadings: true`):
543
+ ```markdown
544
+ # Create Deliverable
545
+
546
+ ---
547
+
548
+ # Update User Profile
549
+
550
+ ---
551
+ ```
552
+
553
+ #### Configuration
554
+ ```js
555
+ {
556
+ removeDuplicateHeadings: true, // Remove redundant heading text
557
+ }
558
+ ```
559
+
560
+ ### Combined Content Cleaning
561
+
562
+ For optimal LLM-friendly output, you can combine both options:
563
+
564
+ ```js
565
+ module.exports = {
566
+ plugins: [
567
+ [
568
+ 'docusaurus-plugin-llms',
569
+ {
570
+ // Enable both content cleaning options for optimal LLM output
571
+ excludeImports: true,
572
+ removeDuplicateHeadings: true,
573
+
574
+ // Other configuration options...
575
+ generateLLMsTxt: true,
576
+ generateLLMsFullTxt: true,
577
+ docsDir: 'docs',
578
+ },
579
+ ],
580
+ ],
581
+ };
582
+ ```
583
+
584
+ ### Content Cleaning by Use Case
585
+
586
+ #### Minimal Cleanup (Default Behavior)
587
+ ```js
588
+ {
589
+ excludeImports: false,
590
+ removeDuplicateHeadings: false
591
+ }
592
+ ```
593
+ - Preserves all original content
594
+ - Suitable when you want to keep import statements for reference
595
+ - Good for documentation that doesn't have redundant patterns
596
+
597
+ #### Import Cleanup Only
598
+ ```js
599
+ {
600
+ excludeImports: true,
601
+ removeDuplicateHeadings: false
602
+ }
603
+ ```
604
+ - Removes import statements but keeps all content
605
+ - Good for MDX-heavy documentation sites
606
+ - Maintains content structure while removing technical imports
607
+
608
+ #### Full Cleanup (Recommended for LLMs)
609
+ ```js
610
+ {
611
+ excludeImports: true,
612
+ removeDuplicateHeadings: true
613
+ }
614
+ ```
615
+ - Maximum cleanup for LLM consumption
616
+ - Removes both imports and redundant content
617
+ - Recommended for API documentation and auto-generated content
618
+ - Produces the cleanest, most concise output
619
+
620
+ ## Best Practices
621
+
622
+ ### For API Documentation
623
+ If you have auto-generated API documentation (like OpenAPI docs), enable both cleaning options:
624
+
625
+ ```js
626
+ {
627
+ excludeImports: true, // Remove React component imports
628
+ removeDuplicateHeadings: true, // Remove redundant API endpoint descriptions
629
+ generateLLMsFullTxt: true, // Create comprehensive single file
630
+ }
631
+ ```
632
+
633
+ ### For Tutorial Content
634
+ For hand-written tutorials and guides, you might want selective cleaning:
635
+
636
+ ```js
637
+ {
638
+ excludeImports: true, // Remove any MDX imports
639
+ removeDuplicateHeadings: false, // Keep all content as written
640
+ includeOrder: [ // Organize content logically
641
+ 'getting-started/*',
642
+ 'tutorials/*',
643
+ 'advanced/*'
644
+ ]
645
+ }
646
+ ```
647
+
648
+ ### For Multi-Language Documentation
649
+ Create separate clean files for different programming languages:
650
+
651
+ ```js
652
+ {
653
+ excludeImports: true,
654
+ removeDuplicateHeadings: true,
655
+ customLLMFiles: [
656
+ {
657
+ filename: 'llms-python.txt',
658
+ includePatterns: ['**/python/**/*.md'],
659
+ fullContent: true,
660
+ title: 'Python Documentation'
661
+ },
662
+ {
663
+ filename: 'llms-javascript.txt',
664
+ includePatterns: ['**/javascript/**/*.md'],
665
+ fullContent: true,
666
+ title: 'JavaScript Documentation'
667
+ }
668
+ ]
669
+ }
670
+ ```
671
+
672
+ ### Performance Considerations
673
+ - Content cleaning adds minimal processing overhead
674
+ - Both options work on the content after HTML tag removal
675
+ - No impact on your site's build performance
676
+ - Cleaning happens only during the LLM file generation phase
677
+
678
+ ### Backward Compatibility
679
+ Both options default to `false`, ensuring existing configurations continue to work without changes. Only users who explicitly enable these features will see the cleaned output.
680
+
681
+ ## Markdown File Generation (`generateMarkdownFiles`)
682
+
683
+ The `generateMarkdownFiles` option enables the plugin to generate individual markdown files for each documentation page, following the [llmstxt.org specification](https://llmstxt.org/) more closely. When enabled, this creates separate `.md` files for LLM consumption instead of linking to your original documentation pages.
684
+
685
+ ### How It Works
686
+
687
+ **Default Behavior (generateMarkdownFiles: false)**:
688
+ - Generates `llms.txt` with links to your original documentation pages
689
+ - Example: `[Getting Started](https://yoursite.com/docs/getting-started)`
690
+
691
+ **With generateMarkdownFiles: true**:
692
+ - Generates individual markdown files (e.g., `getting-started.md`, `api-reference.md`)
693
+ - Generates `llms.txt` with links to these generated markdown files
694
+ - Example: `[Getting Started](https://yoursite.com/getting-started.md)`
695
+
696
+ ### Key Benefits
697
+
698
+ 1. **Standards Compliance**: Follows the llmstxt.org specification by providing individual markdown files rather than linking to HTML pages
699
+ 2. **LLM Optimization**: Generated files contain clean, processed markdown optimized for LLM consumption
700
+ 3. **Self-Contained**: All necessary content is available in markdown format without requiring HTML parsing
701
+ 4. **Flexible Naming**: Automatically generates readable filenames based on document titles
702
+
703
+ ### Configuration Example
704
+
705
+ ```js
706
+ module.exports = {
707
+ plugins: [
708
+ [
709
+ 'docusaurus-plugin-llms',
710
+ {
711
+ generateMarkdownFiles: true, // Enable individual markdown file generation
712
+ generateLLMsTxt: true, // Generate index file linking to markdown files
713
+ excludeImports: true, // Clean up import statements
714
+ removeDuplicateHeadings: true, // Remove redundant content
715
+
716
+ // Other options work normally
717
+ includeOrder: ['getting-started/*', 'guides/*', 'api/*'],
718
+ pathTransformation: {
719
+ ignorePaths: ['docs']
720
+ }
721
+ }
722
+ ]
723
+ ]
724
+ }
725
+ ```
726
+
727
+ ### Generated File Structure
728
+
729
+ With `generateMarkdownFiles: true`, your output directory will contain:
730
+
731
+ ```
732
+ build/
733
+ โ”œโ”€โ”€ llms.txt # Index file with links to generated markdown files
734
+ โ”œโ”€โ”€ llms-full.txt # Full content file (if enabled)
735
+ โ”œโ”€โ”€ getting-started.md # Generated from your getting started docs
736
+ โ”œโ”€โ”€ api-reference.md # Generated from your API documentation
737
+ โ”œโ”€โ”€ user-guide.md # Generated from your user guides
738
+ โ””โ”€โ”€ ... # Other generated markdown files
349
739
  ```
350
740
 
741
+ ### Filename Generation
742
+
743
+ The plugin generates readable filenames using this priority:
744
+
745
+ 1. **Document Title**: Converted to kebab-case (e.g., "Getting Started" โ†’ `getting-started.md`)
746
+ 2. **URL Path**: If title is unavailable, uses the document's URL path
747
+ 3. **Uniqueness**: Automatically appends numbers for duplicate names (e.g., `getting-started-1.md`)
748
+
749
+ ### Content Processing
750
+
751
+ Generated markdown files include:
752
+
753
+ - **Document title** as H1 heading
754
+ - **Document description** as blockquote (following llmstxt.org format)
755
+ - **Processed content** with optional cleaning (import removal, duplicate heading removal)
756
+ - **Proper markdown formatting** optimized for LLM consumption
757
+
758
+ ### Example Generated File
759
+
760
+ Input documentation about "API Authentication" would generate `api-authentication.md`:
761
+
762
+ ```markdown
763
+ # API Authentication
764
+
765
+ > Learn how to authenticate with our API using various methods
766
+
767
+ ## Overview
768
+
769
+ This guide covers all authentication methods supported by our API...
770
+
771
+ ## API Key Authentication
772
+
773
+ Use your API key to authenticate requests:
774
+
775
+ ```javascript
776
+ const client = new Client({ apiKey: 'your-key' });
777
+ ```
778
+ ```
779
+
780
+ ### Use Cases
781
+
782
+ #### Standards-Compliant Documentation
783
+ Perfect for projects that want to follow the llmstxt.org specification exactly:
784
+
785
+ ```js
786
+ {
787
+ generateMarkdownFiles: true,
788
+ generateLLMsTxt: true,
789
+ generateLLMsFullTxt: false // Optional: disable if only individual files are needed
790
+ }
791
+ ```
792
+
793
+ #### LLM Training Data
794
+ Generate clean markdown files for LLM training or fine-tuning:
795
+
796
+ ```js
797
+ {
798
+ generateMarkdownFiles: true,
799
+ excludeImports: true,
800
+ removeDuplicateHeadings: true,
801
+ customLLMFiles: [
802
+ {
803
+ filename: 'training-data.txt',
804
+ includePatterns: ['**/*.md'],
805
+ fullContent: true
806
+ }
807
+ ]
808
+ }
809
+ ```
810
+
811
+ #### Multi-Format Output
812
+ Generate both original links and markdown files for different use cases:
813
+
814
+ ```js
815
+ {
816
+ generateLLMsTxt: true, // Links to original pages
817
+ generateMarkdownFiles: true, // Also generate individual markdown files
818
+ llmsTxtFilename: 'llms-original.txt', // Original links file
819
+ // The markdown-linked version will be in llms.txt
820
+ }
821
+ ```
822
+
823
+ ### Compatibility
824
+
825
+ - **Fully backward compatible**: Defaults to `false`, existing configurations unchanged
826
+ - **Works with all existing options**: Path transformations, custom LLM files, content cleaning
827
+ - **Respects ordering**: Generated files maintain the same order as configured with `includeOrder`
828
+ - **Custom LLM files**: Also support markdown file generation when the global option is enabled
829
+
351
830
  ## How It Works
352
831
 
353
832
  This plugin automatically generates the following files during the build process:
@@ -10,8 +10,17 @@ import { DocInfo, PluginContext } from './types';
10
10
  * @param fileDescription - Description for the file
11
11
  * @param includeFullContent - Whether to include full content or just links
12
12
  * @param version - Version of the file
13
+ * @param customRootContent - Optional custom content to include at the root level
13
14
  */
14
- export declare function generateLLMFile(docs: DocInfo[], outputPath: string, fileTitle: string, fileDescription: string, includeFullContent: boolean, version?: string): Promise<void>;
15
+ export declare function generateLLMFile(docs: DocInfo[], outputPath: string, fileTitle: string, fileDescription: string, includeFullContent: boolean, version?: string, customRootContent?: string): Promise<void>;
16
+ /**
17
+ * Generate individual markdown files for each document
18
+ * @param docs - Processed document information
19
+ * @param outputDir - Directory to write the markdown files
20
+ * @param siteUrl - Base site URL
21
+ * @returns Updated docs with new URLs pointing to generated markdown files
22
+ */
23
+ export declare function generateIndividualMarkdownFiles(docs: DocInfo[], outputDir: string, siteUrl: string): Promise<DocInfo[]>;
15
24
  /**
16
25
  * Generate standard LLM files (llms.txt and llms-full.txt)
17
26
  * @param context - Plugin context