docusaurus-plugin-llms 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -137,9 +137,12 @@ module.exports = {
137
137
  | `version` | string | `undefined` | Global version to include in all generated files |
138
138
  | `customLLMFiles` | array | `[]` | Array of custom LLM file configurations |
139
139
  | `generateMarkdownFiles` | boolean | `false` | Generate individual markdown files and link to them from llms.txt |
140
- | `keepFrontMatter` | string[] | [] | Preserve selected front matter items when generating individual markdown files
140
+ | `keepFrontMatter` | string[] | [] | Preserve selected front matter items when generating individual markdown files |
141
+ | `preserveDirectoryStructure` | boolean | `true` | Preserve full directory structure in generated markdown files (e.g., `docs/server/config.md` instead of `server/config.md`) |
142
+ | `processingBatchSize` | number | `100` | Batch size for processing documents to prevent out-of-memory errors on large sites |
141
143
  | `rootContent` | string | (see below) | Custom content to include at the root level of llms.txt |
142
144
  | `fullRootContent` | string | (see below) | Custom content to include at the root level of llms-full.txt |
145
+ | `logLevel` | string | `'normal'` | Logging level for plugin output: `'quiet'`, `'normal'`, or `'verbose'` |
143
146
 
144
147
  ### Custom Root Content
145
148
 
@@ -209,6 +212,37 @@ Base URL: https://api.example.com/v2`
209
212
  ]
210
213
  ```
211
214
 
215
+ ### Batch Processing for Large Sites
216
+
217
+ The plugin includes batch processing to prevent out-of-memory errors when processing very large documentation sites. By default, documents are processed in batches of 100, but you can configure this using the `processingBatchSize` option.
218
+
219
+ **When to adjust batch size:**
220
+ - **Large sites (1000+ documents)**: Reduce batch size (e.g., `50`) to lower memory usage
221
+ - **Small sites (< 100 documents)**: Default value is fine
222
+ - **Memory-constrained environments**: Reduce batch size to prevent OOM errors
223
+ - **High-memory systems**: Increase batch size (e.g., `200`) for faster processing
224
+
225
+ **Example configuration:**
226
+ ```js
227
+ module.exports = {
228
+ plugins: [
229
+ [
230
+ 'docusaurus-plugin-llms',
231
+ {
232
+ processingBatchSize: 50, // Process 50 documents at a time
233
+ // ... other options
234
+ },
235
+ ],
236
+ ],
237
+ };
238
+ ```
239
+
240
+ **How it works:**
241
+ - Documents are processed in chunks of the specified batch size
242
+ - Each batch is processed sequentially to control memory usage
243
+ - Document order is preserved across batches
244
+ - Progress is logged when processing multiple batches (in verbose mode)
245
+
212
246
  ### Path Transformation Examples
213
247
 
214
248
  The path transformation feature allows you to manipulate how URLs are constructed from file paths:
@@ -242,15 +276,32 @@ The configuration supports multiple path segments in both arrays.
242
276
 
243
277
  ### Document Ordering Examples
244
278
 
245
- The document ordering feature allows you to control the sequence in which files appear in the generated output:
279
+ The document ordering feature allows you to control the sequence in which files appear in the generated output.
280
+
281
+ #### Pattern Matching Behavior
282
+
283
+ Patterns in `includeOrder`, `ignoreFiles`, and `customLLMFiles.includePatterns` are matched against **both** site-relative and docs-relative paths for maximum flexibility:
284
+
285
+ - **Site-relative path**: The path relative to your site root (e.g., `docs/quickstart/file.md`)
286
+ - **Docs-relative path**: The path relative to your docs directory (e.g., `quickstart/file.md`)
287
+
288
+ This means both of these patterns will match the same file:
289
+ ```js
290
+ includeOrder: [
291
+ 'docs/quickstart/*', // Matches site-relative path
292
+ 'quickstart/*' // Matches docs-relative path (more intuitive!)
293
+ ]
294
+ ```
295
+
296
+ **Recommended approach**: Use docs-relative paths (without the `docs/` prefix) as they are more intuitive and portable across different configurations.
246
297
 
247
298
  **Example 1**: Basic Section Ordering
248
299
  ```js
249
300
  includeOrder: [
250
- 'getting-started/*',
251
- 'guides/*',
252
- 'api/*',
253
- 'advanced/*'
301
+ 'getting-started/*', // Matches docs/getting-started/*.md
302
+ 'guides/*', // Matches docs/guides/*.md
303
+ 'api/*', // Matches docs/api/*.md
304
+ 'advanced/*' // Matches docs/advanced/*.md
254
305
  ]
255
306
  ```
256
307
  Result: Files will appear in the generated output following this section order.
@@ -258,7 +309,7 @@ Result: Files will appear in the generated output following this section order.
258
309
  **Example 2**: Strict Inclusion List
259
310
  ```js
260
311
  includeOrder: [
261
- 'public-docs/**/*.md'
312
+ 'public-docs/**/*.md' // Matches docs/public-docs/**/*.md
262
313
  ],
263
314
  includeUnmatchedLast: false
264
315
  ```
@@ -267,16 +318,26 @@ Result: Only files matching 'public-docs/**/*.md' are included, all others are e
267
318
  **Example 3**: Detailed Ordering with Specific Files First
268
319
  ```js
269
320
  includeOrder: [
270
- 'getting-started/installation.md',
271
- 'getting-started/quick-start.md',
272
- 'getting-started/*.md',
273
- 'api/core/*.md',
274
- 'api/plugins/*.md',
275
- 'api/**/*.md'
321
+ 'getting-started/installation.md', // Specific file first
322
+ 'getting-started/quick-start.md', // Another specific file
323
+ 'getting-started/*.md', // Rest of getting-started
324
+ 'api/core/*.md', // Core API docs
325
+ 'api/plugins/*.md', // Plugin API docs
326
+ 'api/**/*.md' // All other API docs
276
327
  ]
277
328
  ```
278
329
  Result: Installation and quick-start guides appear first, followed by other getting-started files, then API documentation in a specific order.
279
330
 
331
+ **Example 4**: Nested Directory Patterns
332
+ ```js
333
+ includeOrder: [
334
+ 'tutorials/beginner/**/*', // All beginner tutorials (deeply nested)
335
+ 'tutorials/intermediate/*', // Intermediate tutorials (one level)
336
+ 'tutorials/**/*' // All other tutorials
337
+ ]
338
+ ```
339
+ Result: Beginner tutorials appear first (regardless of nesting depth), then intermediate, then everything else.
340
+
280
341
  ### Docusaurus Partials Support
281
342
 
282
343
  The plugin fully supports [Docusaurus partials](https://docusaurus.io/docs/markdown-features/react#importing-markdown) - reusable MDX content files that can be imported into other documents.
@@ -471,6 +532,130 @@ Version: 1.0.0
471
532
  This file contains all documentation content in a single document following the llmstxt.org standard.
472
533
  ```
473
534
 
535
+ ## Logging Configuration
536
+
537
+ The plugin includes a configurable logging system that allows you to control the amount of output during the build process.
538
+
539
+ ### Log Levels
540
+
541
+ The plugin supports three logging levels:
542
+
543
+ - **quiet**: Suppresses all output except errors
544
+ - **normal** (default): Shows standard informational messages and warnings
545
+ - **verbose**: Shows detailed progress information including file-by-file processing
546
+
547
+ ### Configuration
548
+
549
+ ```js
550
+ module.exports = {
551
+ plugins: [
552
+ [
553
+ 'docusaurus-plugin-llms',
554
+ {
555
+ logLevel: 'verbose', // Options: 'quiet', 'normal', 'verbose'
556
+ // Other configuration options...
557
+ },
558
+ ],
559
+ ],
560
+ };
561
+ ```
562
+
563
+ ### Log Level Details
564
+
565
+ #### Quiet Mode (`logLevel: 'quiet'`)
566
+
567
+ Only errors are displayed. Use this for clean builds in CI/CD environments or when you don't need build feedback.
568
+
569
+ ```js
570
+ {
571
+ logLevel: 'quiet'
572
+ }
573
+ ```
574
+
575
+ Output:
576
+ ```
577
+ [docusaurus-plugin-llms] ERROR: Error generating LLM documentation: ...
578
+ ```
579
+
580
+ #### Normal Mode (`logLevel: 'normal'`) - Default
581
+
582
+ Shows standard progress messages, warnings, and errors. This is the recommended setting for most users.
583
+
584
+ ```js
585
+ {
586
+ logLevel: 'normal' // or omit - this is the default
587
+ }
588
+ ```
589
+
590
+ Output:
591
+ ```
592
+ [docusaurus-plugin-llms] Generating LLM-friendly documentation...
593
+ [docusaurus-plugin-llms] Generating individual markdown files...
594
+ [docusaurus-plugin-llms] Generated: /path/to/llms.txt
595
+ [docusaurus-plugin-llms] Generated: /path/to/llms-full.txt
596
+ [docusaurus-plugin-llms] Stats: 42 total available documents processed
597
+ ```
598
+
599
+ #### Verbose Mode (`logLevel: 'verbose'`)
600
+
601
+ Shows detailed information about every file being processed. Use this for debugging or when you need detailed feedback.
602
+
603
+ ```js
604
+ {
605
+ logLevel: 'verbose'
606
+ }
607
+ ```
608
+
609
+ Output:
610
+ ```
611
+ [docusaurus-plugin-llms] Generating LLM-friendly documentation...
612
+ [docusaurus-plugin-llms] Generating file: /path/to/llms.txt, version: undefined
613
+ [docusaurus-plugin-llms] Processed 42 documentation files for standard LLM files
614
+ [docusaurus-plugin-llms] Generating individual markdown files...
615
+ [docusaurus-plugin-llms] Generated markdown file: getting-started.md
616
+ [docusaurus-plugin-llms] Generated markdown file: api/reference.md
617
+ [docusaurus-plugin-llms] Generated: /path/to/llms.txt
618
+ [docusaurus-plugin-llms] Generated: /path/to/llms-full.txt
619
+ [docusaurus-plugin-llms] Stats: 42 total available documents processed
620
+ ```
621
+
622
+ ### Use Cases
623
+
624
+ #### Development
625
+
626
+ Use normal or verbose mode during development to see what's being generated:
627
+
628
+ ```js
629
+ {
630
+ logLevel: 'verbose',
631
+ generateMarkdownFiles: true
632
+ }
633
+ ```
634
+
635
+ #### Production/CI
636
+
637
+ Use quiet mode in production builds or CI/CD to reduce log noise:
638
+
639
+ ```js
640
+ {
641
+ logLevel: 'quiet',
642
+ generateLLMsTxt: true,
643
+ generateLLMsFullTxt: true
644
+ }
645
+ ```
646
+
647
+ #### Debugging
648
+
649
+ Use verbose mode when troubleshooting issues:
650
+
651
+ ```js
652
+ {
653
+ logLevel: 'verbose',
654
+ excludeImports: true,
655
+ removeDuplicateHeadings: true
656
+ }
657
+ ```
658
+
474
659
  ## Content Cleaning Options
475
660
 
476
661
  The plugin provides advanced content cleaning options to optimize your documentation for LLM consumption by removing unnecessary elements that can clutter the output.
@@ -725,6 +910,35 @@ module.exports = {
725
910
  }
726
911
  ```
727
912
 
913
+ ### Directory Structure Options
914
+
915
+ #### Preserving Directory Structure (`preserveDirectoryStructure`)
916
+
917
+ By default (`preserveDirectoryStructure: true`), generated markdown files maintain the same directory structure as your HTML output, making them accessible at matching URL paths:
918
+
919
+ **With `preserveDirectoryStructure: true` (default)**:
920
+ ```
921
+ docs/server/config.md → build/docs/server/config.md
922
+ ```
923
+
924
+ **With `preserveDirectoryStructure: false`**:
925
+ ```
926
+ docs/server/config.md → build/server/config.md
927
+ ```
928
+
929
+ This is particularly useful when you want markdown files to sit alongside HTML files in the build output, allowing them to be served from the same URL path with a `.md` extension.
930
+
931
+ **Example configuration**:
932
+ ```js
933
+ {
934
+ generateMarkdownFiles: true,
935
+ preserveDirectoryStructure: true, // Default: matches HTML output structure
936
+ docsDir: 'docs'
937
+ }
938
+ ```
939
+
940
+ With this configuration, if your HTML is at `https://yoursite.com/docs/server/config.html`, the markdown will be at `https://yoursite.com/docs/server/config.md`.
941
+
728
942
  ### Generated File Structure
729
943
 
730
944
  With `generateMarkdownFiles: true`, your output directory will contain:
@@ -733,12 +947,28 @@ With `generateMarkdownFiles: true`, your output directory will contain:
733
947
  build/
734
948
  ├── llms.txt # Index file with links to generated markdown files
735
949
  ├── llms-full.txt # Full content file (if enabled)
736
- ├── getting-started.md # Generated from your getting started docs
737
- ├── api-reference.md # Generated from your API documentation
738
- ├── user-guide.md # Generated from your user guides
950
+ ├── docs/ # Preserves directory structure (default)
951
+ ├── getting-started.md
952
+ ├── api/
953
+ │ │ └── reference.md
954
+ │ └── server/
955
+ │ └── config.md
739
956
  └── ... # Other generated markdown files
740
957
  ```
741
958
 
959
+ Or with `preserveDirectoryStructure: false`:
960
+
961
+ ```
962
+ build/
963
+ ├── llms.txt # Index file with links to generated markdown files
964
+ ├── llms-full.txt # Full content file (if enabled)
965
+ ├── getting-started.md # Flat structure (old behavior)
966
+ ├── api/
967
+ │ └── reference.md
968
+ └── server/
969
+ └── config.md
970
+ ```
971
+
742
972
  ### Filename Generation
743
973
 
744
974
  The plugin generates readable filenames using this priority:
@@ -0,0 +1,44 @@
1
+ /**
2
+ * LLM file generation functions for the docusaurus-plugin-llms plugin
3
+ */
4
+ import { DocInfo, PluginContext } from './types';
5
+ /**
6
+ * Generate an LLM-friendly file
7
+ * @param docs - Processed document information
8
+ * @param outputPath - Path to write the output file
9
+ * @param fileTitle - Title for the file
10
+ * @param fileDescription - Description for the file
11
+ * @param includeFullContent - Whether to include full content or just links
12
+ * @param version - Version of the file
13
+ * @param customRootContent - Optional custom content to include at the root level
14
+ */
15
+ export declare function generateLLMFile(docs: DocInfo[], outputPath: string, fileTitle: string, fileDescription: string, includeFullContent: boolean, version?: string, customRootContent?: string): Promise<void>;
16
+ /**
17
+ * Generate individual markdown files for each document
18
+ * @param docs - Processed document information
19
+ * @param outputDir - Directory to write the markdown files
20
+ * @param siteUrl - Base site URL
21
+ * @param docsDir - The configured docs directory name (e.g., 'docs', 'documentation', etc.)
22
+ * @param keepFrontMatter - Array of frontmatter keys to preserve in generated files
23
+ * @param preserveDirectoryStructure - Whether to preserve the full directory structure (default: true)
24
+ * @returns Updated docs with new URLs pointing to generated markdown files
25
+ */
26
+ export declare function generateIndividualMarkdownFiles(docs: DocInfo[], outputDir: string, siteUrl: string, docsDir?: string, keepFrontMatter?: string[], preserveDirectoryStructure?: boolean): Promise<DocInfo[]>;
27
+ /**
28
+ * Generate standard LLM files (llms.txt and llms-full.txt)
29
+ * @param context - Plugin context
30
+ * @param allDocFiles - Array of all document files
31
+ */
32
+ export declare function generateStandardLLMFiles(context: PluginContext, allDocFiles: string[]): Promise<void>;
33
+ /**
34
+ * Generate custom LLM files based on configuration
35
+ * @param context - Plugin context
36
+ * @param allDocFiles - Array of all document files
37
+ */
38
+ export declare function generateCustomLLMFiles(context: PluginContext, allDocFiles: string[]): Promise<void>;
39
+ /**
40
+ * Collect all document files from docs directory and optionally blog
41
+ * @param context - Plugin context
42
+ * @returns Array of file paths
43
+ */
44
+ export declare function collectDocFiles(context: PluginContext): Promise<string[]>;