docusaurus-plugin-llms 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/index.js CHANGED
@@ -8,184 +8,9 @@
8
8
  *
9
9
  * The plugin runs during the Docusaurus build process and scans all Markdown files in the docs directory.
10
10
  */
11
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
12
- if (k2 === undefined) k2 = k;
13
- var desc = Object.getOwnPropertyDescriptor(m, k);
14
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
15
- desc = { enumerable: true, get: function() { return m[k]; } };
16
- }
17
- Object.defineProperty(o, k2, desc);
18
- }) : (function(o, m, k, k2) {
19
- if (k2 === undefined) k2 = k;
20
- o[k2] = m[k];
21
- }));
22
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
23
- Object.defineProperty(o, "default", { enumerable: true, value: v });
24
- }) : function(o, v) {
25
- o["default"] = v;
26
- });
27
- var __importStar = (this && this.__importStar) || (function () {
28
- var ownKeys = function(o) {
29
- ownKeys = Object.getOwnPropertyNames || function (o) {
30
- var ar = [];
31
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
32
- return ar;
33
- };
34
- return ownKeys(o);
35
- };
36
- return function (mod) {
37
- if (mod && mod.__esModule) return mod;
38
- var result = {};
39
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
40
- __setModuleDefault(result, mod);
41
- return result;
42
- };
43
- })();
44
- var __importDefault = (this && this.__importDefault) || function (mod) {
45
- return (mod && mod.__esModule) ? mod : { "default": mod };
46
- };
47
11
  Object.defineProperty(exports, "__esModule", { value: true });
48
12
  exports.default = docusaurusPluginLLMs;
49
- const fs = __importStar(require("fs/promises"));
50
- const path = __importStar(require("path"));
51
- const gray_matter_1 = __importDefault(require("gray-matter"));
52
- const minimatch_1 = require("minimatch");
53
- /**
54
- * Write content to a file
55
- * @param filePath - Path to write the file to
56
- * @param data - Content to write
57
- */
58
- async function writeFile(filePath, data) {
59
- return fs.writeFile(filePath, data, 'utf8');
60
- }
61
- /**
62
- * Read content from a file
63
- * @param filePath - Path of the file to read
64
- * @returns Content of the file
65
- */
66
- async function readFile(filePath) {
67
- return fs.readFile(filePath, 'utf8');
68
- }
69
- /**
70
- * Check if a file should be ignored based on glob patterns
71
- * @param filePath - Path to the file
72
- * @param baseDir - Base directory for relative paths
73
- * @param ignorePatterns - Glob patterns for files to ignore
74
- * @returns Whether the file should be ignored
75
- */
76
- function shouldIgnoreFile(filePath, baseDir, ignorePatterns) {
77
- if (ignorePatterns.length === 0) {
78
- return false;
79
- }
80
- const relativePath = path.relative(baseDir, filePath);
81
- return ignorePatterns.some(pattern => (0, minimatch_1.minimatch)(relativePath, pattern, { matchBase: true }));
82
- }
83
- /**
84
- * Recursively reads all Markdown files in a directory
85
- * @param dir - Directory to scan
86
- * @param baseDir - Base directory for relative paths
87
- * @param ignorePatterns - Glob patterns for files to ignore
88
- * @returns Array of file paths
89
- */
90
- async function readMarkdownFiles(dir, baseDir, ignorePatterns = []) {
91
- const files = [];
92
- const entries = await fs.readdir(dir, { withFileTypes: true });
93
- for (const entry of entries) {
94
- const fullPath = path.join(dir, entry.name);
95
- if (shouldIgnoreFile(fullPath, baseDir, ignorePatterns)) {
96
- continue;
97
- }
98
- if (entry.isDirectory()) {
99
- const subDirFiles = await readMarkdownFiles(fullPath, baseDir, ignorePatterns);
100
- files.push(...subDirFiles);
101
- }
102
- else if (entry.name.endsWith('.md') || entry.name.endsWith('.mdx')) {
103
- files.push(fullPath);
104
- }
105
- }
106
- return files;
107
- }
108
- /**
109
- * Extract title from content or use the filename
110
- * @param data - Frontmatter data
111
- * @param content - Markdown content
112
- * @param filePath - Path to the file
113
- * @returns Extracted title
114
- */
115
- function extractTitle(data, content, filePath) {
116
- // First try frontmatter
117
- if (data.title) {
118
- return data.title;
119
- }
120
- // Then try first heading
121
- const headingMatch = content.match(/^#\s+(.*)/m);
122
- if (headingMatch) {
123
- return headingMatch[1].trim();
124
- }
125
- // Finally use filename
126
- return path.basename(filePath, path.extname(filePath))
127
- .replace(/-/g, ' ')
128
- .replace(/\b\w/g, c => c.toUpperCase());
129
- }
130
- /**
131
- * Clean markdown content for LLM consumption
132
- * @param content - Raw markdown content
133
- * @returns Cleaned content
134
- */
135
- function cleanMarkdownContent(content) {
136
- // Remove HTML tags
137
- let cleaned = content.replace(/<[^>]*>/g, '');
138
- // Normalize whitespace
139
- cleaned = cleaned.replace(/\r\n/g, '\n')
140
- .replace(/\n{3,}/g, '\n\n')
141
- .trim();
142
- return cleaned;
143
- }
144
- /**
145
- * Process a markdown file and extract its metadata and content
146
- * @param filePath - Path to the markdown file
147
- * @param baseDir - Base directory
148
- * @param siteUrl - Base URL of the site
149
- * @param pathPrefix - Path prefix for URLs (e.g., 'docs' or 'blog')
150
- * @returns Processed file data
151
- */
152
- async function processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix = 'docs') {
153
- const content = await readFile(filePath);
154
- const { data, content: markdownContent } = (0, gray_matter_1.default)(content);
155
- const relativePath = path.relative(baseDir, filePath);
156
- // Convert to URL path format (replace backslashes with forward slashes on Windows)
157
- const normalizedPath = relativePath.replace(/\\/g, '/');
158
- // Convert .md extension to appropriate path
159
- const linkPathBase = normalizedPath.replace(/\.mdx?$/, '');
160
- // Handle index files specially
161
- const linkPath = linkPathBase.endsWith('index')
162
- ? linkPathBase.replace(/\/index$/, '')
163
- : linkPathBase;
164
- // Generate full URL
165
- const fullUrl = new URL(`${pathPrefix}/${linkPath}`, siteUrl).toString();
166
- // Extract title
167
- const title = extractTitle(data, markdownContent, filePath);
168
- // Get description from frontmatter or first paragraph
169
- let description = data.description || '';
170
- if (!description) {
171
- const paragraphs = markdownContent.split('\n\n');
172
- for (const para of paragraphs) {
173
- if (para.trim() && !para.startsWith('#')) {
174
- description = para.trim();
175
- break;
176
- }
177
- }
178
- }
179
- // Clean and process content
180
- const cleanedContent = cleanMarkdownContent(markdownContent);
181
- return {
182
- title,
183
- path: normalizedPath,
184
- url: fullUrl,
185
- content: cleanedContent,
186
- description: description || '',
187
- };
188
- }
13
+ const generator_1 = require("./generator");
189
14
  /**
190
15
  * A Docusaurus plugin to generate LLM-friendly documentation following
191
16
  * the llmtxt.org standard
@@ -196,8 +21,36 @@ async function processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix = 'doc
196
21
  */
197
22
  function docusaurusPluginLLMs(context, options = {}) {
198
23
  // Set default options
199
- const { generateLLMsTxt = true, generateLLMsFullTxt = true, docsDir = 'docs', ignoreFiles = [], title, description, llmsTxtFilename = 'llms.txt', llmsFullTxtFilename = 'llms-full.txt', includeBlog = false, } = options;
24
+ const { generateLLMsTxt = true, generateLLMsFullTxt = true, docsDir = 'docs', ignoreFiles = [], title, description, llmsTxtFilename = 'llms.txt', llmsFullTxtFilename = 'llms-full.txt', includeBlog = false, pathTransformation, includeOrder = [], includeUnmatchedLast = true, customLLMFiles = [], } = options;
200
25
  const { siteDir, siteConfig, outDir, } = context;
26
+ // Build the site URL with proper trailing slash
27
+ const siteUrl = siteConfig.url + (siteConfig.baseUrl.endsWith('/')
28
+ ? siteConfig.baseUrl.slice(0, -1)
29
+ : siteConfig.baseUrl || '');
30
+ // Create a plugin context object with processed options
31
+ const pluginContext = {
32
+ siteDir,
33
+ outDir,
34
+ siteUrl,
35
+ docsDir,
36
+ docTitle: title || siteConfig.title,
37
+ docDescription: description || siteConfig.tagline || '',
38
+ options: {
39
+ generateLLMsTxt,
40
+ generateLLMsFullTxt,
41
+ docsDir,
42
+ ignoreFiles,
43
+ title,
44
+ description,
45
+ llmsTxtFilename,
46
+ llmsFullTxtFilename,
47
+ includeBlog,
48
+ pathTransformation,
49
+ includeOrder,
50
+ includeUnmatchedLast,
51
+ customLLMFiles,
52
+ }
53
+ };
201
54
  return {
202
55
  name: 'docusaurus-plugin-llms',
203
56
  /**
@@ -205,122 +58,20 @@ function docusaurusPluginLLMs(context, options = {}) {
205
58
  */
206
59
  async postBuild() {
207
60
  console.log('Generating LLM-friendly documentation...');
208
- // Custom title and description or fallback to site values
209
- const docTitle = title || siteConfig.title;
210
- const docDescription = description || siteConfig.tagline || '';
211
- // Build the site URL with proper trailing slash
212
- const siteUrl = siteConfig.url + (siteConfig.baseUrl.endsWith('/')
213
- ? siteConfig.baseUrl.slice(0, -1)
214
- : siteConfig.baseUrl || '');
215
- // Initialize docs collection
216
- const allDocs = [];
217
61
  try {
218
- // Process docs directory
219
- const fullDocsDir = path.join(siteDir, docsDir);
220
- try {
221
- await fs.access(fullDocsDir);
222
- // Collect all markdown files from docs directory
223
- const docFiles = await readMarkdownFiles(fullDocsDir, siteDir, ignoreFiles);
224
- if (docFiles.length > 0) {
225
- // Process each file
226
- for (const filePath of docFiles) {
227
- try {
228
- const docInfo = await processMarkdownFile(filePath, fullDocsDir, siteUrl, 'docs');
229
- allDocs.push(docInfo);
230
- }
231
- catch (err) {
232
- console.warn(`Error processing ${filePath}: ${err.message}`);
233
- }
234
- }
235
- console.log(`Processed ${docFiles.length} documentation files`);
236
- }
237
- else {
238
- console.warn('No markdown files found in docs directory.');
239
- }
240
- }
241
- catch (err) {
242
- console.warn(`Docs directory not found: ${fullDocsDir}`);
243
- }
244
- // Process blog if enabled
245
- if (includeBlog) {
246
- const blogDir = path.join(siteDir, 'blog');
247
- try {
248
- await fs.access(blogDir);
249
- // Collect all markdown files from blog directory
250
- const blogFiles = await readMarkdownFiles(blogDir, siteDir, ignoreFiles);
251
- if (blogFiles.length > 0) {
252
- // Process each file
253
- for (const filePath of blogFiles) {
254
- try {
255
- const docInfo = await processMarkdownFile(filePath, blogDir, siteUrl, 'blog');
256
- allDocs.push(docInfo);
257
- }
258
- catch (err) {
259
- console.warn(`Error processing ${filePath}: ${err.message}`);
260
- }
261
- }
262
- console.log(`Processed ${blogFiles.length} blog files`);
263
- }
264
- else {
265
- console.warn('No markdown files found in blog directory.');
266
- }
267
- }
268
- catch (err) {
269
- console.warn(`Blog directory not found: ${blogDir}`);
270
- }
271
- }
62
+ // Collect all document files
63
+ const allDocFiles = await (0, generator_1.collectDocFiles)(pluginContext);
272
64
  // Skip further processing if no documents were found
273
- if (allDocs.length === 0) {
65
+ if (allDocFiles.length === 0) {
274
66
  console.warn('No documents found to process.');
275
67
  return;
276
68
  }
277
- // Sort files to ensure consistent ordering
278
- allDocs.sort((a, b) => a.title.localeCompare(b.title));
279
- // Generate llms.txt
280
- if (generateLLMsTxt) {
281
- const llmsTxtPath = path.join(outDir, llmsTxtFilename);
282
- const tocItems = allDocs.map(doc => {
283
- return `- [${doc.title}](${doc.url})${doc.description ? `: ${doc.description.split('\n')[0]}` : ''}`;
284
- });
285
- const llmsTxtContent = `# ${docTitle}
286
-
287
- > ${docDescription}
288
-
289
- This file contains links to all documentation sections following the llmtxt.org standard.
290
-
291
- ## Table of Contents
292
-
293
- ${tocItems.join('\n')}
294
- `;
295
- await writeFile(llmsTxtPath, llmsTxtContent);
296
- console.log(`Generated ${llmsTxtFilename}: ${llmsTxtPath}`);
297
- }
298
- // Generate llms-full.txt with all content
299
- if (generateLLMsFullTxt) {
300
- const llmsFullTxtPath = path.join(outDir, llmsFullTxtFilename);
301
- const fullContentSections = allDocs.map(doc => {
302
- return `## ${doc.title}
303
-
304
- ${doc.content}`;
305
- });
306
- const llmsFullTxtContent = `# ${docTitle}
307
-
308
- > ${docDescription}
309
-
310
- This file contains all documentation content in a single document following the llmtxt.org standard.
311
-
312
- ${fullContentSections.join('\n\n---\n\n')}
313
- `;
314
- await writeFile(llmsFullTxtPath, llmsFullTxtContent);
315
- console.log(`Generated ${llmsFullTxtFilename}: ${llmsFullTxtPath}`);
316
- }
317
- // Output statistics
318
- const stats = {
319
- totalDocuments: allDocs.length,
320
- totalBytes: allDocs.reduce((sum, doc) => sum + doc.content.length, 0),
321
- approxTokens: Math.round(allDocs.reduce((sum, doc) => sum + doc.content.length, 0) / 4), // Rough token estimate
322
- };
323
- console.log(`Stats: ${stats.totalDocuments} documents, ${Math.round(stats.totalBytes / 1024)}KB, ~${stats.approxTokens} tokens`);
69
+ // Process standard LLM files (llms.txt and llms-full.txt)
70
+ await (0, generator_1.generateStandardLLMFiles)(pluginContext, allDocFiles);
71
+ // Process custom LLM files
72
+ await (0, generator_1.generateCustomLLMFiles)(pluginContext, allDocFiles);
73
+ // Output overall statistics
74
+ console.log(`Stats: ${allDocFiles.length} total available documents processed`);
324
75
  }
325
76
  catch (err) {
326
77
  console.error('Error generating LLM documentation:', err);
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Document processing functions for the docusaurus-plugin-llms plugin
3
+ */
4
+ import { DocInfo, PluginContext } from './types';
5
+ /**
6
+ * Process a markdown file and extract its metadata and content
7
+ * @param filePath - Path to the markdown file
8
+ * @param baseDir - Base directory
9
+ * @param siteUrl - Base URL of the site
10
+ * @param pathPrefix - Path prefix for URLs (e.g., 'docs' or 'blog')
11
+ * @param pathTransformation - Path transformation configuration
12
+ * @returns Processed file data
13
+ */
14
+ export declare function processMarkdownFile(filePath: string, baseDir: string, siteUrl: string, pathPrefix?: string, pathTransformation?: {
15
+ ignorePaths?: string[];
16
+ addPaths?: string[];
17
+ }): Promise<DocInfo>;
18
+ /**
19
+ * Process files based on include patterns, ignore patterns, and ordering
20
+ * @param context - Plugin context
21
+ * @param allFiles - All available files
22
+ * @param includePatterns - Patterns for files to include
23
+ * @param ignorePatterns - Patterns for files to ignore
24
+ * @param orderPatterns - Patterns for ordering files
25
+ * @param includeUnmatched - Whether to include unmatched files
26
+ * @returns Processed files
27
+ */
28
+ export declare function processFilesWithPatterns(context: PluginContext, allFiles: string[], includePatterns?: string[], ignorePatterns?: string[], orderPatterns?: string[], includeUnmatched?: boolean): Promise<DocInfo[]>;
@@ -0,0 +1,211 @@
1
+ "use strict";
2
+ /**
3
+ * Document processing functions for the docusaurus-plugin-llms plugin
4
+ */
5
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
6
+ if (k2 === undefined) k2 = k;
7
+ var desc = Object.getOwnPropertyDescriptor(m, k);
8
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
9
+ desc = { enumerable: true, get: function() { return m[k]; } };
10
+ }
11
+ Object.defineProperty(o, k2, desc);
12
+ }) : (function(o, m, k, k2) {
13
+ if (k2 === undefined) k2 = k;
14
+ o[k2] = m[k];
15
+ }));
16
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
17
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
18
+ }) : function(o, v) {
19
+ o["default"] = v;
20
+ });
21
+ var __importStar = (this && this.__importStar) || (function () {
22
+ var ownKeys = function(o) {
23
+ ownKeys = Object.getOwnPropertyNames || function (o) {
24
+ var ar = [];
25
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
26
+ return ar;
27
+ };
28
+ return ownKeys(o);
29
+ };
30
+ return function (mod) {
31
+ if (mod && mod.__esModule) return mod;
32
+ var result = {};
33
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
34
+ __setModuleDefault(result, mod);
35
+ return result;
36
+ };
37
+ })();
38
+ var __importDefault = (this && this.__importDefault) || function (mod) {
39
+ return (mod && mod.__esModule) ? mod : { "default": mod };
40
+ };
41
+ Object.defineProperty(exports, "__esModule", { value: true });
42
+ exports.processMarkdownFile = processMarkdownFile;
43
+ exports.processFilesWithPatterns = processFilesWithPatterns;
44
+ const path = __importStar(require("path"));
45
+ const gray_matter_1 = __importDefault(require("gray-matter"));
46
+ const minimatch_1 = require("minimatch");
47
+ const utils_1 = require("./utils");
48
+ /**
49
+ * Process a markdown file and extract its metadata and content
50
+ * @param filePath - Path to the markdown file
51
+ * @param baseDir - Base directory
52
+ * @param siteUrl - Base URL of the site
53
+ * @param pathPrefix - Path prefix for URLs (e.g., 'docs' or 'blog')
54
+ * @param pathTransformation - Path transformation configuration
55
+ * @returns Processed file data
56
+ */
57
+ async function processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix = 'docs', pathTransformation) {
58
+ const content = await (0, utils_1.readFile)(filePath);
59
+ const { data, content: markdownContent } = (0, gray_matter_1.default)(content);
60
+ const relativePath = path.relative(baseDir, filePath);
61
+ // Convert to URL path format (replace backslashes with forward slashes on Windows)
62
+ const normalizedPath = relativePath.replace(/\\/g, '/');
63
+ // Convert .md extension to appropriate path
64
+ const linkPathBase = normalizedPath.replace(/\.mdx?$/, '');
65
+ // Handle index files specially
66
+ const linkPath = linkPathBase.endsWith('index')
67
+ ? linkPathBase.replace(/\/index$/, '')
68
+ : linkPathBase;
69
+ // Apply path transformations to the link path
70
+ const transformedLinkPath = (0, utils_1.applyPathTransformations)(linkPath, pathTransformation);
71
+ // Also apply path transformations to the pathPrefix if it's not empty
72
+ // This allows removing 'docs' from the path when specified in ignorePaths
73
+ let transformedPathPrefix = pathPrefix;
74
+ if (pathPrefix && pathTransformation?.ignorePaths?.includes(pathPrefix)) {
75
+ transformedPathPrefix = '';
76
+ }
77
+ // Generate full URL with transformed path and path prefix
78
+ const fullUrl = new URL(`${transformedPathPrefix ? `${transformedPathPrefix}/` : ''}${transformedLinkPath}`, siteUrl).toString();
79
+ // Extract title
80
+ const title = (0, utils_1.extractTitle)(data, markdownContent, filePath);
81
+ // Get description from frontmatter or first paragraph
82
+ let description = '';
83
+ // First priority: Use frontmatter description if available
84
+ if (data.description) {
85
+ description = data.description;
86
+ }
87
+ else {
88
+ // Second priority: Find the first non-heading paragraph
89
+ const paragraphs = markdownContent.split('\n\n');
90
+ for (const para of paragraphs) {
91
+ const trimmedPara = para.trim();
92
+ // Skip empty paragraphs and headings
93
+ if (trimmedPara && !trimmedPara.startsWith('#')) {
94
+ description = trimmedPara;
95
+ break;
96
+ }
97
+ }
98
+ // Third priority: If still no description, use the first heading's content
99
+ if (!description) {
100
+ const firstHeadingMatch = markdownContent.match(/^#\s+(.*?)$/m);
101
+ if (firstHeadingMatch && firstHeadingMatch[1]) {
102
+ description = firstHeadingMatch[1].trim();
103
+ }
104
+ }
105
+ }
106
+ // Only remove heading markers at the beginning of descriptions or lines
107
+ // This preserves # characters that are part of the content
108
+ if (description) {
109
+ // Original approach had issues with hashtags inside content
110
+ // Fix: Only remove # symbols at the beginning of lines or description
111
+ // that are followed by a space (actual heading markers)
112
+ description = description.replace(/^(#+)\s+/gm, '');
113
+ // Special handling for description frontmatter with heading markers
114
+ if (data.description && data.description.startsWith('#')) {
115
+ // If the description in frontmatter starts with a heading marker,
116
+ // we should preserve it in the extracted description
117
+ description = description.replace(/^#+\s+/, '');
118
+ }
119
+ // Preserve inline hashtags (not heading markers)
120
+ // We don't want to treat hashtags in the middle of content as headings
121
+ // Validate that the description doesn't contain markdown headings
122
+ if (description.match(/^#+\s+/m)) {
123
+ console.warn(`Warning: Description for "${title}" may still contain heading markers`);
124
+ }
125
+ // Warn if the description contains HTML tags
126
+ if (/<[^>]+>/g.test(description)) {
127
+ console.warn(`Warning: Description for "${title}" contains HTML tags`);
128
+ }
129
+ // Warn if the description is very long
130
+ if (description.length > 500) {
131
+ console.warn(`Warning: Description for "${title}" is very long (${description.length} characters)`);
132
+ }
133
+ }
134
+ // Clean and process content
135
+ const cleanedContent = (0, utils_1.cleanMarkdownContent)(markdownContent);
136
+ return {
137
+ title,
138
+ path: normalizedPath,
139
+ url: fullUrl,
140
+ content: cleanedContent,
141
+ description: description || '',
142
+ };
143
+ }
144
+ /**
145
+ * Process files based on include patterns, ignore patterns, and ordering
146
+ * @param context - Plugin context
147
+ * @param allFiles - All available files
148
+ * @param includePatterns - Patterns for files to include
149
+ * @param ignorePatterns - Patterns for files to ignore
150
+ * @param orderPatterns - Patterns for ordering files
151
+ * @param includeUnmatched - Whether to include unmatched files
152
+ * @returns Processed files
153
+ */
154
+ async function processFilesWithPatterns(context, allFiles, includePatterns = [], ignorePatterns = [], orderPatterns = [], includeUnmatched = false) {
155
+ const { siteDir, siteUrl, docsDir } = context;
156
+ // Filter files based on include patterns
157
+ let filteredFiles = allFiles;
158
+ if (includePatterns.length > 0) {
159
+ filteredFiles = allFiles.filter(file => {
160
+ const relativePath = path.relative(siteDir, file);
161
+ return includePatterns.some(pattern => (0, minimatch_1.minimatch)(relativePath, pattern, { matchBase: true }));
162
+ });
163
+ }
164
+ // Apply ignore patterns
165
+ if (ignorePatterns.length > 0) {
166
+ filteredFiles = filteredFiles.filter(file => {
167
+ const relativePath = path.relative(siteDir, file);
168
+ return !ignorePatterns.some(pattern => (0, minimatch_1.minimatch)(relativePath, pattern, { matchBase: true }));
169
+ });
170
+ }
171
+ // Order files according to orderPatterns
172
+ let filesToProcess = [];
173
+ if (orderPatterns.length > 0) {
174
+ const matchedFiles = new Set();
175
+ // Process files according to orderPatterns
176
+ for (const pattern of orderPatterns) {
177
+ const matchingFiles = filteredFiles.filter(file => {
178
+ const relativePath = path.relative(siteDir, file);
179
+ return (0, minimatch_1.minimatch)(relativePath, pattern, { matchBase: true }) && !matchedFiles.has(file);
180
+ });
181
+ for (const file of matchingFiles) {
182
+ filesToProcess.push(file);
183
+ matchedFiles.add(file);
184
+ }
185
+ }
186
+ // Add remaining files if includeUnmatched is true
187
+ if (includeUnmatched) {
188
+ const remainingFiles = filteredFiles.filter(file => !matchedFiles.has(file));
189
+ filesToProcess.push(...remainingFiles);
190
+ }
191
+ }
192
+ else {
193
+ filesToProcess = filteredFiles;
194
+ }
195
+ // Process each file to generate DocInfo
196
+ const processedDocs = [];
197
+ for (const filePath of filesToProcess) {
198
+ try {
199
+ // Determine if this is a blog or docs file
200
+ const isBlogFile = filePath.includes(path.join(siteDir, 'blog'));
201
+ const baseDir = isBlogFile ? path.join(siteDir, 'blog') : path.join(siteDir, docsDir);
202
+ const pathPrefix = isBlogFile ? 'blog' : 'docs';
203
+ const docInfo = await processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix, context.options.pathTransformation);
204
+ processedDocs.push(docInfo);
205
+ }
206
+ catch (err) {
207
+ console.warn(`Error processing ${filePath}: ${err.message}`);
208
+ }
209
+ }
210
+ return processedDocs;
211
+ }
package/lib/utils.d.ts ADDED
@@ -0,0 +1,53 @@
1
+ /**
2
+ * Utility functions for the docusaurus-plugin-llms plugin
3
+ */
4
+ import { PluginOptions } from './types';
5
+ /**
6
+ * Write content to a file
7
+ * @param filePath - Path to write the file to
8
+ * @param data - Content to write
9
+ */
10
+ export declare function writeFile(filePath: string, data: string): Promise<void>;
11
+ /**
12
+ * Read content from a file
13
+ * @param filePath - Path of the file to read
14
+ * @returns Content of the file
15
+ */
16
+ export declare function readFile(filePath: string): Promise<string>;
17
+ /**
18
+ * Check if a file should be ignored based on glob patterns
19
+ * @param filePath - Path to the file
20
+ * @param baseDir - Base directory for relative paths
21
+ * @param ignorePatterns - Glob patterns for files to ignore
22
+ * @returns Whether the file should be ignored
23
+ */
24
+ export declare function shouldIgnoreFile(filePath: string, baseDir: string, ignorePatterns: string[]): boolean;
25
+ /**
26
+ * Recursively reads all Markdown files in a directory
27
+ * @param dir - Directory to scan
28
+ * @param baseDir - Base directory for relative paths
29
+ * @param ignorePatterns - Glob patterns for files to ignore
30
+ * @returns Array of file paths
31
+ */
32
+ export declare function readMarkdownFiles(dir: string, baseDir: string, ignorePatterns?: string[]): Promise<string[]>;
33
+ /**
34
+ * Extract title from content or use the filename
35
+ * @param data - Frontmatter data
36
+ * @param content - Markdown content
37
+ * @param filePath - Path to the file
38
+ * @returns Extracted title
39
+ */
40
+ export declare function extractTitle(data: any, content: string, filePath: string): string;
41
+ /**
42
+ * Clean markdown content for LLM consumption
43
+ * @param content - Raw markdown content
44
+ * @returns Cleaned content
45
+ */
46
+ export declare function cleanMarkdownContent(content: string): string;
47
+ /**
48
+ * Apply path transformations according to configuration
49
+ * @param urlPath - Original URL path
50
+ * @param pathTransformation - Path transformation configuration
51
+ * @returns Transformed URL path
52
+ */
53
+ export declare function applyPathTransformations(urlPath: string, pathTransformation?: PluginOptions['pathTransformation']): string;