docusaurus-plugin-llms 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +246 -15
- package/lib/generator-current.d.ts +44 -0
- package/lib/generator-current.js +398 -0
- package/lib/generator.d.ts +6 -2
- package/lib/generator.js +200 -120
- package/lib/index.js +175 -10
- package/lib/null-handling-guide.d.ts +47 -0
- package/lib/null-handling-guide.js +290 -0
- package/lib/processor.d.ts +0 -10
- package/lib/processor.js +230 -83
- package/lib/types.d.ts +13 -0
- package/lib/utils.d.ts +165 -6
- package/lib/utils.js +481 -28
- package/package.json +5 -3
- package/src/generator.ts +270 -128
- package/src/index.ts +204 -14
- package/src/null-handling-guide.ts +321 -0
- package/src/processor.ts +314 -127
- package/src/types.ts +20 -1
- package/src/utils.ts +594 -48
package/lib/processor.js
CHANGED
|
@@ -61,25 +61,57 @@ async function processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix = 'doc
|
|
|
61
61
|
if (data.draft === true) {
|
|
62
62
|
return null;
|
|
63
63
|
}
|
|
64
|
+
// Validate and clean empty frontmatter fields
|
|
65
|
+
// Empty strings should be treated as undefined to allow fallback logic
|
|
66
|
+
if (data.title !== undefined && !(0, utils_1.isNonEmptyString)(data.title)) {
|
|
67
|
+
utils_1.logger.warn(`Empty title in frontmatter for ${filePath}. Using fallback.`);
|
|
68
|
+
data.title = undefined;
|
|
69
|
+
}
|
|
70
|
+
if (data.description !== undefined && !(0, utils_1.isNonEmptyString)(data.description)) {
|
|
71
|
+
data.description = undefined;
|
|
72
|
+
}
|
|
73
|
+
if (data.slug !== undefined && !(0, utils_1.isNonEmptyString)(data.slug)) {
|
|
74
|
+
data.slug = undefined;
|
|
75
|
+
}
|
|
76
|
+
if (data.id !== undefined && !(0, utils_1.isNonEmptyString)(data.id)) {
|
|
77
|
+
data.id = undefined;
|
|
78
|
+
}
|
|
64
79
|
// Resolve partial imports before processing
|
|
65
80
|
const resolvedContent = await (0, utils_1.resolvePartialImports)(markdownContent, filePath);
|
|
66
81
|
const relativePath = path.relative(baseDir, filePath);
|
|
67
82
|
// Convert to URL path format (replace backslashes with forward slashes on Windows)
|
|
68
|
-
const normalizedPath =
|
|
83
|
+
const normalizedPath = (0, utils_1.normalizePath)(relativePath);
|
|
69
84
|
let fullUrl;
|
|
70
|
-
if (resolvedUrl) {
|
|
85
|
+
if ((0, utils_1.isNonEmptyString)(resolvedUrl)) {
|
|
71
86
|
// Use the actual resolved URL from Docusaurus if provided
|
|
72
|
-
|
|
87
|
+
try {
|
|
88
|
+
fullUrl = new URL(resolvedUrl, siteUrl).toString();
|
|
89
|
+
}
|
|
90
|
+
catch (error) {
|
|
91
|
+
utils_1.logger.warn(`Invalid URL construction: ${resolvedUrl} with base ${siteUrl}. Using fallback.`);
|
|
92
|
+
// Fallback to string concatenation with proper path joining
|
|
93
|
+
const baseUrl = siteUrl.endsWith('/') ? siteUrl.slice(0, -1) : siteUrl;
|
|
94
|
+
const urlPath = resolvedUrl.startsWith('/') ? resolvedUrl : `/${resolvedUrl}`;
|
|
95
|
+
fullUrl = baseUrl + urlPath;
|
|
96
|
+
}
|
|
73
97
|
}
|
|
74
98
|
else {
|
|
75
99
|
// Fallback to the old path construction method
|
|
76
100
|
// Convert .md extension to appropriate path
|
|
77
101
|
const linkPathBase = normalizedPath.replace(/\.mdx?$/, '');
|
|
78
102
|
// Handle index files specially
|
|
79
|
-
|
|
103
|
+
let linkPath = linkPathBase.endsWith('index')
|
|
80
104
|
? linkPathBase.replace(/\/index$/, '')
|
|
81
105
|
: linkPathBase;
|
|
82
|
-
//
|
|
106
|
+
// linkPath might include the pathPrefix (e.g., "docs/api/core")
|
|
107
|
+
// We need to remove the pathPrefix before applying transformations, then add it back later
|
|
108
|
+
if (pathPrefix && linkPath.startsWith(`${pathPrefix}/`)) {
|
|
109
|
+
linkPath = linkPath.substring(`${pathPrefix}/`.length);
|
|
110
|
+
}
|
|
111
|
+
else if (pathPrefix && linkPath === pathPrefix) {
|
|
112
|
+
linkPath = '';
|
|
113
|
+
}
|
|
114
|
+
// Apply path transformations to the clean link path (without pathPrefix)
|
|
83
115
|
const transformedLinkPath = (0, utils_1.applyPathTransformations)(linkPath, pathTransformation);
|
|
84
116
|
// Also apply path transformations to the pathPrefix if it's not empty
|
|
85
117
|
// This allows removing 'docs' from the path when specified in ignorePaths
|
|
@@ -87,15 +119,49 @@ async function processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix = 'doc
|
|
|
87
119
|
if (pathPrefix && pathTransformation?.ignorePaths?.includes(pathPrefix)) {
|
|
88
120
|
transformedPathPrefix = '';
|
|
89
121
|
}
|
|
90
|
-
//
|
|
91
|
-
|
|
122
|
+
// Ensure path segments are URL-safe with sophisticated encoding detection
|
|
123
|
+
const encodedLinkPath = transformedLinkPath.split('/').map(segment => {
|
|
124
|
+
// Check if segment contains characters that need encoding
|
|
125
|
+
// Unreserved characters (per RFC 3986): A-Z a-z 0-9 - . _ ~
|
|
126
|
+
if (!/[^A-Za-z0-9\-._~]/.test(segment)) {
|
|
127
|
+
// Segment only contains unreserved characters, no encoding needed
|
|
128
|
+
return segment;
|
|
129
|
+
}
|
|
130
|
+
try {
|
|
131
|
+
// Try to decode - if it changes, it was already encoded
|
|
132
|
+
const decoded = decodeURIComponent(segment);
|
|
133
|
+
if (decoded !== segment) {
|
|
134
|
+
// Was already encoded, return as-is
|
|
135
|
+
return segment;
|
|
136
|
+
}
|
|
137
|
+
// Not encoded, encode it
|
|
138
|
+
return encodeURIComponent(segment);
|
|
139
|
+
}
|
|
140
|
+
catch {
|
|
141
|
+
// Malformed encoding, re-encode
|
|
142
|
+
return encodeURIComponent(segment);
|
|
143
|
+
}
|
|
144
|
+
}).join('/');
|
|
145
|
+
// Construct URL by encoding path components, then combine with site URL
|
|
146
|
+
// We don't use URL constructor for the full path because it decodes some characters
|
|
147
|
+
const pathPart = transformedPathPrefix ? `${transformedPathPrefix}/${encodedLinkPath}` : encodedLinkPath;
|
|
148
|
+
try {
|
|
149
|
+
const baseUrl = new URL(siteUrl);
|
|
150
|
+
fullUrl = `${baseUrl.origin}/${pathPart}`;
|
|
151
|
+
}
|
|
152
|
+
catch (error) {
|
|
153
|
+
utils_1.logger.warn(`Invalid siteUrl: ${siteUrl}. Using fallback.`);
|
|
154
|
+
// Fallback to string concatenation with proper path joining
|
|
155
|
+
const baseUrl = siteUrl.endsWith('/') ? siteUrl.slice(0, -1) : siteUrl;
|
|
156
|
+
fullUrl = `${baseUrl}/${pathPart}`;
|
|
157
|
+
}
|
|
92
158
|
}
|
|
93
159
|
// Extract title
|
|
94
160
|
const title = (0, utils_1.extractTitle)(data, resolvedContent, filePath);
|
|
95
161
|
// Get description from frontmatter or first paragraph
|
|
96
162
|
let description = '';
|
|
97
163
|
// First priority: Use frontmatter description if available
|
|
98
|
-
if (data.description) {
|
|
164
|
+
if ((0, utils_1.isNonEmptyString)(data.description)) {
|
|
99
165
|
description = data.description;
|
|
100
166
|
}
|
|
101
167
|
else {
|
|
@@ -119,13 +185,13 @@ async function processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix = 'doc
|
|
|
119
185
|
}
|
|
120
186
|
// Only remove heading markers at the beginning of descriptions or lines
|
|
121
187
|
// This preserves # characters that are part of the content
|
|
122
|
-
if (description) {
|
|
188
|
+
if ((0, utils_1.isNonEmptyString)(description)) {
|
|
123
189
|
// Original approach had issues with hashtags inside content
|
|
124
190
|
// Fix: Only remove # symbols at the beginning of lines or description
|
|
125
191
|
// that are followed by a space (actual heading markers)
|
|
126
192
|
description = description.replace(/^(#+)\s+/gm, '');
|
|
127
193
|
// Special handling for description frontmatter with heading markers
|
|
128
|
-
if (data.description && data.description.startsWith('#')) {
|
|
194
|
+
if ((0, utils_1.isNonEmptyString)(data.description) && data.description.startsWith('#')) {
|
|
129
195
|
// If the description in frontmatter starts with a heading marker,
|
|
130
196
|
// we should preserve it in the extracted description
|
|
131
197
|
description = description.replace(/^#+\s+/, '');
|
|
@@ -134,15 +200,15 @@ async function processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix = 'doc
|
|
|
134
200
|
// We don't want to treat hashtags in the middle of content as headings
|
|
135
201
|
// Validate that the description doesn't contain markdown headings
|
|
136
202
|
if (description.match(/^#+\s+/m)) {
|
|
137
|
-
|
|
203
|
+
utils_1.logger.warn(`Warning: Description for "${title}" may still contain heading markers`);
|
|
138
204
|
}
|
|
139
205
|
// Warn if the description contains HTML tags
|
|
140
206
|
if (/<[^>]+>/g.test(description)) {
|
|
141
|
-
|
|
207
|
+
utils_1.logger.warn(`Warning: Description for "${title}" contains HTML tags`);
|
|
142
208
|
}
|
|
143
209
|
// Warn if the description is very long
|
|
144
210
|
if (description.length > 500) {
|
|
145
|
-
|
|
211
|
+
utils_1.logger.warn(`Warning: Description for "${title}" is very long (${description.length} characters)`);
|
|
146
212
|
}
|
|
147
213
|
}
|
|
148
214
|
// Clean and process content (now with partials already resolved)
|
|
@@ -153,8 +219,113 @@ async function processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix = 'doc
|
|
|
153
219
|
url: fullUrl,
|
|
154
220
|
content: cleanedContent,
|
|
155
221
|
description: description || '',
|
|
222
|
+
frontMatter: data,
|
|
156
223
|
};
|
|
157
224
|
}
|
|
225
|
+
/**
|
|
226
|
+
* Remove numbered prefixes from path segments (e.g., "01-intro" -> "intro")
|
|
227
|
+
*/
|
|
228
|
+
function removeNumberedPrefixes(path) {
|
|
229
|
+
return path.split('/').map(segment => {
|
|
230
|
+
// Remove numbered prefixes like "01-", "1-", "001-" from each segment
|
|
231
|
+
return segment.replace(/^\d+-/, '');
|
|
232
|
+
}).join('/');
|
|
233
|
+
}
|
|
234
|
+
/**
|
|
235
|
+
* Try to find a route in the route map from a list of possible paths
|
|
236
|
+
*/
|
|
237
|
+
function findRouteInMap(routeMap, possiblePaths) {
|
|
238
|
+
for (const possiblePath of possiblePaths) {
|
|
239
|
+
const route = routeMap.get(possiblePath) || routeMap.get(possiblePath + '/');
|
|
240
|
+
if (route) {
|
|
241
|
+
return route;
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
return undefined;
|
|
245
|
+
}
|
|
246
|
+
/**
|
|
247
|
+
* Try exact match for route resolution
|
|
248
|
+
*/
|
|
249
|
+
function tryExactRouteMatch(routeMap, relativePath, pathPrefix) {
|
|
250
|
+
const possiblePaths = [
|
|
251
|
+
`/${pathPrefix}/${relativePath}`,
|
|
252
|
+
`/${relativePath}`,
|
|
253
|
+
];
|
|
254
|
+
return findRouteInMap(routeMap, possiblePaths);
|
|
255
|
+
}
|
|
256
|
+
/**
|
|
257
|
+
* Try route resolution with numbered prefix removal
|
|
258
|
+
*/
|
|
259
|
+
function tryNumberedPrefixResolution(routeMap, relativePath, pathPrefix) {
|
|
260
|
+
const cleanPath = removeNumberedPrefixes(relativePath);
|
|
261
|
+
// Try basic cleaned path
|
|
262
|
+
const basicPaths = [`/${pathPrefix}/${cleanPath}`, `/${cleanPath}`];
|
|
263
|
+
const basicMatch = findRouteInMap(routeMap, basicPaths);
|
|
264
|
+
if (basicMatch) {
|
|
265
|
+
return basicMatch;
|
|
266
|
+
}
|
|
267
|
+
// Try nested folder structures with numbered prefixes at different levels
|
|
268
|
+
const segments = relativePath.split('/');
|
|
269
|
+
if (segments.length > 1) {
|
|
270
|
+
for (let i = 0; i < segments.length; i++) {
|
|
271
|
+
const modifiedSegments = [...segments];
|
|
272
|
+
modifiedSegments[i] = modifiedSegments[i].replace(/^\d+-/, '');
|
|
273
|
+
const modifiedPath = modifiedSegments.join('/');
|
|
274
|
+
const pathsToTry = [`/${pathPrefix}/${modifiedPath}`, `/${modifiedPath}`];
|
|
275
|
+
const match = findRouteInMap(routeMap, pathsToTry);
|
|
276
|
+
if (match) {
|
|
277
|
+
return match;
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
return undefined;
|
|
282
|
+
}
|
|
283
|
+
/**
|
|
284
|
+
* Try finding best match using routes paths array
|
|
285
|
+
*/
|
|
286
|
+
function tryRoutesPathsMatch(routesPaths, relativePath, pathPrefix) {
|
|
287
|
+
const cleanPath = removeNumberedPrefixes(relativePath);
|
|
288
|
+
const normalizedCleanPath = cleanPath.toLowerCase();
|
|
289
|
+
return routesPaths.find(routePath => {
|
|
290
|
+
const normalizedRoute = routePath.toLowerCase();
|
|
291
|
+
return normalizedRoute.endsWith(`/${normalizedCleanPath}`) ||
|
|
292
|
+
normalizedRoute === `/${pathPrefix}/${normalizedCleanPath}` ||
|
|
293
|
+
normalizedRoute === `/${normalizedCleanPath}`;
|
|
294
|
+
});
|
|
295
|
+
}
|
|
296
|
+
/**
|
|
297
|
+
* Resolve the URL for a document using Docusaurus routes
|
|
298
|
+
* @param filePath - Full path to the file
|
|
299
|
+
* @param baseDir - Base directory (typically siteDir)
|
|
300
|
+
* @param pathPrefix - Path prefix ('docs' or 'blog')
|
|
301
|
+
* @param context - Plugin context with route map
|
|
302
|
+
* @returns Resolved URL or undefined if not found
|
|
303
|
+
*/
|
|
304
|
+
function resolveDocumentUrl(filePath, baseDir, pathPrefix, context) {
|
|
305
|
+
// Early return if no route map available
|
|
306
|
+
if (!context.routeMap) {
|
|
307
|
+
return undefined;
|
|
308
|
+
}
|
|
309
|
+
// Convert file path to a potential route path
|
|
310
|
+
const relativePath = (0, utils_1.normalizePath)(path.relative(baseDir, filePath))
|
|
311
|
+
.replace(/\.mdx?$/, '')
|
|
312
|
+
.replace(/\/index$/, '');
|
|
313
|
+
// Try exact match first (respects Docusaurus's resolved routes)
|
|
314
|
+
const exactMatch = tryExactRouteMatch(context.routeMap, relativePath, pathPrefix);
|
|
315
|
+
if (exactMatch) {
|
|
316
|
+
return exactMatch;
|
|
317
|
+
}
|
|
318
|
+
// Try numbered prefix removal as fallback
|
|
319
|
+
const prefixMatch = tryNumberedPrefixResolution(context.routeMap, relativePath, pathPrefix);
|
|
320
|
+
if (prefixMatch) {
|
|
321
|
+
return prefixMatch;
|
|
322
|
+
}
|
|
323
|
+
// Try to find the best match using the routesPaths array
|
|
324
|
+
if (context.routesPaths) {
|
|
325
|
+
return tryRoutesPathsMatch(context.routesPaths, relativePath, pathPrefix);
|
|
326
|
+
}
|
|
327
|
+
return undefined;
|
|
328
|
+
}
|
|
158
329
|
/**
|
|
159
330
|
* Process files based on include patterns, ignore patterns, and ordering
|
|
160
331
|
* @param context - Plugin context
|
|
@@ -165,21 +336,44 @@ async function processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix = 'doc
|
|
|
165
336
|
* @param includeUnmatched - Whether to include unmatched files
|
|
166
337
|
* @returns Processed files
|
|
167
338
|
*/
|
|
339
|
+
/**
|
|
340
|
+
* Helper function to check if a file matches a pattern
|
|
341
|
+
* Tries matching against multiple path variants for better usability
|
|
342
|
+
*/
|
|
343
|
+
function matchesPattern(file, pattern, siteDir, docsDir) {
|
|
344
|
+
const minimatchOptions = { matchBase: true };
|
|
345
|
+
// Get site-relative path (e.g., "docs/quickstart/file.md")
|
|
346
|
+
const siteRelativePath = (0, utils_1.normalizePath)(path.relative(siteDir, file));
|
|
347
|
+
// Get docs-relative path (e.g., "quickstart/file.md")
|
|
348
|
+
// Normalize both paths to handle different path separators and resolve any .. or .
|
|
349
|
+
const docsBaseDir = path.resolve(path.join(siteDir, docsDir));
|
|
350
|
+
const resolvedFile = path.resolve(file);
|
|
351
|
+
const docsRelativePath = resolvedFile.startsWith(docsBaseDir)
|
|
352
|
+
? (0, utils_1.normalizePath)(path.relative(docsBaseDir, resolvedFile))
|
|
353
|
+
: null;
|
|
354
|
+
// Try matching against site-relative path
|
|
355
|
+
if ((0, minimatch_1.minimatch)(siteRelativePath, pattern, minimatchOptions)) {
|
|
356
|
+
return true;
|
|
357
|
+
}
|
|
358
|
+
// Try matching against docs-relative path if available
|
|
359
|
+
if (docsRelativePath && (0, minimatch_1.minimatch)(docsRelativePath, pattern, minimatchOptions)) {
|
|
360
|
+
return true;
|
|
361
|
+
}
|
|
362
|
+
return false;
|
|
363
|
+
}
|
|
168
364
|
async function processFilesWithPatterns(context, allFiles, includePatterns = [], ignorePatterns = [], orderPatterns = [], includeUnmatched = false) {
|
|
169
365
|
const { siteDir, siteUrl, docsDir } = context;
|
|
170
366
|
// Filter files based on include patterns
|
|
171
367
|
let filteredFiles = allFiles;
|
|
172
368
|
if (includePatterns.length > 0) {
|
|
173
369
|
filteredFiles = allFiles.filter(file => {
|
|
174
|
-
|
|
175
|
-
return includePatterns.some(pattern => (0, minimatch_1.minimatch)(relativePath, pattern, { matchBase: true }));
|
|
370
|
+
return includePatterns.some(pattern => matchesPattern(file, pattern, siteDir, docsDir));
|
|
176
371
|
});
|
|
177
372
|
}
|
|
178
373
|
// Apply ignore patterns
|
|
179
374
|
if (ignorePatterns.length > 0) {
|
|
180
375
|
filteredFiles = filteredFiles.filter(file => {
|
|
181
|
-
|
|
182
|
-
return !ignorePatterns.some(pattern => (0, minimatch_1.minimatch)(relativePath, pattern, { matchBase: true }));
|
|
376
|
+
return !ignorePatterns.some(pattern => matchesPattern(file, pattern, siteDir, docsDir));
|
|
183
377
|
});
|
|
184
378
|
}
|
|
185
379
|
// Order files according to orderPatterns
|
|
@@ -189,8 +383,7 @@ async function processFilesWithPatterns(context, allFiles, includePatterns = [],
|
|
|
189
383
|
// Process files according to orderPatterns
|
|
190
384
|
for (const pattern of orderPatterns) {
|
|
191
385
|
const matchingFiles = filteredFiles.filter(file => {
|
|
192
|
-
|
|
193
|
-
return (0, minimatch_1.minimatch)(relativePath, pattern, { matchBase: true }) && !matchedFiles.has(file);
|
|
386
|
+
return matchesPattern(file, pattern, siteDir, docsDir) && !matchedFiles.has(file);
|
|
194
387
|
});
|
|
195
388
|
for (const file of matchingFiles) {
|
|
196
389
|
filesToProcess.push(file);
|
|
@@ -206,82 +399,36 @@ async function processFilesWithPatterns(context, allFiles, includePatterns = [],
|
|
|
206
399
|
else {
|
|
207
400
|
filesToProcess = filteredFiles;
|
|
208
401
|
}
|
|
209
|
-
// Process
|
|
210
|
-
const
|
|
211
|
-
for (const filePath of filesToProcess) {
|
|
402
|
+
// Process files in parallel using Promise.allSettled
|
|
403
|
+
const results = await Promise.allSettled(filesToProcess.map(async (filePath) => {
|
|
212
404
|
try {
|
|
213
405
|
// Determine if this is a blog or docs file
|
|
214
406
|
const isBlogFile = filePath.includes(path.join(siteDir, 'blog'));
|
|
215
|
-
|
|
407
|
+
// Use siteDir as baseDir to preserve full directory structure (docs/path/file.md instead of just path/file.md)
|
|
408
|
+
const baseDir = siteDir;
|
|
216
409
|
const pathPrefix = isBlogFile ? 'blog' : 'docs';
|
|
217
410
|
// Try to find the resolved URL for this file from the route map
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
const relativePath = path.relative(baseDir, filePath)
|
|
222
|
-
.replace(/\\/g, '/')
|
|
411
|
+
const resolvedUrl = resolveDocumentUrl(filePath, baseDir, pathPrefix, context);
|
|
412
|
+
// Log when we successfully resolve a URL using Docusaurus routes
|
|
413
|
+
if (resolvedUrl && context.routeMap) {
|
|
414
|
+
const relativePath = (0, utils_1.normalizePath)(path.relative(baseDir, filePath))
|
|
223
415
|
.replace(/\.mdx?$/, '')
|
|
224
416
|
.replace(/\/index$/, '');
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
return path.split('/').map(segment => {
|
|
228
|
-
// Remove numbered prefixes like "01-", "1-", "001-" from each segment
|
|
229
|
-
return segment.replace(/^\d+-/, '');
|
|
230
|
-
}).join('/');
|
|
231
|
-
};
|
|
232
|
-
// Check various possible route patterns
|
|
233
|
-
const cleanPath = removeNumberedPrefixes(relativePath);
|
|
234
|
-
const possiblePaths = [
|
|
235
|
-
`/${pathPrefix}/${cleanPath}`,
|
|
236
|
-
`/${cleanPath}`,
|
|
237
|
-
`/${pathPrefix}/${relativePath}`, // Try with original path
|
|
238
|
-
`/${relativePath}`, // Try without prefix
|
|
239
|
-
];
|
|
240
|
-
// Also handle nested folder structures with numbered prefixes
|
|
241
|
-
const segments = relativePath.split('/');
|
|
242
|
-
if (segments.length > 1) {
|
|
243
|
-
// Try removing numbered prefixes from different levels
|
|
244
|
-
for (let i = 0; i < segments.length; i++) {
|
|
245
|
-
const modifiedSegments = [...segments];
|
|
246
|
-
modifiedSegments[i] = modifiedSegments[i].replace(/^\d+-/, '');
|
|
247
|
-
const modifiedPath = modifiedSegments.join('/');
|
|
248
|
-
possiblePaths.push(`/${pathPrefix}/${modifiedPath}`);
|
|
249
|
-
possiblePaths.push(`/${modifiedPath}`);
|
|
250
|
-
}
|
|
251
|
-
}
|
|
252
|
-
// Try to find a match in the route map
|
|
253
|
-
for (const possiblePath of possiblePaths) {
|
|
254
|
-
if (context.routeMap.has(possiblePath)) {
|
|
255
|
-
resolvedUrl = context.routeMap.get(possiblePath);
|
|
256
|
-
break;
|
|
257
|
-
}
|
|
258
|
-
}
|
|
259
|
-
// If still not found, try to find the best match using the routesPaths array
|
|
260
|
-
if (!resolvedUrl && context.routesPaths) {
|
|
261
|
-
const normalizedCleanPath = cleanPath.toLowerCase();
|
|
262
|
-
const matchingRoute = context.routesPaths.find(routePath => {
|
|
263
|
-
const normalizedRoute = routePath.toLowerCase();
|
|
264
|
-
return normalizedRoute.endsWith(`/${normalizedCleanPath}`) ||
|
|
265
|
-
normalizedRoute === `/${pathPrefix}/${normalizedCleanPath}` ||
|
|
266
|
-
normalizedRoute === `/${normalizedCleanPath}`;
|
|
267
|
-
});
|
|
268
|
-
if (matchingRoute) {
|
|
269
|
-
resolvedUrl = matchingRoute;
|
|
270
|
-
}
|
|
271
|
-
}
|
|
272
|
-
// Log when we successfully resolve a URL using Docusaurus routes
|
|
273
|
-
if (resolvedUrl && resolvedUrl !== `/${pathPrefix}/${relativePath}`) {
|
|
274
|
-
console.log(`Resolved URL for ${path.basename(filePath)}: ${resolvedUrl} (was: /${pathPrefix}/${relativePath})`);
|
|
417
|
+
if (resolvedUrl !== `/${pathPrefix}/${relativePath}`) {
|
|
418
|
+
utils_1.logger.verbose(`Resolved URL for ${path.basename(filePath)}: ${resolvedUrl} (was: /${pathPrefix}/${relativePath})`);
|
|
275
419
|
}
|
|
276
420
|
}
|
|
277
421
|
const docInfo = await processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix, context.options.pathTransformation, context.options.excludeImports || false, context.options.removeDuplicateHeadings || false, resolvedUrl);
|
|
278
|
-
|
|
279
|
-
processedDocs.push(docInfo);
|
|
280
|
-
}
|
|
422
|
+
return docInfo;
|
|
281
423
|
}
|
|
282
424
|
catch (err) {
|
|
283
|
-
|
|
425
|
+
utils_1.logger.warn(`Error processing ${filePath}: ${(0, utils_1.getErrorMessage)(err)}`);
|
|
426
|
+
return null;
|
|
284
427
|
}
|
|
285
|
-
}
|
|
428
|
+
}));
|
|
429
|
+
// Filter successful results and non-null DocInfo objects
|
|
430
|
+
const processedDocs = results
|
|
431
|
+
.filter((r) => r.status === 'fulfilled' && r.value !== null)
|
|
432
|
+
.map(r => r.value);
|
|
286
433
|
return processedDocs;
|
|
287
434
|
}
|
package/lib/types.d.ts
CHANGED
|
@@ -11,6 +11,7 @@ export interface DocInfo {
|
|
|
11
11
|
url: string;
|
|
12
12
|
content: string;
|
|
13
13
|
description: string;
|
|
14
|
+
frontMatter?: Record<string, any>;
|
|
14
15
|
}
|
|
15
16
|
/**
|
|
16
17
|
* Interface for custom LLM file configuration
|
|
@@ -80,10 +81,22 @@ export interface PluginOptions {
|
|
|
80
81
|
removeDuplicateHeadings?: boolean;
|
|
81
82
|
/** Whether to generate individual markdown files and link to them from llms.txt instead of original docs (default: false) */
|
|
82
83
|
generateMarkdownFiles?: boolean;
|
|
84
|
+
/** Array of frontmatter keys to preserve in generated individual markdown files (only used when generateMarkdownFiles is true) */
|
|
85
|
+
keepFrontMatter?: string[];
|
|
83
86
|
/** Custom content to include at the root level of llms.txt (after title/description, before TOC) */
|
|
84
87
|
rootContent?: string;
|
|
85
88
|
/** Custom content to include at the root level of llms-full.txt (after title/description, before content sections) */
|
|
86
89
|
fullRootContent?: string;
|
|
90
|
+
/** Whether to preserve directory structure in generated markdown files (default: true) */
|
|
91
|
+
preserveDirectoryStructure?: boolean;
|
|
92
|
+
/** Batch size for processing large document sets to prevent memory issues (default: 100) */
|
|
93
|
+
processingBatchSize?: number;
|
|
94
|
+
/** Logging level for plugin output (default: 'normal'). Options: 'quiet', 'normal', 'verbose' */
|
|
95
|
+
logLevel?: 'quiet' | 'normal' | 'verbose';
|
|
96
|
+
/** Whether to warn about files that are ignored (no extension or unsupported extension) (default: false) */
|
|
97
|
+
warnOnIgnoredFiles?: boolean;
|
|
98
|
+
/** Index signature for Docusaurus plugin compatibility */
|
|
99
|
+
[key: string]: unknown;
|
|
87
100
|
}
|
|
88
101
|
/**
|
|
89
102
|
* Plugin context with processed options
|
package/lib/utils.d.ts
CHANGED
|
@@ -2,6 +2,129 @@
|
|
|
2
2
|
* Utility functions for the docusaurus-plugin-llms plugin
|
|
3
3
|
*/
|
|
4
4
|
import { PluginOptions } from './types';
|
|
5
|
+
/**
|
|
6
|
+
* Null/Undefined Handling Guidelines:
|
|
7
|
+
*
|
|
8
|
+
* 1. For required parameters: Throw early if null/undefined
|
|
9
|
+
* 2. For optional parameters: Use optional chaining `value?.property`
|
|
10
|
+
* 3. For explicit null checks: Use `!== null` and `!== undefined` or the isDefined type guard
|
|
11
|
+
* 4. For string validation: Use isNonEmptyString() type guard
|
|
12
|
+
* 5. For truthy checks on booleans: Use explicit comparison or Boolean(value)
|
|
13
|
+
*
|
|
14
|
+
* Avoid: `if (value)` when value could be 0, '', or false legitimately
|
|
15
|
+
* Use: Type guards for consistent, type-safe checks
|
|
16
|
+
*/
|
|
17
|
+
/**
|
|
18
|
+
* Type guard to check if a value is defined (not null or undefined)
|
|
19
|
+
* @param value - Value to check
|
|
20
|
+
* @returns True if value is not null or undefined
|
|
21
|
+
*/
|
|
22
|
+
export declare function isDefined<T>(value: T | null | undefined): value is T;
|
|
23
|
+
/**
|
|
24
|
+
* Type guard to check if a value is a non-empty string
|
|
25
|
+
* @param value - Value to check
|
|
26
|
+
* @returns True if value is a string with at least one non-whitespace character
|
|
27
|
+
*/
|
|
28
|
+
export declare function isNonEmptyString(value: unknown): value is string;
|
|
29
|
+
/**
|
|
30
|
+
* Type guard to check if a value is a non-empty array
|
|
31
|
+
* @param value - Value to check
|
|
32
|
+
* @returns True if value is an array with at least one element
|
|
33
|
+
*/
|
|
34
|
+
export declare function isNonEmptyArray<T>(value: unknown): value is T[];
|
|
35
|
+
/**
|
|
36
|
+
* Safely extract an error message from an unknown error value
|
|
37
|
+
* @param error - The error value (can be Error, string, or any other type)
|
|
38
|
+
* @returns A string representation of the error
|
|
39
|
+
*/
|
|
40
|
+
export declare function getErrorMessage(error: unknown): string;
|
|
41
|
+
/**
|
|
42
|
+
* Extract stack trace from unknown error types
|
|
43
|
+
* @param error - The error value (can be Error or any other type)
|
|
44
|
+
* @returns Stack trace if available, undefined otherwise
|
|
45
|
+
*/
|
|
46
|
+
export declare function getErrorStack(error: unknown): string | undefined;
|
|
47
|
+
/**
|
|
48
|
+
* Custom error class for validation errors
|
|
49
|
+
*/
|
|
50
|
+
export declare class ValidationError extends Error {
|
|
51
|
+
constructor(message: string);
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Validates that a value is not null or undefined
|
|
55
|
+
* @param value - The value to validate
|
|
56
|
+
* @param paramName - The parameter name for error messages
|
|
57
|
+
* @returns The validated value
|
|
58
|
+
* @throws ValidationError if the value is null or undefined
|
|
59
|
+
*/
|
|
60
|
+
export declare function validateRequired<T>(value: T | null | undefined, paramName: string): T;
|
|
61
|
+
/**
|
|
62
|
+
* Validates that a value is a string and optionally checks its properties
|
|
63
|
+
* @param value - The value to validate
|
|
64
|
+
* @param paramName - The parameter name for error messages
|
|
65
|
+
* @param options - Validation options for min/max length and pattern
|
|
66
|
+
* @returns The validated string
|
|
67
|
+
* @throws ValidationError if validation fails
|
|
68
|
+
*/
|
|
69
|
+
export declare function validateString(value: unknown, paramName: string, options?: {
|
|
70
|
+
minLength?: number;
|
|
71
|
+
maxLength?: number;
|
|
72
|
+
pattern?: RegExp;
|
|
73
|
+
}): string;
|
|
74
|
+
/**
|
|
75
|
+
* Validates that a value is an array and optionally validates elements
|
|
76
|
+
* @param value - The value to validate
|
|
77
|
+
* @param paramName - The parameter name for error messages
|
|
78
|
+
* @param elementValidator - Optional function to validate each element
|
|
79
|
+
* @returns The validated array
|
|
80
|
+
* @throws ValidationError if validation fails
|
|
81
|
+
*/
|
|
82
|
+
export declare function validateArray<T>(value: unknown, paramName: string, elementValidator?: (item: unknown) => boolean): T[];
|
|
83
|
+
/**
|
|
84
|
+
* Logging level enumeration
|
|
85
|
+
*/
|
|
86
|
+
export declare enum LogLevel {
|
|
87
|
+
QUIET = 0,
|
|
88
|
+
NORMAL = 1,
|
|
89
|
+
VERBOSE = 2
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Set the logging level for the plugin
|
|
93
|
+
* @param level - The logging level to use
|
|
94
|
+
*/
|
|
95
|
+
export declare function setLogLevel(level: LogLevel): void;
|
|
96
|
+
/**
|
|
97
|
+
* Logger utility for consistent logging across the plugin
|
|
98
|
+
*/
|
|
99
|
+
export declare const logger: {
|
|
100
|
+
error: (message: string) => void;
|
|
101
|
+
warn: (message: string) => void;
|
|
102
|
+
info: (message: string) => void;
|
|
103
|
+
verbose: (message: string) => void;
|
|
104
|
+
};
|
|
105
|
+
/**
|
|
106
|
+
* Normalizes a file path by converting all backslashes to forward slashes.
|
|
107
|
+
* This ensures consistent path handling across Windows and Unix systems.
|
|
108
|
+
*
|
|
109
|
+
* @param filePath - The file path to normalize
|
|
110
|
+
* @returns The normalized path with forward slashes
|
|
111
|
+
* @throws ValidationError if filePath is not a string
|
|
112
|
+
*/
|
|
113
|
+
export declare function normalizePath(filePath: string): string;
|
|
114
|
+
/**
|
|
115
|
+
* Validates that a file path does not exceed the platform-specific maximum length
|
|
116
|
+
* @param filePath - The file path to validate
|
|
117
|
+
* @returns True if the path is within limits, false otherwise
|
|
118
|
+
*/
|
|
119
|
+
export declare function validatePathLength(filePath: string): boolean;
|
|
120
|
+
/**
|
|
121
|
+
* Shortens a file path by creating a hash-based filename if the path is too long
|
|
122
|
+
* @param fullPath - The full file path that may be too long
|
|
123
|
+
* @param outputDir - The output directory base path
|
|
124
|
+
* @param relativePath - The relative path from the output directory
|
|
125
|
+
* @returns A shortened path if necessary, or the original path if it's within limits
|
|
126
|
+
*/
|
|
127
|
+
export declare function shortenPathIfNeeded(fullPath: string, outputDir: string, relativePath: string): string;
|
|
5
128
|
/**
|
|
6
129
|
* Write content to a file
|
|
7
130
|
* @param filePath - Path to write the file to
|
|
@@ -11,25 +134,30 @@ export declare function writeFile(filePath: string, data: string): Promise<void>
|
|
|
11
134
|
/**
|
|
12
135
|
* Read content from a file
|
|
13
136
|
* @param filePath - Path of the file to read
|
|
14
|
-
* @returns Content of the file
|
|
137
|
+
* @returns Content of the file with BOM removed if present
|
|
15
138
|
*/
|
|
16
139
|
export declare function readFile(filePath: string): Promise<string>;
|
|
17
140
|
/**
|
|
18
141
|
* Check if a file should be ignored based on glob patterns
|
|
142
|
+
* Matches against both site-relative and docs-relative paths
|
|
19
143
|
* @param filePath - Path to the file
|
|
20
|
-
* @param baseDir - Base directory for relative paths
|
|
144
|
+
* @param baseDir - Base directory (site root) for relative paths
|
|
21
145
|
* @param ignorePatterns - Glob patterns for files to ignore
|
|
146
|
+
* @param docsDir - Docs directory name (e.g., 'docs')
|
|
22
147
|
* @returns Whether the file should be ignored
|
|
23
148
|
*/
|
|
24
|
-
export declare function shouldIgnoreFile(filePath: string, baseDir: string, ignorePatterns: string[]): boolean;
|
|
149
|
+
export declare function shouldIgnoreFile(filePath: string, baseDir: string, ignorePatterns: string[], docsDir?: string): boolean;
|
|
25
150
|
/**
|
|
26
151
|
* Recursively reads all Markdown files in a directory
|
|
27
152
|
* @param dir - Directory to scan
|
|
28
|
-
* @param baseDir - Base directory for relative paths
|
|
153
|
+
* @param baseDir - Base directory (site root) for relative paths
|
|
29
154
|
* @param ignorePatterns - Glob patterns for files to ignore
|
|
155
|
+
* @param docsDir - Docs directory name (e.g., 'docs')
|
|
156
|
+
* @param warnOnIgnoredFiles - Whether to warn about ignored files
|
|
157
|
+
* @param visitedPaths - Set of already visited real paths to detect symlink loops (internal use)
|
|
30
158
|
* @returns Array of file paths
|
|
31
159
|
*/
|
|
32
|
-
export declare function readMarkdownFiles(dir: string, baseDir: string, ignorePatterns?: string[]): Promise<string[]>;
|
|
160
|
+
export declare function readMarkdownFiles(dir: string, baseDir: string, ignorePatterns?: string[], docsDir?: string, warnOnIgnoredFiles?: boolean, visitedPaths?: Set<string>): Promise<string[]>;
|
|
33
161
|
/**
|
|
34
162
|
* Extract title from content or use the filename
|
|
35
163
|
* @param data - Frontmatter data
|
|
@@ -42,9 +170,10 @@ export declare function extractTitle(data: any, content: string, filePath: strin
|
|
|
42
170
|
* Resolve and inline partial imports in markdown content
|
|
43
171
|
* @param content - The markdown content with import statements
|
|
44
172
|
* @param filePath - The path of the file containing the imports
|
|
173
|
+
* @param importChain - Set of file paths in the current import chain (for circular dependency detection)
|
|
45
174
|
* @returns Content with partials resolved
|
|
46
175
|
*/
|
|
47
|
-
export declare function resolvePartialImports(content: string, filePath: string): Promise<string>;
|
|
176
|
+
export declare function resolvePartialImports(content: string, filePath: string, importChain?: Set<string>): Promise<string>;
|
|
48
177
|
/**
|
|
49
178
|
* Clean markdown content for LLM consumption
|
|
50
179
|
* @param content - Raw markdown content
|
|
@@ -60,3 +189,33 @@ export declare function cleanMarkdownContent(content: string, excludeImports?: b
|
|
|
60
189
|
* @returns Transformed URL path
|
|
61
190
|
*/
|
|
62
191
|
export declare function applyPathTransformations(urlPath: string, pathTransformation?: PluginOptions['pathTransformation']): string;
|
|
192
|
+
/**
|
|
193
|
+
* Sanitize a string to create a safe filename
|
|
194
|
+
* @param input - Input string (typically a title)
|
|
195
|
+
* @param fallback - Fallback string if input becomes empty after sanitization
|
|
196
|
+
* @returns Sanitized filename (without extension)
|
|
197
|
+
* @throws ValidationError if input or fallback are not strings
|
|
198
|
+
*/
|
|
199
|
+
export declare function sanitizeForFilename(input: string, fallback?: string, options?: {
|
|
200
|
+
preserveUnicode?: boolean;
|
|
201
|
+
preserveCase?: boolean;
|
|
202
|
+
}): string;
|
|
203
|
+
/**
|
|
204
|
+
* Ensure a unique identifier from a set of used identifiers
|
|
205
|
+
* @param baseIdentifier - Base identifier to make unique
|
|
206
|
+
* @param usedIdentifiers - Set of already used identifiers
|
|
207
|
+
* @param suffix - Suffix pattern (default: number in parentheses)
|
|
208
|
+
* @returns Unique identifier
|
|
209
|
+
* @throws ValidationError if baseIdentifier is not a string or usedIdentifiers is not a Set
|
|
210
|
+
*/
|
|
211
|
+
export declare function ensureUniqueIdentifier(baseIdentifier: string, usedIdentifiers: Set<string>, suffix?: (counter: number, base: string) => string): string;
|
|
212
|
+
/**
|
|
213
|
+
* Create standardized markdown content template
|
|
214
|
+
* @param title - Document title
|
|
215
|
+
* @param description - Document description
|
|
216
|
+
* @param content - Document content
|
|
217
|
+
* @param includeMetadata - Whether to include description metadata
|
|
218
|
+
* @param frontMatter - Optional frontmatter to include at the top
|
|
219
|
+
* @returns Formatted markdown content
|
|
220
|
+
*/
|
|
221
|
+
export declare function createMarkdownContent(title: string, description?: string, content?: string, includeMetadata?: boolean, frontMatter?: Record<string, any>): string;
|