docusaurus-plugin-llms 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +246 -15
- package/lib/generator-current.d.ts +44 -0
- package/lib/generator-current.js +398 -0
- package/lib/generator.d.ts +6 -2
- package/lib/generator.js +200 -120
- package/lib/index.js +175 -10
- package/lib/null-handling-guide.d.ts +47 -0
- package/lib/null-handling-guide.js +290 -0
- package/lib/processor.d.ts +0 -10
- package/lib/processor.js +230 -83
- package/lib/types.d.ts +13 -0
- package/lib/utils.d.ts +165 -6
- package/lib/utils.js +481 -28
- package/package.json +5 -3
- package/src/generator.ts +270 -128
- package/src/index.ts +204 -14
- package/src/null-handling-guide.ts +321 -0
- package/src/processor.ts +314 -127
- package/src/types.ts +20 -1
- package/src/utils.ts +594 -48
package/src/processor.ts
CHANGED
|
@@ -6,12 +6,16 @@ import * as path from 'path';
|
|
|
6
6
|
import matter from 'gray-matter';
|
|
7
7
|
import { minimatch } from 'minimatch';
|
|
8
8
|
import { DocInfo, PluginContext } from './types';
|
|
9
|
-
import {
|
|
10
|
-
readFile,
|
|
11
|
-
extractTitle,
|
|
12
|
-
cleanMarkdownContent,
|
|
9
|
+
import {
|
|
10
|
+
readFile,
|
|
11
|
+
extractTitle,
|
|
12
|
+
cleanMarkdownContent,
|
|
13
13
|
applyPathTransformations,
|
|
14
|
-
resolvePartialImports
|
|
14
|
+
resolvePartialImports,
|
|
15
|
+
normalizePath,
|
|
16
|
+
logger,
|
|
17
|
+
getErrorMessage,
|
|
18
|
+
isNonEmptyString
|
|
15
19
|
} from './utils';
|
|
16
20
|
|
|
17
21
|
/**
|
|
@@ -24,8 +28,8 @@ import {
|
|
|
24
28
|
* @returns Processed file data
|
|
25
29
|
*/
|
|
26
30
|
export async function processMarkdownFile(
|
|
27
|
-
filePath: string,
|
|
28
|
-
baseDir: string,
|
|
31
|
+
filePath: string,
|
|
32
|
+
baseDir: string,
|
|
29
33
|
siteUrl: string,
|
|
30
34
|
pathPrefix: string = 'docs',
|
|
31
35
|
pathTransformation?: {
|
|
@@ -38,35 +42,70 @@ export async function processMarkdownFile(
|
|
|
38
42
|
): Promise<DocInfo | null> {
|
|
39
43
|
const content = await readFile(filePath);
|
|
40
44
|
const { data, content: markdownContent } = matter(content);
|
|
41
|
-
|
|
45
|
+
|
|
42
46
|
// Skip draft files
|
|
43
47
|
if (data.draft === true) {
|
|
44
48
|
return null;
|
|
45
49
|
}
|
|
50
|
+
|
|
51
|
+
// Validate and clean empty frontmatter fields
|
|
52
|
+
// Empty strings should be treated as undefined to allow fallback logic
|
|
53
|
+
if (data.title !== undefined && !isNonEmptyString(data.title)) {
|
|
54
|
+
logger.warn(`Empty title in frontmatter for ${filePath}. Using fallback.`);
|
|
55
|
+
data.title = undefined;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
if (data.description !== undefined && !isNonEmptyString(data.description)) {
|
|
59
|
+
data.description = undefined;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if (data.slug !== undefined && !isNonEmptyString(data.slug)) {
|
|
63
|
+
data.slug = undefined;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
if (data.id !== undefined && !isNonEmptyString(data.id)) {
|
|
67
|
+
data.id = undefined;
|
|
68
|
+
}
|
|
46
69
|
|
|
47
70
|
// Resolve partial imports before processing
|
|
48
71
|
const resolvedContent = await resolvePartialImports(markdownContent, filePath);
|
|
49
72
|
|
|
50
73
|
const relativePath = path.relative(baseDir, filePath);
|
|
51
74
|
// Convert to URL path format (replace backslashes with forward slashes on Windows)
|
|
52
|
-
const normalizedPath = relativePath
|
|
75
|
+
const normalizedPath = normalizePath(relativePath);
|
|
53
76
|
|
|
54
77
|
let fullUrl: string;
|
|
55
|
-
|
|
56
|
-
if (resolvedUrl) {
|
|
78
|
+
|
|
79
|
+
if (isNonEmptyString(resolvedUrl)) {
|
|
57
80
|
// Use the actual resolved URL from Docusaurus if provided
|
|
58
|
-
|
|
81
|
+
try {
|
|
82
|
+
fullUrl = new URL(resolvedUrl, siteUrl).toString();
|
|
83
|
+
} catch (error: unknown) {
|
|
84
|
+
logger.warn(`Invalid URL construction: ${resolvedUrl} with base ${siteUrl}. Using fallback.`);
|
|
85
|
+
// Fallback to string concatenation with proper path joining
|
|
86
|
+
const baseUrl = siteUrl.endsWith('/') ? siteUrl.slice(0, -1) : siteUrl;
|
|
87
|
+
const urlPath = resolvedUrl.startsWith('/') ? resolvedUrl : `/${resolvedUrl}`;
|
|
88
|
+
fullUrl = baseUrl + urlPath;
|
|
89
|
+
}
|
|
59
90
|
} else {
|
|
60
91
|
// Fallback to the old path construction method
|
|
61
92
|
// Convert .md extension to appropriate path
|
|
62
93
|
const linkPathBase = normalizedPath.replace(/\.mdx?$/, '');
|
|
63
94
|
|
|
64
95
|
// Handle index files specially
|
|
65
|
-
|
|
96
|
+
let linkPath = linkPathBase.endsWith('index')
|
|
66
97
|
? linkPathBase.replace(/\/index$/, '')
|
|
67
98
|
: linkPathBase;
|
|
68
99
|
|
|
69
|
-
//
|
|
100
|
+
// linkPath might include the pathPrefix (e.g., "docs/api/core")
|
|
101
|
+
// We need to remove the pathPrefix before applying transformations, then add it back later
|
|
102
|
+
if (pathPrefix && linkPath.startsWith(`${pathPrefix}/`)) {
|
|
103
|
+
linkPath = linkPath.substring(`${pathPrefix}/`.length);
|
|
104
|
+
} else if (pathPrefix && linkPath === pathPrefix) {
|
|
105
|
+
linkPath = '';
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Apply path transformations to the clean link path (without pathPrefix)
|
|
70
109
|
const transformedLinkPath = applyPathTransformations(linkPath, pathTransformation);
|
|
71
110
|
|
|
72
111
|
// Also apply path transformations to the pathPrefix if it's not empty
|
|
@@ -76,13 +115,44 @@ export async function processMarkdownFile(
|
|
|
76
115
|
transformedPathPrefix = '';
|
|
77
116
|
}
|
|
78
117
|
|
|
79
|
-
//
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
118
|
+
// Ensure path segments are URL-safe with sophisticated encoding detection
|
|
119
|
+
const encodedLinkPath = transformedLinkPath.split('/').map(segment => {
|
|
120
|
+
// Check if segment contains characters that need encoding
|
|
121
|
+
// Unreserved characters (per RFC 3986): A-Z a-z 0-9 - . _ ~
|
|
122
|
+
if (!/[^A-Za-z0-9\-._~]/.test(segment)) {
|
|
123
|
+
// Segment only contains unreserved characters, no encoding needed
|
|
124
|
+
return segment;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
try {
|
|
128
|
+
// Try to decode - if it changes, it was already encoded
|
|
129
|
+
const decoded = decodeURIComponent(segment);
|
|
130
|
+
if (decoded !== segment) {
|
|
131
|
+
// Was already encoded, return as-is
|
|
132
|
+
return segment;
|
|
133
|
+
}
|
|
134
|
+
// Not encoded, encode it
|
|
135
|
+
return encodeURIComponent(segment);
|
|
136
|
+
} catch {
|
|
137
|
+
// Malformed encoding, re-encode
|
|
138
|
+
return encodeURIComponent(segment);
|
|
139
|
+
}
|
|
140
|
+
}).join('/');
|
|
141
|
+
|
|
142
|
+
// Construct URL by encoding path components, then combine with site URL
|
|
143
|
+
// We don't use URL constructor for the full path because it decodes some characters
|
|
144
|
+
const pathPart = transformedPathPrefix ? `${transformedPathPrefix}/${encodedLinkPath}` : encodedLinkPath;
|
|
145
|
+
try {
|
|
146
|
+
const baseUrl = new URL(siteUrl);
|
|
147
|
+
fullUrl = `${baseUrl.origin}/${pathPart}`;
|
|
148
|
+
} catch (error: unknown) {
|
|
149
|
+
logger.warn(`Invalid siteUrl: ${siteUrl}. Using fallback.`);
|
|
150
|
+
// Fallback to string concatenation with proper path joining
|
|
151
|
+
const baseUrl = siteUrl.endsWith('/') ? siteUrl.slice(0, -1) : siteUrl;
|
|
152
|
+
fullUrl = `${baseUrl}/${pathPart}`;
|
|
153
|
+
}
|
|
84
154
|
}
|
|
85
|
-
|
|
155
|
+
|
|
86
156
|
// Extract title
|
|
87
157
|
const title = extractTitle(data, resolvedContent, filePath);
|
|
88
158
|
|
|
@@ -90,7 +160,7 @@ export async function processMarkdownFile(
|
|
|
90
160
|
let description = '';
|
|
91
161
|
|
|
92
162
|
// First priority: Use frontmatter description if available
|
|
93
|
-
if (data.description) {
|
|
163
|
+
if (isNonEmptyString(data.description)) {
|
|
94
164
|
description = data.description;
|
|
95
165
|
} else {
|
|
96
166
|
// Second priority: Find the first non-heading paragraph
|
|
@@ -115,14 +185,14 @@ export async function processMarkdownFile(
|
|
|
115
185
|
|
|
116
186
|
// Only remove heading markers at the beginning of descriptions or lines
|
|
117
187
|
// This preserves # characters that are part of the content
|
|
118
|
-
if (description) {
|
|
188
|
+
if (isNonEmptyString(description)) {
|
|
119
189
|
// Original approach had issues with hashtags inside content
|
|
120
190
|
// Fix: Only remove # symbols at the beginning of lines or description
|
|
121
191
|
// that are followed by a space (actual heading markers)
|
|
122
192
|
description = description.replace(/^(#+)\s+/gm, '');
|
|
123
193
|
|
|
124
194
|
// Special handling for description frontmatter with heading markers
|
|
125
|
-
if (data.description && data.description.startsWith('#')) {
|
|
195
|
+
if (isNonEmptyString(data.description) && data.description.startsWith('#')) {
|
|
126
196
|
// If the description in frontmatter starts with a heading marker,
|
|
127
197
|
// we should preserve it in the extracted description
|
|
128
198
|
description = description.replace(/^#+\s+/, '');
|
|
@@ -133,17 +203,17 @@ export async function processMarkdownFile(
|
|
|
133
203
|
|
|
134
204
|
// Validate that the description doesn't contain markdown headings
|
|
135
205
|
if (description.match(/^#+\s+/m)) {
|
|
136
|
-
|
|
206
|
+
logger.warn(`Warning: Description for "${title}" may still contain heading markers`);
|
|
137
207
|
}
|
|
138
208
|
|
|
139
209
|
// Warn if the description contains HTML tags
|
|
140
210
|
if (/<[^>]+>/g.test(description)) {
|
|
141
|
-
|
|
211
|
+
logger.warn(`Warning: Description for "${title}" contains HTML tags`);
|
|
142
212
|
}
|
|
143
213
|
|
|
144
214
|
// Warn if the description is very long
|
|
145
215
|
if (description.length > 500) {
|
|
146
|
-
|
|
216
|
+
logger.warn(`Warning: Description for "${title}" is very long (${description.length} characters)`);
|
|
147
217
|
}
|
|
148
218
|
}
|
|
149
219
|
|
|
@@ -156,9 +226,147 @@ export async function processMarkdownFile(
|
|
|
156
226
|
url: fullUrl,
|
|
157
227
|
content: cleanedContent,
|
|
158
228
|
description: description || '',
|
|
229
|
+
frontMatter: data,
|
|
159
230
|
};
|
|
160
231
|
}
|
|
161
232
|
|
|
233
|
+
/**
|
|
234
|
+
* Remove numbered prefixes from path segments (e.g., "01-intro" -> "intro")
|
|
235
|
+
*/
|
|
236
|
+
function removeNumberedPrefixes(path: string): string {
|
|
237
|
+
return path.split('/').map(segment => {
|
|
238
|
+
// Remove numbered prefixes like "01-", "1-", "001-" from each segment
|
|
239
|
+
return segment.replace(/^\d+-/, '');
|
|
240
|
+
}).join('/');
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Try to find a route in the route map from a list of possible paths
|
|
245
|
+
*/
|
|
246
|
+
function findRouteInMap(routeMap: Map<string, string>, possiblePaths: string[]): string | undefined {
|
|
247
|
+
for (const possiblePath of possiblePaths) {
|
|
248
|
+
const route = routeMap.get(possiblePath) || routeMap.get(possiblePath + '/');
|
|
249
|
+
if (route) {
|
|
250
|
+
return route;
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
return undefined;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
/**
|
|
257
|
+
* Try exact match for route resolution
|
|
258
|
+
*/
|
|
259
|
+
function tryExactRouteMatch(
|
|
260
|
+
routeMap: Map<string, string>,
|
|
261
|
+
relativePath: string,
|
|
262
|
+
pathPrefix: string
|
|
263
|
+
): string | undefined {
|
|
264
|
+
const possiblePaths = [
|
|
265
|
+
`/${pathPrefix}/${relativePath}`,
|
|
266
|
+
`/${relativePath}`,
|
|
267
|
+
];
|
|
268
|
+
return findRouteInMap(routeMap, possiblePaths);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
/**
|
|
272
|
+
* Try route resolution with numbered prefix removal
|
|
273
|
+
*/
|
|
274
|
+
function tryNumberedPrefixResolution(
|
|
275
|
+
routeMap: Map<string, string>,
|
|
276
|
+
relativePath: string,
|
|
277
|
+
pathPrefix: string
|
|
278
|
+
): string | undefined {
|
|
279
|
+
const cleanPath = removeNumberedPrefixes(relativePath);
|
|
280
|
+
|
|
281
|
+
// Try basic cleaned path
|
|
282
|
+
const basicPaths = [`/${pathPrefix}/${cleanPath}`, `/${cleanPath}`];
|
|
283
|
+
const basicMatch = findRouteInMap(routeMap, basicPaths);
|
|
284
|
+
if (basicMatch) {
|
|
285
|
+
return basicMatch;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// Try nested folder structures with numbered prefixes at different levels
|
|
289
|
+
const segments = relativePath.split('/');
|
|
290
|
+
if (segments.length > 1) {
|
|
291
|
+
for (let i = 0; i < segments.length; i++) {
|
|
292
|
+
const modifiedSegments = [...segments];
|
|
293
|
+
modifiedSegments[i] = modifiedSegments[i].replace(/^\d+-/, '');
|
|
294
|
+
const modifiedPath = modifiedSegments.join('/');
|
|
295
|
+
const pathsToTry = [`/${pathPrefix}/${modifiedPath}`, `/${modifiedPath}`];
|
|
296
|
+
|
|
297
|
+
const match = findRouteInMap(routeMap, pathsToTry);
|
|
298
|
+
if (match) {
|
|
299
|
+
return match;
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
return undefined;
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
/**
|
|
308
|
+
* Try finding best match using routes paths array
|
|
309
|
+
*/
|
|
310
|
+
function tryRoutesPathsMatch(
|
|
311
|
+
routesPaths: string[],
|
|
312
|
+
relativePath: string,
|
|
313
|
+
pathPrefix: string
|
|
314
|
+
): string | undefined {
|
|
315
|
+
const cleanPath = removeNumberedPrefixes(relativePath);
|
|
316
|
+
const normalizedCleanPath = cleanPath.toLowerCase();
|
|
317
|
+
|
|
318
|
+
return routesPaths.find(routePath => {
|
|
319
|
+
const normalizedRoute = routePath.toLowerCase();
|
|
320
|
+
return normalizedRoute.endsWith(`/${normalizedCleanPath}`) ||
|
|
321
|
+
normalizedRoute === `/${pathPrefix}/${normalizedCleanPath}` ||
|
|
322
|
+
normalizedRoute === `/${normalizedCleanPath}`;
|
|
323
|
+
});
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
/**
|
|
327
|
+
* Resolve the URL for a document using Docusaurus routes
|
|
328
|
+
* @param filePath - Full path to the file
|
|
329
|
+
* @param baseDir - Base directory (typically siteDir)
|
|
330
|
+
* @param pathPrefix - Path prefix ('docs' or 'blog')
|
|
331
|
+
* @param context - Plugin context with route map
|
|
332
|
+
* @returns Resolved URL or undefined if not found
|
|
333
|
+
*/
|
|
334
|
+
function resolveDocumentUrl(
|
|
335
|
+
filePath: string,
|
|
336
|
+
baseDir: string,
|
|
337
|
+
pathPrefix: string,
|
|
338
|
+
context: PluginContext
|
|
339
|
+
): string | undefined {
|
|
340
|
+
// Early return if no route map available
|
|
341
|
+
if (!context.routeMap) {
|
|
342
|
+
return undefined;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
// Convert file path to a potential route path
|
|
346
|
+
const relativePath = normalizePath(path.relative(baseDir, filePath))
|
|
347
|
+
.replace(/\.mdx?$/, '')
|
|
348
|
+
.replace(/\/index$/, '');
|
|
349
|
+
|
|
350
|
+
// Try exact match first (respects Docusaurus's resolved routes)
|
|
351
|
+
const exactMatch = tryExactRouteMatch(context.routeMap, relativePath, pathPrefix);
|
|
352
|
+
if (exactMatch) {
|
|
353
|
+
return exactMatch;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// Try numbered prefix removal as fallback
|
|
357
|
+
const prefixMatch = tryNumberedPrefixResolution(context.routeMap, relativePath, pathPrefix);
|
|
358
|
+
if (prefixMatch) {
|
|
359
|
+
return prefixMatch;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
// Try to find the best match using the routesPaths array
|
|
363
|
+
if (context.routesPaths) {
|
|
364
|
+
return tryRoutesPathsMatch(context.routesPaths, relativePath, pathPrefix);
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
return undefined;
|
|
368
|
+
}
|
|
369
|
+
|
|
162
370
|
/**
|
|
163
371
|
* Process files based on include patterns, ignore patterns, and ordering
|
|
164
372
|
* @param context - Plugin context
|
|
@@ -169,6 +377,37 @@ export async function processMarkdownFile(
|
|
|
169
377
|
* @param includeUnmatched - Whether to include unmatched files
|
|
170
378
|
* @returns Processed files
|
|
171
379
|
*/
|
|
380
|
+
/**
|
|
381
|
+
* Helper function to check if a file matches a pattern
|
|
382
|
+
* Tries matching against multiple path variants for better usability
|
|
383
|
+
*/
|
|
384
|
+
function matchesPattern(file: string, pattern: string, siteDir: string, docsDir: string): boolean {
|
|
385
|
+
const minimatchOptions = { matchBase: true };
|
|
386
|
+
|
|
387
|
+
// Get site-relative path (e.g., "docs/quickstart/file.md")
|
|
388
|
+
const siteRelativePath = normalizePath(path.relative(siteDir, file));
|
|
389
|
+
|
|
390
|
+
// Get docs-relative path (e.g., "quickstart/file.md")
|
|
391
|
+
// Normalize both paths to handle different path separators and resolve any .. or .
|
|
392
|
+
const docsBaseDir = path.resolve(path.join(siteDir, docsDir));
|
|
393
|
+
const resolvedFile = path.resolve(file);
|
|
394
|
+
const docsRelativePath = resolvedFile.startsWith(docsBaseDir)
|
|
395
|
+
? normalizePath(path.relative(docsBaseDir, resolvedFile))
|
|
396
|
+
: null;
|
|
397
|
+
|
|
398
|
+
// Try matching against site-relative path
|
|
399
|
+
if (minimatch(siteRelativePath, pattern, minimatchOptions)) {
|
|
400
|
+
return true;
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
// Try matching against docs-relative path if available
|
|
404
|
+
if (docsRelativePath && minimatch(docsRelativePath, pattern, minimatchOptions)) {
|
|
405
|
+
return true;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
return false;
|
|
409
|
+
}
|
|
410
|
+
|
|
172
411
|
export async function processFilesWithPatterns(
|
|
173
412
|
context: PluginContext,
|
|
174
413
|
allFiles: string[],
|
|
@@ -184,9 +423,8 @@ export async function processFilesWithPatterns(
|
|
|
184
423
|
|
|
185
424
|
if (includePatterns.length > 0) {
|
|
186
425
|
filteredFiles = allFiles.filter(file => {
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
minimatch(relativePath, pattern, { matchBase: true })
|
|
426
|
+
return includePatterns.some(pattern =>
|
|
427
|
+
matchesPattern(file, pattern, siteDir, docsDir)
|
|
190
428
|
);
|
|
191
429
|
});
|
|
192
430
|
}
|
|
@@ -194,9 +432,8 @@ export async function processFilesWithPatterns(
|
|
|
194
432
|
// Apply ignore patterns
|
|
195
433
|
if (ignorePatterns.length > 0) {
|
|
196
434
|
filteredFiles = filteredFiles.filter(file => {
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
minimatch(relativePath, pattern, { matchBase: true })
|
|
435
|
+
return !ignorePatterns.some(pattern =>
|
|
436
|
+
matchesPattern(file, pattern, siteDir, docsDir)
|
|
200
437
|
);
|
|
201
438
|
});
|
|
202
439
|
}
|
|
@@ -210,8 +447,7 @@ export async function processFilesWithPatterns(
|
|
|
210
447
|
// Process files according to orderPatterns
|
|
211
448
|
for (const pattern of orderPatterns) {
|
|
212
449
|
const matchingFiles = filteredFiles.filter(file => {
|
|
213
|
-
|
|
214
|
-
return minimatch(relativePath, pattern, { matchBase: true }) && !matchedFiles.has(file);
|
|
450
|
+
return matchesPattern(file, pattern, siteDir, docsDir) && !matchedFiles.has(file);
|
|
215
451
|
});
|
|
216
452
|
|
|
217
453
|
for (const file of matchingFiles) {
|
|
@@ -229,100 +465,51 @@ export async function processFilesWithPatterns(
|
|
|
229
465
|
filesToProcess = filteredFiles;
|
|
230
466
|
}
|
|
231
467
|
|
|
232
|
-
// Process
|
|
233
|
-
const
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
// Convert file path to a potential route path
|
|
246
|
-
const relativePath = path.relative(baseDir, filePath)
|
|
247
|
-
.replace(/\\/g, '/')
|
|
248
|
-
.replace(/\.mdx?$/, '')
|
|
249
|
-
.replace(/\/index$/, '');
|
|
250
|
-
|
|
251
|
-
// Function to remove numbered prefixes from path segments
|
|
252
|
-
const removeNumberedPrefixes = (path: string): string => {
|
|
253
|
-
return path.split('/').map(segment => {
|
|
254
|
-
// Remove numbered prefixes like "01-", "1-", "001-" from each segment
|
|
255
|
-
return segment.replace(/^\d+-/, '');
|
|
256
|
-
}).join('/');
|
|
257
|
-
};
|
|
258
|
-
|
|
259
|
-
// Check various possible route patterns
|
|
260
|
-
const cleanPath = removeNumberedPrefixes(relativePath);
|
|
261
|
-
const possiblePaths = [
|
|
262
|
-
`/${pathPrefix}/${cleanPath}`,
|
|
263
|
-
`/${cleanPath}`,
|
|
264
|
-
`/${pathPrefix}/${relativePath}`, // Try with original path
|
|
265
|
-
`/${relativePath}`, // Try without prefix
|
|
266
|
-
];
|
|
267
|
-
|
|
268
|
-
// Also handle nested folder structures with numbered prefixes
|
|
269
|
-
const segments = relativePath.split('/');
|
|
270
|
-
if (segments.length > 1) {
|
|
271
|
-
// Try removing numbered prefixes from different levels
|
|
272
|
-
for (let i = 0; i < segments.length; i++) {
|
|
273
|
-
const modifiedSegments = [...segments];
|
|
274
|
-
modifiedSegments[i] = modifiedSegments[i].replace(/^\d+-/, '');
|
|
275
|
-
const modifiedPath = modifiedSegments.join('/');
|
|
276
|
-
possiblePaths.push(`/${pathPrefix}/${modifiedPath}`);
|
|
277
|
-
possiblePaths.push(`/${modifiedPath}`);
|
|
278
|
-
}
|
|
279
|
-
}
|
|
280
|
-
|
|
281
|
-
// Try to find a match in the route map
|
|
282
|
-
for (const possiblePath of possiblePaths) {
|
|
283
|
-
if (context.routeMap.has(possiblePath)) {
|
|
284
|
-
resolvedUrl = context.routeMap.get(possiblePath);
|
|
285
|
-
break;
|
|
286
|
-
}
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
// If still not found, try to find the best match using the routesPaths array
|
|
290
|
-
if (!resolvedUrl && context.routesPaths) {
|
|
291
|
-
const normalizedCleanPath = cleanPath.toLowerCase();
|
|
292
|
-
const matchingRoute = context.routesPaths.find(routePath => {
|
|
293
|
-
const normalizedRoute = routePath.toLowerCase();
|
|
294
|
-
return normalizedRoute.endsWith(`/${normalizedCleanPath}`) ||
|
|
295
|
-
normalizedRoute === `/${pathPrefix}/${normalizedCleanPath}` ||
|
|
296
|
-
normalizedRoute === `/${normalizedCleanPath}`;
|
|
297
|
-
});
|
|
298
|
-
if (matchingRoute) {
|
|
299
|
-
resolvedUrl = matchingRoute;
|
|
300
|
-
}
|
|
301
|
-
}
|
|
302
|
-
|
|
468
|
+
// Process files in parallel using Promise.allSettled
|
|
469
|
+
const results = await Promise.allSettled(
|
|
470
|
+
filesToProcess.map(async (filePath) => {
|
|
471
|
+
try {
|
|
472
|
+
// Determine if this is a blog or docs file
|
|
473
|
+
const isBlogFile = filePath.includes(path.join(siteDir, 'blog'));
|
|
474
|
+
// Use siteDir as baseDir to preserve full directory structure (docs/path/file.md instead of just path/file.md)
|
|
475
|
+
const baseDir = siteDir;
|
|
476
|
+
const pathPrefix = isBlogFile ? 'blog' : 'docs';
|
|
477
|
+
|
|
478
|
+
// Try to find the resolved URL for this file from the route map
|
|
479
|
+
const resolvedUrl = resolveDocumentUrl(filePath, baseDir, pathPrefix, context);
|
|
480
|
+
|
|
303
481
|
// Log when we successfully resolve a URL using Docusaurus routes
|
|
304
|
-
if (resolvedUrl &&
|
|
305
|
-
|
|
482
|
+
if (resolvedUrl && context.routeMap) {
|
|
483
|
+
const relativePath = normalizePath(path.relative(baseDir, filePath))
|
|
484
|
+
.replace(/\.mdx?$/, '')
|
|
485
|
+
.replace(/\/index$/, '');
|
|
486
|
+
if (resolvedUrl !== `/${pathPrefix}/${relativePath}`) {
|
|
487
|
+
logger.verbose(`Resolved URL for ${path.basename(filePath)}: ${resolvedUrl} (was: /${pathPrefix}/${relativePath})`);
|
|
488
|
+
}
|
|
306
489
|
}
|
|
490
|
+
|
|
491
|
+
const docInfo = await processMarkdownFile(
|
|
492
|
+
filePath,
|
|
493
|
+
baseDir,
|
|
494
|
+
siteUrl,
|
|
495
|
+
pathPrefix,
|
|
496
|
+
context.options.pathTransformation,
|
|
497
|
+
context.options.excludeImports || false,
|
|
498
|
+
context.options.removeDuplicateHeadings || false,
|
|
499
|
+
resolvedUrl
|
|
500
|
+
);
|
|
501
|
+
return docInfo;
|
|
502
|
+
} catch (err: unknown) {
|
|
503
|
+
logger.warn(`Error processing ${filePath}: ${getErrorMessage(err)}`);
|
|
504
|
+
return null;
|
|
307
505
|
}
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
context.options.removeDuplicateHeadings || false,
|
|
317
|
-
resolvedUrl
|
|
318
|
-
);
|
|
319
|
-
if (docInfo !== null) {
|
|
320
|
-
processedDocs.push(docInfo);
|
|
321
|
-
}
|
|
322
|
-
} catch (err: any) {
|
|
323
|
-
console.warn(`Error processing ${filePath}: ${err.message}`);
|
|
324
|
-
}
|
|
325
|
-
}
|
|
326
|
-
|
|
506
|
+
})
|
|
507
|
+
);
|
|
508
|
+
|
|
509
|
+
// Filter successful results and non-null DocInfo objects
|
|
510
|
+
const processedDocs = results
|
|
511
|
+
.filter((r): r is PromiseFulfilledResult<DocInfo | null> => r.status === 'fulfilled' && r.value !== null)
|
|
512
|
+
.map(r => r.value as DocInfo);
|
|
513
|
+
|
|
327
514
|
return processedDocs;
|
|
328
|
-
}
|
|
515
|
+
}
|
package/src/types.ts
CHANGED
|
@@ -13,6 +13,7 @@ export interface DocInfo {
|
|
|
13
13
|
url: string;
|
|
14
14
|
content: string;
|
|
15
15
|
description: string;
|
|
16
|
+
frontMatter?: Record<string, any>;
|
|
16
17
|
}
|
|
17
18
|
|
|
18
19
|
/**
|
|
@@ -109,12 +110,30 @@ export interface PluginOptions {
|
|
|
109
110
|
|
|
110
111
|
/** Whether to generate individual markdown files and link to them from llms.txt instead of original docs (default: false) */
|
|
111
112
|
generateMarkdownFiles?: boolean;
|
|
112
|
-
|
|
113
|
+
|
|
114
|
+
/** Array of frontmatter keys to preserve in generated individual markdown files (only used when generateMarkdownFiles is true) */
|
|
115
|
+
keepFrontMatter?: string[];
|
|
116
|
+
|
|
113
117
|
/** Custom content to include at the root level of llms.txt (after title/description, before TOC) */
|
|
114
118
|
rootContent?: string;
|
|
115
119
|
|
|
116
120
|
/** Custom content to include at the root level of llms-full.txt (after title/description, before content sections) */
|
|
117
121
|
fullRootContent?: string;
|
|
122
|
+
|
|
123
|
+
/** Whether to preserve directory structure in generated markdown files (default: true) */
|
|
124
|
+
preserveDirectoryStructure?: boolean;
|
|
125
|
+
|
|
126
|
+
/** Batch size for processing large document sets to prevent memory issues (default: 100) */
|
|
127
|
+
processingBatchSize?: number;
|
|
128
|
+
|
|
129
|
+
/** Logging level for plugin output (default: 'normal'). Options: 'quiet', 'normal', 'verbose' */
|
|
130
|
+
logLevel?: 'quiet' | 'normal' | 'verbose';
|
|
131
|
+
|
|
132
|
+
/** Whether to warn about files that are ignored (no extension or unsupported extension) (default: false) */
|
|
133
|
+
warnOnIgnoredFiles?: boolean;
|
|
134
|
+
|
|
135
|
+
/** Index signature for Docusaurus plugin compatibility */
|
|
136
|
+
[key: string]: unknown;
|
|
118
137
|
}
|
|
119
138
|
|
|
120
139
|
/**
|