docusaurus-plugin-llms 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +246 -15
- package/lib/generator-current.d.ts +44 -0
- package/lib/generator-current.js +398 -0
- package/lib/generator.d.ts +6 -2
- package/lib/generator.js +200 -120
- package/lib/index.js +175 -10
- package/lib/null-handling-guide.d.ts +47 -0
- package/lib/null-handling-guide.js +290 -0
- package/lib/processor.d.ts +0 -10
- package/lib/processor.js +230 -83
- package/lib/types.d.ts +13 -0
- package/lib/utils.d.ts +165 -6
- package/lib/utils.js +481 -28
- package/package.json +5 -3
- package/src/generator.ts +270 -128
- package/src/index.ts +204 -14
- package/src/null-handling-guide.ts +321 -0
- package/src/processor.ts +314 -127
- package/src/types.ts +20 -1
- package/src/utils.ts +594 -48
package/src/utils.ts
CHANGED
|
@@ -4,10 +4,277 @@
|
|
|
4
4
|
|
|
5
5
|
import * as fs from 'fs/promises';
|
|
6
6
|
import * as path from 'path';
|
|
7
|
+
import * as crypto from 'crypto';
|
|
7
8
|
import { minimatch } from 'minimatch';
|
|
8
9
|
import matter from 'gray-matter';
|
|
10
|
+
import * as YAML from 'yaml';
|
|
9
11
|
import { PluginOptions } from './types';
|
|
10
12
|
|
|
13
|
+
/**
|
|
14
|
+
* Null/Undefined Handling Guidelines:
|
|
15
|
+
*
|
|
16
|
+
* 1. For required parameters: Throw early if null/undefined
|
|
17
|
+
* 2. For optional parameters: Use optional chaining `value?.property`
|
|
18
|
+
* 3. For explicit null checks: Use `!== null` and `!== undefined` or the isDefined type guard
|
|
19
|
+
* 4. For string validation: Use isNonEmptyString() type guard
|
|
20
|
+
* 5. For truthy checks on booleans: Use explicit comparison or Boolean(value)
|
|
21
|
+
*
|
|
22
|
+
* Avoid: `if (value)` when value could be 0, '', or false legitimately
|
|
23
|
+
* Use: Type guards for consistent, type-safe checks
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Type guard to check if a value is defined (not null or undefined)
|
|
28
|
+
* @param value - Value to check
|
|
29
|
+
* @returns True if value is not null or undefined
|
|
30
|
+
*/
|
|
31
|
+
export function isDefined<T>(value: T | null | undefined): value is T {
|
|
32
|
+
return value !== null && value !== undefined;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Type guard to check if a value is a non-empty string
|
|
37
|
+
* @param value - Value to check
|
|
38
|
+
* @returns True if value is a string with at least one non-whitespace character
|
|
39
|
+
*/
|
|
40
|
+
export function isNonEmptyString(value: unknown): value is string {
|
|
41
|
+
return typeof value === 'string' && value.trim().length > 0;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Type guard to check if a value is a non-empty array
|
|
46
|
+
* @param value - Value to check
|
|
47
|
+
* @returns True if value is an array with at least one element
|
|
48
|
+
*/
|
|
49
|
+
export function isNonEmptyArray<T>(value: unknown): value is T[] {
|
|
50
|
+
return Array.isArray(value) && value.length > 0;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Safely extract an error message from an unknown error value
|
|
55
|
+
* @param error - The error value (can be Error, string, or any other type)
|
|
56
|
+
* @returns A string representation of the error
|
|
57
|
+
*/
|
|
58
|
+
export function getErrorMessage(error: unknown): string {
|
|
59
|
+
if (error instanceof Error) {
|
|
60
|
+
return error.message;
|
|
61
|
+
}
|
|
62
|
+
if (typeof error === 'string') {
|
|
63
|
+
return error;
|
|
64
|
+
}
|
|
65
|
+
try {
|
|
66
|
+
const stringified = JSON.stringify(error);
|
|
67
|
+
// JSON.stringify returns undefined for undefined values, handle that case
|
|
68
|
+
return stringified !== undefined ? stringified : 'Unknown error';
|
|
69
|
+
} catch {
|
|
70
|
+
return 'Unknown error';
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Extract stack trace from unknown error types
|
|
76
|
+
* @param error - The error value (can be Error or any other type)
|
|
77
|
+
* @returns Stack trace if available, undefined otherwise
|
|
78
|
+
*/
|
|
79
|
+
export function getErrorStack(error: unknown): string | undefined {
|
|
80
|
+
if (error instanceof Error) {
|
|
81
|
+
return error.stack;
|
|
82
|
+
}
|
|
83
|
+
return undefined;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Custom error class for validation errors
|
|
88
|
+
*/
|
|
89
|
+
export class ValidationError extends Error {
|
|
90
|
+
constructor(message: string) {
|
|
91
|
+
super(message);
|
|
92
|
+
this.name = 'ValidationError';
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Validates that a value is not null or undefined
|
|
98
|
+
* @param value - The value to validate
|
|
99
|
+
* @param paramName - The parameter name for error messages
|
|
100
|
+
* @returns The validated value
|
|
101
|
+
* @throws ValidationError if the value is null or undefined
|
|
102
|
+
*/
|
|
103
|
+
export function validateRequired<T>(
|
|
104
|
+
value: T | null | undefined,
|
|
105
|
+
paramName: string
|
|
106
|
+
): T {
|
|
107
|
+
if (value === null || value === undefined) {
|
|
108
|
+
throw new ValidationError(`Required parameter '${paramName}' is null or undefined`);
|
|
109
|
+
}
|
|
110
|
+
return value;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Validates that a value is a string and optionally checks its properties
|
|
115
|
+
* @param value - The value to validate
|
|
116
|
+
* @param paramName - The parameter name for error messages
|
|
117
|
+
* @param options - Validation options for min/max length and pattern
|
|
118
|
+
* @returns The validated string
|
|
119
|
+
* @throws ValidationError if validation fails
|
|
120
|
+
*/
|
|
121
|
+
export function validateString(
|
|
122
|
+
value: unknown,
|
|
123
|
+
paramName: string,
|
|
124
|
+
options: { minLength?: number; maxLength?: number; pattern?: RegExp } = {}
|
|
125
|
+
): string {
|
|
126
|
+
if (typeof value !== 'string') {
|
|
127
|
+
throw new ValidationError(`Parameter '${paramName}' must be a string, got ${typeof value}`);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
if (options.minLength !== undefined && value.length < options.minLength) {
|
|
131
|
+
throw new ValidationError(`Parameter '${paramName}' must be at least ${options.minLength} characters`);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
if (options.maxLength !== undefined && value.length > options.maxLength) {
|
|
135
|
+
throw new ValidationError(`Parameter '${paramName}' exceeds maximum length of ${options.maxLength}`);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
if (options.pattern && !options.pattern.test(value)) {
|
|
139
|
+
throw new ValidationError(`Parameter '${paramName}' does not match required pattern`);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return value;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Validates that a value is an array and optionally validates elements
|
|
147
|
+
* @param value - The value to validate
|
|
148
|
+
* @param paramName - The parameter name for error messages
|
|
149
|
+
* @param elementValidator - Optional function to validate each element
|
|
150
|
+
* @returns The validated array
|
|
151
|
+
* @throws ValidationError if validation fails
|
|
152
|
+
*/
|
|
153
|
+
export function validateArray<T>(
|
|
154
|
+
value: unknown,
|
|
155
|
+
paramName: string,
|
|
156
|
+
elementValidator?: (item: unknown) => boolean
|
|
157
|
+
): T[] {
|
|
158
|
+
if (!Array.isArray(value)) {
|
|
159
|
+
throw new ValidationError(`Parameter '${paramName}' must be an array`);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if (elementValidator) {
|
|
163
|
+
value.forEach((item, index) => {
|
|
164
|
+
if (!elementValidator(item)) {
|
|
165
|
+
throw new ValidationError(`Element at index ${index} in '${paramName}' failed validation`);
|
|
166
|
+
}
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
return value as T[];
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Logging level enumeration
|
|
175
|
+
*/
|
|
176
|
+
export enum LogLevel {
|
|
177
|
+
QUIET = 0,
|
|
178
|
+
NORMAL = 1,
|
|
179
|
+
VERBOSE = 2
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
let currentLogLevel = LogLevel.NORMAL;
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Set the logging level for the plugin
|
|
186
|
+
* @param level - The logging level to use
|
|
187
|
+
*/
|
|
188
|
+
export function setLogLevel(level: LogLevel): void {
|
|
189
|
+
currentLogLevel = level;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Logger utility for consistent logging across the plugin
|
|
194
|
+
*/
|
|
195
|
+
export const logger = {
|
|
196
|
+
error: (message: string) => {
|
|
197
|
+
console.error(`[docusaurus-plugin-llms] ERROR: ${message}`);
|
|
198
|
+
},
|
|
199
|
+
warn: (message: string) => {
|
|
200
|
+
if (currentLogLevel >= LogLevel.NORMAL) {
|
|
201
|
+
console.warn(`[docusaurus-plugin-llms] ${message}`);
|
|
202
|
+
}
|
|
203
|
+
},
|
|
204
|
+
info: (message: string) => {
|
|
205
|
+
if (currentLogLevel >= LogLevel.NORMAL) {
|
|
206
|
+
console.log(`[docusaurus-plugin-llms] ${message}`);
|
|
207
|
+
}
|
|
208
|
+
},
|
|
209
|
+
verbose: (message: string) => {
|
|
210
|
+
if (currentLogLevel >= LogLevel.VERBOSE) {
|
|
211
|
+
console.log(`[docusaurus-plugin-llms] ${message}`);
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
};
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* Constants for path length limits
|
|
218
|
+
*/
|
|
219
|
+
const MAX_PATH_LENGTH_WINDOWS = 260;
|
|
220
|
+
const MAX_PATH_LENGTH_UNIX = 4096;
|
|
221
|
+
|
|
222
|
+
/**
|
|
223
|
+
* Normalizes a file path by converting all backslashes to forward slashes.
|
|
224
|
+
* This ensures consistent path handling across Windows and Unix systems.
|
|
225
|
+
*
|
|
226
|
+
* @param filePath - The file path to normalize
|
|
227
|
+
* @returns The normalized path with forward slashes
|
|
228
|
+
* @throws ValidationError if filePath is not a string
|
|
229
|
+
*/
|
|
230
|
+
export function normalizePath(filePath: string): string {
|
|
231
|
+
validateString(filePath, 'filePath');
|
|
232
|
+
return filePath.replace(/\\/g, '/');
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Validates that a file path does not exceed the platform-specific maximum length
|
|
237
|
+
* @param filePath - The file path to validate
|
|
238
|
+
* @returns True if the path is within limits, false otherwise
|
|
239
|
+
*/
|
|
240
|
+
export function validatePathLength(filePath: string): boolean {
|
|
241
|
+
const maxLength = process.platform === 'win32'
|
|
242
|
+
? MAX_PATH_LENGTH_WINDOWS
|
|
243
|
+
: MAX_PATH_LENGTH_UNIX;
|
|
244
|
+
|
|
245
|
+
if (filePath.length > maxLength) {
|
|
246
|
+
logger.error(`Path exceeds maximum length (${maxLength}): ${filePath}`);
|
|
247
|
+
return false;
|
|
248
|
+
}
|
|
249
|
+
return true;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Shortens a file path by creating a hash-based filename if the path is too long
|
|
254
|
+
* @param fullPath - The full file path that may be too long
|
|
255
|
+
* @param outputDir - The output directory base path
|
|
256
|
+
* @param relativePath - The relative path from the output directory
|
|
257
|
+
* @returns A shortened path if necessary, or the original path if it's within limits
|
|
258
|
+
*/
|
|
259
|
+
export function shortenPathIfNeeded(
|
|
260
|
+
fullPath: string,
|
|
261
|
+
outputDir: string,
|
|
262
|
+
relativePath: string
|
|
263
|
+
): string {
|
|
264
|
+
if (validatePathLength(fullPath)) {
|
|
265
|
+
return fullPath;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// Create a hash of the relative path to ensure uniqueness
|
|
269
|
+
const hash = crypto.createHash('md5').update(relativePath).digest('hex').substring(0, 8);
|
|
270
|
+
const shortenedPath = path.join(outputDir, `${hash}.md`);
|
|
271
|
+
|
|
272
|
+
logger.warn(`Path too long, using shortened path: ${shortenedPath}`);
|
|
273
|
+
logger.verbose(`Original path: ${fullPath}`);
|
|
274
|
+
|
|
275
|
+
return shortenedPath;
|
|
276
|
+
}
|
|
277
|
+
|
|
11
278
|
/**
|
|
12
279
|
* Write content to a file
|
|
13
280
|
* @param filePath - Path to write the file to
|
|
@@ -20,63 +287,146 @@ export async function writeFile(filePath: string, data: string): Promise<void> {
|
|
|
20
287
|
/**
|
|
21
288
|
* Read content from a file
|
|
22
289
|
* @param filePath - Path of the file to read
|
|
23
|
-
* @returns Content of the file
|
|
290
|
+
* @returns Content of the file with BOM removed if present
|
|
24
291
|
*/
|
|
25
292
|
export async function readFile(filePath: string): Promise<string> {
|
|
26
|
-
|
|
293
|
+
let content = await fs.readFile(filePath, 'utf8');
|
|
294
|
+
|
|
295
|
+
// Remove UTF-8 BOM if present
|
|
296
|
+
// UTF-8 BOM is the character U+FEFF at the start of the file
|
|
297
|
+
if (content.charCodeAt(0) === 0xFEFF) {
|
|
298
|
+
content = content.slice(1);
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
return content;
|
|
27
302
|
}
|
|
28
303
|
|
|
29
304
|
/**
|
|
30
305
|
* Check if a file should be ignored based on glob patterns
|
|
306
|
+
* Matches against both site-relative and docs-relative paths
|
|
31
307
|
* @param filePath - Path to the file
|
|
32
|
-
* @param baseDir - Base directory for relative paths
|
|
308
|
+
* @param baseDir - Base directory (site root) for relative paths
|
|
33
309
|
* @param ignorePatterns - Glob patterns for files to ignore
|
|
310
|
+
* @param docsDir - Docs directory name (e.g., 'docs')
|
|
34
311
|
* @returns Whether the file should be ignored
|
|
35
312
|
*/
|
|
36
|
-
export function shouldIgnoreFile(filePath: string, baseDir: string, ignorePatterns: string[]): boolean {
|
|
37
|
-
if (ignorePatterns
|
|
313
|
+
export function shouldIgnoreFile(filePath: string, baseDir: string, ignorePatterns: string[], docsDir: string = 'docs'): boolean {
|
|
314
|
+
if (!isNonEmptyArray(ignorePatterns)) {
|
|
38
315
|
return false;
|
|
39
316
|
}
|
|
40
|
-
|
|
41
|
-
const
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
317
|
+
|
|
318
|
+
const minimatchOptions = { matchBase: true };
|
|
319
|
+
|
|
320
|
+
// Get site-relative path (e.g., "docs/quickstart/file.md")
|
|
321
|
+
const siteRelativePath = normalizePath(path.relative(baseDir, filePath));
|
|
322
|
+
|
|
323
|
+
// Get docs-relative path (e.g., "quickstart/file.md")
|
|
324
|
+
const docsBaseDir = path.resolve(path.join(baseDir, docsDir));
|
|
325
|
+
const resolvedFile = path.resolve(filePath);
|
|
326
|
+
const docsRelativePath = resolvedFile.startsWith(docsBaseDir)
|
|
327
|
+
? normalizePath(path.relative(docsBaseDir, resolvedFile))
|
|
328
|
+
: null;
|
|
329
|
+
|
|
330
|
+
return ignorePatterns.some(pattern => {
|
|
331
|
+
// Try matching against site-relative path
|
|
332
|
+
if (minimatch(siteRelativePath, pattern, minimatchOptions)) {
|
|
333
|
+
return true;
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
// Try matching against docs-relative path if available
|
|
337
|
+
if (docsRelativePath && minimatch(docsRelativePath, pattern, minimatchOptions)) {
|
|
338
|
+
return true;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
return false;
|
|
342
|
+
});
|
|
46
343
|
}
|
|
47
344
|
|
|
48
345
|
/**
|
|
49
346
|
* Recursively reads all Markdown files in a directory
|
|
50
347
|
* @param dir - Directory to scan
|
|
51
|
-
* @param baseDir - Base directory for relative paths
|
|
348
|
+
* @param baseDir - Base directory (site root) for relative paths
|
|
52
349
|
* @param ignorePatterns - Glob patterns for files to ignore
|
|
350
|
+
* @param docsDir - Docs directory name (e.g., 'docs')
|
|
351
|
+
* @param warnOnIgnoredFiles - Whether to warn about ignored files
|
|
352
|
+
* @param visitedPaths - Set of already visited real paths to detect symlink loops (internal use)
|
|
53
353
|
* @returns Array of file paths
|
|
54
354
|
*/
|
|
55
|
-
export async function readMarkdownFiles(
|
|
355
|
+
export async function readMarkdownFiles(
|
|
356
|
+
dir: string,
|
|
357
|
+
baseDir: string,
|
|
358
|
+
ignorePatterns: string[] = [],
|
|
359
|
+
docsDir: string = 'docs',
|
|
360
|
+
warnOnIgnoredFiles: boolean = false,
|
|
361
|
+
visitedPaths: Set<string> = new Set()
|
|
362
|
+
): Promise<string[]> {
|
|
363
|
+
// Get real path to detect symlink loops
|
|
364
|
+
let realPath: string;
|
|
365
|
+
try {
|
|
366
|
+
realPath = await fs.realpath(dir);
|
|
367
|
+
} catch (error: unknown) {
|
|
368
|
+
logger.warn(`Failed to resolve real path for ${dir}: ${getErrorMessage(error)}`);
|
|
369
|
+
return [];
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
// Check if we've already visited this path (symlink loop detection)
|
|
373
|
+
if (visitedPaths.has(realPath)) {
|
|
374
|
+
logger.warn(`Skipping already visited path (possible symlink loop): ${dir}`);
|
|
375
|
+
return [];
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
// Add to visited paths
|
|
379
|
+
visitedPaths.add(realPath);
|
|
380
|
+
|
|
56
381
|
const files: string[] = [];
|
|
57
382
|
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
58
383
|
|
|
59
384
|
for (const entry of entries) {
|
|
60
385
|
const fullPath = path.join(dir, entry.name);
|
|
61
|
-
|
|
62
|
-
if (shouldIgnoreFile(fullPath, baseDir, ignorePatterns)) {
|
|
386
|
+
|
|
387
|
+
if (shouldIgnoreFile(fullPath, baseDir, ignorePatterns, docsDir)) {
|
|
63
388
|
continue;
|
|
64
389
|
}
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
390
|
+
|
|
391
|
+
// Handle both regular directories and symlinked directories
|
|
392
|
+
let isDir = entry.isDirectory();
|
|
393
|
+
if (!isDir && entry.isSymbolicLink()) {
|
|
394
|
+
// Check if symlink points to a directory
|
|
395
|
+
try {
|
|
396
|
+
const stats = await fs.stat(fullPath);
|
|
397
|
+
isDir = stats.isDirectory();
|
|
398
|
+
} catch (error: unknown) {
|
|
399
|
+
// Broken symlink, warn and skip it
|
|
400
|
+
logger.warn(`Skipping broken symlink: ${fullPath}`);
|
|
401
|
+
continue;
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
if (isDir) {
|
|
406
|
+
const subDirFiles = await readMarkdownFiles(fullPath, baseDir, ignorePatterns, docsDir, warnOnIgnoredFiles, visitedPaths);
|
|
68
407
|
files.push(...subDirFiles);
|
|
408
|
+
} else if (!entry.name.includes('.')) {
|
|
409
|
+
// File without extension
|
|
410
|
+
if (warnOnIgnoredFiles) {
|
|
411
|
+
logger.warn(`Ignoring file without extension: ${fullPath}`);
|
|
412
|
+
}
|
|
69
413
|
} else if (entry.name.endsWith('.md') || entry.name.endsWith('.mdx')) {
|
|
70
414
|
// Skip partial files (those starting with underscore)
|
|
71
415
|
if (!entry.name.startsWith('_')) {
|
|
72
416
|
files.push(fullPath);
|
|
73
417
|
}
|
|
418
|
+
} else {
|
|
419
|
+
// Other extension
|
|
420
|
+
if (warnOnIgnoredFiles) {
|
|
421
|
+
logger.warn(`Ignoring file with unsupported extension: ${fullPath}`);
|
|
422
|
+
}
|
|
74
423
|
}
|
|
75
424
|
}
|
|
76
425
|
|
|
77
426
|
return files;
|
|
78
427
|
}
|
|
79
428
|
|
|
429
|
+
|
|
80
430
|
/**
|
|
81
431
|
* Extract title from content or use the filename
|
|
82
432
|
* @param data - Frontmatter data
|
|
@@ -85,79 +435,145 @@ export async function readMarkdownFiles(dir: string, baseDir: string, ignorePatt
|
|
|
85
435
|
* @returns Extracted title
|
|
86
436
|
*/
|
|
87
437
|
export function extractTitle(data: any, content: string, filePath: string): string {
|
|
88
|
-
// First try frontmatter
|
|
89
|
-
if (data.title) {
|
|
438
|
+
// First try frontmatter (check for valid non-empty string)
|
|
439
|
+
if (isNonEmptyString(data.title)) {
|
|
90
440
|
return data.title;
|
|
91
441
|
}
|
|
92
|
-
|
|
442
|
+
|
|
93
443
|
// Then try first heading
|
|
94
444
|
const headingMatch = content.match(/^#\s+(.*)/m);
|
|
95
|
-
if (headingMatch) {
|
|
445
|
+
if (isNonEmptyString(headingMatch?.[1])) {
|
|
96
446
|
return headingMatch[1].trim();
|
|
97
447
|
}
|
|
98
|
-
|
|
448
|
+
|
|
99
449
|
// Finally use filename
|
|
100
450
|
return path.basename(filePath, path.extname(filePath))
|
|
101
451
|
.replace(/-/g, ' ')
|
|
102
|
-
.replace(/\b\w/g, c => c.toUpperCase());
|
|
452
|
+
.replace(/\b\w/g, (c: string) => c.toUpperCase());
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
/**
|
|
456
|
+
* Escape special regex characters in a string
|
|
457
|
+
* @param str - String to escape
|
|
458
|
+
* @returns Escaped string safe for use in regex
|
|
459
|
+
*/
|
|
460
|
+
function escapeRegex(str: string): string {
|
|
461
|
+
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
103
462
|
}
|
|
104
463
|
|
|
105
464
|
/**
|
|
106
465
|
* Resolve and inline partial imports in markdown content
|
|
107
466
|
* @param content - The markdown content with import statements
|
|
108
467
|
* @param filePath - The path of the file containing the imports
|
|
468
|
+
* @param importChain - Set of file paths in the current import chain (for circular dependency detection)
|
|
109
469
|
* @returns Content with partials resolved
|
|
110
470
|
*/
|
|
111
|
-
export async function resolvePartialImports(
|
|
471
|
+
export async function resolvePartialImports(
|
|
472
|
+
content: string,
|
|
473
|
+
filePath: string,
|
|
474
|
+
importChain: Set<string> = new Set()
|
|
475
|
+
): Promise<string> {
|
|
112
476
|
let resolved = content;
|
|
113
|
-
|
|
477
|
+
|
|
114
478
|
// Match import statements for partials and JSX usage
|
|
115
479
|
// Pattern 1: import PartialName from './_partial.mdx'
|
|
116
480
|
// Pattern 2: import { PartialName } from './_partial.mdx'
|
|
117
|
-
|
|
481
|
+
// Create a fresh regex for each invocation to avoid lastIndex state leakage
|
|
482
|
+
const createImportRegex = () => /^\s*import\s+(?:(\w+)|{\s*(\w+)\s*})\s+from\s+['"]([^'"]+_[^'"]+\.mdx?)['"];?\s*$/gm;
|
|
118
483
|
const imports = new Map<string, string>();
|
|
119
|
-
|
|
484
|
+
|
|
120
485
|
// First pass: collect all imports
|
|
121
486
|
let match;
|
|
487
|
+
const importRegex = createImportRegex();
|
|
122
488
|
while ((match = importRegex.exec(content)) !== null) {
|
|
123
489
|
const componentName = match[1] || match[2];
|
|
124
490
|
const importPath = match[3];
|
|
125
|
-
|
|
491
|
+
|
|
126
492
|
// Only process imports for partial files (containing underscore)
|
|
127
493
|
if (importPath.includes('_')) {
|
|
128
494
|
imports.set(componentName, importPath);
|
|
129
495
|
}
|
|
130
496
|
}
|
|
131
|
-
|
|
497
|
+
|
|
132
498
|
// Resolve each partial import
|
|
133
499
|
for (const [componentName, importPath] of imports) {
|
|
134
500
|
try {
|
|
135
501
|
// Resolve the partial file path relative to the current file
|
|
136
502
|
const dir = path.dirname(filePath);
|
|
137
503
|
const partialPath = path.resolve(dir, importPath);
|
|
138
|
-
|
|
504
|
+
|
|
505
|
+
// Check for circular import
|
|
506
|
+
if (importChain.has(partialPath)) {
|
|
507
|
+
const chain = Array.from(importChain).join(' -> ');
|
|
508
|
+
logger.error(`Circular import detected: ${chain} -> ${partialPath}`);
|
|
509
|
+
|
|
510
|
+
// Escape special regex characters in component name and import path
|
|
511
|
+
const escapedComponentName = escapeRegex(componentName);
|
|
512
|
+
const escapedImportPath = escapeRegex(importPath);
|
|
513
|
+
|
|
514
|
+
// Remove the import statement to prevent infinite recursion
|
|
515
|
+
resolved = resolved.replace(
|
|
516
|
+
new RegExp(`^\\s*import\\s+(?:${escapedComponentName}|{\\s*${escapedComponentName}\\s*})\\s+from\\s+['"]${escapedImportPath}['"];?\\s*$`, 'gm'),
|
|
517
|
+
''
|
|
518
|
+
);
|
|
519
|
+
|
|
520
|
+
// Remove JSX usage of this component
|
|
521
|
+
const jsxRegex = new RegExp(`<${escapedComponentName}(?:\\s+[^>]*)?\\s*\\/?>(?:[\\s\\S]*?<\\/${escapedComponentName}>)?`, 'gm');
|
|
522
|
+
resolved = resolved.replace(jsxRegex, '');
|
|
523
|
+
|
|
524
|
+
continue;
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
// Add to chain before recursive call
|
|
528
|
+
const newChain = new Set(importChain);
|
|
529
|
+
newChain.add(partialPath);
|
|
530
|
+
|
|
139
531
|
// Read the partial file
|
|
140
|
-
|
|
532
|
+
let partialContent = await readFile(partialPath);
|
|
141
533
|
const { content: partialMarkdown } = matter(partialContent);
|
|
142
|
-
|
|
534
|
+
|
|
535
|
+
// Recursively resolve imports in the partial with the updated chain
|
|
536
|
+
const resolvedPartial = await resolvePartialImports(partialMarkdown, partialPath, newChain);
|
|
537
|
+
|
|
538
|
+
// Escape special regex characters in component name and import path
|
|
539
|
+
const escapedComponentName = escapeRegex(componentName);
|
|
540
|
+
const escapedImportPath = escapeRegex(importPath);
|
|
541
|
+
|
|
143
542
|
// Remove the import statement
|
|
144
543
|
resolved = resolved.replace(
|
|
145
|
-
new RegExp(`^\\s*import\\s+(?:${
|
|
544
|
+
new RegExp(`^\\s*import\\s+(?:${escapedComponentName}|{\\s*${escapedComponentName}\\s*})\\s+from\\s+['"]${escapedImportPath}['"];?\\s*$`, 'gm'),
|
|
146
545
|
''
|
|
147
546
|
);
|
|
148
|
-
|
|
547
|
+
|
|
149
548
|
// Replace JSX usage with the partial content
|
|
150
549
|
// Handle both self-closing tags and tags with content
|
|
151
550
|
// <PartialName /> or <PartialName></PartialName> or <PartialName>...</PartialName>
|
|
152
|
-
const jsxRegex = new RegExp(`<${
|
|
153
|
-
resolved = resolved.replace(jsxRegex,
|
|
154
|
-
|
|
155
|
-
} catch (error) {
|
|
156
|
-
|
|
157
|
-
|
|
551
|
+
const jsxRegex = new RegExp(`<${escapedComponentName}\\s*(?:[^>]*?)(?:/>|>[^<]*</${escapedComponentName}>)`, 'g');
|
|
552
|
+
resolved = resolved.replace(jsxRegex, resolvedPartial.trim());
|
|
553
|
+
|
|
554
|
+
} catch (error: unknown) {
|
|
555
|
+
logger.warn(`Failed to resolve partial import from ${importPath}: ${getErrorMessage(error)}`);
|
|
556
|
+
|
|
557
|
+
// Remove both the import statement AND the JSX usage even if partial can't be resolved
|
|
558
|
+
// This prevents leaving broken references in the output
|
|
559
|
+
|
|
560
|
+
// Escape special regex characters in component name and import path
|
|
561
|
+
const escapedComponentName = escapeRegex(componentName);
|
|
562
|
+
const escapedImportPath = escapeRegex(importPath);
|
|
563
|
+
|
|
564
|
+
// Remove the import statement
|
|
565
|
+
resolved = resolved.replace(
|
|
566
|
+
new RegExp(`^\\s*import\\s+(?:${escapedComponentName}|{\\s*${escapedComponentName}\\s*})\\s+from\\s+['"]${escapedImportPath}['"];?\\s*$`, 'gm'),
|
|
567
|
+
''
|
|
568
|
+
);
|
|
569
|
+
|
|
570
|
+
// Remove JSX usage of this component
|
|
571
|
+
// Handle both self-closing tags (<Component />) and regular tags with content (<Component>...</Component>)
|
|
572
|
+
const jsxRegex = new RegExp(`<${escapedComponentName}(?:\\s+[^>]*)?\\s*\\/?>(?:[\\s\\S]*?<\\/${escapedComponentName}>)?`, 'gm');
|
|
573
|
+
resolved = resolved.replace(jsxRegex, '');
|
|
158
574
|
}
|
|
159
575
|
}
|
|
160
|
-
|
|
576
|
+
|
|
161
577
|
return resolved;
|
|
162
578
|
}
|
|
163
579
|
|
|
@@ -253,18 +669,20 @@ export function applyPathTransformations(
|
|
|
253
669
|
urlPath: string,
|
|
254
670
|
pathTransformation?: PluginOptions['pathTransformation']
|
|
255
671
|
): string {
|
|
256
|
-
if (!pathTransformation) {
|
|
672
|
+
if (!isDefined(pathTransformation)) {
|
|
257
673
|
return urlPath;
|
|
258
674
|
}
|
|
259
675
|
|
|
260
676
|
let transformedPath = urlPath;
|
|
261
|
-
|
|
677
|
+
|
|
262
678
|
// Remove ignored path segments
|
|
263
|
-
if (pathTransformation.ignorePaths
|
|
679
|
+
if (isNonEmptyArray(pathTransformation.ignorePaths)) {
|
|
264
680
|
for (const ignorePath of pathTransformation.ignorePaths) {
|
|
265
681
|
// Create a regex that matches the ignore path at the beginning, middle, or end of the path
|
|
266
682
|
// We use word boundaries to ensure we match complete path segments
|
|
267
|
-
|
|
683
|
+
// Escape special regex characters in ignorePath to prevent regex injection
|
|
684
|
+
const escapedIgnorePath = ignorePath.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
685
|
+
const ignoreRegex = new RegExp(`(^|/)(${escapedIgnorePath})(/|$)`, 'g');
|
|
268
686
|
transformedPath = transformedPath.replace(ignoreRegex, '$1$3');
|
|
269
687
|
}
|
|
270
688
|
|
|
@@ -276,7 +694,7 @@ export function applyPathTransformations(
|
|
|
276
694
|
}
|
|
277
695
|
|
|
278
696
|
// Add path segments if they're not already present
|
|
279
|
-
if (pathTransformation.addPaths
|
|
697
|
+
if (isNonEmptyArray(pathTransformation.addPaths)) {
|
|
280
698
|
// Process in reverse order to maintain the specified order in the final path
|
|
281
699
|
// This is because each path is prepended to the front
|
|
282
700
|
const pathsToAdd = [...pathTransformation.addPaths].reverse();
|
|
@@ -290,4 +708,132 @@ export function applyPathTransformations(
|
|
|
290
708
|
}
|
|
291
709
|
|
|
292
710
|
return transformedPath;
|
|
293
|
-
}
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
/**
|
|
714
|
+
* Sanitize a string to create a safe filename
|
|
715
|
+
* @param input - Input string (typically a title)
|
|
716
|
+
* @param fallback - Fallback string if input becomes empty after sanitization
|
|
717
|
+
* @returns Sanitized filename (without extension)
|
|
718
|
+
* @throws ValidationError if input or fallback are not strings
|
|
719
|
+
*/
|
|
720
|
+
export function sanitizeForFilename(
|
|
721
|
+
input: string,
|
|
722
|
+
fallback: string = 'untitled',
|
|
723
|
+
options: {
|
|
724
|
+
preserveUnicode?: boolean;
|
|
725
|
+
preserveCase?: boolean;
|
|
726
|
+
} = {}
|
|
727
|
+
): string {
|
|
728
|
+
// Validate input parameters
|
|
729
|
+
validateString(input, 'input');
|
|
730
|
+
validateString(fallback, 'fallback', { minLength: 1 });
|
|
731
|
+
|
|
732
|
+
if (!isNonEmptyString(input)) return fallback;
|
|
733
|
+
|
|
734
|
+
const { preserveUnicode = true, preserveCase = false } = options;
|
|
735
|
+
|
|
736
|
+
let sanitized = preserveCase ? input : input.toLowerCase();
|
|
737
|
+
|
|
738
|
+
if (preserveUnicode) {
|
|
739
|
+
// Only remove filesystem-unsafe characters: / \ : * ? " < > |
|
|
740
|
+
// Keep underscores, dots (except at start), hyphens, and unicode
|
|
741
|
+
// Also replace spaces with dashes for better filesystem compatibility
|
|
742
|
+
sanitized = sanitized.replace(/[/\\:*?"<>|\s]+/g, '-');
|
|
743
|
+
} else {
|
|
744
|
+
// Allow alphanumeric, underscores, hyphens, dots
|
|
745
|
+
sanitized = sanitized.replace(/[^a-z0-9_.-]+/g, '-');
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
// Remove leading dots (hidden files on Unix)
|
|
749
|
+
sanitized = sanitized.replace(/^\.+/, '');
|
|
750
|
+
|
|
751
|
+
// Clean up multiple dashes and trim
|
|
752
|
+
sanitized = sanitized
|
|
753
|
+
.replace(/-+/g, '-')
|
|
754
|
+
.replace(/^-+|-+$/g, '');
|
|
755
|
+
|
|
756
|
+
return sanitized || fallback;
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
/**
|
|
760
|
+
* Ensure a unique identifier from a set of used identifiers
|
|
761
|
+
* @param baseIdentifier - Base identifier to make unique
|
|
762
|
+
* @param usedIdentifiers - Set of already used identifiers
|
|
763
|
+
* @param suffix - Suffix pattern (default: number in parentheses)
|
|
764
|
+
* @returns Unique identifier
|
|
765
|
+
* @throws ValidationError if baseIdentifier is not a string or usedIdentifiers is not a Set
|
|
766
|
+
*/
|
|
767
|
+
export function ensureUniqueIdentifier(
|
|
768
|
+
baseIdentifier: string,
|
|
769
|
+
usedIdentifiers: Set<string>,
|
|
770
|
+
suffix: (counter: number, base: string) => string = (counter) => `(${counter})`
|
|
771
|
+
): string {
|
|
772
|
+
// Validate input parameters
|
|
773
|
+
validateString(baseIdentifier, 'baseIdentifier', { minLength: 1 });
|
|
774
|
+
validateRequired(usedIdentifiers, 'usedIdentifiers');
|
|
775
|
+
|
|
776
|
+
if (!(usedIdentifiers instanceof Set)) {
|
|
777
|
+
throw new ValidationError(`Parameter 'usedIdentifiers' must be a Set`);
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
const MAX_ITERATIONS = 10000;
|
|
781
|
+
let uniqueIdentifier = baseIdentifier;
|
|
782
|
+
let counter = 1;
|
|
783
|
+
let iterations = 0;
|
|
784
|
+
|
|
785
|
+
while (usedIdentifiers.has(uniqueIdentifier.toLowerCase())) {
|
|
786
|
+
counter++;
|
|
787
|
+
uniqueIdentifier = `${baseIdentifier}${suffix(counter, baseIdentifier)}`;
|
|
788
|
+
|
|
789
|
+
iterations++;
|
|
790
|
+
if (iterations >= MAX_ITERATIONS) {
|
|
791
|
+
// Fallback to timestamp-based unique identifier
|
|
792
|
+
const timestamp = Date.now().toString(36);
|
|
793
|
+
const random = Math.random().toString(36).substring(2, 8);
|
|
794
|
+
uniqueIdentifier = `${baseIdentifier}-${timestamp}-${random}`;
|
|
795
|
+
logger.warn(`Maximum iterations reached for unique identifier. Using fallback: ${uniqueIdentifier}`);
|
|
796
|
+
break;
|
|
797
|
+
}
|
|
798
|
+
}
|
|
799
|
+
|
|
800
|
+
usedIdentifiers.add(uniqueIdentifier.toLowerCase());
|
|
801
|
+
return uniqueIdentifier;
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
/**
|
|
805
|
+
* Create standardized markdown content template
|
|
806
|
+
* @param title - Document title
|
|
807
|
+
* @param description - Document description
|
|
808
|
+
* @param content - Document content
|
|
809
|
+
* @param includeMetadata - Whether to include description metadata
|
|
810
|
+
* @param frontMatter - Optional frontmatter to include at the top
|
|
811
|
+
* @returns Formatted markdown content
|
|
812
|
+
*/
|
|
813
|
+
export function createMarkdownContent(
|
|
814
|
+
title: string,
|
|
815
|
+
description: string = '',
|
|
816
|
+
content: string = '',
|
|
817
|
+
includeMetadata: boolean = true,
|
|
818
|
+
frontMatter?: Record<string, any>
|
|
819
|
+
): string {
|
|
820
|
+
let result = '';
|
|
821
|
+
|
|
822
|
+
// Add frontmatter if provided
|
|
823
|
+
if (isDefined(frontMatter) && Object.keys(frontMatter).length > 0) {
|
|
824
|
+
result += '---\n';
|
|
825
|
+
result += YAML.stringify(frontMatter, {
|
|
826
|
+
lineWidth: 0,
|
|
827
|
+
defaultStringType: 'QUOTE_DOUBLE',
|
|
828
|
+
defaultKeyType: 'PLAIN'
|
|
829
|
+
});
|
|
830
|
+
result += '---\n\n';
|
|
831
|
+
}
|
|
832
|
+
|
|
833
|
+
const descriptionLine = includeMetadata && description ? `\n\n> ${description}\n` : '\n';
|
|
834
|
+
|
|
835
|
+
result += `# ${title}${descriptionLine}
|
|
836
|
+
${content}`.trim() + '\n';
|
|
837
|
+
|
|
838
|
+
return result;
|
|
839
|
+
}
|