docusaurus-plugin-llms 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +306 -17
- package/lib/generator.d.ts +32 -0
- package/lib/generator.js +212 -0
- package/lib/index.d.ts +1 -24
- package/lib/index.js +39 -288
- package/lib/processor.d.ts +28 -0
- package/lib/processor.js +211 -0
- package/lib/utils.d.ts +53 -0
- package/lib/utils.js +177 -0
- package/package.json +4 -2
- package/src/generator.ts +266 -0
- package/src/index.ts +48 -348
- package/src/processor.ts +236 -0
- package/src/types.ts +113 -0
- package/src/utils.ts +165 -0
package/src/types.ts
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
/**
|
2
|
+
* Type definitions for the docusaurus-plugin-llms plugin
|
3
|
+
*/
|
4
|
+
|
5
|
+
import type { LoadContext } from '@docusaurus/types';
|
6
|
+
|
7
|
+
/**
|
8
|
+
* Interface for processed document information
|
9
|
+
*/
|
10
|
+
export interface DocInfo {
|
11
|
+
title: string;
|
12
|
+
path: string;
|
13
|
+
url: string;
|
14
|
+
content: string;
|
15
|
+
description: string;
|
16
|
+
}
|
17
|
+
|
18
|
+
/**
|
19
|
+
* Interface for custom LLM file configuration
|
20
|
+
*/
|
21
|
+
export interface CustomLLMFile {
|
22
|
+
/** Name of the output file (e.g., 'llms-python.txt') */
|
23
|
+
filename: string;
|
24
|
+
|
25
|
+
/** Glob patterns for files to include */
|
26
|
+
includePatterns: string[];
|
27
|
+
|
28
|
+
/** Whether to include full content (true) or just links (false) */
|
29
|
+
fullContent: boolean;
|
30
|
+
|
31
|
+
/** Custom title for this file (defaults to site title) */
|
32
|
+
title?: string;
|
33
|
+
|
34
|
+
/** Custom description for this file (defaults to site description) */
|
35
|
+
description?: string;
|
36
|
+
|
37
|
+
/** Additional patterns to exclude (combined with global ignoreFiles) */
|
38
|
+
ignorePatterns?: string[];
|
39
|
+
|
40
|
+
/** Order patterns for controlling file ordering (similar to includeOrder) */
|
41
|
+
orderPatterns?: string[];
|
42
|
+
|
43
|
+
/** Whether to include unmatched files last (default: false) */
|
44
|
+
includeUnmatchedLast?: boolean;
|
45
|
+
|
46
|
+
/** Version information for this LLM file */
|
47
|
+
version?: string;
|
48
|
+
}
|
49
|
+
|
50
|
+
/**
|
51
|
+
* Plugin options interface
|
52
|
+
*/
|
53
|
+
export interface PluginOptions {
|
54
|
+
/** Whether to generate the llms.txt file (default: true) */
|
55
|
+
generateLLMsTxt?: boolean;
|
56
|
+
|
57
|
+
/** Whether to generate the llms-full.txt file (default: true) */
|
58
|
+
generateLLMsFullTxt?: boolean;
|
59
|
+
|
60
|
+
/** Base directory for documentation files (default: 'docs') */
|
61
|
+
docsDir?: string;
|
62
|
+
|
63
|
+
/** Array of glob patterns for files to ignore */
|
64
|
+
ignoreFiles?: string[];
|
65
|
+
|
66
|
+
/** Custom title to use in generated files (defaults to site title) */
|
67
|
+
title?: string;
|
68
|
+
|
69
|
+
/** Custom description to use in generated files (defaults to site tagline) */
|
70
|
+
description?: string;
|
71
|
+
|
72
|
+
/** Custom file name for the links file (default: 'llms.txt') */
|
73
|
+
llmsTxtFilename?: string;
|
74
|
+
|
75
|
+
/** Custom file name for the full content file (default: 'llms-full.txt') */
|
76
|
+
llmsFullTxtFilename?: string;
|
77
|
+
|
78
|
+
/** Whether to include blog content (default: false) */
|
79
|
+
includeBlog?: boolean;
|
80
|
+
|
81
|
+
/** Path transformation options for URL construction */
|
82
|
+
pathTransformation?: {
|
83
|
+
/** Path segments to ignore when constructing URLs (will be removed if found) */
|
84
|
+
ignorePaths?: string[];
|
85
|
+
/** Path segments to add when constructing URLs (will be prepended if not already present) */
|
86
|
+
addPaths?: string[];
|
87
|
+
};
|
88
|
+
|
89
|
+
/** Array of glob patterns for controlling the order of files (files will be processed in the order of patterns) */
|
90
|
+
includeOrder?: string[];
|
91
|
+
|
92
|
+
/** Whether to include files that don't match any pattern in includeOrder at the end (default: true) */
|
93
|
+
includeUnmatchedLast?: boolean;
|
94
|
+
|
95
|
+
/** Array of custom LLM file configurations */
|
96
|
+
customLLMFiles?: CustomLLMFile[];
|
97
|
+
|
98
|
+
/** Global version for all generated LLM files */
|
99
|
+
version?: string;
|
100
|
+
}
|
101
|
+
|
102
|
+
/**
|
103
|
+
* Plugin context with processed options
|
104
|
+
*/
|
105
|
+
export interface PluginContext {
|
106
|
+
siteDir: string;
|
107
|
+
outDir: string;
|
108
|
+
siteUrl: string;
|
109
|
+
docsDir: string;
|
110
|
+
docTitle: string;
|
111
|
+
docDescription: string;
|
112
|
+
options: PluginOptions;
|
113
|
+
}
|
package/src/utils.ts
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
/**
|
2
|
+
* Utility functions for the docusaurus-plugin-llms plugin
|
3
|
+
*/
|
4
|
+
|
5
|
+
import * as fs from 'fs/promises';
|
6
|
+
import * as path from 'path';
|
7
|
+
import { minimatch } from 'minimatch';
|
8
|
+
import { PluginOptions } from './types';
|
9
|
+
|
10
|
+
/**
|
11
|
+
* Write content to a file
|
12
|
+
* @param filePath - Path to write the file to
|
13
|
+
* @param data - Content to write
|
14
|
+
*/
|
15
|
+
export async function writeFile(filePath: string, data: string): Promise<void> {
|
16
|
+
return fs.writeFile(filePath, data, 'utf8');
|
17
|
+
}
|
18
|
+
|
19
|
+
/**
|
20
|
+
* Read content from a file
|
21
|
+
* @param filePath - Path of the file to read
|
22
|
+
* @returns Content of the file
|
23
|
+
*/
|
24
|
+
export async function readFile(filePath: string): Promise<string> {
|
25
|
+
return fs.readFile(filePath, 'utf8');
|
26
|
+
}
|
27
|
+
|
28
|
+
/**
|
29
|
+
* Check if a file should be ignored based on glob patterns
|
30
|
+
* @param filePath - Path to the file
|
31
|
+
* @param baseDir - Base directory for relative paths
|
32
|
+
* @param ignorePatterns - Glob patterns for files to ignore
|
33
|
+
* @returns Whether the file should be ignored
|
34
|
+
*/
|
35
|
+
export function shouldIgnoreFile(filePath: string, baseDir: string, ignorePatterns: string[]): boolean {
|
36
|
+
if (ignorePatterns.length === 0) {
|
37
|
+
return false;
|
38
|
+
}
|
39
|
+
|
40
|
+
const relativePath = path.relative(baseDir, filePath);
|
41
|
+
|
42
|
+
return ignorePatterns.some(pattern =>
|
43
|
+
minimatch(relativePath, pattern, { matchBase: true })
|
44
|
+
);
|
45
|
+
}
|
46
|
+
|
47
|
+
/**
|
48
|
+
* Recursively reads all Markdown files in a directory
|
49
|
+
* @param dir - Directory to scan
|
50
|
+
* @param baseDir - Base directory for relative paths
|
51
|
+
* @param ignorePatterns - Glob patterns for files to ignore
|
52
|
+
* @returns Array of file paths
|
53
|
+
*/
|
54
|
+
export async function readMarkdownFiles(dir: string, baseDir: string, ignorePatterns: string[] = []): Promise<string[]> {
|
55
|
+
const files: string[] = [];
|
56
|
+
const entries = await fs.readdir(dir, { withFileTypes: true });
|
57
|
+
|
58
|
+
for (const entry of entries) {
|
59
|
+
const fullPath = path.join(dir, entry.name);
|
60
|
+
|
61
|
+
if (shouldIgnoreFile(fullPath, baseDir, ignorePatterns)) {
|
62
|
+
continue;
|
63
|
+
}
|
64
|
+
|
65
|
+
if (entry.isDirectory()) {
|
66
|
+
const subDirFiles = await readMarkdownFiles(fullPath, baseDir, ignorePatterns);
|
67
|
+
files.push(...subDirFiles);
|
68
|
+
} else if (entry.name.endsWith('.md') || entry.name.endsWith('.mdx')) {
|
69
|
+
files.push(fullPath);
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
73
|
+
return files;
|
74
|
+
}
|
75
|
+
|
76
|
+
/**
|
77
|
+
* Extract title from content or use the filename
|
78
|
+
* @param data - Frontmatter data
|
79
|
+
* @param content - Markdown content
|
80
|
+
* @param filePath - Path to the file
|
81
|
+
* @returns Extracted title
|
82
|
+
*/
|
83
|
+
export function extractTitle(data: any, content: string, filePath: string): string {
|
84
|
+
// First try frontmatter
|
85
|
+
if (data.title) {
|
86
|
+
return data.title;
|
87
|
+
}
|
88
|
+
|
89
|
+
// Then try first heading
|
90
|
+
const headingMatch = content.match(/^#\s+(.*)/m);
|
91
|
+
if (headingMatch) {
|
92
|
+
return headingMatch[1].trim();
|
93
|
+
}
|
94
|
+
|
95
|
+
// Finally use filename
|
96
|
+
return path.basename(filePath, path.extname(filePath))
|
97
|
+
.replace(/-/g, ' ')
|
98
|
+
.replace(/\b\w/g, c => c.toUpperCase());
|
99
|
+
}
|
100
|
+
|
101
|
+
/**
|
102
|
+
* Clean markdown content for LLM consumption
|
103
|
+
* @param content - Raw markdown content
|
104
|
+
* @returns Cleaned content
|
105
|
+
*/
|
106
|
+
export function cleanMarkdownContent(content: string): string {
|
107
|
+
// Remove HTML tags
|
108
|
+
let cleaned = content.replace(/<[^>]*>/g, '');
|
109
|
+
|
110
|
+
// Normalize whitespace
|
111
|
+
cleaned = cleaned.replace(/\r\n/g, '\n')
|
112
|
+
.replace(/\n{3,}/g, '\n\n')
|
113
|
+
.trim();
|
114
|
+
|
115
|
+
return cleaned;
|
116
|
+
}
|
117
|
+
|
118
|
+
/**
|
119
|
+
* Apply path transformations according to configuration
|
120
|
+
* @param urlPath - Original URL path
|
121
|
+
* @param pathTransformation - Path transformation configuration
|
122
|
+
* @returns Transformed URL path
|
123
|
+
*/
|
124
|
+
export function applyPathTransformations(
|
125
|
+
urlPath: string,
|
126
|
+
pathTransformation?: PluginOptions['pathTransformation']
|
127
|
+
): string {
|
128
|
+
if (!pathTransformation) {
|
129
|
+
return urlPath;
|
130
|
+
}
|
131
|
+
|
132
|
+
let transformedPath = urlPath;
|
133
|
+
|
134
|
+
// Remove ignored path segments
|
135
|
+
if (pathTransformation.ignorePaths?.length) {
|
136
|
+
for (const ignorePath of pathTransformation.ignorePaths) {
|
137
|
+
// Create a regex that matches the ignore path at the beginning, middle, or end of the path
|
138
|
+
// We use word boundaries to ensure we match complete path segments
|
139
|
+
const ignoreRegex = new RegExp(`(^|/)(${ignorePath})(/|$)`, 'g');
|
140
|
+
transformedPath = transformedPath.replace(ignoreRegex, '$1$3');
|
141
|
+
}
|
142
|
+
|
143
|
+
// Clean up any double slashes that might have been created
|
144
|
+
transformedPath = transformedPath.replace(/\/+/g, '/');
|
145
|
+
|
146
|
+
// Remove leading slash if present
|
147
|
+
transformedPath = transformedPath.replace(/^\//, '');
|
148
|
+
}
|
149
|
+
|
150
|
+
// Add path segments if they're not already present
|
151
|
+
if (pathTransformation.addPaths?.length) {
|
152
|
+
// Process in reverse order to maintain the specified order in the final path
|
153
|
+
// This is because each path is prepended to the front
|
154
|
+
const pathsToAdd = [...pathTransformation.addPaths].reverse();
|
155
|
+
|
156
|
+
for (const addPath of pathsToAdd) {
|
157
|
+
// Only add if not already present at the beginning
|
158
|
+
if (!transformedPath.startsWith(addPath + '/') && transformedPath !== addPath) {
|
159
|
+
transformedPath = `${addPath}/${transformedPath}`;
|
160
|
+
}
|
161
|
+
}
|
162
|
+
}
|
163
|
+
|
164
|
+
return transformedPath;
|
165
|
+
}
|