docusaurus-plugin-llms 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +68 -3
- package/lib/index.d.ts +7 -0
- package/lib/index.js +53 -6
- package/package.json +4 -2
- package/src/index.ts +80 -5
package/README.md
CHANGED
@@ -39,6 +39,13 @@ module.exports = {
|
|
39
39
|
title: 'My Project Documentation',
|
40
40
|
description: 'Complete reference documentation for My Project',
|
41
41
|
includeBlog: true,
|
42
|
+
// Path transformation options
|
43
|
+
pathTransformation: {
|
44
|
+
// Paths to ignore when constructing URLs (will be removed if found)
|
45
|
+
ignorePaths: ['docs'],
|
46
|
+
// Paths to add when constructing URLs (will be prepended if not already present)
|
47
|
+
addPaths: ['api'],
|
48
|
+
},
|
42
49
|
},
|
43
50
|
],
|
44
51
|
// ... your other plugins
|
@@ -59,6 +66,40 @@ module.exports = {
|
|
59
66
|
| `llmsTxtFilename` | string | `'llms.txt'` | Custom filename for the links file |
|
60
67
|
| `llmsFullTxtFilename` | string | `'llms-full.txt'` | Custom filename for the full content file |
|
61
68
|
| `includeBlog` | boolean | `false` | Whether to include blog content |
|
69
|
+
| `pathTransformation` | object | `undefined` | Path transformation options for URL construction |
|
70
|
+
| `pathTransformation.ignorePaths` | string[] | `[]` | Path segments to ignore when constructing URLs |
|
71
|
+
| `pathTransformation.addPaths` | string[] | `[]` | Path segments to add when constructing URLs |
|
72
|
+
|
73
|
+
### Path Transformation Examples
|
74
|
+
|
75
|
+
The path transformation feature allows you to manipulate how URLs are constructed from file paths:
|
76
|
+
|
77
|
+
**Example 1**: Remove 'docs' from the URL path
|
78
|
+
```js
|
79
|
+
pathTransformation: {
|
80
|
+
ignorePaths: ['docs'],
|
81
|
+
}
|
82
|
+
```
|
83
|
+
File path: `/content/docs/manual/decorators.md` → URL: `https://example.com/manual/decorators`
|
84
|
+
|
85
|
+
**Example 2**: Add 'api' to the URL path
|
86
|
+
```js
|
87
|
+
pathTransformation: {
|
88
|
+
addPaths: ['api'],
|
89
|
+
}
|
90
|
+
```
|
91
|
+
File path: `/content/manual/decorators.md` → URL: `https://example.com/api/manual/decorators`
|
92
|
+
|
93
|
+
**Example 3**: Combine both transformations
|
94
|
+
```js
|
95
|
+
pathTransformation: {
|
96
|
+
ignorePaths: ['docs'],
|
97
|
+
addPaths: ['api'],
|
98
|
+
}
|
99
|
+
```
|
100
|
+
File path: `/content/docs/manual/decorators.md` → URL: `https://example.com/api/manual/decorators`
|
101
|
+
|
102
|
+
The configuration supports multiple path segments in both arrays.
|
62
103
|
|
63
104
|
## How It Works
|
64
105
|
|
@@ -79,6 +120,7 @@ These files follow the [llmstxt standard](https://llmstxt.org/), making your doc
|
|
79
120
|
- 🧹 Cleans HTML and normalizes content for optimal LLM consumption
|
80
121
|
- 📊 Provides statistics about generated documentation
|
81
122
|
- 📚 Option to include blog posts
|
123
|
+
- 🔄 Path transformation to customize URL construction
|
82
124
|
|
83
125
|
## Implementation Details
|
84
126
|
|
@@ -88,9 +130,32 @@ The plugin:
|
|
88
130
|
2. Optionally includes blog content
|
89
131
|
3. Extracts metadata, titles, and content from each file
|
90
132
|
4. Creates proper URL links to each document section
|
91
|
-
5.
|
92
|
-
6.
|
93
|
-
7.
|
133
|
+
5. Applies path transformations according to configuration (removing or adding path segments)
|
134
|
+
6. Generates a table of contents in `llms.txt`
|
135
|
+
7. Combines all documentation content in `llms-full.txt`
|
136
|
+
8. Provides statistics about the generated documentation
|
137
|
+
|
138
|
+
## Testing
|
139
|
+
|
140
|
+
The plugin includes comprehensive tests in the `tests` directory:
|
141
|
+
|
142
|
+
- **Unit tests**: Test the path transformation functionality in isolation
|
143
|
+
- **Integration tests**: Simulate a Docusaurus build with various configurations
|
144
|
+
|
145
|
+
To run the tests:
|
146
|
+
|
147
|
+
```bash
|
148
|
+
# Run all tests
|
149
|
+
npm test
|
150
|
+
|
151
|
+
# Run just the unit tests
|
152
|
+
npm run test:unit
|
153
|
+
|
154
|
+
# Run just the integration tests
|
155
|
+
npm run test:integration
|
156
|
+
```
|
157
|
+
|
158
|
+
For more detailed testing instructions, see [tests/TESTING.md](tests/TESTING.md).
|
94
159
|
|
95
160
|
## Future Enhancements
|
96
161
|
|
package/lib/index.d.ts
CHANGED
@@ -30,6 +30,13 @@ interface PluginOptions {
|
|
30
30
|
llmsFullTxtFilename?: string;
|
31
31
|
/** Whether to include blog content (default: false) */
|
32
32
|
includeBlog?: boolean;
|
33
|
+
/** Path transformation options for URL construction */
|
34
|
+
pathTransformation?: {
|
35
|
+
/** Path segments to ignore when constructing URLs (will be removed if found) */
|
36
|
+
ignorePaths?: string[];
|
37
|
+
/** Path segments to add when constructing URLs (will be prepended if not already present) */
|
38
|
+
addPaths?: string[];
|
39
|
+
};
|
33
40
|
}
|
34
41
|
/**
|
35
42
|
* A Docusaurus plugin to generate LLM-friendly documentation following
|
package/lib/index.js
CHANGED
@@ -141,15 +141,54 @@ function cleanMarkdownContent(content) {
|
|
141
141
|
.trim();
|
142
142
|
return cleaned;
|
143
143
|
}
|
144
|
+
/**
|
145
|
+
* Apply path transformations according to configuration
|
146
|
+
* @param urlPath - Original URL path
|
147
|
+
* @param pathTransformation - Path transformation configuration
|
148
|
+
* @returns Transformed URL path
|
149
|
+
*/
|
150
|
+
function applyPathTransformations(urlPath, pathTransformation) {
|
151
|
+
if (!pathTransformation) {
|
152
|
+
return urlPath;
|
153
|
+
}
|
154
|
+
let transformedPath = urlPath;
|
155
|
+
// Remove ignored path segments
|
156
|
+
if (pathTransformation.ignorePaths?.length) {
|
157
|
+
for (const ignorePath of pathTransformation.ignorePaths) {
|
158
|
+
// Create a regex that matches the ignore path at the beginning, middle, or end of the path
|
159
|
+
// We use word boundaries to ensure we match complete path segments
|
160
|
+
const ignoreRegex = new RegExp(`(^|/)(${ignorePath})(/|$)`, 'g');
|
161
|
+
transformedPath = transformedPath.replace(ignoreRegex, '$1$3');
|
162
|
+
}
|
163
|
+
// Clean up any double slashes that might have been created
|
164
|
+
transformedPath = transformedPath.replace(/\/+/g, '/');
|
165
|
+
// Remove leading slash if present
|
166
|
+
transformedPath = transformedPath.replace(/^\//, '');
|
167
|
+
}
|
168
|
+
// Add path segments if they're not already present
|
169
|
+
if (pathTransformation.addPaths?.length) {
|
170
|
+
// Process in reverse order to maintain the specified order in the final path
|
171
|
+
// This is because each path is prepended to the front
|
172
|
+
const pathsToAdd = [...pathTransformation.addPaths].reverse();
|
173
|
+
for (const addPath of pathsToAdd) {
|
174
|
+
// Only add if not already present at the beginning
|
175
|
+
if (!transformedPath.startsWith(addPath + '/') && transformedPath !== addPath) {
|
176
|
+
transformedPath = `${addPath}/${transformedPath}`;
|
177
|
+
}
|
178
|
+
}
|
179
|
+
}
|
180
|
+
return transformedPath;
|
181
|
+
}
|
144
182
|
/**
|
145
183
|
* Process a markdown file and extract its metadata and content
|
146
184
|
* @param filePath - Path to the markdown file
|
147
185
|
* @param baseDir - Base directory
|
148
186
|
* @param siteUrl - Base URL of the site
|
149
187
|
* @param pathPrefix - Path prefix for URLs (e.g., 'docs' or 'blog')
|
188
|
+
* @param pathTransformation - Path transformation configuration
|
150
189
|
* @returns Processed file data
|
151
190
|
*/
|
152
|
-
async function processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix = 'docs') {
|
191
|
+
async function processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix = 'docs', pathTransformation) {
|
153
192
|
const content = await readFile(filePath);
|
154
193
|
const { data, content: markdownContent } = (0, gray_matter_1.default)(content);
|
155
194
|
const relativePath = path.relative(baseDir, filePath);
|
@@ -161,8 +200,16 @@ async function processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix = 'doc
|
|
161
200
|
const linkPath = linkPathBase.endsWith('index')
|
162
201
|
? linkPathBase.replace(/\/index$/, '')
|
163
202
|
: linkPathBase;
|
164
|
-
//
|
165
|
-
const
|
203
|
+
// Apply path transformations to the link path
|
204
|
+
const transformedLinkPath = applyPathTransformations(linkPath, pathTransformation);
|
205
|
+
// Also apply path transformations to the pathPrefix if it's not empty
|
206
|
+
// This allows removing 'docs' from the path when specified in ignorePaths
|
207
|
+
let transformedPathPrefix = pathPrefix;
|
208
|
+
if (pathPrefix && pathTransformation?.ignorePaths?.includes(pathPrefix)) {
|
209
|
+
transformedPathPrefix = '';
|
210
|
+
}
|
211
|
+
// Generate full URL with transformed path and path prefix
|
212
|
+
const fullUrl = new URL(`${transformedPathPrefix ? `${transformedPathPrefix}/` : ''}${transformedLinkPath}`, siteUrl).toString();
|
166
213
|
// Extract title
|
167
214
|
const title = extractTitle(data, markdownContent, filePath);
|
168
215
|
// Get description from frontmatter or first paragraph
|
@@ -196,7 +243,7 @@ async function processMarkdownFile(filePath, baseDir, siteUrl, pathPrefix = 'doc
|
|
196
243
|
*/
|
197
244
|
function docusaurusPluginLLMs(context, options = {}) {
|
198
245
|
// Set default options
|
199
|
-
const { generateLLMsTxt = true, generateLLMsFullTxt = true, docsDir = 'docs', ignoreFiles = [], title, description, llmsTxtFilename = 'llms.txt', llmsFullTxtFilename = 'llms-full.txt', includeBlog = false, } = options;
|
246
|
+
const { generateLLMsTxt = true, generateLLMsFullTxt = true, docsDir = 'docs', ignoreFiles = [], title, description, llmsTxtFilename = 'llms.txt', llmsFullTxtFilename = 'llms-full.txt', includeBlog = false, pathTransformation, } = options;
|
200
247
|
const { siteDir, siteConfig, outDir, } = context;
|
201
248
|
return {
|
202
249
|
name: 'docusaurus-plugin-llms',
|
@@ -225,7 +272,7 @@ function docusaurusPluginLLMs(context, options = {}) {
|
|
225
272
|
// Process each file
|
226
273
|
for (const filePath of docFiles) {
|
227
274
|
try {
|
228
|
-
const docInfo = await processMarkdownFile(filePath, fullDocsDir, siteUrl, 'docs');
|
275
|
+
const docInfo = await processMarkdownFile(filePath, fullDocsDir, siteUrl, 'docs', pathTransformation);
|
229
276
|
allDocs.push(docInfo);
|
230
277
|
}
|
231
278
|
catch (err) {
|
@@ -252,7 +299,7 @@ function docusaurusPluginLLMs(context, options = {}) {
|
|
252
299
|
// Process each file
|
253
300
|
for (const filePath of blogFiles) {
|
254
301
|
try {
|
255
|
-
const docInfo = await processMarkdownFile(filePath, blogDir, siteUrl, 'blog');
|
302
|
+
const docInfo = await processMarkdownFile(filePath, blogDir, siteUrl, 'blog', pathTransformation);
|
256
303
|
allDocs.push(docInfo);
|
257
304
|
}
|
258
305
|
catch (err) {
|
package/package.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
{
|
2
2
|
"name": "docusaurus-plugin-llms",
|
3
|
-
"version": "0.1.
|
3
|
+
"version": "0.1.2",
|
4
4
|
"description": "Docusaurus plugin for generating LLM-friendly documentation following the llmtxt.org standard",
|
5
5
|
"main": "lib/index.js",
|
6
6
|
"scripts": {
|
@@ -8,7 +8,9 @@
|
|
8
8
|
"watch": "tsc --watch",
|
9
9
|
"cleanup": "node cleanup.js",
|
10
10
|
"prepublishOnly": "npm run build && npm run cleanup",
|
11
|
-
"test": "
|
11
|
+
"test:unit": "node tests/test-path-transforms.js",
|
12
|
+
"test:integration": "node tests/test-path-transformation.js",
|
13
|
+
"test": "npm run build && npm run test:unit && npm run test:integration"
|
12
14
|
},
|
13
15
|
"files": [
|
14
16
|
"lib",
|
package/src/index.ts
CHANGED
@@ -55,6 +55,14 @@ interface PluginOptions {
|
|
55
55
|
|
56
56
|
/** Whether to include blog content (default: false) */
|
57
57
|
includeBlog?: boolean;
|
58
|
+
|
59
|
+
/** Path transformation options for URL construction */
|
60
|
+
pathTransformation?: {
|
61
|
+
/** Path segments to ignore when constructing URLs (will be removed if found) */
|
62
|
+
ignorePaths?: string[];
|
63
|
+
/** Path segments to add when constructing URLs (will be prepended if not already present) */
|
64
|
+
addPaths?: string[];
|
65
|
+
};
|
58
66
|
}
|
59
67
|
|
60
68
|
/**
|
@@ -165,19 +173,70 @@ function cleanMarkdownContent(content: string): string {
|
|
165
173
|
return cleaned;
|
166
174
|
}
|
167
175
|
|
176
|
+
/**
|
177
|
+
* Apply path transformations according to configuration
|
178
|
+
* @param urlPath - Original URL path
|
179
|
+
* @param pathTransformation - Path transformation configuration
|
180
|
+
* @returns Transformed URL path
|
181
|
+
*/
|
182
|
+
function applyPathTransformations(
|
183
|
+
urlPath: string,
|
184
|
+
pathTransformation?: PluginOptions['pathTransformation']
|
185
|
+
): string {
|
186
|
+
if (!pathTransformation) {
|
187
|
+
return urlPath;
|
188
|
+
}
|
189
|
+
|
190
|
+
let transformedPath = urlPath;
|
191
|
+
|
192
|
+
// Remove ignored path segments
|
193
|
+
if (pathTransformation.ignorePaths?.length) {
|
194
|
+
for (const ignorePath of pathTransformation.ignorePaths) {
|
195
|
+
// Create a regex that matches the ignore path at the beginning, middle, or end of the path
|
196
|
+
// We use word boundaries to ensure we match complete path segments
|
197
|
+
const ignoreRegex = new RegExp(`(^|/)(${ignorePath})(/|$)`, 'g');
|
198
|
+
transformedPath = transformedPath.replace(ignoreRegex, '$1$3');
|
199
|
+
}
|
200
|
+
|
201
|
+
// Clean up any double slashes that might have been created
|
202
|
+
transformedPath = transformedPath.replace(/\/+/g, '/');
|
203
|
+
|
204
|
+
// Remove leading slash if present
|
205
|
+
transformedPath = transformedPath.replace(/^\//, '');
|
206
|
+
}
|
207
|
+
|
208
|
+
// Add path segments if they're not already present
|
209
|
+
if (pathTransformation.addPaths?.length) {
|
210
|
+
// Process in reverse order to maintain the specified order in the final path
|
211
|
+
// This is because each path is prepended to the front
|
212
|
+
const pathsToAdd = [...pathTransformation.addPaths].reverse();
|
213
|
+
|
214
|
+
for (const addPath of pathsToAdd) {
|
215
|
+
// Only add if not already present at the beginning
|
216
|
+
if (!transformedPath.startsWith(addPath + '/') && transformedPath !== addPath) {
|
217
|
+
transformedPath = `${addPath}/${transformedPath}`;
|
218
|
+
}
|
219
|
+
}
|
220
|
+
}
|
221
|
+
|
222
|
+
return transformedPath;
|
223
|
+
}
|
224
|
+
|
168
225
|
/**
|
169
226
|
* Process a markdown file and extract its metadata and content
|
170
227
|
* @param filePath - Path to the markdown file
|
171
228
|
* @param baseDir - Base directory
|
172
229
|
* @param siteUrl - Base URL of the site
|
173
230
|
* @param pathPrefix - Path prefix for URLs (e.g., 'docs' or 'blog')
|
231
|
+
* @param pathTransformation - Path transformation configuration
|
174
232
|
* @returns Processed file data
|
175
233
|
*/
|
176
234
|
async function processMarkdownFile(
|
177
235
|
filePath: string,
|
178
236
|
baseDir: string,
|
179
237
|
siteUrl: string,
|
180
|
-
pathPrefix: string = 'docs'
|
238
|
+
pathPrefix: string = 'docs',
|
239
|
+
pathTransformation?: PluginOptions['pathTransformation']
|
181
240
|
): Promise<DocInfo> {
|
182
241
|
const content = await readFile(filePath);
|
183
242
|
const { data, content: markdownContent } = matter(content);
|
@@ -194,8 +253,21 @@ async function processMarkdownFile(
|
|
194
253
|
? linkPathBase.replace(/\/index$/, '')
|
195
254
|
: linkPathBase;
|
196
255
|
|
197
|
-
//
|
198
|
-
const
|
256
|
+
// Apply path transformations to the link path
|
257
|
+
const transformedLinkPath = applyPathTransformations(linkPath, pathTransformation);
|
258
|
+
|
259
|
+
// Also apply path transformations to the pathPrefix if it's not empty
|
260
|
+
// This allows removing 'docs' from the path when specified in ignorePaths
|
261
|
+
let transformedPathPrefix = pathPrefix;
|
262
|
+
if (pathPrefix && pathTransformation?.ignorePaths?.includes(pathPrefix)) {
|
263
|
+
transformedPathPrefix = '';
|
264
|
+
}
|
265
|
+
|
266
|
+
// Generate full URL with transformed path and path prefix
|
267
|
+
const fullUrl = new URL(
|
268
|
+
`${transformedPathPrefix ? `${transformedPathPrefix}/` : ''}${transformedLinkPath}`,
|
269
|
+
siteUrl
|
270
|
+
).toString();
|
199
271
|
|
200
272
|
// Extract title
|
201
273
|
const title = extractTitle(data, markdownContent, filePath);
|
@@ -247,6 +319,7 @@ export default function docusaurusPluginLLMs(
|
|
247
319
|
llmsTxtFilename = 'llms.txt',
|
248
320
|
llmsFullTxtFilename = 'llms-full.txt',
|
249
321
|
includeBlog = false,
|
322
|
+
pathTransformation,
|
250
323
|
} = options;
|
251
324
|
|
252
325
|
const {
|
@@ -296,7 +369,8 @@ export default function docusaurusPluginLLMs(
|
|
296
369
|
filePath,
|
297
370
|
fullDocsDir,
|
298
371
|
siteUrl,
|
299
|
-
'docs'
|
372
|
+
'docs',
|
373
|
+
pathTransformation
|
300
374
|
);
|
301
375
|
allDocs.push(docInfo);
|
302
376
|
} catch (err: any) {
|
@@ -329,7 +403,8 @@ export default function docusaurusPluginLLMs(
|
|
329
403
|
filePath,
|
330
404
|
blogDir,
|
331
405
|
siteUrl,
|
332
|
-
'blog'
|
406
|
+
'blog',
|
407
|
+
pathTransformation
|
333
408
|
);
|
334
409
|
allDocs.push(docInfo);
|
335
410
|
} catch (err: any) {
|