mdream 0.15.1 → 0.15.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -92,6 +92,7 @@ function generateLlmsTxtContent(files, options) {
|
|
|
92
92
|
const { siteName = "Site", description, origin = "", sections, notes } = options;
|
|
93
93
|
let content = `# ${siteName}\n\n`;
|
|
94
94
|
if (description) content += `> ${description}\n\n`;
|
|
95
|
+
if (origin) content += `Canonical Origin: ${origin}\n\n`;
|
|
95
96
|
if (sections) for (const section of sections) content += formatSection(section);
|
|
96
97
|
if (files.length > 0) {
|
|
97
98
|
content += `## Pages\n\n`;
|
|
@@ -102,7 +103,7 @@ function generateLlmsTxtContent(files, options) {
|
|
|
102
103
|
const relativePath = relative(options.outputDir, file.filePath);
|
|
103
104
|
content += `- [${file.title}](${relativePath})${descText}\n`;
|
|
104
105
|
} else {
|
|
105
|
-
const url = file.url.startsWith("http://") || file.url.startsWith("https://") ? file.url : origin + file.url;
|
|
106
|
+
const url = file.url.startsWith("http://") || file.url.startsWith("https://") ? file.url : origin ? origin + file.url : file.url;
|
|
106
107
|
content += `- [${file.title}](${url})${descText}\n`;
|
|
107
108
|
}
|
|
108
109
|
}
|
|
@@ -150,6 +151,7 @@ function generateLlmsFullTxtContent(files, options) {
|
|
|
150
151
|
const { siteName = "Site", description, origin = "", sections, notes } = options;
|
|
151
152
|
let content = `# ${siteName}\n\n`;
|
|
152
153
|
if (description) content += `> ${description}\n\n`;
|
|
154
|
+
if (origin) content += `Canonical Origin: ${origin}\n\n`;
|
|
153
155
|
if (sections) for (const section of sections) content += formatSection(section);
|
|
154
156
|
if (files.length > 0) {
|
|
155
157
|
content += `## Table of Contents\n\n`;
|
|
@@ -285,39 +287,76 @@ function formatNotes(notes) {
|
|
|
285
287
|
* @param options - Configuration options
|
|
286
288
|
* @returns WritableStream that accepts ProcessedFile objects
|
|
287
289
|
*/
|
|
290
|
+
/**
|
|
291
|
+
* Get the group key for a URL (up to 2 segments deep)
|
|
292
|
+
*/
|
|
293
|
+
/**
|
|
294
|
+
* Sort pages by URL path in hierarchical order (directory tree structure)
|
|
295
|
+
* Groups by first segment, with root-level pages without nesting grouped together
|
|
296
|
+
*/
|
|
297
|
+
function sortPagesByPath(pages) {
|
|
298
|
+
const segmentHasNested = /* @__PURE__ */ new Map();
|
|
299
|
+
for (const page of pages) {
|
|
300
|
+
const segments = page.url.split("/").filter(Boolean);
|
|
301
|
+
const firstSegment = segments.length > 0 ? segments[0] : "";
|
|
302
|
+
if (!segmentHasNested.has(firstSegment)) segmentHasNested.set(firstSegment, false);
|
|
303
|
+
if (segments.length > 1) segmentHasNested.set(firstSegment, true);
|
|
304
|
+
}
|
|
305
|
+
return pages.sort((a, b) => {
|
|
306
|
+
const segmentsA = a.url.split("/").filter(Boolean);
|
|
307
|
+
const segmentsB = b.url.split("/").filter(Boolean);
|
|
308
|
+
const firstSegmentA = segmentsA.length > 0 ? segmentsA[0] : "";
|
|
309
|
+
const firstSegmentB = segmentsB.length > 0 ? segmentsB[0] : "";
|
|
310
|
+
const isRootLevelA = segmentsA.length <= 1;
|
|
311
|
+
const isRootLevelB = segmentsB.length <= 1;
|
|
312
|
+
const hasNestedA = segmentHasNested.get(firstSegmentA);
|
|
313
|
+
const hasNestedB = segmentHasNested.get(firstSegmentB);
|
|
314
|
+
const groupKeyA = isRootLevelA && !hasNestedA ? "" : firstSegmentA;
|
|
315
|
+
const groupKeyB = isRootLevelB && !hasNestedB ? "" : firstSegmentB;
|
|
316
|
+
if (groupKeyA === "" && groupKeyB !== "") return -1;
|
|
317
|
+
if (groupKeyA !== "" && groupKeyB === "") return 1;
|
|
318
|
+
if (groupKeyA !== groupKeyB) return groupKeyA.localeCompare(groupKeyB);
|
|
319
|
+
if (segmentsA.length === 0) return -1;
|
|
320
|
+
if (segmentsB.length === 0) return 1;
|
|
321
|
+
const minLen = Math.min(segmentsA.length, segmentsB.length);
|
|
322
|
+
for (let i = 0; i < minLen; i++) {
|
|
323
|
+
const cmp = segmentsA[i].localeCompare(segmentsB[i]);
|
|
324
|
+
if (cmp !== 0) return cmp;
|
|
325
|
+
}
|
|
326
|
+
return segmentsA.length - segmentsB.length;
|
|
327
|
+
});
|
|
328
|
+
}
|
|
288
329
|
function createLlmsTxtStream(options = {}) {
|
|
289
330
|
const { siteName = "Site", description, origin = "", generateFull, outputDir = process.cwd(), sections, notes } = options;
|
|
290
331
|
let llmsTxtHandle;
|
|
291
332
|
let llmsFullTxtHandle;
|
|
333
|
+
const bufferedPages = [];
|
|
292
334
|
return new WritableStream({
|
|
293
335
|
async start() {
|
|
294
336
|
await mkdir(outputDir, { recursive: true });
|
|
295
337
|
llmsTxtHandle = await open(join(outputDir, "llms.txt"), "w");
|
|
296
338
|
let header = `# ${siteName}\n\n`;
|
|
297
339
|
if (description) header += `> ${description}\n\n`;
|
|
340
|
+
if (origin) header += `Canonical Origin: ${origin}\n\n`;
|
|
298
341
|
if (sections) for (const section of sections) header += formatSection(section);
|
|
299
|
-
header += `## Pages\n\n`;
|
|
300
342
|
await llmsTxtHandle.write(header);
|
|
301
343
|
if (generateFull) {
|
|
302
344
|
llmsFullTxtHandle = await open(join(outputDir, "llms-full.txt"), "w");
|
|
303
345
|
let fullHeader = `# ${siteName}\n\n`;
|
|
304
346
|
if (description) fullHeader += `> ${description}\n\n`;
|
|
347
|
+
if (origin) fullHeader += `Canonical Origin: ${origin}\n\n`;
|
|
305
348
|
if (sections) for (const section of sections) fullHeader += formatSection(section);
|
|
306
349
|
await llmsFullTxtHandle.write(fullHeader);
|
|
307
350
|
}
|
|
308
351
|
},
|
|
309
352
|
async write(file) {
|
|
310
353
|
const desc = file.metadata?.description;
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
}
|
|
317
|
-
const url = file.url.startsWith("http://") || file.url.startsWith("https://") ? file.url : origin + file.url;
|
|
318
|
-
chunk = `- [${file.title}](${url})${descText}\n`;
|
|
319
|
-
}
|
|
320
|
-
await llmsTxtHandle?.write(chunk);
|
|
354
|
+
bufferedPages.push({
|
|
355
|
+
url: file.url,
|
|
356
|
+
title: file.title,
|
|
357
|
+
description: desc,
|
|
358
|
+
filePath: file.filePath
|
|
359
|
+
});
|
|
321
360
|
if (generateFull && llmsFullTxtHandle) {
|
|
322
361
|
const url = file.url.startsWith("http://") || file.url.startsWith("https://") ? file.url : origin ? origin + file.url : file.url;
|
|
323
362
|
const { frontmatter, body } = parseFrontmatter(file.content);
|
|
@@ -343,6 +382,45 @@ function createLlmsTxtStream(options = {}) {
|
|
|
343
382
|
}
|
|
344
383
|
},
|
|
345
384
|
async close() {
|
|
385
|
+
const sortedPages = sortPagesByPath(bufferedPages);
|
|
386
|
+
const segmentHasNested = /* @__PURE__ */ new Map();
|
|
387
|
+
for (const page of sortedPages) {
|
|
388
|
+
const segments = page.url.split("/").filter(Boolean);
|
|
389
|
+
const firstSegment = segments.length > 0 ? segments[0] : "";
|
|
390
|
+
if (!segmentHasNested.has(firstSegment)) segmentHasNested.set(firstSegment, false);
|
|
391
|
+
if (segments.length > 1) segmentHasNested.set(firstSegment, true);
|
|
392
|
+
}
|
|
393
|
+
await llmsTxtHandle?.write(`## Pages\n\n`);
|
|
394
|
+
let currentGroup = "";
|
|
395
|
+
let segmentGroupIndex = 0;
|
|
396
|
+
let urlsInCurrentGroup = 0;
|
|
397
|
+
for (let i = 0; i < sortedPages.length; i++) {
|
|
398
|
+
const page = sortedPages[i];
|
|
399
|
+
const segments = page.url.split("/").filter(Boolean);
|
|
400
|
+
const firstSegment = segments.length > 0 ? segments[0] : "";
|
|
401
|
+
const isRootLevel = segments.length <= 1;
|
|
402
|
+
const hasNested = segmentHasNested.get(firstSegment);
|
|
403
|
+
const groupKey = isRootLevel && !hasNested ? "" : firstSegment;
|
|
404
|
+
if (groupKey !== currentGroup) {
|
|
405
|
+
if (urlsInCurrentGroup > 0) {
|
|
406
|
+
if (segmentGroupIndex === 0 || segmentGroupIndex >= 1 && segmentGroupIndex <= 2 && urlsInCurrentGroup > 1) await llmsTxtHandle?.write("\n");
|
|
407
|
+
}
|
|
408
|
+
currentGroup = groupKey;
|
|
409
|
+
segmentGroupIndex++;
|
|
410
|
+
urlsInCurrentGroup = 0;
|
|
411
|
+
}
|
|
412
|
+
urlsInCurrentGroup++;
|
|
413
|
+
const descText = page.description ? `: ${page.description.substring(0, 160)}${page.description.length > 160 ? "..." : ""}` : "";
|
|
414
|
+
let chunk = "";
|
|
415
|
+
if (page.filePath && page.filePath.endsWith(".md")) {
|
|
416
|
+
const relativePath = relative(outputDir, page.filePath);
|
|
417
|
+
chunk = `- [${page.title}](${relativePath})${descText}\n`;
|
|
418
|
+
} else {
|
|
419
|
+
const url = page.url.startsWith("http://") || page.url.startsWith("https://") ? page.url : origin ? origin + page.url : page.url;
|
|
420
|
+
chunk = `- [${page.title}](${url})${descText}\n`;
|
|
421
|
+
}
|
|
422
|
+
await llmsTxtHandle?.write(chunk);
|
|
423
|
+
}
|
|
346
424
|
if (notes) {
|
|
347
425
|
const notesContent = formatNotes(notes);
|
|
348
426
|
await llmsTxtHandle?.write(`\n${notesContent}`);
|
package/dist/cli.mjs
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import "./_chunks/markdown-processor-D26Uo5td.mjs";
|
|
2
2
|
import { n as streamHtmlToMarkdown } from "./_chunks/src-BJpipdul.mjs";
|
|
3
|
-
import { n as generateLlmsTxtArtifacts } from "./_chunks/llms-txt-
|
|
3
|
+
import { n as generateLlmsTxtArtifacts } from "./_chunks/llms-txt-Czb_M48B.mjs";
|
|
4
4
|
import "./_chunks/plugins-DJnqR2fA.mjs";
|
|
5
5
|
import { t as withMinimalPreset } from "./_chunks/minimal-BiDhcwif.mjs";
|
|
6
6
|
import { readFileSync } from "node:fs";
|
package/dist/llms-txt.d.mts
CHANGED
|
@@ -79,45 +79,6 @@ interface CreateLlmsTxtStreamOptions extends Omit<LlmsTxtArtifactsOptions, 'patt
|
|
|
79
79
|
/** Notes to write at the end */
|
|
80
80
|
notes?: string | string[];
|
|
81
81
|
}
|
|
82
|
-
/**
|
|
83
|
-
* Create a WritableStream that generates llms.txt artifacts by streaming pages to disk
|
|
84
|
-
*
|
|
85
|
-
* Writes llms.txt (and optionally llms-full.txt) incrementally as pages are written,
|
|
86
|
-
* never keeping full content in memory. Creates outputDir recursively if needed.
|
|
87
|
-
*
|
|
88
|
-
* @example
|
|
89
|
-
* ```typescript
|
|
90
|
-
* const stream = createLlmsTxtStream({
|
|
91
|
-
* siteName: 'My Docs',
|
|
92
|
-
* description: 'Documentation site',
|
|
93
|
-
* origin: 'https://example.com',
|
|
94
|
-
* generateFull: true,
|
|
95
|
-
* outputDir: './dist',
|
|
96
|
-
* sections: [
|
|
97
|
-
* {
|
|
98
|
-
* title: 'Getting Started',
|
|
99
|
-
* description: 'Quick start guide',
|
|
100
|
-
* links: [
|
|
101
|
-
* { title: 'Installation', href: '/install', description: 'How to install' },
|
|
102
|
-
* { title: 'Quick Start', href: '/quickstart' },
|
|
103
|
-
* ],
|
|
104
|
-
* },
|
|
105
|
-
* ],
|
|
106
|
-
* notes: ['Generated by mdream', 'Last updated: 2024'],
|
|
107
|
-
* })
|
|
108
|
-
*
|
|
109
|
-
* const writer = stream.getWriter()
|
|
110
|
-
* await writer.write({
|
|
111
|
-
* title: 'Home',
|
|
112
|
-
* content: '# Welcome\n\nHome page content.',
|
|
113
|
-
* url: '/',
|
|
114
|
-
* })
|
|
115
|
-
* await writer.close()
|
|
116
|
-
* ```
|
|
117
|
-
*
|
|
118
|
-
* @param options - Configuration options
|
|
119
|
-
* @returns WritableStream that accepts ProcessedFile objects
|
|
120
|
-
*/
|
|
121
82
|
declare function createLlmsTxtStream(options?: CreateLlmsTxtStreamOptions): WritableStream<ProcessedFile>;
|
|
122
83
|
//#endregion
|
|
123
84
|
export { CreateLlmsTxtStreamOptions, LlmsTxtArtifactsOptions, LlmsTxtArtifactsResult, LlmsTxtLink, LlmsTxtSection, ProcessedFile, createLlmsTxtStream, generateLlmsTxtArtifacts };
|
package/dist/llms-txt.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import "./_chunks/markdown-processor-D26Uo5td.mjs";
|
|
2
2
|
import "./_chunks/src-BJpipdul.mjs";
|
|
3
|
-
import { n as generateLlmsTxtArtifacts, t as createLlmsTxtStream } from "./_chunks/llms-txt-
|
|
3
|
+
import { n as generateLlmsTxtArtifacts, t as createLlmsTxtStream } from "./_chunks/llms-txt-Czb_M48B.mjs";
|
|
4
4
|
|
|
5
5
|
export { createLlmsTxtStream, generateLlmsTxtArtifacts };
|