mdream 0.15.1 → 0.15.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -92,6 +92,7 @@ function generateLlmsTxtContent(files, options) {
92
92
  const { siteName = "Site", description, origin = "", sections, notes } = options;
93
93
  let content = `# ${siteName}\n\n`;
94
94
  if (description) content += `> ${description}\n\n`;
95
+ if (origin) content += `Canonical Origin: ${origin}\n\n`;
95
96
  if (sections) for (const section of sections) content += formatSection(section);
96
97
  if (files.length > 0) {
97
98
  content += `## Pages\n\n`;
@@ -102,7 +103,7 @@ function generateLlmsTxtContent(files, options) {
102
103
  const relativePath = relative(options.outputDir, file.filePath);
103
104
  content += `- [${file.title}](${relativePath})${descText}\n`;
104
105
  } else {
105
- const url = file.url.startsWith("http://") || file.url.startsWith("https://") ? file.url : origin + file.url;
106
+ const url = file.url.startsWith("http://") || file.url.startsWith("https://") ? file.url : origin ? origin + file.url : file.url;
106
107
  content += `- [${file.title}](${url})${descText}\n`;
107
108
  }
108
109
  }
@@ -150,6 +151,7 @@ function generateLlmsFullTxtContent(files, options) {
150
151
  const { siteName = "Site", description, origin = "", sections, notes } = options;
151
152
  let content = `# ${siteName}\n\n`;
152
153
  if (description) content += `> ${description}\n\n`;
154
+ if (origin) content += `Canonical Origin: ${origin}\n\n`;
153
155
  if (sections) for (const section of sections) content += formatSection(section);
154
156
  if (files.length > 0) {
155
157
  content += `## Table of Contents\n\n`;
@@ -285,39 +287,76 @@ function formatNotes(notes) {
285
287
  * @param options - Configuration options
286
288
  * @returns WritableStream that accepts ProcessedFile objects
287
289
  */
290
+ /**
291
+ * Get the group key for a URL (up to 2 segments deep)
292
+ */
293
+ /**
294
+ * Sort pages by URL path in hierarchical order (directory tree structure)
295
+ * Groups by first segment, with root-level pages without nesting grouped together
296
+ */
297
+ function sortPagesByPath(pages) {
298
+ const segmentHasNested = /* @__PURE__ */ new Map();
299
+ for (const page of pages) {
300
+ const segments = page.url.split("/").filter(Boolean);
301
+ const firstSegment = segments.length > 0 ? segments[0] : "";
302
+ if (!segmentHasNested.has(firstSegment)) segmentHasNested.set(firstSegment, false);
303
+ if (segments.length > 1) segmentHasNested.set(firstSegment, true);
304
+ }
305
+ return pages.sort((a, b) => {
306
+ const segmentsA = a.url.split("/").filter(Boolean);
307
+ const segmentsB = b.url.split("/").filter(Boolean);
308
+ const firstSegmentA = segmentsA.length > 0 ? segmentsA[0] : "";
309
+ const firstSegmentB = segmentsB.length > 0 ? segmentsB[0] : "";
310
+ const isRootLevelA = segmentsA.length <= 1;
311
+ const isRootLevelB = segmentsB.length <= 1;
312
+ const hasNestedA = segmentHasNested.get(firstSegmentA);
313
+ const hasNestedB = segmentHasNested.get(firstSegmentB);
314
+ const groupKeyA = isRootLevelA && !hasNestedA ? "" : firstSegmentA;
315
+ const groupKeyB = isRootLevelB && !hasNestedB ? "" : firstSegmentB;
316
+ if (groupKeyA === "" && groupKeyB !== "") return -1;
317
+ if (groupKeyA !== "" && groupKeyB === "") return 1;
318
+ if (groupKeyA !== groupKeyB) return groupKeyA.localeCompare(groupKeyB);
319
+ if (segmentsA.length === 0) return -1;
320
+ if (segmentsB.length === 0) return 1;
321
+ const minLen = Math.min(segmentsA.length, segmentsB.length);
322
+ for (let i = 0; i < minLen; i++) {
323
+ const cmp = segmentsA[i].localeCompare(segmentsB[i]);
324
+ if (cmp !== 0) return cmp;
325
+ }
326
+ return segmentsA.length - segmentsB.length;
327
+ });
328
+ }
288
329
  function createLlmsTxtStream(options = {}) {
289
330
  const { siteName = "Site", description, origin = "", generateFull, outputDir = process.cwd(), sections, notes } = options;
290
331
  let llmsTxtHandle;
291
332
  let llmsFullTxtHandle;
333
+ const bufferedPages = [];
292
334
  return new WritableStream({
293
335
  async start() {
294
336
  await mkdir(outputDir, { recursive: true });
295
337
  llmsTxtHandle = await open(join(outputDir, "llms.txt"), "w");
296
338
  let header = `# ${siteName}\n\n`;
297
339
  if (description) header += `> ${description}\n\n`;
340
+ if (origin) header += `Canonical Origin: ${origin}\n\n`;
298
341
  if (sections) for (const section of sections) header += formatSection(section);
299
- header += `## Pages\n\n`;
300
342
  await llmsTxtHandle.write(header);
301
343
  if (generateFull) {
302
344
  llmsFullTxtHandle = await open(join(outputDir, "llms-full.txt"), "w");
303
345
  let fullHeader = `# ${siteName}\n\n`;
304
346
  if (description) fullHeader += `> ${description}\n\n`;
347
+ if (origin) fullHeader += `Canonical Origin: ${origin}\n\n`;
305
348
  if (sections) for (const section of sections) fullHeader += formatSection(section);
306
349
  await llmsFullTxtHandle.write(fullHeader);
307
350
  }
308
351
  },
309
352
  async write(file) {
310
353
  const desc = file.metadata?.description;
311
- const descText = desc ? `: ${desc.substring(0, 100)}${desc.length > 100 ? "..." : ""}` : "";
312
- let chunk = "";
313
- if (file.filePath && file.filePath.endsWith(".md")) {
314
- const relativePath = relative(outputDir, file.filePath);
315
- chunk = `- [${file.title}](${relativePath})${descText}\n`;
316
- } else {
317
- const url = file.url.startsWith("http://") || file.url.startsWith("https://") ? file.url : origin + file.url;
318
- chunk = `- [${file.title}](${url})${descText}\n`;
319
- }
320
- await llmsTxtHandle?.write(chunk);
354
+ bufferedPages.push({
355
+ url: file.url,
356
+ title: file.title,
357
+ description: desc,
358
+ filePath: file.filePath
359
+ });
321
360
  if (generateFull && llmsFullTxtHandle) {
322
361
  const url = file.url.startsWith("http://") || file.url.startsWith("https://") ? file.url : origin ? origin + file.url : file.url;
323
362
  const { frontmatter, body } = parseFrontmatter(file.content);
@@ -343,6 +382,45 @@ function createLlmsTxtStream(options = {}) {
343
382
  }
344
383
  },
345
384
  async close() {
385
+ const sortedPages = sortPagesByPath(bufferedPages);
386
+ const segmentHasNested = /* @__PURE__ */ new Map();
387
+ for (const page of sortedPages) {
388
+ const segments = page.url.split("/").filter(Boolean);
389
+ const firstSegment = segments.length > 0 ? segments[0] : "";
390
+ if (!segmentHasNested.has(firstSegment)) segmentHasNested.set(firstSegment, false);
391
+ if (segments.length > 1) segmentHasNested.set(firstSegment, true);
392
+ }
393
+ await llmsTxtHandle?.write(`## Pages\n\n`);
394
+ let currentGroup = "";
395
+ let segmentGroupIndex = 0;
396
+ let urlsInCurrentGroup = 0;
397
+ for (let i = 0; i < sortedPages.length; i++) {
398
+ const page = sortedPages[i];
399
+ const segments = page.url.split("/").filter(Boolean);
400
+ const firstSegment = segments.length > 0 ? segments[0] : "";
401
+ const isRootLevel = segments.length <= 1;
402
+ const hasNested = segmentHasNested.get(firstSegment);
403
+ const groupKey = isRootLevel && !hasNested ? "" : firstSegment;
404
+ if (groupKey !== currentGroup) {
405
+ if (urlsInCurrentGroup > 0) {
406
+ if (segmentGroupIndex === 0 || segmentGroupIndex >= 1 && segmentGroupIndex <= 2 && urlsInCurrentGroup > 1) await llmsTxtHandle?.write("\n");
407
+ }
408
+ currentGroup = groupKey;
409
+ segmentGroupIndex++;
410
+ urlsInCurrentGroup = 0;
411
+ }
412
+ urlsInCurrentGroup++;
413
+ const descText = page.description ? `: ${page.description.substring(0, 160)}${page.description.length > 160 ? "..." : ""}` : "";
414
+ let chunk = "";
415
+ if (page.filePath && page.filePath.endsWith(".md")) {
416
+ const relativePath = relative(outputDir, page.filePath);
417
+ chunk = `- [${page.title}](${relativePath})${descText}\n`;
418
+ } else {
419
+ const url = page.url.startsWith("http://") || page.url.startsWith("https://") ? page.url : origin ? origin + page.url : page.url;
420
+ chunk = `- [${page.title}](${url})${descText}\n`;
421
+ }
422
+ await llmsTxtHandle?.write(chunk);
423
+ }
346
424
  if (notes) {
347
425
  const notesContent = formatNotes(notes);
348
426
  await llmsTxtHandle?.write(`\n${notesContent}`);
package/dist/cli.mjs CHANGED
@@ -1,6 +1,6 @@
1
1
  import "./_chunks/markdown-processor-D26Uo5td.mjs";
2
2
  import { n as streamHtmlToMarkdown } from "./_chunks/src-BJpipdul.mjs";
3
- import { n as generateLlmsTxtArtifacts } from "./_chunks/llms-txt-BXtLmgK6.mjs";
3
+ import { n as generateLlmsTxtArtifacts } from "./_chunks/llms-txt-Czb_M48B.mjs";
4
4
  import "./_chunks/plugins-DJnqR2fA.mjs";
5
5
  import { t as withMinimalPreset } from "./_chunks/minimal-BiDhcwif.mjs";
6
6
  import { readFileSync } from "node:fs";
@@ -79,45 +79,6 @@ interface CreateLlmsTxtStreamOptions extends Omit<LlmsTxtArtifactsOptions, 'patt
79
79
  /** Notes to write at the end */
80
80
  notes?: string | string[];
81
81
  }
82
- /**
83
- * Create a WritableStream that generates llms.txt artifacts by streaming pages to disk
84
- *
85
- * Writes llms.txt (and optionally llms-full.txt) incrementally as pages are written,
86
- * never keeping full content in memory. Creates outputDir recursively if needed.
87
- *
88
- * @example
89
- * ```typescript
90
- * const stream = createLlmsTxtStream({
91
- * siteName: 'My Docs',
92
- * description: 'Documentation site',
93
- * origin: 'https://example.com',
94
- * generateFull: true,
95
- * outputDir: './dist',
96
- * sections: [
97
- * {
98
- * title: 'Getting Started',
99
- * description: 'Quick start guide',
100
- * links: [
101
- * { title: 'Installation', href: '/install', description: 'How to install' },
102
- * { title: 'Quick Start', href: '/quickstart' },
103
- * ],
104
- * },
105
- * ],
106
- * notes: ['Generated by mdream', 'Last updated: 2024'],
107
- * })
108
- *
109
- * const writer = stream.getWriter()
110
- * await writer.write({
111
- * title: 'Home',
112
- * content: '# Welcome\n\nHome page content.',
113
- * url: '/',
114
- * })
115
- * await writer.close()
116
- * ```
117
- *
118
- * @param options - Configuration options
119
- * @returns WritableStream that accepts ProcessedFile objects
120
- */
121
82
  declare function createLlmsTxtStream(options?: CreateLlmsTxtStreamOptions): WritableStream<ProcessedFile>;
122
83
  //#endregion
123
84
  export { CreateLlmsTxtStreamOptions, LlmsTxtArtifactsOptions, LlmsTxtArtifactsResult, LlmsTxtLink, LlmsTxtSection, ProcessedFile, createLlmsTxtStream, generateLlmsTxtArtifacts };
package/dist/llms-txt.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  import "./_chunks/markdown-processor-D26Uo5td.mjs";
2
2
  import "./_chunks/src-BJpipdul.mjs";
3
- import { n as generateLlmsTxtArtifacts, t as createLlmsTxtStream } from "./_chunks/llms-txt-BXtLmgK6.mjs";
3
+ import { n as generateLlmsTxtArtifacts, t as createLlmsTxtStream } from "./_chunks/llms-txt-Czb_M48B.mjs";
4
4
 
5
5
  export { createLlmsTxtStream, generateLlmsTxtArtifacts };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "mdream",
3
3
  "type": "module",
4
- "version": "0.15.1",
4
+ "version": "0.15.2",
5
5
  "description": "Ultra-performant HTML to Markdown Convertor Optimized for LLMs and llm.txt artifacts.",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",