mdream 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  import { t as htmlToMarkdown } from "./src-BJpipdul.mjs";
2
2
  import { t as extractionPlugin } from "./extraction-BA9MDtq3.mjs";
3
- import { readFile } from "node:fs/promises";
4
- import { basename, dirname, relative, sep } from "pathe";
3
+ import { mkdir, open, readFile } from "node:fs/promises";
4
+ import { basename, dirname, join, relative, sep } from "pathe";
5
5
  import { glob } from "tinyglobby";
6
6
 
7
7
  //#region src/llms-txt.ts
@@ -215,6 +215,99 @@ async function generateLlmsTxtArtifacts(options) {
215
215
  processedFiles: files
216
216
  };
217
217
  }
218
+ /**
219
+ * Create a WritableStream that generates llms.txt artifacts by streaming pages to disk
220
+ *
221
+ * Writes llms.txt (and optionally llms-full.txt) incrementally as pages are written,
222
+ * never keeping full content in memory. Creates outputDir recursively if needed.
223
+ *
224
+ * @example
225
+ * ```typescript
226
+ * const stream = createLlmsTxtStream({
227
+ * siteName: 'My Docs',
228
+ * description: 'Documentation site',
229
+ * origin: 'https://example.com',
230
+ * generateFull: true,
231
+ * outputDir: './dist',
232
+ * })
233
+ *
234
+ * const writer = stream.getWriter()
235
+ * await writer.write({
236
+ * title: 'Home',
237
+ * content: '# Welcome\n\nHome page content.',
238
+ * url: '/',
239
+ * })
240
+ * await writer.close()
241
+ * ```
242
+ *
243
+ * @param options - Configuration options
244
+ * @returns WritableStream that accepts ProcessedFile objects
245
+ */
246
+ function createLlmsTxtStream(options = {}) {
247
+ const { siteName = "Site", description, origin = "", generateFull, outputDir = process.cwd() } = options;
248
+ let llmsTxtHandle;
249
+ let llmsFullTxtHandle;
250
+ return new WritableStream({
251
+ async start() {
252
+ await mkdir(outputDir, { recursive: true });
253
+ llmsTxtHandle = await open(join(outputDir, "llms.txt"), "w");
254
+ let header = `# ${siteName}\n\n`;
255
+ if (description) header += `> ${description}\n\n`;
256
+ header += `## Pages\n\n`;
257
+ await llmsTxtHandle.write(header);
258
+ if (generateFull) {
259
+ llmsFullTxtHandle = await open(join(outputDir, "llms-full.txt"), "w");
260
+ let fullHeader = `# ${siteName}\n\n`;
261
+ if (description) fullHeader += `> ${description}\n\n`;
262
+ await llmsFullTxtHandle.write(fullHeader);
263
+ }
264
+ },
265
+ async write(file) {
266
+ const desc = file.metadata?.description;
267
+ const descText = desc ? `: ${desc.substring(0, 100)}${desc.length > 100 ? "..." : ""}` : "";
268
+ let chunk = "";
269
+ if (file.filePath && file.filePath.endsWith(".md")) {
270
+ const relativePath = relative(outputDir, file.filePath);
271
+ chunk = `- [${file.title}](${relativePath})${descText}\n`;
272
+ } else {
273
+ const url = file.url.startsWith("http://") || file.url.startsWith("https://") ? file.url : origin + file.url;
274
+ chunk = `- [${file.title}](${url})${descText}\n`;
275
+ }
276
+ await llmsTxtHandle?.write(chunk);
277
+ if (generateFull && llmsFullTxtHandle) {
278
+ const url = file.url.startsWith("http://") || file.url.startsWith("https://") ? file.url : origin ? origin + file.url : file.url;
279
+ const { frontmatter, body } = parseFrontmatter(file.content);
280
+ const metadata = {
281
+ title: file.title,
282
+ url
283
+ };
284
+ if (file.filePath) metadata.file = relative(outputDir, file.filePath);
285
+ if (file.metadata) {
286
+ if (file.metadata.description) metadata.description = file.metadata.description;
287
+ if (file.metadata.keywords) metadata.keywords = file.metadata.keywords;
288
+ if (file.metadata.author) metadata.author = file.metadata.author;
289
+ }
290
+ const frontmatterString = serializeFrontmatter(frontmatter ? {
291
+ ...frontmatter,
292
+ ...metadata
293
+ } : metadata);
294
+ let contentBody = frontmatter ? body : file.content;
295
+ const titleLine = contentBody.trim().split("\n")[0];
296
+ if (titleLine === file.title || titleLine === `# ${file.title}`) contentBody = contentBody.trim().split("\n").slice(1).join("\n").trimStart();
297
+ const fullChunk = `---\n${frontmatterString}\n---\n\n${contentBody}\n\n---\n\n`;
298
+ await llmsFullTxtHandle.write(fullChunk);
299
+ }
300
+ },
301
+ async close() {
302
+ await llmsTxtHandle?.close();
303
+ await llmsFullTxtHandle?.close();
304
+ },
305
+ async abort(reason) {
306
+ await llmsTxtHandle?.close();
307
+ await llmsFullTxtHandle?.close();
308
+ }
309
+ });
310
+ }
218
311
 
219
312
  //#endregion
220
- export { generateLlmsTxtArtifacts as t };
313
+ export { generateLlmsTxtArtifacts as n, createLlmsTxtStream as t };
package/dist/cli.mjs CHANGED
@@ -3,7 +3,7 @@ import "./_chunks/markdown-processor-D26Uo5td.mjs";
3
3
  import "./_chunks/plugin-CjWWQTuL.mjs";
4
4
  import { n as streamHtmlToMarkdown } from "./_chunks/src-BJpipdul.mjs";
5
5
  import "./_chunks/extraction-BA9MDtq3.mjs";
6
- import { t as generateLlmsTxtArtifacts } from "./_chunks/llms-txt-D7Hduhij.mjs";
6
+ import { n as generateLlmsTxtArtifacts } from "./_chunks/llms-txt-T79S7X24.mjs";
7
7
  import "./_chunks/plugins-DJnqR2fA.mjs";
8
8
  import { t as withMinimalPreset } from "./_chunks/minimal-BiDhcwif.mjs";
9
9
  import { readFileSync } from "node:fs";
@@ -34,5 +34,49 @@ interface LlmsTxtArtifactsResult {
34
34
  * Main function to process files and generate llms.txt artifacts
35
35
  */
36
36
  declare function generateLlmsTxtArtifacts(options: LlmsTxtArtifactsOptions): Promise<LlmsTxtArtifactsResult>;
37
+ /**
38
+ * Options for creating an llms.txt stream
39
+ */
40
+ interface CreateLlmsTxtStreamOptions extends Omit<LlmsTxtArtifactsOptions, 'patterns' | 'files' | 'outputDir' | 'generateMarkdown'> {
41
+ /** Directory to write files to (defaults to process.cwd()) */
42
+ outputDir?: string;
43
+ /** Site name for the header (defaults to 'Site') */
44
+ siteName?: string;
45
+ /** Site description for the header */
46
+ description?: string;
47
+ /** Origin URL to prepend to relative URLs */
48
+ origin?: string;
49
+ /** Generate llms-full.txt with complete page content (defaults to false) */
50
+ generateFull?: boolean;
51
+ }
52
+ /**
53
+ * Create a WritableStream that generates llms.txt artifacts by streaming pages to disk
54
+ *
55
+ * Writes llms.txt (and optionally llms-full.txt) incrementally as pages are written,
56
+ * never keeping full content in memory. Creates outputDir recursively if needed.
57
+ *
58
+ * @example
59
+ * ```typescript
60
+ * const stream = createLlmsTxtStream({
61
+ * siteName: 'My Docs',
62
+ * description: 'Documentation site',
63
+ * origin: 'https://example.com',
64
+ * generateFull: true,
65
+ * outputDir: './dist',
66
+ * })
67
+ *
68
+ * const writer = stream.getWriter()
69
+ * await writer.write({
70
+ * title: 'Home',
71
+ * content: '# Welcome\n\nHome page content.',
72
+ * url: '/',
73
+ * })
74
+ * await writer.close()
75
+ * ```
76
+ *
77
+ * @param options - Configuration options
78
+ * @returns WritableStream that accepts ProcessedFile objects
79
+ */
80
+ declare function createLlmsTxtStream(options?: CreateLlmsTxtStreamOptions): WritableStream<ProcessedFile>;
37
81
  //#endregion
38
- export { LlmsTxtArtifactsOptions, LlmsTxtArtifactsResult, ProcessedFile, generateLlmsTxtArtifacts };
82
+ export { CreateLlmsTxtStreamOptions, LlmsTxtArtifactsOptions, LlmsTxtArtifactsResult, ProcessedFile, createLlmsTxtStream, generateLlmsTxtArtifacts };
package/dist/llms-txt.mjs CHANGED
@@ -3,6 +3,6 @@ import "./_chunks/markdown-processor-D26Uo5td.mjs";
3
3
  import "./_chunks/plugin-CjWWQTuL.mjs";
4
4
  import "./_chunks/src-BJpipdul.mjs";
5
5
  import "./_chunks/extraction-BA9MDtq3.mjs";
6
- import { t as generateLlmsTxtArtifacts } from "./_chunks/llms-txt-D7Hduhij.mjs";
6
+ import { n as generateLlmsTxtArtifacts, t as createLlmsTxtStream } from "./_chunks/llms-txt-T79S7X24.mjs";
7
7
 
8
- export { generateLlmsTxtArtifacts };
8
+ export { createLlmsTxtStream, generateLlmsTxtArtifacts };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "mdream",
3
3
  "type": "module",
4
- "version": "0.14.0",
4
+ "version": "0.15.0",
5
5
  "description": "Ultra-performant HTML to Markdown Convertor Optimized for LLMs and llm.txt artifacts.",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",