mdream 0.15.1 → 0.15.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_chunks/{extraction-BA9MDtq3.mjs → extraction.mjs} +1 -1
- package/dist/_chunks/{llms-txt-BXtLmgK6.mjs → llms-txt.mjs} +116 -14
- package/dist/_chunks/{markdown-processor-D26Uo5td.mjs → markdown-processor.mjs} +1 -1
- package/dist/_chunks/{minimal-BiDhcwif.mjs → minimal.mjs} +2 -2
- package/dist/_chunks/{plugin-D5soyEXm.d.mts → plugin.d.mts} +1 -1
- package/dist/_chunks/{plugins-DJnqR2fA.mjs → plugins.mjs} +3 -3
- package/dist/_chunks/{src-BJpipdul.mjs → src.mjs} +1 -1
- package/dist/cli.mjs +7 -6
- package/dist/index.d.mts +2 -2
- package/dist/index.mjs +4 -4
- package/dist/llms-txt.d.mts +0 -39
- package/dist/llms-txt.mjs +3 -3
- package/dist/plugins.d.mts +2 -2
- package/dist/plugins.mjs +3 -3
- package/dist/preset/minimal.d.mts +1 -1
- package/dist/preset/minimal.mjs +2 -2
- package/dist/splitter.d.mts +1 -1
- package/dist/splitter.mjs +2 -2
- package/package.json +1 -1
- /package/dist/_chunks/{const-Bf_XN9U9.mjs → const.mjs} +0 -0
- /package/dist/_chunks/{plugin-CjWWQTuL.mjs → plugin.mjs} +0 -0
- /package/dist/_chunks/{types-CT4ZxeOH.d.mts → types.d.mts} +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { t as htmlToMarkdown } from "./src
|
|
2
|
-
import { t as extractionPlugin } from "./extraction
|
|
1
|
+
import { t as htmlToMarkdown } from "./src.mjs";
|
|
2
|
+
import { t as extractionPlugin } from "./extraction.mjs";
|
|
3
3
|
import { mkdir, open, readFile } from "node:fs/promises";
|
|
4
4
|
import { basename, dirname, join, relative, sep } from "pathe";
|
|
5
5
|
import { glob } from "tinyglobby";
|
|
@@ -92,6 +92,7 @@ function generateLlmsTxtContent(files, options) {
|
|
|
92
92
|
const { siteName = "Site", description, origin = "", sections, notes } = options;
|
|
93
93
|
let content = `# ${siteName}\n\n`;
|
|
94
94
|
if (description) content += `> ${description}\n\n`;
|
|
95
|
+
if (origin) content += `Canonical Origin: ${origin}\n\n`;
|
|
95
96
|
if (sections) for (const section of sections) content += formatSection(section);
|
|
96
97
|
if (files.length > 0) {
|
|
97
98
|
content += `## Pages\n\n`;
|
|
@@ -102,7 +103,7 @@ function generateLlmsTxtContent(files, options) {
|
|
|
102
103
|
const relativePath = relative(options.outputDir, file.filePath);
|
|
103
104
|
content += `- [${file.title}](${relativePath})${descText}\n`;
|
|
104
105
|
} else {
|
|
105
|
-
const url = file.url.startsWith("http://") || file.url.startsWith("https://") ? file.url : origin + file.url;
|
|
106
|
+
const url = file.url.startsWith("http://") || file.url.startsWith("https://") ? file.url : origin ? origin + file.url : file.url;
|
|
106
107
|
content += `- [${file.title}](${url})${descText}\n`;
|
|
107
108
|
}
|
|
108
109
|
}
|
|
@@ -150,6 +151,7 @@ function generateLlmsFullTxtContent(files, options) {
|
|
|
150
151
|
const { siteName = "Site", description, origin = "", sections, notes } = options;
|
|
151
152
|
let content = `# ${siteName}\n\n`;
|
|
152
153
|
if (description) content += `> ${description}\n\n`;
|
|
154
|
+
if (origin) content += `Canonical Origin: ${origin}\n\n`;
|
|
153
155
|
if (sections) for (const section of sections) content += formatSection(section);
|
|
154
156
|
if (files.length > 0) {
|
|
155
157
|
content += `## Table of Contents\n\n`;
|
|
@@ -285,39 +287,93 @@ function formatNotes(notes) {
|
|
|
285
287
|
* @param options - Configuration options
|
|
286
288
|
* @returns WritableStream that accepts ProcessedFile objects
|
|
287
289
|
*/
|
|
290
|
+
/**
|
|
291
|
+
* Get group prefix for a URL (up to 2 segments)
|
|
292
|
+
*/
|
|
293
|
+
function getGroupPrefix(url, depth) {
|
|
294
|
+
const segments = url.split("/").filter(Boolean);
|
|
295
|
+
if (segments.length === 0) return "/";
|
|
296
|
+
if (depth === 1 || segments.length === 1) return `/${segments[0]}`;
|
|
297
|
+
return `/${segments[0]}/${segments[1]}`;
|
|
298
|
+
}
|
|
299
|
+
/**
|
|
300
|
+
* Sort pages by URL path in hierarchical order (directory tree structure)
|
|
301
|
+
* Groups by up to 2 segments, with root-level pages without nesting grouped together
|
|
302
|
+
*/
|
|
303
|
+
function sortPagesByPath(pages) {
|
|
304
|
+
const twoSegmentCount = /* @__PURE__ */ new Map();
|
|
305
|
+
for (const page of pages) {
|
|
306
|
+
const prefix = getGroupPrefix(page.url, 2);
|
|
307
|
+
twoSegmentCount.set(prefix, (twoSegmentCount.get(prefix) || 0) + 1);
|
|
308
|
+
}
|
|
309
|
+
const segmentHasNested = /* @__PURE__ */ new Map();
|
|
310
|
+
for (const page of pages) {
|
|
311
|
+
const segments = page.url.split("/").filter(Boolean);
|
|
312
|
+
const firstSegment = segments.length > 0 ? segments[0] : "";
|
|
313
|
+
if (!segmentHasNested.has(firstSegment)) segmentHasNested.set(firstSegment, false);
|
|
314
|
+
if (segments.length > 1) segmentHasNested.set(firstSegment, true);
|
|
315
|
+
}
|
|
316
|
+
return pages.sort((a, b) => {
|
|
317
|
+
const segmentsA = a.url.split("/").filter(Boolean);
|
|
318
|
+
const segmentsB = b.url.split("/").filter(Boolean);
|
|
319
|
+
const firstSegmentA = segmentsA.length > 0 ? segmentsA[0] : "";
|
|
320
|
+
const firstSegmentB = segmentsB.length > 0 ? segmentsB[0] : "";
|
|
321
|
+
const twoSegPrefixA = getGroupPrefix(a.url, 2);
|
|
322
|
+
const twoSegPrefixB = getGroupPrefix(b.url, 2);
|
|
323
|
+
const twoSegCountA = twoSegmentCount.get(twoSegPrefixA) || 0;
|
|
324
|
+
const twoSegCountB = twoSegmentCount.get(twoSegPrefixB) || 0;
|
|
325
|
+
let groupKeyA = twoSegCountA > 1 ? twoSegPrefixA : `/${firstSegmentA}`;
|
|
326
|
+
let groupKeyB = twoSegCountB > 1 ? twoSegPrefixB : `/${firstSegmentB}`;
|
|
327
|
+
const isRootLevelA = segmentsA.length <= 1;
|
|
328
|
+
const isRootLevelB = segmentsB.length <= 1;
|
|
329
|
+
const hasNestedA = segmentHasNested.get(firstSegmentA);
|
|
330
|
+
const hasNestedB = segmentHasNested.get(firstSegmentB);
|
|
331
|
+
if (isRootLevelA && !hasNestedA) groupKeyA = "";
|
|
332
|
+
if (isRootLevelB && !hasNestedB) groupKeyB = "";
|
|
333
|
+
if (groupKeyA === "" && groupKeyB !== "") return -1;
|
|
334
|
+
if (groupKeyA !== "" && groupKeyB === "") return 1;
|
|
335
|
+
if (groupKeyA !== groupKeyB) return groupKeyA.localeCompare(groupKeyB);
|
|
336
|
+
if (segmentsA.length === 0) return -1;
|
|
337
|
+
if (segmentsB.length === 0) return 1;
|
|
338
|
+
const minLen = Math.min(segmentsA.length, segmentsB.length);
|
|
339
|
+
for (let i = 0; i < minLen; i++) {
|
|
340
|
+
const cmp = segmentsA[i].localeCompare(segmentsB[i]);
|
|
341
|
+
if (cmp !== 0) return cmp;
|
|
342
|
+
}
|
|
343
|
+
return segmentsA.length - segmentsB.length;
|
|
344
|
+
});
|
|
345
|
+
}
|
|
288
346
|
function createLlmsTxtStream(options = {}) {
|
|
289
347
|
const { siteName = "Site", description, origin = "", generateFull, outputDir = process.cwd(), sections, notes } = options;
|
|
290
348
|
let llmsTxtHandle;
|
|
291
349
|
let llmsFullTxtHandle;
|
|
350
|
+
const bufferedPages = [];
|
|
292
351
|
return new WritableStream({
|
|
293
352
|
async start() {
|
|
294
353
|
await mkdir(outputDir, { recursive: true });
|
|
295
354
|
llmsTxtHandle = await open(join(outputDir, "llms.txt"), "w");
|
|
296
355
|
let header = `# ${siteName}\n\n`;
|
|
297
356
|
if (description) header += `> ${description}\n\n`;
|
|
357
|
+
if (origin) header += `Canonical Origin: ${origin}\n\n`;
|
|
298
358
|
if (sections) for (const section of sections) header += formatSection(section);
|
|
299
|
-
header += `## Pages\n\n`;
|
|
300
359
|
await llmsTxtHandle.write(header);
|
|
301
360
|
if (generateFull) {
|
|
302
361
|
llmsFullTxtHandle = await open(join(outputDir, "llms-full.txt"), "w");
|
|
303
362
|
let fullHeader = `# ${siteName}\n\n`;
|
|
304
363
|
if (description) fullHeader += `> ${description}\n\n`;
|
|
364
|
+
if (origin) fullHeader += `Canonical Origin: ${origin}\n\n`;
|
|
305
365
|
if (sections) for (const section of sections) fullHeader += formatSection(section);
|
|
306
366
|
await llmsFullTxtHandle.write(fullHeader);
|
|
307
367
|
}
|
|
308
368
|
},
|
|
309
369
|
async write(file) {
|
|
310
370
|
const desc = file.metadata?.description;
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
}
|
|
317
|
-
const url = file.url.startsWith("http://") || file.url.startsWith("https://") ? file.url : origin + file.url;
|
|
318
|
-
chunk = `- [${file.title}](${url})${descText}\n`;
|
|
319
|
-
}
|
|
320
|
-
await llmsTxtHandle?.write(chunk);
|
|
371
|
+
bufferedPages.push({
|
|
372
|
+
url: file.url,
|
|
373
|
+
title: file.title,
|
|
374
|
+
description: desc,
|
|
375
|
+
filePath: file.filePath
|
|
376
|
+
});
|
|
321
377
|
if (generateFull && llmsFullTxtHandle) {
|
|
322
378
|
const url = file.url.startsWith("http://") || file.url.startsWith("https://") ? file.url : origin ? origin + file.url : file.url;
|
|
323
379
|
const { frontmatter, body } = parseFrontmatter(file.content);
|
|
@@ -343,6 +399,52 @@ function createLlmsTxtStream(options = {}) {
|
|
|
343
399
|
}
|
|
344
400
|
},
|
|
345
401
|
async close() {
|
|
402
|
+
const sortedPages = sortPagesByPath(bufferedPages);
|
|
403
|
+
const twoSegmentCount = /* @__PURE__ */ new Map();
|
|
404
|
+
for (const page of sortedPages) {
|
|
405
|
+
const prefix = getGroupPrefix(page.url, 2);
|
|
406
|
+
twoSegmentCount.set(prefix, (twoSegmentCount.get(prefix) || 0) + 1);
|
|
407
|
+
}
|
|
408
|
+
const segmentHasNested = /* @__PURE__ */ new Map();
|
|
409
|
+
for (const page of sortedPages) {
|
|
410
|
+
const segments = page.url.split("/").filter(Boolean);
|
|
411
|
+
const firstSegment = segments.length > 0 ? segments[0] : "";
|
|
412
|
+
if (!segmentHasNested.has(firstSegment)) segmentHasNested.set(firstSegment, false);
|
|
413
|
+
if (segments.length > 1) segmentHasNested.set(firstSegment, true);
|
|
414
|
+
}
|
|
415
|
+
await llmsTxtHandle?.write(`## Pages\n\n`);
|
|
416
|
+
let currentGroup = "";
|
|
417
|
+
let segmentGroupIndex = 0;
|
|
418
|
+
let urlsInCurrentGroup = 0;
|
|
419
|
+
for (let i = 0; i < sortedPages.length; i++) {
|
|
420
|
+
const page = sortedPages[i];
|
|
421
|
+
const segments = page.url.split("/").filter(Boolean);
|
|
422
|
+
const firstSegment = segments.length > 0 ? segments[0] : "";
|
|
423
|
+
const twoSegPrefix = getGroupPrefix(page.url, 2);
|
|
424
|
+
let groupKey = (twoSegmentCount.get(twoSegPrefix) || 0) > 1 ? twoSegPrefix : `/${firstSegment}`;
|
|
425
|
+
const isRootLevel = segments.length <= 1;
|
|
426
|
+
const hasNested = segmentHasNested.get(firstSegment);
|
|
427
|
+
if (isRootLevel && !hasNested) groupKey = "";
|
|
428
|
+
if (groupKey !== currentGroup) {
|
|
429
|
+
if (urlsInCurrentGroup > 0) {
|
|
430
|
+
if (segmentGroupIndex === 0 || segmentGroupIndex >= 1 && segmentGroupIndex <= 2 && urlsInCurrentGroup > 1) await llmsTxtHandle?.write("\n");
|
|
431
|
+
}
|
|
432
|
+
currentGroup = groupKey;
|
|
433
|
+
segmentGroupIndex++;
|
|
434
|
+
urlsInCurrentGroup = 0;
|
|
435
|
+
}
|
|
436
|
+
urlsInCurrentGroup++;
|
|
437
|
+
const descText = page.description ? `: ${page.description.substring(0, 160)}${page.description.length > 160 ? "..." : ""}` : "";
|
|
438
|
+
let chunk = "";
|
|
439
|
+
if (page.filePath && page.filePath.endsWith(".md")) {
|
|
440
|
+
const relativePath = relative(outputDir, page.filePath);
|
|
441
|
+
chunk = `- [${page.title}](${relativePath})${descText}\n`;
|
|
442
|
+
} else {
|
|
443
|
+
const url = page.url.startsWith("http://") || page.url.startsWith("https://") ? page.url : origin ? origin + page.url : page.url;
|
|
444
|
+
chunk = `- [${page.title}](${url})${descText}\n`;
|
|
445
|
+
}
|
|
446
|
+
await llmsTxtHandle?.write(chunk);
|
|
447
|
+
}
|
|
346
448
|
if (notes) {
|
|
347
449
|
const notesContent = formatNotes(notes);
|
|
348
450
|
await llmsTxtHandle?.write(`\n${notesContent}`);
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as TAG_H2, $t as TAG_TBODY, A as TAG_BUTTON, At as TAG_P, B as TAG_DFN, Bt as TAG_SCRIPT, C as TAG_AUDIO, Ct as TAG_METER, D as TAG_BLOCKQUOTE, E as TAG_BDO, Et as TAG_NOSCRIPT, F as TAG_CODE, Ft as TAG_Q, G as TAG_EM, Gt as TAG_SPAN, H as TAG_DIV, Ht as TAG_SELECT, I as TAG_COL, It as TAG_RP, Jt as TAG_SUB, K as TAG_EMBED, Kt as TAG_STRONG, L as TAG_DD, Lt as TAG_RT, Mt as TAG_PLAINTEXT, N as TAG_CENTER, Nt as TAG_PRE, O as TAG_BODY, Ot as TAG_OL, P as TAG_CITE, Pt as TAG_PROGRESS, Q as TAG_H1, Qt as TAG_TABLE, R as TAG_DEL, Rt as TAG_RUBY, S as TAG_ASIDE, St as TAG_META, T as TAG_BASE, Tt as TAG_NOFRAMES, U as TAG_DL, Ut as TAG_SMALL, V as TAG_DIALOG, W as TAG_DT, Wt as TAG_SOURCE, X as TAG_FOOTER, Xt as TAG_SUP, Yt as TAG_SUMMARY, Z as TAG_FORM, Zt as TAG_SVG, _ as TAG_A, _n as TagIdMap, _t as TAG_LI, a as LIST_ITEM_SPACING, an as TAG_THEAD, b as TAG_AREA, bt as TAG_MAP, c as MARKDOWN_HORIZONTAL_RULE, cn as TAG_TR, ct as TAG_I, d as MARKDOWN_STRONG, dn as TAG_UL, dt as TAG_INPUT, en as TAG_TD, et as TAG_H3, f as MAX_TAG_ID, fn as TAG_VAR, ft as TAG_INS, g as TABLE_ROW_SPACING, gn as TEXT_NODE, gt as TAG_LEGEND, h as NodeEventExit, hn as TAG_XMP, ht as TAG_LABEL, i as HTML_ENTITIES, in as TAG_TH, it as TAG_HEAD, j as TAG_CANVAS, jt as TAG_PARAM, k as TAG_BR, kt as TAG_OPTION, l as MARKDOWN_INLINE_CODE, ln as TAG_TRACK, lt as TAG_IFRAME, m as NodeEventEnter, mn as TAG_WBR, mt as TAG_KEYGEN, n as DEFAULT_BLOCK_SPACING, nn as TAG_TEXTAREA, nt as TAG_H5, o as MARKDOWN_CODE_BLOCK, on as TAG_TIME, ot as TAG_HR, p as NO_SPACING, pn as TAG_VIDEO, pt as TAG_KBD, q as TAG_FIELDSET, qt as TAG_STYLE, r as ELEMENT_NODE, rn as TAG_TFOOT, rt as TAG_H6, s as MARKDOWN_EMPHASIS, sn as TAG_TITLE, t as BLOCKQUOTE_SPACING, tn as TAG_TEMPLATE, tt as TAG_H4, u as MARKDOWN_STRIKETHROUGH, un as TAG_U, ut as TAG_IMG, v as TAG_ABBR, vn as assembleBufferedContent, vt as TAG_LINK, w as TAG_B, wt as TAG_NAV, xt as TAG_MARK, y as TAG_ADDRESS, yn as collectNodeContent, z as TAG_DETAILS, zt as TAG_SAMP } from "./const
|
|
1
|
+
import { $ as TAG_H2, $t as TAG_TBODY, A as TAG_BUTTON, At as TAG_P, B as TAG_DFN, Bt as TAG_SCRIPT, C as TAG_AUDIO, Ct as TAG_METER, D as TAG_BLOCKQUOTE, E as TAG_BDO, Et as TAG_NOSCRIPT, F as TAG_CODE, Ft as TAG_Q, G as TAG_EM, Gt as TAG_SPAN, H as TAG_DIV, Ht as TAG_SELECT, I as TAG_COL, It as TAG_RP, Jt as TAG_SUB, K as TAG_EMBED, Kt as TAG_STRONG, L as TAG_DD, Lt as TAG_RT, Mt as TAG_PLAINTEXT, N as TAG_CENTER, Nt as TAG_PRE, O as TAG_BODY, Ot as TAG_OL, P as TAG_CITE, Pt as TAG_PROGRESS, Q as TAG_H1, Qt as TAG_TABLE, R as TAG_DEL, Rt as TAG_RUBY, S as TAG_ASIDE, St as TAG_META, T as TAG_BASE, Tt as TAG_NOFRAMES, U as TAG_DL, Ut as TAG_SMALL, V as TAG_DIALOG, W as TAG_DT, Wt as TAG_SOURCE, X as TAG_FOOTER, Xt as TAG_SUP, Yt as TAG_SUMMARY, Z as TAG_FORM, Zt as TAG_SVG, _ as TAG_A, _n as TagIdMap, _t as TAG_LI, a as LIST_ITEM_SPACING, an as TAG_THEAD, b as TAG_AREA, bt as TAG_MAP, c as MARKDOWN_HORIZONTAL_RULE, cn as TAG_TR, ct as TAG_I, d as MARKDOWN_STRONG, dn as TAG_UL, dt as TAG_INPUT, en as TAG_TD, et as TAG_H3, f as MAX_TAG_ID, fn as TAG_VAR, ft as TAG_INS, g as TABLE_ROW_SPACING, gn as TEXT_NODE, gt as TAG_LEGEND, h as NodeEventExit, hn as TAG_XMP, ht as TAG_LABEL, i as HTML_ENTITIES, in as TAG_TH, it as TAG_HEAD, j as TAG_CANVAS, jt as TAG_PARAM, k as TAG_BR, kt as TAG_OPTION, l as MARKDOWN_INLINE_CODE, ln as TAG_TRACK, lt as TAG_IFRAME, m as NodeEventEnter, mn as TAG_WBR, mt as TAG_KEYGEN, n as DEFAULT_BLOCK_SPACING, nn as TAG_TEXTAREA, nt as TAG_H5, o as MARKDOWN_CODE_BLOCK, on as TAG_TIME, ot as TAG_HR, p as NO_SPACING, pn as TAG_VIDEO, pt as TAG_KBD, q as TAG_FIELDSET, qt as TAG_STYLE, r as ELEMENT_NODE, rn as TAG_TFOOT, rt as TAG_H6, s as MARKDOWN_EMPHASIS, sn as TAG_TITLE, t as BLOCKQUOTE_SPACING, tn as TAG_TEMPLATE, tt as TAG_H4, u as MARKDOWN_STRIKETHROUGH, un as TAG_U, ut as TAG_IMG, v as TAG_ABBR, vn as assembleBufferedContent, vt as TAG_LINK, w as TAG_B, wt as TAG_NAV, xt as TAG_MARK, y as TAG_ADDRESS, yn as collectNodeContent, z as TAG_DETAILS, zt as TAG_SAMP } from "./const.mjs";
|
|
2
2
|
|
|
3
3
|
//#region src/tags.ts
|
|
4
4
|
function resolveUrl(url, origin) {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { A as TAG_BUTTON, Dt as TAG_OBJECT, Ht as TAG_SELECT, K as TAG_EMBED, S as TAG_ASIDE, X as TAG_FOOTER, Y as TAG_FIGURE, Z as TAG_FORM, dt as TAG_INPUT, lt as TAG_IFRAME, nn as TAG_TEXTAREA, q as TAG_FIELDSET, wt as TAG_NAV } from "./const
|
|
2
|
-
import { a as filterPlugin, i as frontmatterPlugin, r as isolateMainPlugin, t as tailwindPlugin } from "./plugins
|
|
1
|
+
import { A as TAG_BUTTON, Dt as TAG_OBJECT, Ht as TAG_SELECT, K as TAG_EMBED, S as TAG_ASIDE, X as TAG_FOOTER, Y as TAG_FIGURE, Z as TAG_FORM, dt as TAG_INPUT, lt as TAG_IFRAME, nn as TAG_TEXTAREA, q as TAG_FIELDSET, wt as TAG_NAV } from "./const.mjs";
|
|
2
|
+
import { a as filterPlugin, i as frontmatterPlugin, r as isolateMainPlugin, t as tailwindPlugin } from "./plugins.mjs";
|
|
3
3
|
|
|
4
4
|
//#region src/preset/minimal.ts
|
|
5
5
|
/**
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { $ as TAG_H2, $t as TAG_TBODY, A as TAG_BUTTON, At as TAG_P, Bt as TAG_SCRIPT, C as TAG_AUDIO, D as TAG_BLOCKQUOTE, Dt as TAG_OBJECT, F as TAG_CODE, G as TAG_EM, Gt as TAG_SPAN, H as TAG_DIV, Ht as TAG_SELECT, J as TAG_FIGCAPTION, K as TAG_EMBED, Kt as TAG_STRONG, L as TAG_DD, M as TAG_CAPTION, Nt as TAG_PRE, O as TAG_BODY, Ot as TAG_OL, Q as TAG_H1, Qt as TAG_TABLE, S as TAG_ASIDE, St as TAG_META, U as TAG_DL, Vt as TAG_SECTION, W as TAG_DT, X as TAG_FOOTER, Y as TAG_FIGURE, Yt as TAG_SUMMARY, Z as TAG_FORM, Zt as TAG_SVG, _ as TAG_A, _t as TAG_LI, an as TAG_THEAD, at as TAG_HEADER, bn as createBufferRegion, cn as TAG_TR, ct as TAG_I, dn as TAG_UL, dt as TAG_INPUT, en as TAG_TD, et as TAG_H3, gn as TEXT_NODE, in as TAG_TH, it as TAG_HEAD, k as TAG_BR, lt as TAG_IFRAME, nn as TAG_TEXTAREA, nt as TAG_H5, ot as TAG_HR, pn as TAG_VIDEO, q as TAG_FIELDSET, qt as TAG_STYLE, r as ELEMENT_NODE, rn as TAG_TFOOT, rt as TAG_H6, sn as TAG_TITLE, st as TAG_HTML, tt as TAG_H4, ut as TAG_IMG, w as TAG_B, wt as TAG_NAV, x as TAG_ARTICLE, y as TAG_ADDRESS, yn as collectNodeContent, yt as TAG_MAIN, z as TAG_DETAILS } from "./const
|
|
2
|
-
import { t as createPlugin } from "./plugin
|
|
3
|
-
import { n as parseSelector } from "./extraction
|
|
1
|
+
import { $ as TAG_H2, $t as TAG_TBODY, A as TAG_BUTTON, At as TAG_P, Bt as TAG_SCRIPT, C as TAG_AUDIO, D as TAG_BLOCKQUOTE, Dt as TAG_OBJECT, F as TAG_CODE, G as TAG_EM, Gt as TAG_SPAN, H as TAG_DIV, Ht as TAG_SELECT, J as TAG_FIGCAPTION, K as TAG_EMBED, Kt as TAG_STRONG, L as TAG_DD, M as TAG_CAPTION, Nt as TAG_PRE, O as TAG_BODY, Ot as TAG_OL, Q as TAG_H1, Qt as TAG_TABLE, S as TAG_ASIDE, St as TAG_META, U as TAG_DL, Vt as TAG_SECTION, W as TAG_DT, X as TAG_FOOTER, Y as TAG_FIGURE, Yt as TAG_SUMMARY, Z as TAG_FORM, Zt as TAG_SVG, _ as TAG_A, _t as TAG_LI, an as TAG_THEAD, at as TAG_HEADER, bn as createBufferRegion, cn as TAG_TR, ct as TAG_I, dn as TAG_UL, dt as TAG_INPUT, en as TAG_TD, et as TAG_H3, gn as TEXT_NODE, in as TAG_TH, it as TAG_HEAD, k as TAG_BR, lt as TAG_IFRAME, nn as TAG_TEXTAREA, nt as TAG_H5, ot as TAG_HR, pn as TAG_VIDEO, q as TAG_FIELDSET, qt as TAG_STYLE, r as ELEMENT_NODE, rn as TAG_TFOOT, rt as TAG_H6, sn as TAG_TITLE, st as TAG_HTML, tt as TAG_H4, ut as TAG_IMG, w as TAG_B, wt as TAG_NAV, x as TAG_ARTICLE, y as TAG_ADDRESS, yn as collectNodeContent, yt as TAG_MAIN, z as TAG_DETAILS } from "./const.mjs";
|
|
2
|
+
import { t as createPlugin } from "./plugin.mjs";
|
|
3
|
+
import { n as parseSelector } from "./extraction.mjs";
|
|
4
4
|
|
|
5
5
|
//#region src/plugins/filter.ts
|
|
6
6
|
/**
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { a as parseHtmlStream, n as createMarkdownProcessor, r as processPluginsForEvent } from "./markdown-processor
|
|
1
|
+
import { a as parseHtmlStream, n as createMarkdownProcessor, r as processPluginsForEvent } from "./markdown-processor.mjs";
|
|
2
2
|
|
|
3
3
|
//#region src/stream.ts
|
|
4
4
|
/**
|
package/dist/cli.mjs
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import "./_chunks/markdown-processor
|
|
2
|
-
import { n as streamHtmlToMarkdown } from "./_chunks/src
|
|
3
|
-
import { n as generateLlmsTxtArtifacts } from "./_chunks/llms-txt
|
|
4
|
-
import "./_chunks/plugins
|
|
5
|
-
import { t as withMinimalPreset } from "./_chunks/minimal
|
|
1
|
+
import "./_chunks/markdown-processor.mjs";
|
|
2
|
+
import { n as streamHtmlToMarkdown } from "./_chunks/src.mjs";
|
|
3
|
+
import { n as generateLlmsTxtArtifacts } from "./_chunks/llms-txt.mjs";
|
|
4
|
+
import "./_chunks/plugins.mjs";
|
|
5
|
+
import { t as withMinimalPreset } from "./_chunks/minimal.mjs";
|
|
6
6
|
import { readFileSync } from "node:fs";
|
|
7
7
|
import { mkdir, writeFile } from "node:fs/promises";
|
|
8
8
|
import { Readable } from "node:stream";
|
|
@@ -62,4 +62,5 @@ cli.command("llms <patterns...>", "Generate llms.txt artifacts from HTML files")
|
|
|
62
62
|
});
|
|
63
63
|
cli.help().version(version).parse();
|
|
64
64
|
|
|
65
|
-
//#endregion
|
|
65
|
+
//#endregion
|
|
66
|
+
export { };
|
package/dist/index.d.mts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { _ as TagHandler, a as HandlerContext, b as ExtractedElement, c as MdreamRuntimeState, d as Plugin, f as PluginContext, g as TEXT_NODE, h as SplitterOptions, i as HTMLToMarkdownOptions, l as Node, m as ReadabilityContext, n as ELEMENT_NODE, o as MarkdownChunk, p as PluginCreationOptions, r as ElementNode, s as MdreamProcessingState, t as BufferRegion, u as NodeEvent, v as TailwindContext, y as TextNode } from "./_chunks/types
|
|
2
|
-
import { t as createPlugin } from "./_chunks/plugin
|
|
1
|
+
import { _ as TagHandler, a as HandlerContext, b as ExtractedElement, c as MdreamRuntimeState, d as Plugin, f as PluginContext, g as TEXT_NODE, h as SplitterOptions, i as HTMLToMarkdownOptions, l as Node, m as ReadabilityContext, n as ELEMENT_NODE, o as MarkdownChunk, p as PluginCreationOptions, r as ElementNode, s as MdreamProcessingState, t as BufferRegion, u as NodeEvent, v as TailwindContext, y as TextNode } from "./_chunks/types.mjs";
|
|
2
|
+
import { t as createPlugin } from "./_chunks/plugin.mjs";
|
|
3
3
|
import { ReadableStream } from "node:stream/web";
|
|
4
4
|
|
|
5
5
|
//#region src/const.d.ts
|
package/dist/index.mjs
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { _n as TagIdMap } from "./_chunks/const
|
|
2
|
-
import { i as parseHtml, t as MarkdownProcessor } from "./_chunks/markdown-processor
|
|
3
|
-
import { t as createPlugin } from "./_chunks/plugin
|
|
4
|
-
import { n as streamHtmlToMarkdown, t as htmlToMarkdown } from "./_chunks/src
|
|
1
|
+
import { _n as TagIdMap } from "./_chunks/const.mjs";
|
|
2
|
+
import { i as parseHtml, t as MarkdownProcessor } from "./_chunks/markdown-processor.mjs";
|
|
3
|
+
import { t as createPlugin } from "./_chunks/plugin.mjs";
|
|
4
|
+
import { n as streamHtmlToMarkdown, t as htmlToMarkdown } from "./_chunks/src.mjs";
|
|
5
5
|
|
|
6
6
|
export { MarkdownProcessor, TagIdMap, createPlugin, htmlToMarkdown, parseHtml, streamHtmlToMarkdown };
|
package/dist/llms-txt.d.mts
CHANGED
|
@@ -79,45 +79,6 @@ interface CreateLlmsTxtStreamOptions extends Omit<LlmsTxtArtifactsOptions, 'patt
|
|
|
79
79
|
/** Notes to write at the end */
|
|
80
80
|
notes?: string | string[];
|
|
81
81
|
}
|
|
82
|
-
/**
|
|
83
|
-
* Create a WritableStream that generates llms.txt artifacts by streaming pages to disk
|
|
84
|
-
*
|
|
85
|
-
* Writes llms.txt (and optionally llms-full.txt) incrementally as pages are written,
|
|
86
|
-
* never keeping full content in memory. Creates outputDir recursively if needed.
|
|
87
|
-
*
|
|
88
|
-
* @example
|
|
89
|
-
* ```typescript
|
|
90
|
-
* const stream = createLlmsTxtStream({
|
|
91
|
-
* siteName: 'My Docs',
|
|
92
|
-
* description: 'Documentation site',
|
|
93
|
-
* origin: 'https://example.com',
|
|
94
|
-
* generateFull: true,
|
|
95
|
-
* outputDir: './dist',
|
|
96
|
-
* sections: [
|
|
97
|
-
* {
|
|
98
|
-
* title: 'Getting Started',
|
|
99
|
-
* description: 'Quick start guide',
|
|
100
|
-
* links: [
|
|
101
|
-
* { title: 'Installation', href: '/install', description: 'How to install' },
|
|
102
|
-
* { title: 'Quick Start', href: '/quickstart' },
|
|
103
|
-
* ],
|
|
104
|
-
* },
|
|
105
|
-
* ],
|
|
106
|
-
* notes: ['Generated by mdream', 'Last updated: 2024'],
|
|
107
|
-
* })
|
|
108
|
-
*
|
|
109
|
-
* const writer = stream.getWriter()
|
|
110
|
-
* await writer.write({
|
|
111
|
-
* title: 'Home',
|
|
112
|
-
* content: '# Welcome\n\nHome page content.',
|
|
113
|
-
* url: '/',
|
|
114
|
-
* })
|
|
115
|
-
* await writer.close()
|
|
116
|
-
* ```
|
|
117
|
-
*
|
|
118
|
-
* @param options - Configuration options
|
|
119
|
-
* @returns WritableStream that accepts ProcessedFile objects
|
|
120
|
-
*/
|
|
121
82
|
declare function createLlmsTxtStream(options?: CreateLlmsTxtStreamOptions): WritableStream<ProcessedFile>;
|
|
122
83
|
//#endregion
|
|
123
84
|
export { CreateLlmsTxtStreamOptions, LlmsTxtArtifactsOptions, LlmsTxtArtifactsResult, LlmsTxtLink, LlmsTxtSection, ProcessedFile, createLlmsTxtStream, generateLlmsTxtArtifacts };
|
package/dist/llms-txt.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import "./_chunks/markdown-processor
|
|
2
|
-
import "./_chunks/src
|
|
3
|
-
import { n as generateLlmsTxtArtifacts, t as createLlmsTxtStream } from "./_chunks/llms-txt
|
|
1
|
+
import "./_chunks/markdown-processor.mjs";
|
|
2
|
+
import "./_chunks/src.mjs";
|
|
3
|
+
import { n as generateLlmsTxtArtifacts, t as createLlmsTxtStream } from "./_chunks/llms-txt.mjs";
|
|
4
4
|
|
|
5
5
|
export { createLlmsTxtStream, generateLlmsTxtArtifacts };
|
package/dist/plugins.d.mts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { d as Plugin, x as extractionPlugin } from "./_chunks/types
|
|
2
|
-
import { t as createPlugin } from "./_chunks/plugin
|
|
1
|
+
import { d as Plugin, x as extractionPlugin } from "./_chunks/types.mjs";
|
|
2
|
+
import { t as createPlugin } from "./_chunks/plugin.mjs";
|
|
3
3
|
|
|
4
4
|
//#region src/plugins/filter.d.ts
|
|
5
5
|
|
package/dist/plugins.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { t as createPlugin } from "./_chunks/plugin
|
|
2
|
-
import { t as extractionPlugin } from "./_chunks/extraction
|
|
3
|
-
import { a as filterPlugin, i as frontmatterPlugin, n as readabilityPlugin, r as isolateMainPlugin, t as tailwindPlugin } from "./_chunks/plugins
|
|
1
|
+
import { t as createPlugin } from "./_chunks/plugin.mjs";
|
|
2
|
+
import { t as extractionPlugin } from "./_chunks/extraction.mjs";
|
|
3
|
+
import { a as filterPlugin, i as frontmatterPlugin, n as readabilityPlugin, r as isolateMainPlugin, t as tailwindPlugin } from "./_chunks/plugins.mjs";
|
|
4
4
|
|
|
5
5
|
export { createPlugin, extractionPlugin, filterPlugin, frontmatterPlugin, isolateMainPlugin, readabilityPlugin, tailwindPlugin };
|
package/dist/preset/minimal.mjs
CHANGED
package/dist/splitter.d.mts
CHANGED
package/dist/splitter.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { $ as TAG_H2, F as TAG_CODE, Nt as TAG_PRE, Q as TAG_H1, et as TAG_H3, gn as TEXT_NODE, h as NodeEventExit, m as NodeEventEnter, nt as TAG_H5, ot as TAG_HR, r as ELEMENT_NODE, rt as TAG_H6, tt as TAG_H4 } from "./_chunks/const
|
|
2
|
-
import { a as parseHtmlStream, n as createMarkdownProcessor, r as processPluginsForEvent } from "./_chunks/markdown-processor
|
|
1
|
+
import { $ as TAG_H2, F as TAG_CODE, Nt as TAG_PRE, Q as TAG_H1, et as TAG_H3, gn as TEXT_NODE, h as NodeEventExit, m as NodeEventEnter, nt as TAG_H5, ot as TAG_HR, r as ELEMENT_NODE, rt as TAG_H6, tt as TAG_H4 } from "./_chunks/const.mjs";
|
|
2
|
+
import { a as parseHtmlStream, n as createMarkdownProcessor, r as processPluginsForEvent } from "./_chunks/markdown-processor.mjs";
|
|
3
3
|
|
|
4
4
|
//#region src/splitter.ts
|
|
5
5
|
const DEFAULT_HEADERS_TO_SPLIT_ON = [
|
package/package.json
CHANGED
|
File without changes
|
|
File without changes
|
|
File without changes
|