mdream 0.13.3 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -311,6 +311,117 @@ htmlToMarkdown(html, { plugins: [plugin] })
311
311
 
312
312
  The extraction plugin provides memory-efficient element extraction with full text content and attributes, perfect for SEO analysis, content discovery, and data mining.
313
313
 
314
+ ## Markdown Splitting
315
+
316
+ Split HTML into chunks during conversion for LLM context windows, vector databases, or document processing.
317
+
318
+ ### Basic Chunking
319
+
320
+ ```ts
321
+ import { TAG_H2 } from 'mdream'
322
+ import { htmlToMarkdownSplitChunks } from 'mdream/splitter'
323
+
324
+ const html = `
325
+ <h1>Documentation</h1>
326
+ <h2>Installation</h2>
327
+ <p>Install via npm...</p>
328
+ <h2>Usage</h2>
329
+ <p>Use it like this...</p>
330
+ `
331
+
332
+ const chunks = htmlToMarkdownSplitChunks(html, {
333
+ headersToSplitOn: [TAG_H2], // Split on h2 headers
334
+ chunkSize: 1000, // Max chars per chunk
335
+ chunkOverlap: 200, // Overlap for context
336
+ stripHeaders: true // Remove headers from content
337
+ })
338
+
339
+ // Each chunk includes content and metadata
340
+ chunks.forEach((chunk) => {
341
+ console.log(chunk.content)
342
+ console.log(chunk.metadata.headers) // { h1: "Documentation", h2: "Installation" }
343
+ console.log(chunk.metadata.code) // Language if chunk contains code
344
+ console.log(chunk.metadata.loc) // Line numbers
345
+ })
346
+ ```
347
+
348
+ ### Streaming Chunks (Memory Efficient)
349
+
350
+ For large documents, use the generator version to process chunks one at a time:
351
+
352
+ ```ts
353
+ import { htmlToMarkdownSplitChunksStream } from 'mdream/splitter'
354
+
355
+ // Process chunks incrementally - lower memory usage
356
+ for (const chunk of htmlToMarkdownSplitChunksStream(html, options)) {
357
+ await processChunk(chunk) // Handle each chunk as it's generated
358
+
359
+ // Can break early if you found what you need
360
+ if (foundTarget)
361
+ break
362
+ }
363
+ ```
364
+
365
+ **Benefits of streaming:**
366
+ - Lower memory usage - chunks aren't stored in an array
367
+ - Early termination - stop processing when you find what you need
368
+ - Better for large documents
369
+
370
+ ### Splitting Options
371
+
372
+ ```ts
373
+ interface SplitterOptions {
374
+ // Structural splitting
375
+ headersToSplitOn?: number[] // TAG_H1, TAG_H2, etc. Default: [TAG_H2-TAG_H6]
376
+
377
+ // Size-based splitting
378
+ chunkSize?: number // Max chunk size. Default: 1000
379
+ chunkOverlap?: number // Overlap between chunks. Default: 200
380
+ lengthFunction?: (text: string) => number // Custom length (e.g., token count)
381
+
382
+ // Output formatting
383
+ stripHeaders?: boolean // Remove headers from content. Default: true
384
+ returnEachLine?: boolean // Split into individual lines. Default: false
385
+
386
+ // Standard options
387
+ origin?: string // Base URL for links/images
388
+ plugins?: Plugin[] // Apply plugins during conversion
389
+ }
390
+ ```
391
+
392
+ ### Chunk Metadata
393
+
394
+ Each chunk includes rich metadata for context:
395
+
396
+ ```ts
397
+ interface MarkdownChunk {
398
+ content: string
399
+ metadata: {
400
+ headers?: Record<string, string> // Header hierarchy: { h1: "Title", h2: "Section" }
401
+ code?: string // Code block language if present
402
+ loc?: { // Line number range
403
+ lines: { from: number, to: number }
404
+ }
405
+ }
406
+ }
407
+ ```
408
+
409
+ ### Use with Presets
410
+
411
+ Combine splitting with presets for optimized output:
412
+
413
+ ```ts
414
+ import { TAG_H2 } from 'mdream'
415
+ import { withMinimalPreset } from 'mdream/preset/minimal'
416
+ import { htmlToMarkdownSplitChunks } from 'mdream/splitter'
417
+
418
+ const chunks = htmlToMarkdownSplitChunks(html, withMinimalPreset({
419
+ headersToSplitOn: [TAG_H2],
420
+ chunkSize: 500,
421
+ origin: 'https://example.com'
422
+ }))
423
+ ```
424
+
314
425
  ## Credits
315
426
 
316
427
  - [ultrahtml](https://github.com/natemoo-re/ultrahtml): HTML parsing inspiration
@@ -29,10 +29,7 @@ function collectNodeContent(node, content, state) {
29
29
  */
30
30
  function assembleBufferedContent(state) {
31
31
  const fragments = [];
32
- for (const [regionId, content] of Array.from(state.regionContentBuffers.entries())) {
33
- const include = state.regionToggles.get(regionId);
34
- if (include) fragments.push(...content);
35
- }
32
+ for (const [regionId, content] of Array.from(state.regionContentBuffers.entries())) if (state.regionToggles.get(regionId)) fragments.push(...content);
36
33
  state.regionToggles.clear();
37
34
  state.regionContentBuffers.clear();
38
35
  return fragments.join("").trimStart();
@@ -285,4 +282,4 @@ const LIST_ITEM_SPACING = [1, 0];
285
282
  const TABLE_ROW_SPACING = [0, 1];
286
283
 
287
284
  //#endregion
288
- export { BLOCKQUOTE_SPACING, DEFAULT_BLOCK_SPACING, ELEMENT_NODE, HTML_ENTITIES, LIST_ITEM_SPACING, MARKDOWN_CODE_BLOCK, MARKDOWN_EMPHASIS, MARKDOWN_HORIZONTAL_RULE, MARKDOWN_INLINE_CODE, MARKDOWN_STRIKETHROUGH, MARKDOWN_STRONG, MAX_TAG_ID, NO_SPACING, NodeEventEnter, NodeEventExit, TABLE_ROW_SPACING, TAG_A, TAG_ABBR, TAG_ADDRESS, TAG_AREA, TAG_ARTICLE, TAG_ASIDE, TAG_AUDIO, TAG_B, TAG_BASE, TAG_BDO, TAG_BLOCKQUOTE, TAG_BODY, TAG_BR, TAG_BUTTON, TAG_CANVAS, TAG_CAPTION, TAG_CENTER, TAG_CITE, TAG_CODE, TAG_COL, TAG_DD, TAG_DEL, TAG_DETAILS, TAG_DFN, TAG_DIALOG, TAG_DIV, TAG_DL, TAG_DT, TAG_EM, TAG_EMBED, TAG_FIELDSET, TAG_FIGCAPTION, TAG_FIGURE, TAG_FOOTER, TAG_FORM, TAG_H1, TAG_H2, TAG_H3, TAG_H4, TAG_H5, TAG_H6, TAG_HEAD, TAG_HEADER, TAG_HR, TAG_HTML, TAG_I, TAG_IFRAME, TAG_IMG, TAG_INPUT, TAG_INS, TAG_KBD, TAG_KEYGEN, TAG_LABEL, TAG_LEGEND, TAG_LI, TAG_LINK, TAG_MAIN, TAG_MAP, TAG_MARK, TAG_META, TAG_METER, TAG_NAV, TAG_NOFRAMES, TAG_NOSCRIPT, TAG_OBJECT, TAG_OL, TAG_OPTION, TAG_P, TAG_PARAM, TAG_PLAINTEXT, TAG_PRE, TAG_PROGRESS, TAG_Q, TAG_RP, TAG_RT, TAG_RUBY, TAG_SAMP, TAG_SCRIPT, TAG_SECTION, TAG_SELECT, TAG_SMALL, TAG_SOURCE, TAG_SPAN, TAG_STRONG, TAG_STYLE, TAG_SUB, TAG_SUMMARY, TAG_SUP, TAG_SVG, TAG_TABLE, TAG_TBODY, TAG_TD, TAG_TEMPLATE, TAG_TEXTAREA, TAG_TFOOT, TAG_TH, TAG_THEAD, TAG_TIME, TAG_TITLE, TAG_TR, TAG_TRACK, TAG_U, TAG_UL, TAG_VAR, TAG_VIDEO, TAG_WBR, TAG_XMP, TEXT_NODE, TagIdMap, assembleBufferedContent, collectNodeContent, createBufferRegion };
285
+ export { TAG_H2 as $, TAG_TBODY as $t, TAG_BUTTON as A, TAG_P as At, TAG_DFN as B, TAG_SCRIPT as Bt, TAG_AUDIO as C, TAG_METER as Ct, TAG_BLOCKQUOTE as D, TAG_OBJECT as Dt, TAG_BDO as E, TAG_NOSCRIPT as Et, TAG_CODE as F, TAG_Q as Ft, TAG_EM as G, TAG_SPAN as Gt, TAG_DIV as H, TAG_SELECT as Ht, TAG_COL as I, TAG_RP as It, TAG_FIGCAPTION as J, TAG_SUB as Jt, TAG_EMBED as K, TAG_STRONG as Kt, TAG_DD as L, TAG_RT as Lt, TAG_CAPTION as M, TAG_PLAINTEXT as Mt, TAG_CENTER as N, TAG_PRE as Nt, TAG_BODY as O, TAG_OL as Ot, TAG_CITE as P, TAG_PROGRESS as Pt, TAG_H1 as Q, TAG_TABLE as Qt, TAG_DEL as R, TAG_RUBY as Rt, TAG_ASIDE as S, TAG_META as St, TAG_BASE as T, TAG_NOFRAMES as Tt, TAG_DL as U, TAG_SMALL as Ut, TAG_DIALOG as V, TAG_SECTION as Vt, TAG_DT as W, TAG_SOURCE as Wt, TAG_FOOTER as X, TAG_SUP as Xt, TAG_FIGURE as Y, TAG_SUMMARY as Yt, TAG_FORM as Z, TAG_SVG as Zt, TAG_A as _, TagIdMap as _n, TAG_LI as _t, LIST_ITEM_SPACING as a, TAG_THEAD as an, TAG_HEADER as at, TAG_AREA as b, createBufferRegion as bn, TAG_MAP as bt, MARKDOWN_HORIZONTAL_RULE as c, TAG_TR as cn, TAG_I as ct, MARKDOWN_STRONG as d, TAG_UL as dn, TAG_INPUT as dt, TAG_TD as en, TAG_H3 as et, MAX_TAG_ID as f, TAG_VAR as fn, TAG_INS as ft, TABLE_ROW_SPACING as g, TEXT_NODE as gn, TAG_LEGEND as gt, NodeEventExit as h, TAG_XMP as hn, TAG_LABEL as ht, HTML_ENTITIES as i, TAG_TH as in, TAG_HEAD as it, TAG_CANVAS as j, TAG_PARAM as jt, TAG_BR as k, TAG_OPTION as kt, MARKDOWN_INLINE_CODE as l, TAG_TRACK as ln, TAG_IFRAME as lt, NodeEventEnter as m, TAG_WBR as mn, TAG_KEYGEN as mt, DEFAULT_BLOCK_SPACING as n, TAG_TEXTAREA as nn, TAG_H5 as nt, MARKDOWN_CODE_BLOCK as o, TAG_TIME as on, TAG_HR as ot, NO_SPACING as p, TAG_VIDEO as pn, TAG_KBD as pt, TAG_FIELDSET as q, TAG_STYLE as qt, ELEMENT_NODE as r, TAG_TFOOT as rn, TAG_H6 as rt, MARKDOWN_EMPHASIS as s, TAG_TITLE as sn, TAG_HTML as st, BLOCKQUOTE_SPACING as t, TAG_TEMPLATE as tn, TAG_H4 as tt, MARKDOWN_STRIKETHROUGH as u, TAG_U as un, TAG_IMG as ut, TAG_ABBR as v, assembleBufferedContent as vn, TAG_LINK as vt, TAG_B as w, TAG_NAV as wt, TAG_ARTICLE as x, TAG_MARK as xt, TAG_ADDRESS as y, collectNodeContent as yn, TAG_MAIN as yt, TAG_DETAILS as z, TAG_SAMP as zt };
@@ -1,4 +1,4 @@
1
- import { createPlugin } from "./plugin-DrovQriD.mjs";
1
+ import { t as createPlugin } from "./plugin-CjWWQTuL.mjs";
2
2
 
3
3
  //#region src/libs/query-selector.ts
4
4
  /**
@@ -28,8 +28,7 @@ function createClassSelector(selector) {
28
28
  return {
29
29
  matches: (element) => {
30
30
  if (!element.attributes?.class) return false;
31
- const classes = element.attributes.class.trim().split(" ").filter(Boolean);
32
- return classes.includes(className);
31
+ return element.attributes.class.trim().split(" ").filter(Boolean).includes(className);
33
32
  },
34
33
  toString: () => `.${className}`
35
34
  };
@@ -109,7 +108,7 @@ function extractionPlugin(selectors) {
109
108
  matcher: parseSelector(selector),
110
109
  callback
111
110
  }));
112
- const trackedElements = new Map();
111
+ const trackedElements = /* @__PURE__ */ new Map();
113
112
  return createPlugin({
114
113
  onNodeEnter(element) {
115
114
  matcherCallbacks.forEach(({ matcher, callback }) => {
@@ -126,7 +125,6 @@ function extractionPlugin(selectors) {
126
125
  if (tracked) tracked.textContent += textNode.value;
127
126
  currentParent = currentParent.parent;
128
127
  }
129
- return void 0;
130
128
  },
131
129
  onNodeExit(element, state) {
132
130
  const tracked = trackedElements.get(element);
@@ -143,4 +141,4 @@ function extractionPlugin(selectors) {
143
141
  }
144
142
 
145
143
  //#endregion
146
- export { extractionPlugin, parseSelector };
144
+ export { parseSelector as n, extractionPlugin as t };
@@ -1,7 +1,7 @@
1
- import { htmlToMarkdown } from "./src-C3QpB75q.mjs";
2
- import { extractionPlugin } from "./extraction-BPaDGYvv.mjs";
3
- import { readFile } from "node:fs/promises";
4
- import { basename, dirname, relative, sep } from "pathe";
1
+ import { t as htmlToMarkdown } from "./src-BJpipdul.mjs";
2
+ import { t as extractionPlugin } from "./extraction-BA9MDtq3.mjs";
3
+ import { mkdir, open, readFile } from "node:fs/promises";
4
+ import { basename, dirname, join, relative, sep } from "pathe";
5
5
  import { glob } from "tinyglobby";
6
6
 
7
7
  //#region src/llms-txt.ts
@@ -13,28 +13,27 @@ function extractMetadata(html, url) {
13
13
  let description = "";
14
14
  let keywords = "";
15
15
  let author = "";
16
- const extractionPluginInstance = extractionPlugin({
17
- "title": (element) => {
18
- if (!title && element.textContent) title = element.textContent.trim();
19
- },
20
- "meta[name=\"description\"]": (element) => {
21
- if (!description && element.attributes?.content) description = element.attributes.content.trim();
22
- },
23
- "meta[property=\"og:description\"]": (element) => {
24
- if (!description && element.attributes?.content) description = element.attributes.content.trim();
25
- },
26
- "meta[name=\"keywords\"]": (element) => {
27
- if (!keywords && element.attributes?.content) keywords = element.attributes.content.trim();
28
- },
29
- "meta[name=\"author\"]": (element) => {
30
- if (!author && element.attributes?.content) author = element.attributes.content.trim();
31
- },
32
- "meta[property=\"og:title\"]": (element) => {
33
- if (!title && element.attributes?.content) title = element.attributes.content.trim();
34
- }
35
- });
36
16
  htmlToMarkdown(html, {
37
- plugins: [extractionPluginInstance],
17
+ plugins: [extractionPlugin({
18
+ "title": (element) => {
19
+ if (!title && element.textContent) title = element.textContent.trim();
20
+ },
21
+ "meta[name=\"description\"]": (element) => {
22
+ if (!description && element.attributes?.content) description = element.attributes.content.trim();
23
+ },
24
+ "meta[property=\"og:description\"]": (element) => {
25
+ if (!description && element.attributes?.content) description = element.attributes.content.trim();
26
+ },
27
+ "meta[name=\"keywords\"]": (element) => {
28
+ if (!keywords && element.attributes?.content) keywords = element.attributes.content.trim();
29
+ },
30
+ "meta[name=\"author\"]": (element) => {
31
+ if (!author && element.attributes?.content) author = element.attributes.content.trim();
32
+ },
33
+ "meta[property=\"og:title\"]": (element) => {
34
+ if (!title && element.attributes?.content) title = element.attributes.content.trim();
35
+ }
36
+ })],
38
37
  origin: url
39
38
  });
40
39
  return {
@@ -113,8 +112,7 @@ function generateLlmsTxtContent(files, options) {
113
112
  * Parse frontmatter from markdown content
114
113
  */
115
114
  function parseFrontmatter(content) {
116
- const frontmatterRegex = /^---\n([\s\S]*?)\n---\n([\s\S]*)$/;
117
- const match = content.match(frontmatterRegex);
115
+ const match = content.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
118
116
  if (!match) return {
119
117
  frontmatter: null,
120
118
  body: content
@@ -127,8 +125,7 @@ function parseFrontmatter(content) {
127
125
  const colonIndex = line.indexOf(":");
128
126
  if (colonIndex > 0) {
129
127
  const key = line.substring(0, colonIndex).trim();
130
- const value = line.substring(colonIndex + 1).trim();
131
- frontmatter[key] = value;
128
+ frontmatter[key] = line.substring(colonIndex + 1).trim();
132
129
  }
133
130
  }
134
131
  return {
@@ -172,11 +169,10 @@ function generateLlmsFullTxtContent(files, options) {
172
169
  if (file.metadata.keywords) metadata.keywords = file.metadata.keywords;
173
170
  if (file.metadata.author) metadata.author = file.metadata.author;
174
171
  }
175
- const mergedFrontmatter = frontmatter ? {
172
+ const frontmatterString = serializeFrontmatter(frontmatter ? {
176
173
  ...frontmatter,
177
174
  ...metadata
178
- } : metadata;
179
- const frontmatterString = serializeFrontmatter(mergedFrontmatter);
175
+ } : metadata);
180
176
  let contentBody = frontmatter ? body : file.content;
181
177
  const titleLine = contentBody.trim().split("\n")[0];
182
178
  if (titleLine === file.title || titleLine === `# ${file.title}`) contentBody = contentBody.trim().split("\n").slice(1).join("\n").trimStart();
@@ -191,8 +187,7 @@ function generateLlmsFullTxtContent(files, options) {
191
187
  function generateMarkdownFilesContent(files) {
192
188
  const markdownFiles = [];
193
189
  for (const file of files) {
194
- const urlPath = file.url === "/" ? "index" : file.url.replace(/^\//, "").replace(/\/$/, "");
195
- const mdPath = `md/${urlPath}.md`;
190
+ const mdPath = `md/${file.url === "/" ? "index" : file.url.replace(/^\//, "").replace(/\/$/, "")}.md`;
196
191
  markdownFiles.push({
197
192
  path: mdPath,
198
193
  content: file.content
@@ -220,6 +215,99 @@ async function generateLlmsTxtArtifacts(options) {
220
215
  processedFiles: files
221
216
  };
222
217
  }
218
+ /**
219
+ * Create a WritableStream that generates llms.txt artifacts by streaming pages to disk
220
+ *
221
+ * Writes llms.txt (and optionally llms-full.txt) incrementally as pages are written,
222
+ * never keeping full content in memory. Creates outputDir recursively if needed.
223
+ *
224
+ * @example
225
+ * ```typescript
226
+ * const stream = createLlmsTxtStream({
227
+ * siteName: 'My Docs',
228
+ * description: 'Documentation site',
229
+ * origin: 'https://example.com',
230
+ * generateFull: true,
231
+ * outputDir: './dist',
232
+ * })
233
+ *
234
+ * const writer = stream.getWriter()
235
+ * await writer.write({
236
+ * title: 'Home',
237
+ * content: '# Welcome\n\nHome page content.',
238
+ * url: '/',
239
+ * })
240
+ * await writer.close()
241
+ * ```
242
+ *
243
+ * @param options - Configuration options
244
+ * @returns WritableStream that accepts ProcessedFile objects
245
+ */
246
+ function createLlmsTxtStream(options = {}) {
247
+ const { siteName = "Site", description, origin = "", generateFull, outputDir = process.cwd() } = options;
248
+ let llmsTxtHandle;
249
+ let llmsFullTxtHandle;
250
+ return new WritableStream({
251
+ async start() {
252
+ await mkdir(outputDir, { recursive: true });
253
+ llmsTxtHandle = await open(join(outputDir, "llms.txt"), "w");
254
+ let header = `# ${siteName}\n\n`;
255
+ if (description) header += `> ${description}\n\n`;
256
+ header += `## Pages\n\n`;
257
+ await llmsTxtHandle.write(header);
258
+ if (generateFull) {
259
+ llmsFullTxtHandle = await open(join(outputDir, "llms-full.txt"), "w");
260
+ let fullHeader = `# ${siteName}\n\n`;
261
+ if (description) fullHeader += `> ${description}\n\n`;
262
+ await llmsFullTxtHandle.write(fullHeader);
263
+ }
264
+ },
265
+ async write(file) {
266
+ const desc = file.metadata?.description;
267
+ const descText = desc ? `: ${desc.substring(0, 100)}${desc.length > 100 ? "..." : ""}` : "";
268
+ let chunk = "";
269
+ if (file.filePath && file.filePath.endsWith(".md")) {
270
+ const relativePath = relative(outputDir, file.filePath);
271
+ chunk = `- [${file.title}](${relativePath})${descText}\n`;
272
+ } else {
273
+ const url = file.url.startsWith("http://") || file.url.startsWith("https://") ? file.url : origin + file.url;
274
+ chunk = `- [${file.title}](${url})${descText}\n`;
275
+ }
276
+ await llmsTxtHandle?.write(chunk);
277
+ if (generateFull && llmsFullTxtHandle) {
278
+ const url = file.url.startsWith("http://") || file.url.startsWith("https://") ? file.url : origin ? origin + file.url : file.url;
279
+ const { frontmatter, body } = parseFrontmatter(file.content);
280
+ const metadata = {
281
+ title: file.title,
282
+ url
283
+ };
284
+ if (file.filePath) metadata.file = relative(outputDir, file.filePath);
285
+ if (file.metadata) {
286
+ if (file.metadata.description) metadata.description = file.metadata.description;
287
+ if (file.metadata.keywords) metadata.keywords = file.metadata.keywords;
288
+ if (file.metadata.author) metadata.author = file.metadata.author;
289
+ }
290
+ const frontmatterString = serializeFrontmatter(frontmatter ? {
291
+ ...frontmatter,
292
+ ...metadata
293
+ } : metadata);
294
+ let contentBody = frontmatter ? body : file.content;
295
+ const titleLine = contentBody.trim().split("\n")[0];
296
+ if (titleLine === file.title || titleLine === `# ${file.title}`) contentBody = contentBody.trim().split("\n").slice(1).join("\n").trimStart();
297
+ const fullChunk = `---\n${frontmatterString}\n---\n\n${contentBody}\n\n---\n\n`;
298
+ await llmsFullTxtHandle.write(fullChunk);
299
+ }
300
+ },
301
+ async close() {
302
+ await llmsTxtHandle?.close();
303
+ await llmsFullTxtHandle?.close();
304
+ },
305
+ async abort(reason) {
306
+ await llmsTxtHandle?.close();
307
+ await llmsFullTxtHandle?.close();
308
+ }
309
+ });
310
+ }
223
311
 
224
312
  //#endregion
225
- export { generateLlmsTxtArtifacts };
313
+ export { generateLlmsTxtArtifacts as n, createLlmsTxtStream as t };
@@ -1,4 +1,4 @@
1
- import { BLOCKQUOTE_SPACING, DEFAULT_BLOCK_SPACING, ELEMENT_NODE, HTML_ENTITIES, LIST_ITEM_SPACING, MARKDOWN_CODE_BLOCK, MARKDOWN_EMPHASIS, MARKDOWN_HORIZONTAL_RULE, MARKDOWN_INLINE_CODE, MARKDOWN_STRIKETHROUGH, MARKDOWN_STRONG, MAX_TAG_ID, NO_SPACING, NodeEventEnter, NodeEventExit, TABLE_ROW_SPACING, TAG_A, TAG_ABBR, TAG_ADDRESS, TAG_AREA, TAG_ASIDE, TAG_AUDIO, TAG_B, TAG_BASE, TAG_BDO, TAG_BLOCKQUOTE, TAG_BODY, TAG_BR, TAG_BUTTON, TAG_CANVAS, TAG_CENTER, TAG_CITE, TAG_CODE, TAG_COL, TAG_DD, TAG_DEL, TAG_DETAILS, TAG_DFN, TAG_DIALOG, TAG_DIV, TAG_DL, TAG_DT, TAG_EM, TAG_EMBED, TAG_FIELDSET, TAG_FOOTER, TAG_FORM, TAG_H1, TAG_H2, TAG_H3, TAG_H4, TAG_H5, TAG_H6, TAG_HEAD, TAG_HR, TAG_I, TAG_IFRAME, TAG_IMG, TAG_INPUT, TAG_INS, TAG_KBD, TAG_KEYGEN, TAG_LABEL, TAG_LEGEND, TAG_LI, TAG_LINK, TAG_MAP, TAG_MARK, TAG_META, TAG_METER, TAG_NAV, TAG_NOFRAMES, TAG_NOSCRIPT, TAG_OL, TAG_OPTION, TAG_P, TAG_PARAM, TAG_PLAINTEXT, TAG_PRE, TAG_PROGRESS, TAG_Q, TAG_RP, TAG_RT, TAG_RUBY, TAG_SAMP, TAG_SCRIPT, TAG_SELECT, TAG_SMALL, TAG_SOURCE, TAG_SPAN, TAG_STRONG, TAG_STYLE, TAG_SUB, TAG_SUMMARY, TAG_SUP, TAG_SVG, TAG_TABLE, TAG_TBODY, TAG_TD, TAG_TEMPLATE, TAG_TEXTAREA, TAG_TFOOT, TAG_TH, TAG_THEAD, TAG_TIME, TAG_TITLE, TAG_TR, TAG_TRACK, TAG_U, TAG_UL, TAG_VAR, TAG_VIDEO, TAG_WBR, TAG_XMP, TEXT_NODE, TagIdMap, assembleBufferedContent, collectNodeContent } from "./const-BOAJ1T5c.mjs";
1
+ import { $ as TAG_H2, $t as TAG_TBODY, A as TAG_BUTTON, At as TAG_P, B as TAG_DFN, Bt as TAG_SCRIPT, C as TAG_AUDIO, Ct as TAG_METER, D as TAG_BLOCKQUOTE, E as TAG_BDO, Et as TAG_NOSCRIPT, F as TAG_CODE, Ft as TAG_Q, G as TAG_EM, Gt as TAG_SPAN, H as TAG_DIV, Ht as TAG_SELECT, I as TAG_COL, It as TAG_RP, Jt as TAG_SUB, K as TAG_EMBED, Kt as TAG_STRONG, L as TAG_DD, Lt as TAG_RT, Mt as TAG_PLAINTEXT, N as TAG_CENTER, Nt as TAG_PRE, O as TAG_BODY, Ot as TAG_OL, P as TAG_CITE, Pt as TAG_PROGRESS, Q as TAG_H1, Qt as TAG_TABLE, R as TAG_DEL, Rt as TAG_RUBY, S as TAG_ASIDE, St as TAG_META, T as TAG_BASE, Tt as TAG_NOFRAMES, U as TAG_DL, Ut as TAG_SMALL, V as TAG_DIALOG, W as TAG_DT, Wt as TAG_SOURCE, X as TAG_FOOTER, Xt as TAG_SUP, Yt as TAG_SUMMARY, Z as TAG_FORM, Zt as TAG_SVG, _ as TAG_A, _n as TagIdMap, _t as TAG_LI, a as LIST_ITEM_SPACING, an as TAG_THEAD, b as TAG_AREA, bt as TAG_MAP, c as MARKDOWN_HORIZONTAL_RULE, cn as TAG_TR, ct as TAG_I, d as MARKDOWN_STRONG, dn as TAG_UL, dt as TAG_INPUT, en as TAG_TD, et as TAG_H3, f as MAX_TAG_ID, fn as TAG_VAR, ft as TAG_INS, g as TABLE_ROW_SPACING, gn as TEXT_NODE, gt as TAG_LEGEND, h as NodeEventExit, hn as TAG_XMP, ht as TAG_LABEL, i as HTML_ENTITIES, in as TAG_TH, it as TAG_HEAD, j as TAG_CANVAS, jt as TAG_PARAM, k as TAG_BR, kt as TAG_OPTION, l as MARKDOWN_INLINE_CODE, ln as TAG_TRACK, lt as TAG_IFRAME, m as NodeEventEnter, mn as TAG_WBR, mt as TAG_KEYGEN, n as DEFAULT_BLOCK_SPACING, nn as TAG_TEXTAREA, nt as TAG_H5, o as MARKDOWN_CODE_BLOCK, on as TAG_TIME, ot as TAG_HR, p as NO_SPACING, pn as TAG_VIDEO, pt as TAG_KBD, q as TAG_FIELDSET, qt as TAG_STYLE, r as ELEMENT_NODE, rn as TAG_TFOOT, rt as TAG_H6, s as MARKDOWN_EMPHASIS, sn as TAG_TITLE, t as BLOCKQUOTE_SPACING, tn as TAG_TEMPLATE, tt as TAG_H4, u as MARKDOWN_STRIKETHROUGH, un as TAG_U, ut as TAG_IMG, v as TAG_ABBR, vn as assembleBufferedContent, vt as TAG_LINK, w as TAG_B, wt as TAG_NAV, xt as TAG_MARK, y as TAG_ADDRESS, yn as collectNodeContent, z as TAG_DETAILS, zt as TAG_SAMP } from "./const-Bf_XN9U9.mjs";
2
2
 
3
3
  //#region src/tags.ts
4
4
  function resolveUrl(url, origin) {
@@ -6,15 +6,9 @@ function resolveUrl(url, origin) {
6
6
  if (url.startsWith("//")) return `https:${url}`;
7
7
  if (url.startsWith("#")) return url;
8
8
  if (origin) {
9
- if (url.startsWith("/") && origin) {
10
- const cleanOrigin = origin.endsWith("/") ? origin.slice(0, -1) : origin;
11
- return `${cleanOrigin}${url}`;
12
- }
9
+ if (url.startsWith("/") && origin) return `${origin.endsWith("/") ? origin.slice(0, -1) : origin}${url}`;
13
10
  if (url.startsWith("./")) return `${origin}/${url.slice(2)}`;
14
- if (!url.startsWith("http")) {
15
- const cleanUrl = url.startsWith("/") ? url.slice(1) : url;
16
- return `${origin}/${cleanUrl}`;
17
- }
11
+ if (!url.startsWith("http")) return `${origin}/${url.startsWith("/") ? url.slice(1) : url}`;
18
12
  }
19
13
  return url;
20
14
  }
@@ -157,10 +151,7 @@ const tagHandlers = {
157
151
  },
158
152
  [TAG_CODE]: {
159
153
  enter: ({ node }) => {
160
- if ((node.depthMap[TAG_PRE] || 0) > 0) {
161
- const language = getLanguageFromClass(node.attributes?.class);
162
- return `${MARKDOWN_CODE_BLOCK}${language}\n`;
163
- }
154
+ if ((node.depthMap[TAG_PRE] || 0) > 0) return `${MARKDOWN_CODE_BLOCK}${getLanguageFromClass(node.attributes?.class)}\n`;
164
155
  return MARKDOWN_INLINE_CODE;
165
156
  },
166
157
  exit: ({ node }) => {
@@ -179,9 +170,7 @@ const tagHandlers = {
179
170
  if (isInsideTableCell(node)) return "<li>";
180
171
  const depth = (node.depthMap[TAG_UL] || 0) + (node.depthMap[TAG_OL] || 0) - 1;
181
172
  const isOrdered = node.parent?.tagId === TAG_OL;
182
- const indent = " ".repeat(Math.max(0, depth));
183
- const marker = isOrdered ? `${node.index + 1}. ` : "- ";
184
- return `${indent}${marker}`;
173
+ return `${" ".repeat(Math.max(0, depth))}${isOrdered ? `${node.index + 1}. ` : "- "}`;
185
174
  },
186
175
  exit: ({ node }) => isInsideTableCell(node) ? "</li>" : void 0,
187
176
  spacing: LIST_ITEM_SPACING
@@ -194,8 +183,7 @@ const tagHandlers = {
194
183
  if (!node.attributes?.href) return "";
195
184
  const href = resolveUrl(node.attributes?.href || "", state.options?.origin);
196
185
  let title = node.attributes?.title;
197
- const lastContent = state.lastContentCache;
198
- if (lastContent === title) title = "";
186
+ if (state.lastContentCache === title) title = "";
199
187
  return title ? `](${href} "${title}")` : `](${href})`;
200
188
  },
201
189
  collapsesInnerWhiteSpace: true,
@@ -204,9 +192,7 @@ const tagHandlers = {
204
192
  },
205
193
  [TAG_IMG]: {
206
194
  enter: ({ node, state }) => {
207
- const alt = node.attributes?.alt || "";
208
- const src = resolveUrl(node.attributes?.src || "", state.options?.origin);
209
- return `![${alt}](${src})`;
195
+ return `![${node.attributes?.alt || ""}](${resolveUrl(node.attributes?.src || "", state.options?.origin)})`;
210
196
  },
211
197
  collapsesInnerWhiteSpace: true,
212
198
  isSelfClosing: true,
@@ -241,15 +227,14 @@ const tagHandlers = {
241
227
  state.tableRenderedTable = true;
242
228
  const alignments = state.tableColumnAlignments;
243
229
  while (alignments.length < state.tableCurrentRowCells) alignments.push("");
244
- const alignmentMarkers = alignments.map((align) => {
230
+ return ` |\n| ${alignments.map((align) => {
245
231
  switch (align) {
246
232
  case "left": return ":---";
247
233
  case "center": return ":---:";
248
234
  case "right": return "---:";
249
235
  default: return "---";
250
236
  }
251
- });
252
- return ` |\n| ${alignmentMarkers.join(" | ")} |`;
237
+ }).join(" | ")} |`;
253
238
  }
254
239
  return " |";
255
240
  },
@@ -637,17 +622,15 @@ function isWhitespace(charCode) {
637
622
  */
638
623
  function parseHtml(html, options = {}) {
639
624
  const events = [];
640
- const state = {
641
- depthMap: new Uint8Array(MAX_TAG_ID),
642
- depth: 0,
643
- plugins: options.plugins || []
644
- };
645
- const remainingHtml = parseHtmlInternal(html, state, (event) => {
646
- events.push(event);
647
- });
648
625
  return {
649
626
  events,
650
- remainingHtml
627
+ remainingHtml: parseHtmlInternal(html, {
628
+ depthMap: new Uint8Array(MAX_TAG_ID),
629
+ depth: 0,
630
+ plugins: options.plugins || []
631
+ }, (event) => {
632
+ events.push(event);
633
+ })
651
634
  };
652
635
  }
653
636
  /**
@@ -840,8 +823,7 @@ function processClosingTag(htmlChunk, position, state, handleEvent) {
840
823
  const chunkLength = htmlChunk.length;
841
824
  let foundClose = false;
842
825
  while (i < chunkLength) {
843
- const charCode = htmlChunk.charCodeAt(i);
844
- if (charCode === GT_CHAR) {
826
+ if (htmlChunk.charCodeAt(i) === GT_CHAR) {
845
827
  foundClose = true;
846
828
  break;
847
829
  }
@@ -852,8 +834,7 @@ function processClosingTag(htmlChunk, position, state, handleEvent) {
852
834
  newPosition: position,
853
835
  remainingText: htmlChunk.substring(position)
854
836
  };
855
- const tagName = htmlChunk.substring(tagNameStart, i).toLowerCase();
856
- const tagId = TagIdMap[tagName] ?? -1;
837
+ const tagId = TagIdMap[htmlChunk.substring(tagNameStart, i).toLowerCase()] ?? -1;
857
838
  if (state.currentNode?.tagHandler?.isNonNesting && tagId !== state.currentNode.tagId) return {
858
839
  complete: false,
859
840
  newPosition: position,
@@ -885,16 +866,15 @@ function closeNode(node, state, handleEvent) {
885
866
  const prefix = node.attributes?.title || node.attributes?.["aria-label"] || "";
886
867
  if (prefix) {
887
868
  node.childTextNodeIndex = 1;
888
- const textNode = {
889
- type: TEXT_NODE,
890
- value: prefix,
891
- parent: node,
892
- index: 0,
893
- depth: node.depth + 1
894
- };
895
869
  handleEvent({
896
870
  type: NodeEventEnter,
897
- node: textNode
871
+ node: {
872
+ type: TEXT_NODE,
873
+ value: prefix,
874
+ parent: node,
875
+ index: 0,
876
+ depth: node.depth + 1
877
+ }
898
878
  });
899
879
  for (const parent of traverseUpToFirstBlockNode(node)) parent.childTextNodeIndex = (parent.childTextNodeIndex || 0) + 1;
900
880
  }
@@ -1262,8 +1242,8 @@ function calculateNewLineConfig(node) {
1262
1242
  function createMarkdownProcessor(options = {}) {
1263
1243
  const state = {
1264
1244
  options,
1265
- regionToggles: new Map(),
1266
- regionContentBuffers: new Map(),
1245
+ regionToggles: /* @__PURE__ */ new Map(),
1246
+ regionContentBuffers: /* @__PURE__ */ new Map(),
1267
1247
  depthMap: new Uint8Array(MAX_TAG_ID)
1268
1248
  };
1269
1249
  state.regionToggles.set(0, true);
@@ -1315,8 +1295,7 @@ function createMarkdownProcessor(options = {}) {
1315
1295
  const res = handler[eventFn](context);
1316
1296
  if (res) output.push(res);
1317
1297
  }
1318
- const newLineConfig = calculateNewLineConfig(node);
1319
- const configuredNewLines = newLineConfig[eventType] || 0;
1298
+ const configuredNewLines = calculateNewLineConfig(node)[eventType] || 0;
1320
1299
  const newLines = Math.max(0, configuredNewLines - lastNewLines);
1321
1300
  if (newLines > 0) {
1322
1301
  if (!buff.length) {
@@ -1332,13 +1311,10 @@ function createMarkdownProcessor(options = {}) {
1332
1311
  const isInlineElement = node.tagHandler?.isInline;
1333
1312
  const collapsesWhiteSpace = node.tagHandler?.collapsesInnerWhiteSpace;
1334
1313
  const hasSpacing = node.tagHandler?.spacing && Array.isArray(node.tagHandler.spacing);
1335
- const isBlockElement = !isInlineElement && !collapsesWhiteSpace && configuredNewLines > 0;
1336
- const shouldTrim = (!isInlineElement || eventType === NodeEventExit) && !isBlockElement && !(collapsesWhiteSpace && eventType === NodeEventEnter) && !(hasSpacing && eventType === NodeEventEnter);
1337
- if (shouldTrim) {
1314
+ if ((!isInlineElement || eventType === NodeEventExit) && !(!isInlineElement && !collapsesWhiteSpace && configuredNewLines > 0) && !(collapsesWhiteSpace && eventType === NodeEventEnter) && !(hasSpacing && eventType === NodeEventEnter)) {
1338
1315
  const originalLength = lastFragment.length;
1339
1316
  const trimmed = lastFragment.trimEnd();
1340
- const trimmedChars = originalLength - trimmed.length;
1341
- if (trimmedChars > 0) {
1317
+ if (originalLength - trimmed.length > 0) {
1342
1318
  if (buff?.length && buff[buff.length - 1] === lastFragment) buff[buff.length - 1] = trimmed;
1343
1319
  }
1344
1320
  }
@@ -1352,12 +1328,11 @@ function createMarkdownProcessor(options = {}) {
1352
1328
  * Process HTML string and generate events
1353
1329
  */
1354
1330
  function processHtml(html) {
1355
- const parseState = {
1331
+ parseHtmlStream(html, {
1356
1332
  depthMap: state.depthMap,
1357
1333
  depth: 0,
1358
1334
  plugins: state.options?.plugins || []
1359
- };
1360
- parseHtmlStream(html, parseState, (event) => {
1335
+ }, (event) => {
1361
1336
  processPluginsForEvent(event, state.options?.plugins, state, processEvent);
1362
1337
  });
1363
1338
  }
@@ -1365,18 +1340,14 @@ function createMarkdownProcessor(options = {}) {
1365
1340
  * Get the final markdown output
1366
1341
  */
1367
1342
  function getMarkdown() {
1368
- const assembledContent = assembleBufferedContent(state);
1369
- return assembledContent.trimEnd();
1343
+ return assembleBufferedContent(state).trimEnd();
1370
1344
  }
1371
1345
  /**
1372
1346
  * Get new markdown content since the last call (for streaming)
1373
1347
  */
1374
1348
  function getMarkdownChunk() {
1375
1349
  const fragments = [];
1376
- for (const [regionId, content] of Array.from(state.regionContentBuffers.entries())) {
1377
- const include = state.regionToggles.get(regionId);
1378
- if (include) fragments.push(...content);
1379
- }
1350
+ for (const [regionId, content] of Array.from(state.regionContentBuffers.entries())) if (state.regionToggles.get(regionId)) fragments.push(...content);
1380
1351
  const currentContent = fragments.join("").trimStart();
1381
1352
  const newContent = currentContent.slice(lastYieldedLength);
1382
1353
  lastYieldedLength = currentContent.length;
@@ -1393,4 +1364,4 @@ function createMarkdownProcessor(options = {}) {
1393
1364
  const MarkdownProcessor = createMarkdownProcessor;
1394
1365
 
1395
1366
  //#endregion
1396
- export { MarkdownProcessor, createMarkdownProcessor, parseHtml, parseHtmlStream, processPluginsForEvent };
1367
+ export { parseHtmlStream as a, parseHtml as i, createMarkdownProcessor as n, processPluginsForEvent as r, MarkdownProcessor as t };
@@ -1,5 +1,5 @@
1
- import { TAG_ASIDE, TAG_BUTTON, TAG_EMBED, TAG_FIELDSET, TAG_FIGURE, TAG_FOOTER, TAG_FORM, TAG_IFRAME, TAG_INPUT, TAG_NAV, TAG_OBJECT, TAG_SELECT, TAG_TEXTAREA } from "./const-BOAJ1T5c.mjs";
2
- import { filterPlugin, frontmatterPlugin, isolateMainPlugin, tailwindPlugin } from "./plugins-C5_irVJs.mjs";
1
+ import { A as TAG_BUTTON, Dt as TAG_OBJECT, Ht as TAG_SELECT, K as TAG_EMBED, S as TAG_ASIDE, X as TAG_FOOTER, Y as TAG_FIGURE, Z as TAG_FORM, dt as TAG_INPUT, lt as TAG_IFRAME, nn as TAG_TEXTAREA, q as TAG_FIELDSET, wt as TAG_NAV } from "./const-Bf_XN9U9.mjs";
2
+ import { a as filterPlugin, i as frontmatterPlugin, r as isolateMainPlugin, t as tailwindPlugin } from "./plugins-DJnqR2fA.mjs";
3
3
 
4
4
  //#region src/preset/minimal.ts
5
5
  /**
@@ -37,4 +37,4 @@ function withMinimalPreset(options = {}) {
37
37
  }
38
38
 
39
39
  //#endregion
40
- export { withMinimalPreset };
40
+ export { withMinimalPreset as t };
@@ -9,4 +9,4 @@ function createPlugin(plugin) {
9
9
  }
10
10
 
11
11
  //#endregion
12
- export { createPlugin };
12
+ export { createPlugin as t };
@@ -1,4 +1,4 @@
1
- import { Plugin } from "./types-DqiI86yW.mjs";
1
+ import { d as Plugin } from "./types-CT4ZxeOH.mjs";
2
2
 
3
3
  //#region src/pluggable/plugin.d.ts
4
4
 
@@ -9,4 +9,4 @@ import { Plugin } from "./types-DqiI86yW.mjs";
9
9
  */
10
10
  declare function createPlugin<T extends Partial<Plugin>>(plugin: T): Plugin;
11
11
  //#endregion
12
- export { createPlugin as createPlugin$1 };
12
+ export { createPlugin as t };