mdream 0.13.3 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -311,6 +311,117 @@ htmlToMarkdown(html, { plugins: [plugin] })
311
311
 
312
312
  The extraction plugin provides memory-efficient element extraction with full text content and attributes, perfect for SEO analysis, content discovery, and data mining.
313
313
 
314
+ ## Markdown Splitting
315
+
316
+ Split HTML into chunks during conversion for LLM context windows, vector databases, or document processing.
317
+
318
+ ### Basic Chunking
319
+
320
+ ```ts
321
+ import { TAG_H2 } from 'mdream'
322
+ import { htmlToMarkdownSplitChunks } from 'mdream/splitter'
323
+
324
+ const html = `
325
+ <h1>Documentation</h1>
326
+ <h2>Installation</h2>
327
+ <p>Install via npm...</p>
328
+ <h2>Usage</h2>
329
+ <p>Use it like this...</p>
330
+ `
331
+
332
+ const chunks = htmlToMarkdownSplitChunks(html, {
333
+ headersToSplitOn: [TAG_H2], // Split on h2 headers
334
+ chunkSize: 1000, // Max chars per chunk
335
+ chunkOverlap: 200, // Overlap for context
336
+ stripHeaders: true // Remove headers from content
337
+ })
338
+
339
+ // Each chunk includes content and metadata
340
+ chunks.forEach((chunk) => {
341
+ console.log(chunk.content)
342
+ console.log(chunk.metadata.headers) // { h1: "Documentation", h2: "Installation" }
343
+ console.log(chunk.metadata.code) // Language if chunk contains code
344
+ console.log(chunk.metadata.loc) // Line numbers
345
+ })
346
+ ```
347
+
348
+ ### Streaming Chunks (Memory Efficient)
349
+
350
+ For large documents, use the generator version to process chunks one at a time:
351
+
352
+ ```ts
353
+ import { htmlToMarkdownSplitChunksStream } from 'mdream/splitter'
354
+
355
+ // Process chunks incrementally - lower memory usage
356
+ for (const chunk of htmlToMarkdownSplitChunksStream(html, options)) {
357
+ await processChunk(chunk) // Handle each chunk as it's generated
358
+
359
+ // Can break early if you found what you need
360
+ if (foundTarget)
361
+ break
362
+ }
363
+ ```
364
+
365
+ **Benefits of streaming:**
366
+ - Lower memory usage - chunks aren't stored in an array
367
+ - Early termination - stop processing when you find what you need
368
+ - Better for large documents
369
+
370
+ ### Splitting Options
371
+
372
+ ```ts
373
+ interface SplitterOptions {
374
+ // Structural splitting
375
+ headersToSplitOn?: number[] // TAG_H1, TAG_H2, etc. Default: [TAG_H2-TAG_H6]
376
+
377
+ // Size-based splitting
378
+ chunkSize?: number // Max chunk size. Default: 1000
379
+ chunkOverlap?: number // Overlap between chunks. Default: 200
380
+ lengthFunction?: (text: string) => number // Custom length (e.g., token count)
381
+
382
+ // Output formatting
383
+ stripHeaders?: boolean // Remove headers from content. Default: true
384
+ returnEachLine?: boolean // Split into individual lines. Default: false
385
+
386
+ // Standard options
387
+ origin?: string // Base URL for links/images
388
+ plugins?: Plugin[] // Apply plugins during conversion
389
+ }
390
+ ```
391
+
392
+ ### Chunk Metadata
393
+
394
+ Each chunk includes rich metadata for context:
395
+
396
+ ```ts
397
+ interface MarkdownChunk {
398
+ content: string
399
+ metadata: {
400
+ headers?: Record<string, string> // Header hierarchy: { h1: "Title", h2: "Section" }
401
+ code?: string // Code block language if present
402
+ loc?: { // Line number range
403
+ lines: { from: number, to: number }
404
+ }
405
+ }
406
+ }
407
+ ```
408
+
409
+ ### Use with Presets
410
+
411
+ Combine splitting with presets for optimized output:
412
+
413
+ ```ts
414
+ import { TAG_H2 } from 'mdream'
415
+ import { withMinimalPreset } from 'mdream/preset/minimal'
416
+ import { htmlToMarkdownSplitChunks } from 'mdream/splitter'
417
+
418
+ const chunks = htmlToMarkdownSplitChunks(html, withMinimalPreset({
419
+ headersToSplitOn: [TAG_H2],
420
+ chunkSize: 500,
421
+ origin: 'https://example.com'
422
+ }))
423
+ ```
424
+
314
425
  ## Credits
315
426
 
316
427
  - [ultrahtml](https://github.com/natemoo-re/ultrahtml): HTML parsing inspiration
@@ -29,10 +29,7 @@ function collectNodeContent(node, content, state) {
29
29
  */
30
30
  function assembleBufferedContent(state) {
31
31
  const fragments = [];
32
- for (const [regionId, content] of Array.from(state.regionContentBuffers.entries())) {
33
- const include = state.regionToggles.get(regionId);
34
- if (include) fragments.push(...content);
35
- }
32
+ for (const [regionId, content] of Array.from(state.regionContentBuffers.entries())) if (state.regionToggles.get(regionId)) fragments.push(...content);
36
33
  state.regionToggles.clear();
37
34
  state.regionContentBuffers.clear();
38
35
  return fragments.join("").trimStart();
@@ -285,4 +282,4 @@ const LIST_ITEM_SPACING = [1, 0];
285
282
  const TABLE_ROW_SPACING = [0, 1];
286
283
 
287
284
  //#endregion
288
- export { BLOCKQUOTE_SPACING, DEFAULT_BLOCK_SPACING, ELEMENT_NODE, HTML_ENTITIES, LIST_ITEM_SPACING, MARKDOWN_CODE_BLOCK, MARKDOWN_EMPHASIS, MARKDOWN_HORIZONTAL_RULE, MARKDOWN_INLINE_CODE, MARKDOWN_STRIKETHROUGH, MARKDOWN_STRONG, MAX_TAG_ID, NO_SPACING, NodeEventEnter, NodeEventExit, TABLE_ROW_SPACING, TAG_A, TAG_ABBR, TAG_ADDRESS, TAG_AREA, TAG_ARTICLE, TAG_ASIDE, TAG_AUDIO, TAG_B, TAG_BASE, TAG_BDO, TAG_BLOCKQUOTE, TAG_BODY, TAG_BR, TAG_BUTTON, TAG_CANVAS, TAG_CAPTION, TAG_CENTER, TAG_CITE, TAG_CODE, TAG_COL, TAG_DD, TAG_DEL, TAG_DETAILS, TAG_DFN, TAG_DIALOG, TAG_DIV, TAG_DL, TAG_DT, TAG_EM, TAG_EMBED, TAG_FIELDSET, TAG_FIGCAPTION, TAG_FIGURE, TAG_FOOTER, TAG_FORM, TAG_H1, TAG_H2, TAG_H3, TAG_H4, TAG_H5, TAG_H6, TAG_HEAD, TAG_HEADER, TAG_HR, TAG_HTML, TAG_I, TAG_IFRAME, TAG_IMG, TAG_INPUT, TAG_INS, TAG_KBD, TAG_KEYGEN, TAG_LABEL, TAG_LEGEND, TAG_LI, TAG_LINK, TAG_MAIN, TAG_MAP, TAG_MARK, TAG_META, TAG_METER, TAG_NAV, TAG_NOFRAMES, TAG_NOSCRIPT, TAG_OBJECT, TAG_OL, TAG_OPTION, TAG_P, TAG_PARAM, TAG_PLAINTEXT, TAG_PRE, TAG_PROGRESS, TAG_Q, TAG_RP, TAG_RT, TAG_RUBY, TAG_SAMP, TAG_SCRIPT, TAG_SECTION, TAG_SELECT, TAG_SMALL, TAG_SOURCE, TAG_SPAN, TAG_STRONG, TAG_STYLE, TAG_SUB, TAG_SUMMARY, TAG_SUP, TAG_SVG, TAG_TABLE, TAG_TBODY, TAG_TD, TAG_TEMPLATE, TAG_TEXTAREA, TAG_TFOOT, TAG_TH, TAG_THEAD, TAG_TIME, TAG_TITLE, TAG_TR, TAG_TRACK, TAG_U, TAG_UL, TAG_VAR, TAG_VIDEO, TAG_WBR, TAG_XMP, TEXT_NODE, TagIdMap, assembleBufferedContent, collectNodeContent, createBufferRegion };
285
+ export { TAG_H2 as $, TAG_TBODY as $t, TAG_BUTTON as A, TAG_P as At, TAG_DFN as B, TAG_SCRIPT as Bt, TAG_AUDIO as C, TAG_METER as Ct, TAG_BLOCKQUOTE as D, TAG_OBJECT as Dt, TAG_BDO as E, TAG_NOSCRIPT as Et, TAG_CODE as F, TAG_Q as Ft, TAG_EM as G, TAG_SPAN as Gt, TAG_DIV as H, TAG_SELECT as Ht, TAG_COL as I, TAG_RP as It, TAG_FIGCAPTION as J, TAG_SUB as Jt, TAG_EMBED as K, TAG_STRONG as Kt, TAG_DD as L, TAG_RT as Lt, TAG_CAPTION as M, TAG_PLAINTEXT as Mt, TAG_CENTER as N, TAG_PRE as Nt, TAG_BODY as O, TAG_OL as Ot, TAG_CITE as P, TAG_PROGRESS as Pt, TAG_H1 as Q, TAG_TABLE as Qt, TAG_DEL as R, TAG_RUBY as Rt, TAG_ASIDE as S, TAG_META as St, TAG_BASE as T, TAG_NOFRAMES as Tt, TAG_DL as U, TAG_SMALL as Ut, TAG_DIALOG as V, TAG_SECTION as Vt, TAG_DT as W, TAG_SOURCE as Wt, TAG_FOOTER as X, TAG_SUP as Xt, TAG_FIGURE as Y, TAG_SUMMARY as Yt, TAG_FORM as Z, TAG_SVG as Zt, TAG_A as _, TagIdMap as _n, TAG_LI as _t, LIST_ITEM_SPACING as a, TAG_THEAD as an, TAG_HEADER as at, TAG_AREA as b, createBufferRegion as bn, TAG_MAP as bt, MARKDOWN_HORIZONTAL_RULE as c, TAG_TR as cn, TAG_I as ct, MARKDOWN_STRONG as d, TAG_UL as dn, TAG_INPUT as dt, TAG_TD as en, TAG_H3 as et, MAX_TAG_ID as f, TAG_VAR as fn, TAG_INS as ft, TABLE_ROW_SPACING as g, TEXT_NODE as gn, TAG_LEGEND as gt, NodeEventExit as h, TAG_XMP as hn, TAG_LABEL as ht, HTML_ENTITIES as i, TAG_TH as in, TAG_HEAD as it, TAG_CANVAS as j, TAG_PARAM as jt, TAG_BR as k, TAG_OPTION as kt, MARKDOWN_INLINE_CODE as l, TAG_TRACK as ln, TAG_IFRAME as lt, NodeEventEnter as m, TAG_WBR as mn, TAG_KEYGEN as mt, DEFAULT_BLOCK_SPACING as n, TAG_TEXTAREA as nn, TAG_H5 as nt, MARKDOWN_CODE_BLOCK as o, TAG_TIME as on, TAG_HR as ot, NO_SPACING as p, TAG_VIDEO as pn, TAG_KBD as pt, TAG_FIELDSET as q, TAG_STYLE as qt, ELEMENT_NODE as r, TAG_TFOOT as rn, TAG_H6 as rt, MARKDOWN_EMPHASIS as s, TAG_TITLE as sn, TAG_HTML as st, BLOCKQUOTE_SPACING as t, TAG_TEMPLATE as tn, TAG_H4 as tt, MARKDOWN_STRIKETHROUGH as u, TAG_U as un, TAG_IMG as ut, TAG_ABBR as v, assembleBufferedContent as vn, TAG_LINK as vt, TAG_B as w, TAG_NAV as wt, TAG_ARTICLE as x, TAG_MARK as xt, TAG_ADDRESS as y, collectNodeContent as yn, TAG_MAIN as yt, TAG_DETAILS as z, TAG_SAMP as zt };
@@ -1,4 +1,4 @@
1
- import { createPlugin } from "./plugin-DrovQriD.mjs";
1
+ import { t as createPlugin } from "./plugin-CjWWQTuL.mjs";
2
2
 
3
3
  //#region src/libs/query-selector.ts
4
4
  /**
@@ -28,8 +28,7 @@ function createClassSelector(selector) {
28
28
  return {
29
29
  matches: (element) => {
30
30
  if (!element.attributes?.class) return false;
31
- const classes = element.attributes.class.trim().split(" ").filter(Boolean);
32
- return classes.includes(className);
31
+ return element.attributes.class.trim().split(" ").filter(Boolean).includes(className);
33
32
  },
34
33
  toString: () => `.${className}`
35
34
  };
@@ -109,7 +108,7 @@ function extractionPlugin(selectors) {
109
108
  matcher: parseSelector(selector),
110
109
  callback
111
110
  }));
112
- const trackedElements = new Map();
111
+ const trackedElements = /* @__PURE__ */ new Map();
113
112
  return createPlugin({
114
113
  onNodeEnter(element) {
115
114
  matcherCallbacks.forEach(({ matcher, callback }) => {
@@ -126,7 +125,6 @@ function extractionPlugin(selectors) {
126
125
  if (tracked) tracked.textContent += textNode.value;
127
126
  currentParent = currentParent.parent;
128
127
  }
129
- return void 0;
130
128
  },
131
129
  onNodeExit(element, state) {
132
130
  const tracked = trackedElements.get(element);
@@ -143,4 +141,4 @@ function extractionPlugin(selectors) {
143
141
  }
144
142
 
145
143
  //#endregion
146
- export { extractionPlugin, parseSelector };
144
+ export { parseSelector as n, extractionPlugin as t };
@@ -1,5 +1,5 @@
1
- import { htmlToMarkdown } from "./src-C3QpB75q.mjs";
2
- import { extractionPlugin } from "./extraction-BPaDGYvv.mjs";
1
+ import { t as htmlToMarkdown } from "./src-BJpipdul.mjs";
2
+ import { t as extractionPlugin } from "./extraction-BA9MDtq3.mjs";
3
3
  import { readFile } from "node:fs/promises";
4
4
  import { basename, dirname, relative, sep } from "pathe";
5
5
  import { glob } from "tinyglobby";
@@ -13,28 +13,27 @@ function extractMetadata(html, url) {
13
13
  let description = "";
14
14
  let keywords = "";
15
15
  let author = "";
16
- const extractionPluginInstance = extractionPlugin({
17
- "title": (element) => {
18
- if (!title && element.textContent) title = element.textContent.trim();
19
- },
20
- "meta[name=\"description\"]": (element) => {
21
- if (!description && element.attributes?.content) description = element.attributes.content.trim();
22
- },
23
- "meta[property=\"og:description\"]": (element) => {
24
- if (!description && element.attributes?.content) description = element.attributes.content.trim();
25
- },
26
- "meta[name=\"keywords\"]": (element) => {
27
- if (!keywords && element.attributes?.content) keywords = element.attributes.content.trim();
28
- },
29
- "meta[name=\"author\"]": (element) => {
30
- if (!author && element.attributes?.content) author = element.attributes.content.trim();
31
- },
32
- "meta[property=\"og:title\"]": (element) => {
33
- if (!title && element.attributes?.content) title = element.attributes.content.trim();
34
- }
35
- });
36
16
  htmlToMarkdown(html, {
37
- plugins: [extractionPluginInstance],
17
+ plugins: [extractionPlugin({
18
+ "title": (element) => {
19
+ if (!title && element.textContent) title = element.textContent.trim();
20
+ },
21
+ "meta[name=\"description\"]": (element) => {
22
+ if (!description && element.attributes?.content) description = element.attributes.content.trim();
23
+ },
24
+ "meta[property=\"og:description\"]": (element) => {
25
+ if (!description && element.attributes?.content) description = element.attributes.content.trim();
26
+ },
27
+ "meta[name=\"keywords\"]": (element) => {
28
+ if (!keywords && element.attributes?.content) keywords = element.attributes.content.trim();
29
+ },
30
+ "meta[name=\"author\"]": (element) => {
31
+ if (!author && element.attributes?.content) author = element.attributes.content.trim();
32
+ },
33
+ "meta[property=\"og:title\"]": (element) => {
34
+ if (!title && element.attributes?.content) title = element.attributes.content.trim();
35
+ }
36
+ })],
38
37
  origin: url
39
38
  });
40
39
  return {
@@ -113,8 +112,7 @@ function generateLlmsTxtContent(files, options) {
113
112
  * Parse frontmatter from markdown content
114
113
  */
115
114
  function parseFrontmatter(content) {
116
- const frontmatterRegex = /^---\n([\s\S]*?)\n---\n([\s\S]*)$/;
117
- const match = content.match(frontmatterRegex);
115
+ const match = content.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
118
116
  if (!match) return {
119
117
  frontmatter: null,
120
118
  body: content
@@ -127,8 +125,7 @@ function parseFrontmatter(content) {
127
125
  const colonIndex = line.indexOf(":");
128
126
  if (colonIndex > 0) {
129
127
  const key = line.substring(0, colonIndex).trim();
130
- const value = line.substring(colonIndex + 1).trim();
131
- frontmatter[key] = value;
128
+ frontmatter[key] = line.substring(colonIndex + 1).trim();
132
129
  }
133
130
  }
134
131
  return {
@@ -172,11 +169,10 @@ function generateLlmsFullTxtContent(files, options) {
172
169
  if (file.metadata.keywords) metadata.keywords = file.metadata.keywords;
173
170
  if (file.metadata.author) metadata.author = file.metadata.author;
174
171
  }
175
- const mergedFrontmatter = frontmatter ? {
172
+ const frontmatterString = serializeFrontmatter(frontmatter ? {
176
173
  ...frontmatter,
177
174
  ...metadata
178
- } : metadata;
179
- const frontmatterString = serializeFrontmatter(mergedFrontmatter);
175
+ } : metadata);
180
176
  let contentBody = frontmatter ? body : file.content;
181
177
  const titleLine = contentBody.trim().split("\n")[0];
182
178
  if (titleLine === file.title || titleLine === `# ${file.title}`) contentBody = contentBody.trim().split("\n").slice(1).join("\n").trimStart();
@@ -191,8 +187,7 @@ function generateLlmsFullTxtContent(files, options) {
191
187
  function generateMarkdownFilesContent(files) {
192
188
  const markdownFiles = [];
193
189
  for (const file of files) {
194
- const urlPath = file.url === "/" ? "index" : file.url.replace(/^\//, "").replace(/\/$/, "");
195
- const mdPath = `md/${urlPath}.md`;
190
+ const mdPath = `md/${file.url === "/" ? "index" : file.url.replace(/^\//, "").replace(/\/$/, "")}.md`;
196
191
  markdownFiles.push({
197
192
  path: mdPath,
198
193
  content: file.content
@@ -222,4 +217,4 @@ async function generateLlmsTxtArtifacts(options) {
222
217
  }
223
218
 
224
219
  //#endregion
225
- export { generateLlmsTxtArtifacts };
220
+ export { generateLlmsTxtArtifacts as t };
@@ -1,4 +1,4 @@
1
- import { BLOCKQUOTE_SPACING, DEFAULT_BLOCK_SPACING, ELEMENT_NODE, HTML_ENTITIES, LIST_ITEM_SPACING, MARKDOWN_CODE_BLOCK, MARKDOWN_EMPHASIS, MARKDOWN_HORIZONTAL_RULE, MARKDOWN_INLINE_CODE, MARKDOWN_STRIKETHROUGH, MARKDOWN_STRONG, MAX_TAG_ID, NO_SPACING, NodeEventEnter, NodeEventExit, TABLE_ROW_SPACING, TAG_A, TAG_ABBR, TAG_ADDRESS, TAG_AREA, TAG_ASIDE, TAG_AUDIO, TAG_B, TAG_BASE, TAG_BDO, TAG_BLOCKQUOTE, TAG_BODY, TAG_BR, TAG_BUTTON, TAG_CANVAS, TAG_CENTER, TAG_CITE, TAG_CODE, TAG_COL, TAG_DD, TAG_DEL, TAG_DETAILS, TAG_DFN, TAG_DIALOG, TAG_DIV, TAG_DL, TAG_DT, TAG_EM, TAG_EMBED, TAG_FIELDSET, TAG_FOOTER, TAG_FORM, TAG_H1, TAG_H2, TAG_H3, TAG_H4, TAG_H5, TAG_H6, TAG_HEAD, TAG_HR, TAG_I, TAG_IFRAME, TAG_IMG, TAG_INPUT, TAG_INS, TAG_KBD, TAG_KEYGEN, TAG_LABEL, TAG_LEGEND, TAG_LI, TAG_LINK, TAG_MAP, TAG_MARK, TAG_META, TAG_METER, TAG_NAV, TAG_NOFRAMES, TAG_NOSCRIPT, TAG_OL, TAG_OPTION, TAG_P, TAG_PARAM, TAG_PLAINTEXT, TAG_PRE, TAG_PROGRESS, TAG_Q, TAG_RP, TAG_RT, TAG_RUBY, TAG_SAMP, TAG_SCRIPT, TAG_SELECT, TAG_SMALL, TAG_SOURCE, TAG_SPAN, TAG_STRONG, TAG_STYLE, TAG_SUB, TAG_SUMMARY, TAG_SUP, TAG_SVG, TAG_TABLE, TAG_TBODY, TAG_TD, TAG_TEMPLATE, TAG_TEXTAREA, TAG_TFOOT, TAG_TH, TAG_THEAD, TAG_TIME, TAG_TITLE, TAG_TR, TAG_TRACK, TAG_U, TAG_UL, TAG_VAR, TAG_VIDEO, TAG_WBR, TAG_XMP, TEXT_NODE, TagIdMap, assembleBufferedContent, collectNodeContent } from "./const-BOAJ1T5c.mjs";
1
+ import { $ as TAG_H2, $t as TAG_TBODY, A as TAG_BUTTON, At as TAG_P, B as TAG_DFN, Bt as TAG_SCRIPT, C as TAG_AUDIO, Ct as TAG_METER, D as TAG_BLOCKQUOTE, E as TAG_BDO, Et as TAG_NOSCRIPT, F as TAG_CODE, Ft as TAG_Q, G as TAG_EM, Gt as TAG_SPAN, H as TAG_DIV, Ht as TAG_SELECT, I as TAG_COL, It as TAG_RP, Jt as TAG_SUB, K as TAG_EMBED, Kt as TAG_STRONG, L as TAG_DD, Lt as TAG_RT, Mt as TAG_PLAINTEXT, N as TAG_CENTER, Nt as TAG_PRE, O as TAG_BODY, Ot as TAG_OL, P as TAG_CITE, Pt as TAG_PROGRESS, Q as TAG_H1, Qt as TAG_TABLE, R as TAG_DEL, Rt as TAG_RUBY, S as TAG_ASIDE, St as TAG_META, T as TAG_BASE, Tt as TAG_NOFRAMES, U as TAG_DL, Ut as TAG_SMALL, V as TAG_DIALOG, W as TAG_DT, Wt as TAG_SOURCE, X as TAG_FOOTER, Xt as TAG_SUP, Yt as TAG_SUMMARY, Z as TAG_FORM, Zt as TAG_SVG, _ as TAG_A, _n as TagIdMap, _t as TAG_LI, a as LIST_ITEM_SPACING, an as TAG_THEAD, b as TAG_AREA, bt as TAG_MAP, c as MARKDOWN_HORIZONTAL_RULE, cn as TAG_TR, ct as TAG_I, d as MARKDOWN_STRONG, dn as TAG_UL, dt as TAG_INPUT, en as TAG_TD, et as TAG_H3, f as MAX_TAG_ID, fn as TAG_VAR, ft as TAG_INS, g as TABLE_ROW_SPACING, gn as TEXT_NODE, gt as TAG_LEGEND, h as NodeEventExit, hn as TAG_XMP, ht as TAG_LABEL, i as HTML_ENTITIES, in as TAG_TH, it as TAG_HEAD, j as TAG_CANVAS, jt as TAG_PARAM, k as TAG_BR, kt as TAG_OPTION, l as MARKDOWN_INLINE_CODE, ln as TAG_TRACK, lt as TAG_IFRAME, m as NodeEventEnter, mn as TAG_WBR, mt as TAG_KEYGEN, n as DEFAULT_BLOCK_SPACING, nn as TAG_TEXTAREA, nt as TAG_H5, o as MARKDOWN_CODE_BLOCK, on as TAG_TIME, ot as TAG_HR, p as NO_SPACING, pn as TAG_VIDEO, pt as TAG_KBD, q as TAG_FIELDSET, qt as TAG_STYLE, r as ELEMENT_NODE, rn as TAG_TFOOT, rt as TAG_H6, s as MARKDOWN_EMPHASIS, sn as TAG_TITLE, t as BLOCKQUOTE_SPACING, tn as TAG_TEMPLATE, tt as TAG_H4, u as MARKDOWN_STRIKETHROUGH, un as TAG_U, ut as TAG_IMG, v as TAG_ABBR, vn as assembleBufferedContent, vt as TAG_LINK, w as TAG_B, wt as TAG_NAV, xt as TAG_MARK, y as TAG_ADDRESS, yn as collectNodeContent, z as TAG_DETAILS, zt as TAG_SAMP } from "./const-Bf_XN9U9.mjs";
2
2
 
3
3
  //#region src/tags.ts
4
4
  function resolveUrl(url, origin) {
@@ -6,15 +6,9 @@ function resolveUrl(url, origin) {
6
6
  if (url.startsWith("//")) return `https:${url}`;
7
7
  if (url.startsWith("#")) return url;
8
8
  if (origin) {
9
- if (url.startsWith("/") && origin) {
10
- const cleanOrigin = origin.endsWith("/") ? origin.slice(0, -1) : origin;
11
- return `${cleanOrigin}${url}`;
12
- }
9
+ if (url.startsWith("/") && origin) return `${origin.endsWith("/") ? origin.slice(0, -1) : origin}${url}`;
13
10
  if (url.startsWith("./")) return `${origin}/${url.slice(2)}`;
14
- if (!url.startsWith("http")) {
15
- const cleanUrl = url.startsWith("/") ? url.slice(1) : url;
16
- return `${origin}/${cleanUrl}`;
17
- }
11
+ if (!url.startsWith("http")) return `${origin}/${url.startsWith("/") ? url.slice(1) : url}`;
18
12
  }
19
13
  return url;
20
14
  }
@@ -157,10 +151,7 @@ const tagHandlers = {
157
151
  },
158
152
  [TAG_CODE]: {
159
153
  enter: ({ node }) => {
160
- if ((node.depthMap[TAG_PRE] || 0) > 0) {
161
- const language = getLanguageFromClass(node.attributes?.class);
162
- return `${MARKDOWN_CODE_BLOCK}${language}\n`;
163
- }
154
+ if ((node.depthMap[TAG_PRE] || 0) > 0) return `${MARKDOWN_CODE_BLOCK}${getLanguageFromClass(node.attributes?.class)}\n`;
164
155
  return MARKDOWN_INLINE_CODE;
165
156
  },
166
157
  exit: ({ node }) => {
@@ -179,9 +170,7 @@ const tagHandlers = {
179
170
  if (isInsideTableCell(node)) return "<li>";
180
171
  const depth = (node.depthMap[TAG_UL] || 0) + (node.depthMap[TAG_OL] || 0) - 1;
181
172
  const isOrdered = node.parent?.tagId === TAG_OL;
182
- const indent = " ".repeat(Math.max(0, depth));
183
- const marker = isOrdered ? `${node.index + 1}. ` : "- ";
184
- return `${indent}${marker}`;
173
+ return `${" ".repeat(Math.max(0, depth))}${isOrdered ? `${node.index + 1}. ` : "- "}`;
185
174
  },
186
175
  exit: ({ node }) => isInsideTableCell(node) ? "</li>" : void 0,
187
176
  spacing: LIST_ITEM_SPACING
@@ -194,8 +183,7 @@ const tagHandlers = {
194
183
  if (!node.attributes?.href) return "";
195
184
  const href = resolveUrl(node.attributes?.href || "", state.options?.origin);
196
185
  let title = node.attributes?.title;
197
- const lastContent = state.lastContentCache;
198
- if (lastContent === title) title = "";
186
+ if (state.lastContentCache === title) title = "";
199
187
  return title ? `](${href} "${title}")` : `](${href})`;
200
188
  },
201
189
  collapsesInnerWhiteSpace: true,
@@ -204,9 +192,7 @@ const tagHandlers = {
204
192
  },
205
193
  [TAG_IMG]: {
206
194
  enter: ({ node, state }) => {
207
- const alt = node.attributes?.alt || "";
208
- const src = resolveUrl(node.attributes?.src || "", state.options?.origin);
209
- return `![${alt}](${src})`;
195
+ return `![${node.attributes?.alt || ""}](${resolveUrl(node.attributes?.src || "", state.options?.origin)})`;
210
196
  },
211
197
  collapsesInnerWhiteSpace: true,
212
198
  isSelfClosing: true,
@@ -241,15 +227,14 @@ const tagHandlers = {
241
227
  state.tableRenderedTable = true;
242
228
  const alignments = state.tableColumnAlignments;
243
229
  while (alignments.length < state.tableCurrentRowCells) alignments.push("");
244
- const alignmentMarkers = alignments.map((align) => {
230
+ return ` |\n| ${alignments.map((align) => {
245
231
  switch (align) {
246
232
  case "left": return ":---";
247
233
  case "center": return ":---:";
248
234
  case "right": return "---:";
249
235
  default: return "---";
250
236
  }
251
- });
252
- return ` |\n| ${alignmentMarkers.join(" | ")} |`;
237
+ }).join(" | ")} |`;
253
238
  }
254
239
  return " |";
255
240
  },
@@ -637,17 +622,15 @@ function isWhitespace(charCode) {
637
622
  */
638
623
  function parseHtml(html, options = {}) {
639
624
  const events = [];
640
- const state = {
641
- depthMap: new Uint8Array(MAX_TAG_ID),
642
- depth: 0,
643
- plugins: options.plugins || []
644
- };
645
- const remainingHtml = parseHtmlInternal(html, state, (event) => {
646
- events.push(event);
647
- });
648
625
  return {
649
626
  events,
650
- remainingHtml
627
+ remainingHtml: parseHtmlInternal(html, {
628
+ depthMap: new Uint8Array(MAX_TAG_ID),
629
+ depth: 0,
630
+ plugins: options.plugins || []
631
+ }, (event) => {
632
+ events.push(event);
633
+ })
651
634
  };
652
635
  }
653
636
  /**
@@ -840,8 +823,7 @@ function processClosingTag(htmlChunk, position, state, handleEvent) {
840
823
  const chunkLength = htmlChunk.length;
841
824
  let foundClose = false;
842
825
  while (i < chunkLength) {
843
- const charCode = htmlChunk.charCodeAt(i);
844
- if (charCode === GT_CHAR) {
826
+ if (htmlChunk.charCodeAt(i) === GT_CHAR) {
845
827
  foundClose = true;
846
828
  break;
847
829
  }
@@ -852,8 +834,7 @@ function processClosingTag(htmlChunk, position, state, handleEvent) {
852
834
  newPosition: position,
853
835
  remainingText: htmlChunk.substring(position)
854
836
  };
855
- const tagName = htmlChunk.substring(tagNameStart, i).toLowerCase();
856
- const tagId = TagIdMap[tagName] ?? -1;
837
+ const tagId = TagIdMap[htmlChunk.substring(tagNameStart, i).toLowerCase()] ?? -1;
857
838
  if (state.currentNode?.tagHandler?.isNonNesting && tagId !== state.currentNode.tagId) return {
858
839
  complete: false,
859
840
  newPosition: position,
@@ -885,16 +866,15 @@ function closeNode(node, state, handleEvent) {
885
866
  const prefix = node.attributes?.title || node.attributes?.["aria-label"] || "";
886
867
  if (prefix) {
887
868
  node.childTextNodeIndex = 1;
888
- const textNode = {
889
- type: TEXT_NODE,
890
- value: prefix,
891
- parent: node,
892
- index: 0,
893
- depth: node.depth + 1
894
- };
895
869
  handleEvent({
896
870
  type: NodeEventEnter,
897
- node: textNode
871
+ node: {
872
+ type: TEXT_NODE,
873
+ value: prefix,
874
+ parent: node,
875
+ index: 0,
876
+ depth: node.depth + 1
877
+ }
898
878
  });
899
879
  for (const parent of traverseUpToFirstBlockNode(node)) parent.childTextNodeIndex = (parent.childTextNodeIndex || 0) + 1;
900
880
  }
@@ -1262,8 +1242,8 @@ function calculateNewLineConfig(node) {
1262
1242
  function createMarkdownProcessor(options = {}) {
1263
1243
  const state = {
1264
1244
  options,
1265
- regionToggles: new Map(),
1266
- regionContentBuffers: new Map(),
1245
+ regionToggles: /* @__PURE__ */ new Map(),
1246
+ regionContentBuffers: /* @__PURE__ */ new Map(),
1267
1247
  depthMap: new Uint8Array(MAX_TAG_ID)
1268
1248
  };
1269
1249
  state.regionToggles.set(0, true);
@@ -1315,8 +1295,7 @@ function createMarkdownProcessor(options = {}) {
1315
1295
  const res = handler[eventFn](context);
1316
1296
  if (res) output.push(res);
1317
1297
  }
1318
- const newLineConfig = calculateNewLineConfig(node);
1319
- const configuredNewLines = newLineConfig[eventType] || 0;
1298
+ const configuredNewLines = calculateNewLineConfig(node)[eventType] || 0;
1320
1299
  const newLines = Math.max(0, configuredNewLines - lastNewLines);
1321
1300
  if (newLines > 0) {
1322
1301
  if (!buff.length) {
@@ -1332,13 +1311,10 @@ function createMarkdownProcessor(options = {}) {
1332
1311
  const isInlineElement = node.tagHandler?.isInline;
1333
1312
  const collapsesWhiteSpace = node.tagHandler?.collapsesInnerWhiteSpace;
1334
1313
  const hasSpacing = node.tagHandler?.spacing && Array.isArray(node.tagHandler.spacing);
1335
- const isBlockElement = !isInlineElement && !collapsesWhiteSpace && configuredNewLines > 0;
1336
- const shouldTrim = (!isInlineElement || eventType === NodeEventExit) && !isBlockElement && !(collapsesWhiteSpace && eventType === NodeEventEnter) && !(hasSpacing && eventType === NodeEventEnter);
1337
- if (shouldTrim) {
1314
+ if ((!isInlineElement || eventType === NodeEventExit) && !(!isInlineElement && !collapsesWhiteSpace && configuredNewLines > 0) && !(collapsesWhiteSpace && eventType === NodeEventEnter) && !(hasSpacing && eventType === NodeEventEnter)) {
1338
1315
  const originalLength = lastFragment.length;
1339
1316
  const trimmed = lastFragment.trimEnd();
1340
- const trimmedChars = originalLength - trimmed.length;
1341
- if (trimmedChars > 0) {
1317
+ if (originalLength - trimmed.length > 0) {
1342
1318
  if (buff?.length && buff[buff.length - 1] === lastFragment) buff[buff.length - 1] = trimmed;
1343
1319
  }
1344
1320
  }
@@ -1352,12 +1328,11 @@ function createMarkdownProcessor(options = {}) {
1352
1328
  * Process HTML string and generate events
1353
1329
  */
1354
1330
  function processHtml(html) {
1355
- const parseState = {
1331
+ parseHtmlStream(html, {
1356
1332
  depthMap: state.depthMap,
1357
1333
  depth: 0,
1358
1334
  plugins: state.options?.plugins || []
1359
- };
1360
- parseHtmlStream(html, parseState, (event) => {
1335
+ }, (event) => {
1361
1336
  processPluginsForEvent(event, state.options?.plugins, state, processEvent);
1362
1337
  });
1363
1338
  }
@@ -1365,18 +1340,14 @@ function createMarkdownProcessor(options = {}) {
1365
1340
  * Get the final markdown output
1366
1341
  */
1367
1342
  function getMarkdown() {
1368
- const assembledContent = assembleBufferedContent(state);
1369
- return assembledContent.trimEnd();
1343
+ return assembleBufferedContent(state).trimEnd();
1370
1344
  }
1371
1345
  /**
1372
1346
  * Get new markdown content since the last call (for streaming)
1373
1347
  */
1374
1348
  function getMarkdownChunk() {
1375
1349
  const fragments = [];
1376
- for (const [regionId, content] of Array.from(state.regionContentBuffers.entries())) {
1377
- const include = state.regionToggles.get(regionId);
1378
- if (include) fragments.push(...content);
1379
- }
1350
+ for (const [regionId, content] of Array.from(state.regionContentBuffers.entries())) if (state.regionToggles.get(regionId)) fragments.push(...content);
1380
1351
  const currentContent = fragments.join("").trimStart();
1381
1352
  const newContent = currentContent.slice(lastYieldedLength);
1382
1353
  lastYieldedLength = currentContent.length;
@@ -1393,4 +1364,4 @@ function createMarkdownProcessor(options = {}) {
1393
1364
  const MarkdownProcessor = createMarkdownProcessor;
1394
1365
 
1395
1366
  //#endregion
1396
- export { MarkdownProcessor, createMarkdownProcessor, parseHtml, parseHtmlStream, processPluginsForEvent };
1367
+ export { parseHtmlStream as a, parseHtml as i, createMarkdownProcessor as n, processPluginsForEvent as r, MarkdownProcessor as t };
@@ -1,5 +1,5 @@
1
- import { TAG_ASIDE, TAG_BUTTON, TAG_EMBED, TAG_FIELDSET, TAG_FIGURE, TAG_FOOTER, TAG_FORM, TAG_IFRAME, TAG_INPUT, TAG_NAV, TAG_OBJECT, TAG_SELECT, TAG_TEXTAREA } from "./const-BOAJ1T5c.mjs";
2
- import { filterPlugin, frontmatterPlugin, isolateMainPlugin, tailwindPlugin } from "./plugins-C5_irVJs.mjs";
1
+ import { A as TAG_BUTTON, Dt as TAG_OBJECT, Ht as TAG_SELECT, K as TAG_EMBED, S as TAG_ASIDE, X as TAG_FOOTER, Y as TAG_FIGURE, Z as TAG_FORM, dt as TAG_INPUT, lt as TAG_IFRAME, nn as TAG_TEXTAREA, q as TAG_FIELDSET, wt as TAG_NAV } from "./const-Bf_XN9U9.mjs";
2
+ import { a as filterPlugin, i as frontmatterPlugin, r as isolateMainPlugin, t as tailwindPlugin } from "./plugins-DJnqR2fA.mjs";
3
3
 
4
4
  //#region src/preset/minimal.ts
5
5
  /**
@@ -37,4 +37,4 @@ function withMinimalPreset(options = {}) {
37
37
  }
38
38
 
39
39
  //#endregion
40
- export { withMinimalPreset };
40
+ export { withMinimalPreset as t };
@@ -9,4 +9,4 @@ function createPlugin(plugin) {
9
9
  }
10
10
 
11
11
  //#endregion
12
- export { createPlugin };
12
+ export { createPlugin as t };
@@ -1,4 +1,4 @@
1
- import { Plugin } from "./types-DqiI86yW.mjs";
1
+ import { d as Plugin } from "./types-CT4ZxeOH.mjs";
2
2
 
3
3
  //#region src/pluggable/plugin.d.ts
4
4
 
@@ -9,4 +9,4 @@ import { Plugin } from "./types-DqiI86yW.mjs";
9
9
  */
10
10
  declare function createPlugin<T extends Partial<Plugin>>(plugin: T): Plugin;
11
11
  //#endregion
12
- export { createPlugin as createPlugin$1 };
12
+ export { createPlugin as t };
@@ -1,6 +1,6 @@
1
- import { ELEMENT_NODE, TAG_A, TAG_ADDRESS, TAG_ARTICLE, TAG_ASIDE, TAG_AUDIO, TAG_B, TAG_BLOCKQUOTE, TAG_BODY, TAG_BR, TAG_BUTTON, TAG_CAPTION, TAG_CODE, TAG_DD, TAG_DETAILS, TAG_DIV, TAG_DL, TAG_DT, TAG_EM, TAG_EMBED, TAG_FIELDSET, TAG_FIGCAPTION, TAG_FIGURE, TAG_FOOTER, TAG_FORM, TAG_H1, TAG_H2, TAG_H3, TAG_H4, TAG_H5, TAG_H6, TAG_HEAD, TAG_HEADER, TAG_HR, TAG_HTML, TAG_I, TAG_IFRAME, TAG_IMG, TAG_INPUT, TAG_LI, TAG_MAIN, TAG_META, TAG_NAV, TAG_OBJECT, TAG_OL, TAG_P, TAG_PRE, TAG_SCRIPT, TAG_SECTION, TAG_SELECT, TAG_SPAN, TAG_STRONG, TAG_STYLE, TAG_SUMMARY, TAG_SVG, TAG_TABLE, TAG_TBODY, TAG_TD, TAG_TEXTAREA, TAG_TFOOT, TAG_TH, TAG_THEAD, TAG_TITLE, TAG_TR, TAG_UL, TAG_VIDEO, TEXT_NODE, collectNodeContent, createBufferRegion } from "./const-BOAJ1T5c.mjs";
2
- import { createPlugin } from "./plugin-DrovQriD.mjs";
3
- import { parseSelector } from "./extraction-BPaDGYvv.mjs";
1
+ import { $ as TAG_H2, $t as TAG_TBODY, A as TAG_BUTTON, At as TAG_P, Bt as TAG_SCRIPT, C as TAG_AUDIO, D as TAG_BLOCKQUOTE, Dt as TAG_OBJECT, F as TAG_CODE, G as TAG_EM, Gt as TAG_SPAN, H as TAG_DIV, Ht as TAG_SELECT, J as TAG_FIGCAPTION, K as TAG_EMBED, Kt as TAG_STRONG, L as TAG_DD, M as TAG_CAPTION, Nt as TAG_PRE, O as TAG_BODY, Ot as TAG_OL, Q as TAG_H1, Qt as TAG_TABLE, S as TAG_ASIDE, St as TAG_META, U as TAG_DL, Vt as TAG_SECTION, W as TAG_DT, X as TAG_FOOTER, Y as TAG_FIGURE, Yt as TAG_SUMMARY, Z as TAG_FORM, Zt as TAG_SVG, _ as TAG_A, _t as TAG_LI, an as TAG_THEAD, at as TAG_HEADER, bn as createBufferRegion, cn as TAG_TR, ct as TAG_I, dn as TAG_UL, dt as TAG_INPUT, en as TAG_TD, et as TAG_H3, gn as TEXT_NODE, in as TAG_TH, it as TAG_HEAD, k as TAG_BR, lt as TAG_IFRAME, nn as TAG_TEXTAREA, nt as TAG_H5, ot as TAG_HR, pn as TAG_VIDEO, q as TAG_FIELDSET, qt as TAG_STYLE, r as ELEMENT_NODE, rn as TAG_TFOOT, rt as TAG_H6, sn as TAG_TITLE, st as TAG_HTML, tt as TAG_H4, ut as TAG_IMG, w as TAG_B, wt as TAG_NAV, x as TAG_ARTICLE, y as TAG_ADDRESS, yn as collectNodeContent, yt as TAG_MAIN, z as TAG_DETAILS } from "./const-Bf_XN9U9.mjs";
2
+ import { t as createPlugin } from "./plugin-CjWWQTuL.mjs";
3
+ import { n as parseSelector } from "./extraction-BA9MDtq3.mjs";
4
4
 
5
5
  //#region src/plugins/filter.ts
6
6
  /**
@@ -28,11 +28,9 @@ function filterPlugin(options = {}) {
28
28
  return createPlugin({ beforeNodeProcess(event) {
29
29
  const { node } = event;
30
30
  if (node.type === TEXT_NODE) {
31
- const textNode = node;
32
- let currentParent$1 = textNode.parent;
31
+ let currentParent$1 = node.parent;
33
32
  while (currentParent$1 && excludeSelectors.length) {
34
- const parentShouldExclude = excludeSelectors.some((selector) => selector.matches(currentParent$1));
35
- if (parentShouldExclude) return { skip: true };
33
+ if (excludeSelectors.some((selector) => selector.matches(currentParent$1))) return { skip: true };
36
34
  currentParent$1 = currentParent$1.parent;
37
35
  }
38
36
  return;
@@ -41,22 +39,19 @@ function filterPlugin(options = {}) {
41
39
  const element = node;
42
40
  if (excludeSelectors.length) {
43
41
  if (element.attributes.style?.includes("absolute") || element.attributes.style?.includes("fixed")) return { skip: true };
44
- const shouldExclude = excludeSelectors.some((selector) => selector.matches(element));
45
- if (shouldExclude) return { skip: true };
42
+ if (excludeSelectors.some((selector) => selector.matches(element))) return { skip: true };
46
43
  }
47
44
  let currentParent = element.parent;
48
45
  while (currentParent) {
49
46
  if (excludeSelectors.length) {
50
- const parentShouldExclude = excludeSelectors.some((selector) => selector.matches(currentParent));
51
- if (parentShouldExclude) return { skip: true };
47
+ if (excludeSelectors.some((selector) => selector.matches(currentParent))) return { skip: true };
52
48
  }
53
49
  currentParent = currentParent.parent;
54
50
  }
55
51
  if (includeSelectors.length) {
56
52
  let currentElement = element;
57
53
  while (currentElement) {
58
- const shouldInclude = includeSelectors.some((selector) => selector.matches(currentElement));
59
- if (shouldInclude) return;
54
+ if (includeSelectors.some((selector) => selector.matches(currentElement))) return;
60
55
  if (!processChildren) break;
61
56
  currentElement = currentElement.parent;
62
57
  }
@@ -102,25 +97,20 @@ function frontmatterPlugin(options = {}) {
102
97
  }
103
98
  if (inHead && node.type === ELEMENT_NODE && node.tagId === TAG_TITLE) return;
104
99
  if (inHead && node.type === ELEMENT_NODE && node.tagId === TAG_META) {
105
- const elementNode = node;
106
- const { name, property, content } = elementNode.attributes || {};
100
+ const { name, property, content } = node.attributes || {};
107
101
  const metaName = property || name;
108
102
  if (metaName && content && metaFields.has(metaName)) frontmatter.meta[metaName.includes(":") ? `"${metaName}"` : metaName] = formatValue(metaName, content);
109
- return void 0;
103
+ return;
110
104
  }
111
105
  },
112
106
  onNodeExit(node, state) {
113
107
  if (node.type === ELEMENT_NODE && node.tagId === TAG_HEAD) {
114
108
  inHead = false;
115
- if (Object.keys(frontmatter).length > 0) {
116
- const frontmatterContent = generateFrontmatter();
117
- collectNodeContent({
118
- type: 1,
119
- regionId: 0
120
- }, frontmatterContent, state);
121
- }
109
+ if (Object.keys(frontmatter).length > 0) collectNodeContent({
110
+ type: 1,
111
+ regionId: 0
112
+ }, generateFrontmatter(), state);
122
113
  }
123
- return void 0;
124
114
  },
125
115
  processTextNode(node) {
126
116
  if (!inHead) return;
@@ -237,8 +227,7 @@ function isolateMainPlugin() {
237
227
  }
238
228
  }
239
229
  if (firstHeaderElement && !afterFooter && element.tagId === TAG_FOOTER) {
240
- const depthDifference = element.depth - firstHeaderElement.depth;
241
- if (depthDifference <= 5) {
230
+ if (element.depth - firstHeaderElement.depth <= 5) {
242
231
  afterFooter = true;
243
232
  return { skip: true };
244
233
  }
@@ -400,8 +389,7 @@ function readabilityPlugin() {
400
389
  node.context.tagCount = 1;
401
390
  node.context.linkTextLength = 0;
402
391
  node.context.textLength = 0;
403
- const hasStrongNegativePattern = node.name && /nav|header|footer|aside|form|fieldset|button/i.test(node.name) || node.attributes?.class && /nav|menu|header|footer|sidebar|hidden|copyright|ad-|advertisement|banner|promo|related|comment|login|register|subscribe|newsletter|category|meta|tag|cta|button|apply|trial|engagement|sharing|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i.test(node.attributes.class) || node.attributes?.id && /nav|menu|header|footer|sidebar|hidden|copyright|ad-|advertisement|banner|promo|related|comment|login|register|subscribe|newsletter|category|meta|tag|cta|button|apply|trial|engagement|sharing|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i.test(node.attributes.id) || node.attributes?.style && /display:\s*none|visibility:\s*hidden/i.test(node.attributes.style) || node.attributes && Object.keys(node.attributes).some((attr) => attr.startsWith("aria-") && node.attributes[attr] === "true" && /hidden|invisible/i.test(attr));
404
- if (hasStrongNegativePattern) createBufferRegion(node, state, false);
392
+ if (node.name && /nav|header|footer|aside|form|fieldset|button/i.test(node.name) || node.attributes?.class && /nav|menu|header|footer|sidebar|hidden|copyright|ad-|advertisement|banner|promo|related|comment|login|register|subscribe|newsletter|category|meta|tag|cta|button|apply|trial|engagement|sharing|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i.test(node.attributes.class) || node.attributes?.id && /nav|menu|header|footer|sidebar|hidden|copyright|ad-|advertisement|banner|promo|related|comment|login|register|subscribe|newsletter|category|meta|tag|cta|button|apply|trial|engagement|sharing|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i.test(node.attributes.id) || node.attributes?.style && /display:\s*none|visibility:\s*hidden/i.test(node.attributes.style) || node.attributes && Object.keys(node.attributes).some((attr) => attr.startsWith("aria-") && node.attributes[attr] === "true" && /hidden|invisible/i.test(attr))) createBufferRegion(node, state, false);
405
393
  else if (node.parent && node.parent.context) node.context.score = (node.context.score || 0) + (node.parent.context.score || 0);
406
394
  },
407
395
  processTextNode(node) {
@@ -418,7 +406,6 @@ function readabilityPlugin() {
418
406
  if (isInsideLink) parent.context.linkTextLength = (parent.context.linkTextLength || 0) + len;
419
407
  parent = parent.parent;
420
408
  }
421
- return void 0;
422
409
  },
423
410
  onNodeExit(node, state) {
424
411
  if (!node.context) return;
@@ -505,14 +492,13 @@ const TAILWIND_TO_MARKDOWN_MAP = {
505
492
  * Extract base class name from a responsive breakpoint variant
506
493
  */
507
494
  function extractBaseClass(className) {
508
- const breakpoints = [
495
+ for (const bp of [
509
496
  "sm:",
510
497
  "md:",
511
498
  "lg:",
512
499
  "xl:",
513
500
  "2xl:"
514
- ];
515
- for (const bp of breakpoints) if (className.startsWith(bp)) return {
501
+ ]) if (className.startsWith(bp)) return {
516
502
  baseClass: className.substring(bp.length),
517
503
  breakpoint: bp
518
504
  };
@@ -604,8 +590,7 @@ function processTailwindClasses(classes) {
604
590
  let prefix = "";
605
591
  let suffix = "";
606
592
  let hidden = false;
607
- const normalizedClasses = normalizeClasses(classes);
608
- const grouped = groupByFormattingType(normalizedClasses);
593
+ const grouped = groupByFormattingType(normalizeClasses(classes));
609
594
  if (grouped.weight.length > 0) {
610
595
  const { baseClass } = extractBaseClass(grouped.weight[0]);
611
596
  const mapping = TAILWIND_TO_MARKDOWN_MAP[baseClass];
@@ -660,8 +645,7 @@ function tailwindPlugin() {
660
645
  processAttributes(node) {
661
646
  const classAttr = node.attributes?.class;
662
647
  if (!classAttr) return;
663
- const classes = classAttr.trim().split(" ").filter(Boolean);
664
- const { prefix, suffix, hidden } = processTailwindClasses(classes);
648
+ const { prefix, suffix, hidden } = processTailwindClasses(classAttr.trim().split(" ").filter(Boolean));
665
649
  node.context = node.context || {};
666
650
  node.context.tailwind = {
667
651
  prefix,
@@ -671,7 +655,7 @@ function tailwindPlugin() {
671
655
  },
672
656
  processTextNode(node) {
673
657
  const parentNode = node.parent;
674
- if (!parentNode || parentNode.type !== ELEMENT_NODE) return void 0;
658
+ if (!parentNode || parentNode.type !== ELEMENT_NODE) return;
675
659
  const tailwindData = parentNode.context?.tailwind;
676
660
  if (tailwindData?.hidden) return {
677
661
  content: "",
@@ -691,13 +675,11 @@ function tailwindPlugin() {
691
675
  },
692
676
  beforeNodeProcess({ node }) {
693
677
  if (node.type === ELEMENT_NODE) {
694
- const elementNode = node;
695
- const tailwindData = elementNode.context?.tailwind;
696
- if (tailwindData?.hidden) return { skip: true };
678
+ if ((node.context?.tailwind)?.hidden) return { skip: true };
697
679
  }
698
680
  }
699
681
  });
700
682
  }
701
683
 
702
684
  //#endregion
703
- export { filterPlugin, frontmatterPlugin, isolateMainPlugin, readabilityPlugin, tailwindPlugin };
685
+ export { filterPlugin as a, frontmatterPlugin as i, readabilityPlugin as n, isolateMainPlugin as r, tailwindPlugin as t };
@@ -1,4 +1,4 @@
1
- import { createMarkdownProcessor, parseHtmlStream, processPluginsForEvent } from "./markdown-processor-f7XT0--8.mjs";
1
+ import { a as parseHtmlStream, n as createMarkdownProcessor, r as processPluginsForEvent } from "./markdown-processor-D26Uo5td.mjs";
2
2
 
3
3
  //#region src/stream.ts
4
4
  /**
@@ -22,8 +22,7 @@ async function* streamHtmlToMarkdown(htmlStream, options = {}) {
22
22
  while (true) {
23
23
  const { done, value } = await reader.read();
24
24
  if (done) break;
25
- const htmlContent = `${remainingHtml}${typeof value === "string" ? value : decoder.decode(value, { stream: true })}`;
26
- remainingHtml = parseHtmlStream(htmlContent, parseState, (event) => {
25
+ remainingHtml = parseHtmlStream(`${remainingHtml}${typeof value === "string" ? value : decoder.decode(value, { stream: true })}`, parseState, (event) => {
27
26
  processPluginsForEvent(event, options.plugins, processor.state, processor.processEvent);
28
27
  });
29
28
  const chunk = processor.getMarkdownChunk();
@@ -49,4 +48,4 @@ function htmlToMarkdown(html, options = {}) {
49
48
  }
50
49
 
51
50
  //#endregion
52
- export { htmlToMarkdown, streamHtmlToMarkdown };
51
+ export { streamHtmlToMarkdown as n, htmlToMarkdown as t };
@@ -317,4 +317,4 @@ interface SplitterOptions extends HTMLToMarkdownOptions {
317
317
  keepSeparator?: boolean;
318
318
  }
319
319
  //#endregion
320
- export { BufferRegion, ELEMENT_NODE as ELEMENT_NODE$1, ElementNode, type ExtractedElement, HTMLToMarkdownOptions, HandlerContext, MarkdownChunk, MdreamProcessingState, MdreamRuntimeState, Node, NodeEvent, Plugin, PluginContext, PluginCreationOptions, ReadabilityContext, SplitterOptions, TEXT_NODE as TEXT_NODE$1, TagHandler, TailwindContext, TextNode, extractionPlugin as extractionPlugin$1 };
320
+ export { TagHandler as _, HandlerContext as a, ExtractedElement as b, MdreamRuntimeState as c, Plugin as d, PluginContext as f, TEXT_NODE as g, SplitterOptions as h, HTMLToMarkdownOptions as i, Node as l, ReadabilityContext as m, ELEMENT_NODE as n, MarkdownChunk as o, PluginCreationOptions as p, ElementNode as r, MdreamProcessingState as s, BufferRegion as t, NodeEvent as u, TailwindContext as v, extractionPlugin as x, TextNode as y };
package/dist/cli.mjs CHANGED
@@ -1,11 +1,11 @@
1
- import "./_chunks/const-BOAJ1T5c.mjs";
2
- import "./_chunks/markdown-processor-f7XT0--8.mjs";
3
- import "./_chunks/plugin-DrovQriD.mjs";
4
- import { streamHtmlToMarkdown } from "./_chunks/src-C3QpB75q.mjs";
5
- import "./_chunks/extraction-BPaDGYvv.mjs";
6
- import { generateLlmsTxtArtifacts } from "./_chunks/llms-txt-DC12yO2l.mjs";
7
- import "./_chunks/plugins-C5_irVJs.mjs";
8
- import { withMinimalPreset } from "./_chunks/minimal-co1tIZYm.mjs";
1
+ import "./_chunks/const-Bf_XN9U9.mjs";
2
+ import "./_chunks/markdown-processor-D26Uo5td.mjs";
3
+ import "./_chunks/plugin-CjWWQTuL.mjs";
4
+ import { n as streamHtmlToMarkdown } from "./_chunks/src-BJpipdul.mjs";
5
+ import "./_chunks/extraction-BA9MDtq3.mjs";
6
+ import { t as generateLlmsTxtArtifacts } from "./_chunks/llms-txt-D7Hduhij.mjs";
7
+ import "./_chunks/plugins-DJnqR2fA.mjs";
8
+ import { t as withMinimalPreset } from "./_chunks/minimal-BiDhcwif.mjs";
9
9
  import { readFileSync } from "node:fs";
10
10
  import { mkdir, writeFile } from "node:fs/promises";
11
11
  import { Readable } from "node:stream";
@@ -38,12 +38,8 @@ async function generateLlms(patterns, options) {
38
38
  generateMarkdown: artifacts.includes("markdown")
39
39
  });
40
40
  await mkdir(outputDir, { recursive: true });
41
- const llmsPath = join(outputDir, "llms.txt");
42
- await writeFile(llmsPath, result.llmsTxt, "utf-8");
43
- if (artifacts.includes("llms-full.txt") && result.llmsFullTxt) {
44
- const fullPath = join(outputDir, "llms-full.txt");
45
- await writeFile(fullPath, result.llmsFullTxt, "utf-8");
46
- }
41
+ await writeFile(join(outputDir, "llms.txt"), result.llmsTxt, "utf-8");
42
+ if (artifacts.includes("llms-full.txt") && result.llmsFullTxt) await writeFile(join(outputDir, "llms-full.txt"), result.llmsFullTxt, "utf-8");
47
43
  if (artifacts.includes("markdown") && result.markdownFiles) for (const mdFile of result.markdownFiles) {
48
44
  const fullPath = join(outputDir, mdFile.path);
49
45
  await mkdir(dirname(fullPath), { recursive: true });
@@ -55,10 +51,8 @@ async function generateLlms(patterns, options) {
55
51
  process.exit(1);
56
52
  }
57
53
  }
58
- const __dirname = dirname(fileURLToPath(import.meta.url));
59
- const packageJsonPath = join(__dirname, "..", "package.json");
60
- const packageJson = JSON.parse(readFileSync(packageJsonPath, "utf-8"));
61
- const version = packageJson.version;
54
+ const packageJsonPath = join(dirname(fileURLToPath(import.meta.url)), "..", "package.json");
55
+ const version = JSON.parse(readFileSync(packageJsonPath, "utf-8")).version;
62
56
  const cli = cac();
63
57
  cli.command("[options]", "Convert HTML from stdin to Markdown on stdout").option("--origin <url>", "Origin URL for resolving relative image paths").option("--preset <preset>", "Conversion presets: minimal").action(async (_, opts) => {
64
58
  await streamingConvert(opts);
package/dist/iife.js CHANGED
@@ -1,22 +1,12 @@
1
- (function() {
2
- 'use strict';
1
+ function e(e,t,n){if(!t)return;let r=e.regionId||0,i=n.regionContentBuffers.get(r);i&&(i.push(t),n.lastContentCache=t)}function t(e){let t=[];for(let[n,r]of Array.from(e.regionContentBuffers.entries()))e.regionToggles.get(n)&&t.push(...r);return e.regionToggles.clear(),e.regionContentBuffers.clear(),t.join(``).trimStart()}const n={"&amp;":`&`,"&lt;":`<`,"&gt;":`>`,"&quot;":`"`,"&#39;":`'`,"&apos;":`'`,"&nbsp;":` `},r={html:0,head:1,details:2,summary:3,title:4,meta:5,br:6,h1:7,h2:8,h3:9,h4:10,h5:11,h6:12,hr:13,strong:14,b:15,em:16,i:17,del:18,sub:19,sup:20,ins:21,blockquote:22,code:23,ul:24,li:25,a:26,img:27,table:28,thead:29,tr:30,th:31,td:32,ol:33,pre:34,p:35,div:36,span:37,tbody:38,tfoot:39,form:40,nav:41,label:42,button:43,body:44,center:45,kbd:46,footer:47,path:48,svg:49,article:50,section:51,script:52,style:53,link:54,area:55,base:56,col:57,embed:58,input:59,keygen:60,param:61,source:62,track:63,wbr:64,select:65,textarea:66,option:67,fieldset:68,legend:69,audio:70,video:71,canvas:72,iframe:73,map:74,dialog:75,meter:76,progress:77,template:78,abbr:79,mark:80,q:81,samp:82,small:83,noscript:84,noframes:85,xmp:86,plaintext:87,aside:88,u:89,cite:90,dfn:91,var:92,time:93,bdo:94,ruby:95,rt:96,rp:97,dd:98,dt:99,dl:101,address:100,figure:102,object:103,main:104,header:105,figcaption:106,caption:107},i=[0,0],a=[2,2],o=[1,1],s=[1,0],c=[0,1];function l(e,t){if(!e)return e;if(e.startsWith(`//`))return`https:${e}`;if(e.startsWith(`#`))return e;if(t){if(e.startsWith(`/`)&&t)return`${t.endsWith(`/`)?t.slice(0,-1):t}${e}`;if(e.startsWith(`./`))return`${t}/${e.slice(2)}`;if(!e.startsWith(`http`))return`${t}/${e.startsWith(`/`)?e.slice(1):e}`}return e}function u(e){return e.depthMap[32]>0}function d(e){if(!e)return``;let t=e.split(` `).map(e=>e.split(`language-`)[1]).filter(Boolean);return t.length>0?t[0].trim():``}function f(e){return{enter:({node:t})=>t.depthMap[26]?`<h${e}>`:`${`#`.repeat(e)} `,exit:({node:t})=>{if(t.depthMap[26])return`</h${e}>`},collapsesInnerWhiteSpace:!0}}const p={enter:({node:e})=>e.depthMap[15]>1?``:`**`,exit:({node:e})=>e.depthMap[15]>1?``:`**`,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},m={enter:({node:e})=>e.depthMap[17]>1?``:`_`,exit:({node:e})=>e.depthMap[17]>1?``:`_`,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},h={1:{spacing:i,collapsesInnerWhiteSpace:!0},2:{enter:()=>`<details>`,exit:()=>`</details>
3
2
 
4
- function e(e,t,n){if(!t)return;let r=e.regionId||0,i=n.regionContentBuffers.get(r);i&&(i.push(t),n.lastContentCache=t)}function t(e){let t=[];for(let[n,r]of Array.from(e.regionContentBuffers.entries())){let i=e.regionToggles.get(n);i&&t.push(...r)}return e.regionToggles.clear(),e.regionContentBuffers.clear(),t.join(``).trimStart()}const n=0,r=1,i=2,a=3,o=4,s=5,c=6,l=7,u=8,d=9,f=10,p=11,m=12,h=13,g=14,_=15,v=16,y=17,ee=18,te=19,ne=20,re=21,b=22,x=23,S=24,C=25,w=26,ie=27,T=28,ae=29,oe=30,se=31,E=32,D=33,O=34,k=35,A=36,j=37,ce=38,le=39,ue=40,de=41,fe=42,pe=43,me=44,he=45,ge=46,_e=47,ve=48,M=49,ye=50,be=51,N=52,P=53,F=54,xe=55,Se=56,Ce=57,we=58,Te=59,Ee=60,De=61,Oe=62,ke=63,Ae=64,je=65,Me=66,Ne=67,Pe=68,Fe=69,Ie=70,Le=71,Re=72,ze=73,Be=74,Ve=75,He=76,Ue=77,We=78,Ge=79,Ke=80,qe=81,Je=82,Ye=83,Xe=84,Ze=85,Qe=86,$e=87,et=88,tt=89,nt=90,rt=91,it=92,at=93,ot=94,st=95,ct=96,lt=97,ut=98,dt=99,ft=100,pt=101,mt=102,ht=103,gt=104,_t=105,vt=106,yt=107,bt=108,xt={"&amp;":`&`,"&lt;":`<`,"&gt;":`>`,"&quot;":`"`,"&#39;":`'`,"&apos;":`'`,"&nbsp;":` `},I=1,L=2,R=0,St=1,Ct={html:n,head:r,details:i,summary:a,title:o,meta:s,br:c,h1:l,h2:u,h3:d,h4:f,h5:p,h6:m,hr:h,strong:g,b:_,em:v,i:y,del:ee,sub:te,sup:ne,ins:re,blockquote:b,code:x,ul:S,li:C,a:w,img:ie,table:T,thead:ae,tr:oe,th:se,td:E,ol:D,pre:O,p:k,div:A,span:j,tbody:ce,tfoot:le,form:ue,nav:de,label:fe,button:pe,body:me,center:he,kbd:ge,footer:_e,path:ve,svg:M,article:ye,section:be,script:N,style:P,link:F,area:xe,base:Se,col:Ce,embed:we,input:Te,keygen:Ee,param:De,source:Oe,track:ke,wbr:Ae,select:je,textarea:Me,option:Ne,fieldset:Pe,legend:Fe,audio:Ie,video:Le,canvas:Re,iframe:ze,map:Be,dialog:Ve,meter:He,progress:Ue,template:We,abbr:Ge,mark:Ke,q:qe,samp:Je,small:Ye,noscript:Xe,noframes:Ze,xmp:Qe,plaintext:$e,aside:et,u:tt,cite:nt,dfn:rt,var:it,time:at,bdo:ot,ruby:st,rt:ct,rp:lt,dd:ut,dt,dl:pt,address:ft,figure:mt,object:ht,main:gt,header:_t,figcaption:vt,caption:yt},wt=`**`,Tt=`_`,Et=`~~`,Dt="```",Ot="`",kt=`---`,z=[0,0],At=[2,2],jt=[1,1],Mt=[1,0],B=[0,1];function Nt(e,t){if(!e)return e;if(e.startsWith(`//`))return`https:${e}`;if(e.startsWith(`#`))return e;if(t){if(e.startsWith(`/`)&&t){let n=t.endsWith(`/`)?t.slice(0,-1):t;return`${n}${e}`}if(e.startsWith(`./`))return`${t}/${e.slice(2)}`;if(!e.startsWith(`http`)){let n=e.startsWith(`/`)?e.slice(1):e;return`${t}/${n}`}}return e}function V(e){return e.depthMap[E]>0}function Pt(e){if(!e)return``;let t=e.split(` `).map(e=>e.split(`language-`)[1]).filter(Boolean);return t.length>0?t[0].trim():``}function H(e){return{enter:({node:t})=>t.depthMap[w]?`<h${e}>`:`${`#`.repeat(e)} `,exit:({node:t})=>{if(t.depthMap[w])return`</h${e}>`},collapsesInnerWhiteSpace:!0}}const Ft={enter:({node:e})=>e.depthMap[_]>1?``:wt,exit:({node:e})=>e.depthMap[_]>1?``:wt,collapsesInnerWhiteSpace:!0,spacing:z,isInline:!0},It={enter:({node:e})=>e.depthMap[y]>1?``:Tt,exit:({node:e})=>e.depthMap[y]>1?``:Tt,collapsesInnerWhiteSpace:!0,spacing:z,isInline:!0},Lt={[r]:{spacing:z,collapsesInnerWhiteSpace:!0},[i]:{enter:()=>`<details>`,exit:()=>`</details>
3
+ `},3:{enter:()=>`<summary>`,exit:()=>`</summary>
5
4
 
6
- `},[a]:{enter:()=>`<summary>`,exit:()=>`</summary>
7
-
8
- `},[o]:{collapsesInnerWhiteSpace:!0,isNonNesting:!0,spacing:z},[N]:{excludesTextNodes:!0,isNonNesting:!0},[P]:{isNonNesting:!0,excludesTextNodes:!0},[s]:{collapsesInnerWhiteSpace:!0,isSelfClosing:!0,spacing:z},[c]:{enter:({node:e})=>V(e)?`<br>`:void 0,isSelfClosing:!0,spacing:z,collapsesInnerWhiteSpace:!0,isInline:!0},[l]:H(1),[u]:H(2),[d]:H(3),[f]:H(4),[p]:H(5),[m]:H(6),[h]:{enter:()=>kt,isSelfClosing:!0},[g]:Ft,[_]:Ft,[v]:It,[y]:It,[ee]:{enter:()=>Et,exit:()=>Et,collapsesInnerWhiteSpace:!0,spacing:z,isInline:!0},[te]:{enter:()=>`<sub>`,exit:()=>`</sub>`,collapsesInnerWhiteSpace:!0,spacing:z,isInline:!0},[ne]:{enter:()=>`<sup>`,exit:()=>`</sup>`,collapsesInnerWhiteSpace:!0,spacing:z,isInline:!0},[re]:{enter:()=>`<ins>`,exit:()=>`</ins>`,collapsesInnerWhiteSpace:!0,spacing:z,isInline:!0},[b]:{enter:({node:e})=>{let t=e.depthMap[b]||1,n=`> `.repeat(t);return e.depthMap[C]>0&&(n=`\n${` `.repeat(e.depthMap[C])}${n}`),n},spacing:jt},[x]:{enter:({node:e})=>{if((e.depthMap[O]||0)>0){let t=Pt(e.attributes?.class);return`${Dt}${t}\n`}return Ot},exit:({node:e})=>e.depthMap[O]>0?`\n${Dt}`:Ot,collapsesInnerWhiteSpace:!0,spacing:z,isInline:!0},[S]:{enter:({node:e})=>V(e)?`<ul>`:void 0,exit:({node:e})=>V(e)?`</ul>`:void 0},[C]:{enter:({node:e})=>{if(V(e))return`<li>`;let t=(e.depthMap[S]||0)+(e.depthMap[D]||0)-1,n=e.parent?.tagId===D,r=` `.repeat(Math.max(0,t)),i=n?`${e.index+1}. `:`- `;return`${r}${i}`},exit:({node:e})=>V(e)?`</li>`:void 0,spacing:Mt},[w]:{enter:({node:e})=>{if(e.attributes?.href)return`[`},exit:({node:e,state:t})=>{if(!e.attributes?.href)return``;let n=Nt(e.attributes?.href||``,t.options?.origin),r=e.attributes?.title,i=t.lastContentCache;return i===r&&(r=``),r?`](${n} "${r}")`:`](${n})`},collapsesInnerWhiteSpace:!0,spacing:z,isInline:!0},[ie]:{enter:({node:e,state:t})=>{let n=e.attributes?.alt||``,r=Nt(e.attributes?.src||``,t.options?.origin);return`![${n}](${r})`},collapsesInnerWhiteSpace:!0,isSelfClosing:!0,spacing:z,isInline:!0},[T]:{enter:({node:e,state:t})=>{if(V(e))return`<table>`;e.depthMap[T]<=1&&(t.tableRenderedTable=!1),t.tableColumnAlignments=[]},exit:({node:e})=>V(e)?`</table>`:void 0},[ae]:{enter:({node:e})=>{if(V(e))return`<thead>`},exit:({node:e})=>V(e)?`</thead>`:void 0,spacing:B,excludesTextNodes:!0},[oe]:{enter:({node:e,state:t})=>V(e)?`<tr>`:(t.tableCurrentRowCells=0,`| `),exit:({node:e,state:t})=>{if(V(e)||e.depthMap[T]>1)return`</tr>`;if(!t.tableRenderedTable){t.tableRenderedTable=!0;let e=t.tableColumnAlignments;for(;e.length<t.tableCurrentRowCells;)e.push(``);let n=e.map(e=>{switch(e){case`left`:return`:---`;case`center`:return`:---:`;case`right`:return`---:`;default:return`---`}});return` |\n| ${n.join(` | `)} |`}return` |`},excludesTextNodes:!0,spacing:B},[se]:{enter:({node:e,state:t})=>{if(e.depthMap[T]>1)return`<th>`;let n=e.attributes?.align?.toLowerCase();return n?t.tableColumnAlignments.push(n):t.tableColumnAlignments.length<=t.tableCurrentRowCells&&t.tableColumnAlignments.push(``),e.index===0?``:` | `},exit:({node:e,state:t})=>{if(e.depthMap[T]>1)return`</th>`;t.tableCurrentRowCells++},collapsesInnerWhiteSpace:!0,spacing:z},[E]:{enter:({node:e})=>e.depthMap[T]>1?`<td>`:e.index===0?``:` | `,exit:({node:e,state:t})=>{if(e.depthMap[T]>1)return`</td>`;t.tableCurrentRowCells++},collapsesInnerWhiteSpace:!0,spacing:z},[k]:{},[A]:{},[j]:{collapsesInnerWhiteSpace:!0,spacing:z,isInline:!0},[de]:{},[fe]:{collapsesInnerWhiteSpace:!0,spacing:z,isInline:!0},[pe]:{collapsesInnerWhiteSpace:!0,isInline:!0},[me]:{spacing:z},[he]:{enter:({node:e})=>{if(e.depthMap[T]>1)return`<center>`},exit:({node:e})=>{if(e.depthMap[T]>1)return`</center>`},spacing:z},[ce]:{spacing:z,excludesTextNodes:!0},[le]:{spacing:B,excludesTextNodes:!0},[ge]:{enter:()=>"`",exit:()=>"`",collapsesInnerWhiteSpace:!0,spacing:z,isInline:!0},[_e]:{spacing:z},[ue]:{spacing:z},[F]:{isSelfClosing:!0,spacing:z,collapsesInnerWhiteSpace:!0,isInline:!0},[xe]:{isSelfClosing:!0,spacing:z,isInline:!0},[Se]:{isSelfClosing:!0,spacing:z,isInline:!0},[Ce]:{isSelfClosing:!0,spacing:z},[we]:{isSelfClosing:!0,spacing:z},[Te]:{isSelfClosing:!0,spacing:z,isInline:!0},[Ee]:{isSelfClosing:!0,spacing:z,isInline:!0},[De]:{isSelfClosing:!0,spacing:z},[Oe]:{isSelfClosing:!0,spacing:z},[ke]:{isSelfClosing:!0,spacing:z},[Ae]:{isSelfClosing:!0,spacing:z,isInline:!0},[M]:{spacing:z},[je]:{spacing:z},[Me]:{isNonNesting:!0,spacing:z},[Ne]:{isNonNesting:!0,spacing:z},[Pe]:{spacing:z},[Fe]:{spacing:z},[Ie]:{spacing:z},[Le]:{spacing:z},[Re]:{spacing:z},[ze]:{isNonNesting:!0,spacing:z},[Be]:{spacing:z},[Ve]:{spacing:z},[He]:{spacing:z},[Ue]:{spacing:z},[We]:{spacing:z},[Ge]:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:z,isInline:!0},[Ke]:{enter:()=>`<mark>`,exit:()=>`</mark>`,collapsesInnerWhiteSpace:!0,spacing:z,isInline:!0},[qe]:{enter:()=>`"`,exit:()=>`"`,collapsesInnerWhiteSpace:!0,spacing:z,isInline:!0},[Je]:{enter:()=>"`",exit:()=>"`",collapsesInnerWhiteSpace:!0,spacing:z,isInline:!0},[Ye]:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:z,isInline:!0},[Xe]:{excludesTextNodes:!0,spacing:z},[Ze]:{isNonNesting:!0,spacing:z},[Qe]:{isNonNesting:!0,spacing:z},[$e]:{isNonNesting:!0,spacing:z},[et]:{spacing:z},[tt]:{enter:()=>`<u>`,exit:()=>`</u>`,collapsesInnerWhiteSpace:!0,spacing:z,isInline:!0},[nt]:{enter:()=>`*`,exit:()=>`*`,collapsesInnerWhiteSpace:!0,spacing:z,isInline:!0},[rt]:{enter:()=>`**`,exit:()=>`**`,collapsesInnerWhiteSpace:!0,spacing:z,isInline:!0},[it]:{enter:()=>"`",exit:()=>"`",collapsesInnerWhiteSpace:!0,spacing:z,isInline:!0},[at]:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:z,isInline:!0},[ot]:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:z,isInline:!0},[st]:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:z,isInline:!0},[ct]:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:z,isInline:!0},[lt]:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:z,isInline:!0},[ft]:{enter:()=>`<address>`,exit:()=>`</address>`,spacing:z,collapsesInnerWhiteSpace:!0},[pt]:{spacing:z,enter:()=>`<dl>`,exit:()=>`</dl>`},[dt]:{enter:()=>`<dt>`,exit:()=>`</dt>`,collapsesInnerWhiteSpace:!0,spacing:[0,1]},[ut]:{enter:()=>`<dd>`,exit:()=>`</dd>`,spacing:[0,1]}};function Rt(e){let t=``,n=0;for(;n<e.length;){if(e[n]===`&`){let r=!1;for(let[i,a]of Object.entries(xt))if(e.startsWith(i,n)){t+=a,n+=i.length,r=!0;break}if(r)continue;if(n+2<e.length&&e[n+1]===`#`){let r=n;n+=2;let i=e[n]===`x`||e[n]===`X`;i&&n++;let a=n;for(;n<e.length&&e[n]!==`;`;)n++;if(n<e.length&&e[n]===`;`){let r=e.substring(a,n),o=i?16:10;try{let e=Number.parseInt(r,o);if(!Number.isNaN(e)){t+=String.fromCodePoint(e),n++;continue}}catch{}}n=r}}t+=e[n],n++}return t}function zt(e){let t=e,n=[t];for(;t.tagHandler?.isInline&&t.parent;)t=t.parent,n.push(t);return n}const Bt=60,U=62,W=47,G=61,K=34,q=39,Vt=33,Ht=38,J=92,Y=45,X=32,Ut=9,Wt=10,Gt=13,Kt=96,qt=124,Jt=91,Yt=93,Xt=Object.freeze({});function Zt(e){return new Uint8Array(e)}function Z(e){return e===X||e===Ut||e===Wt||e===Gt}function Qt(e,t,n){return $t(e,t,n)}function $t(e,t,n){let r=``;t.depthMap??=new Uint8Array(bt),t.depth??=0,t.lastCharWasWhitespace??=!0,t.justClosedTag??=!1,t.isFirstTextInElement??=!1,t.lastCharWasBackslash??=!1;let i=0,a=e.length;for(;i<a;){let o=e.charCodeAt(i);if(o!==Bt){if(o===Ht&&(t.hasEncodedHtmlEntity=!0),Z(o)){let n=t.depthMap[O]>0;if(t.justClosedTag&&(t.justClosedTag=!1,t.lastCharWasWhitespace=!1),!n&&t.lastCharWasWhitespace){i++;continue}n?r+=e[i]:(o===X||!t.lastCharWasWhitespace)&&(r+=` `),t.lastCharWasWhitespace=!0,t.textBufferContainsWhitespace=!0,t.lastCharWasBackslash=!1}else t.textBufferContainsNonWhitespace=!0,t.lastCharWasWhitespace=!1,t.justClosedTag=!1,o===qt&&t.depthMap[T]?r+=`\\|`:o===Kt&&(t.depthMap[x]||t.depthMap[O])?r+="\\`":o===Jt&&t.depthMap[w]?r+=`\\[`:o===Yt&&t.depthMap[w]?r+=`\\]`:o===U&&t.depthMap[b]?r+=`\\>`:r+=e[i],t.currentNode?.tagHandler?.isNonNesting&&(t.lastCharWasBackslash||(o===q&&!t.inDoubleQuote&&!t.inBacktick?t.inSingleQuote=!t.inSingleQuote:o===K&&!t.inSingleQuote&&!t.inBacktick?t.inDoubleQuote=!t.inDoubleQuote:o===Kt&&!t.inSingleQuote&&!t.inDoubleQuote&&(t.inBacktick=!t.inBacktick))),t.lastCharWasBackslash=o===J;i++;continue}if(i+1>=a){r+=e[i];break}let s=e.charCodeAt(i+1);if(s===Vt){r.length>0&&(Q(r,t,n),r=``);let a=tn(e,i);if(a.complete)i=a.newPosition;else{r+=a.remainingText;break}}else if(s===W){let a=t.inSingleQuote||t.inDoubleQuote||t.inBacktick;if(t.currentNode?.tagHandler?.isNonNesting&&a){r+=e[i],i++;continue}r.length>0&&(Q(r,t,n),r=``);let o=en(e,i,t,n);if(o.complete)i=o.newPosition;else{r+=o.remainingText;break}}else{let o=i+1,s=o,c=-1;for(;o<a;){let t=e.charCodeAt(o);if(Z(t)||t===W||t===U){c=o;break}o++}if(c===-1){r+=e.substring(i);break}let l=e.substring(s,c).toLowerCase();if(!l){i=c;break}let u=Ct[l]??-1;if(o=c,t.currentNode?.tagHandler?.isNonNesting&&u!==t.currentNode?.tagId){r+=e[i++];continue}r.length>0&&(Q(r,t,n),r=``);let d=nn(l,u,e,o,t,n);if(d.skip)r+=e[i++];else if(d.complete)i=d.newPosition,d.selfClosing||(t.isFirstTextInElement=!0);else{r+=d.remainingText;break}}}return r}function Q(e,t,n){let r=t.textBufferContainsNonWhitespace,i=t.textBufferContainsWhitespace;if(t.textBufferContainsNonWhitespace=!1,t.textBufferContainsWhitespace=!1,!t.currentNode)return;let a=t.currentNode?.tagHandler?.excludesTextNodes,o=t.depthMap[O]>0;if(!o&&!r&&!t.currentNode.childTextNodeIndex)return;let s=e;if(s.length===0)return;let c=zt(t.currentNode),l=c[c.length-1];if(i&&!l?.childTextNodeIndex){let e=0;for(;e<s.length&&(o?s.charCodeAt(e)===Wt||s.charCodeAt(e)===Gt:Z(s.charCodeAt(e)));)e++;e>0&&(s=s.substring(e))}t.hasEncodedHtmlEntity&&(s=Rt(String(s)),t.hasEncodedHtmlEntity=!1);let u={type:L,value:s,parent:t.currentNode,regionId:t.currentNode?.regionId,index:t.currentNode.currentWalkIndex++,depth:t.depth,containsWhitespace:i,excludedFromMarkdown:a};for(let e of c)e.childTextNodeIndex=(e.childTextNodeIndex||0)+1;n({type:R,node:u}),t.lastTextNode=u}function en(e,t,n,r){let i=t+2,a=i,o=e.length,s=!1;for(;i<o;){let t=e.charCodeAt(i);if(t===U){s=!0;break}i++}if(!s)return{complete:!1,newPosition:t,remainingText:e.substring(t)};let c=e.substring(a,i).toLowerCase(),l=Ct[c]??-1;if(n.currentNode?.tagHandler?.isNonNesting&&l!==n.currentNode.tagId)return{complete:!1,newPosition:t,remainingText:e.substring(t)};let u=n.currentNode;if(u){let e=u.tagId!==l;for(;u&&e;)$(u,n,r),u=u.parent,e=u?.tagId!==l}return u&&$(u,n,r),n.justClosedTag=!0,{complete:!0,newPosition:i+1,remainingText:``}}function $(e,t,n){if(e){if(e.tagId===w&&!e.childTextNodeIndex){let t=e.attributes?.title||e.attributes?.[`aria-label`]||``;if(t){e.childTextNodeIndex=1;let r={type:L,value:t,parent:e,index:0,depth:e.depth+1};n({type:R,node:r});for(let t of zt(e))t.childTextNodeIndex=(t.childTextNodeIndex||0)+1}}e.tagId&&(t.depthMap[e.tagId]=Math.max(0,t.depthMap[e.tagId]-1)),e.tagHandler?.isNonNesting&&(t.inSingleQuote=!1,t.inDoubleQuote=!1,t.inBacktick=!1,t.lastCharWasBackslash=!1),t.depth--,n({type:St,node:e}),t.currentNode=t.currentNode.parent,t.hasEncodedHtmlEntity=!1,t.justClosedTag=!0}}function tn(e,t){let n=t,r=e.length;if(n+3<r&&e.charCodeAt(n+2)===Y&&e.charCodeAt(n+3)===Y){for(n+=4;n<r-2;){if(e.charCodeAt(n)===Y&&e.charCodeAt(n+1)===Y&&e.charCodeAt(n+2)===U)return n+=3,{complete:!0,newPosition:n,remainingText:``};n++}return{complete:!1,newPosition:t,remainingText:e.substring(t)}}else{for(n+=2;n<r;){if(e.charCodeAt(n)===U)return n++,{complete:!0,newPosition:n,remainingText:``};n++}return{complete:!1,newPosition:n,remainingText:e.substring(t,n)}}}function nn(e,t,n,r,i,a){i.currentNode?.tagHandler?.isNonNesting&&$(i.currentNode,i,a);let o=Lt[t],s=rn(n,r,o);if(!s.complete)return{complete:!1,newPosition:r,remainingText:`<${e}${s.attrBuffer}`,selfClosing:!1};let c=i.depthMap[t];i.depthMap[t]=c+1,i.depth++,r=s.newPosition,i.currentNode&&(i.currentNode.currentWalkIndex=i.currentNode.currentWalkIndex||0);let l=i.currentNode?i.currentNode.currentWalkIndex++:0,u={type:I,name:e,attributes:s.attributes,parent:i.currentNode,depthMap:Zt(i.depthMap),depth:i.depth,index:l,regionId:i.currentNode?.regionId,tagId:t,tagHandler:o};i.lastTextNode=u,a({type:R,node:u});let d=u;return d.currentWalkIndex=0,i.currentNode=d,i.hasEncodedHtmlEntity=!1,o?.isNonNesting&&!s.selfClosing&&(i.inSingleQuote=!1,i.inDoubleQuote=!1,i.inBacktick=!1,i.lastCharWasBackslash=!1),s.selfClosing?($(u,i,a),i.justClosedTag=!0):i.justClosedTag=!1,{complete:!0,newPosition:r,remainingText:``,selfClosing:s.selfClosing}}function rn(e,t,n){let r=t,i=e.length,a=n?.isSelfClosing||!1,o=r,s=!1,c=0,l=0;for(;r<i;){let t=e.charCodeAt(r);if(s){t===c&&l!==J&&(s=!1),r++;continue}else if(t===K||t===q)s=!0,c=t;else if(t===W&&r+1<i&&e.charCodeAt(r+1)===U){let t=e.substring(o,r).trim();return{complete:!0,newPosition:r+2,attributes:an(t),selfClosing:!0,attrBuffer:t}}else if(t===U){let t=e.substring(o,r).trim();return{complete:!0,newPosition:r+1,attributes:an(t),selfClosing:a,attrBuffer:t}}r++,l=t}return{complete:!1,newPosition:r,attributes:Xt,selfClosing:!1,attrBuffer:e.substring(o,r)}}function an(e){if(!e)return Xt;let t={},n=e.length,r=0,i=0,a=1,o=2,s=3,c=4,l=5,u=i,d=0,f=0,p=0,m=0,h=``;for(;r<n;){let g=e.charCodeAt(r),_=Z(g);switch(u){case i:_||(u=a,d=r,f=0);break;case a:(g===G||_)&&(f=r,h=e.substring(d,f).toLowerCase(),u=g===G?s:o);break;case o:g===G?u=s:_||(t[h]=``,u=a,d=r,f=0);break;case s:g===K||g===q?(m=g,u=c,p=r+1):_||(u=l,p=r);break;case c:g===J&&r+1<n?r++:g===m&&(t[h]=e.substring(p,r),u=i);break;case l:(_||g===U)&&(t[h]=e.substring(p,r),u=i);break}r++}if(u===c||u===l)h&&(t[h]=e.substring(p,r));else if(u===a||u===o||u===s){f||=r;let n=e.substring(d,f).toLowerCase();n&&(t[n]=``)}return t}function on(e,t,n,r){if(t?.length){for(let r of t){let t=r.beforeNodeProcess?.(e,n);if(typeof t==`object`&&t.skip)return!0}if(e.node.type===I){let r=e.node;if(e.type===R)for(let e of t)e.processAttributes&&e.processAttributes(r,n);let i=e.type===R?`onNodeEnter`:`onNodeExit`,a=[];for(let e of t)if(e[i]){let t=e[i](r,n);t&&a.push(t)}a.length>0&&(r.pluginOutput=(r.pluginOutput||[]).concat(a))}else if(e.node.type===L&&e.type===R){let r=e.node;for(let e of t)if(e.processTextNode){let t=e.processTextNode(r,n);if(t){if(t.skip)return!0;r.value=t.content}}}}return r(e),!1}function sn(e,t,n){if(e===` `||e===`
5
+ `},4:{collapsesInnerWhiteSpace:!0,isNonNesting:!0,spacing:i},52:{excludesTextNodes:!0,isNonNesting:!0},53:{isNonNesting:!0,excludesTextNodes:!0},5:{collapsesInnerWhiteSpace:!0,isSelfClosing:!0,spacing:i},6:{enter:({node:e})=>u(e)?`<br>`:void 0,isSelfClosing:!0,spacing:i,collapsesInnerWhiteSpace:!0,isInline:!0},7:f(1),8:f(2),9:f(3),10:f(4),11:f(5),12:f(6),13:{enter:()=>`---`,isSelfClosing:!0},14:p,15:p,16:m,17:m,18:{enter:()=>`~~`,exit:()=>`~~`,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},19:{enter:()=>`<sub>`,exit:()=>`</sub>`,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},20:{enter:()=>`<sup>`,exit:()=>`</sup>`,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},21:{enter:()=>`<ins>`,exit:()=>`</ins>`,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},22:{enter:({node:e})=>{let t=e.depthMap[22]||1,n=`> `.repeat(t);return e.depthMap[25]>0&&(n=`\n${` `.repeat(e.depthMap[25])}${n}`),n},spacing:o},23:{enter:({node:e})=>(e.depthMap[34]||0)>0?`\`\`\`${d(e.attributes?.class)}\n`:"`",exit:({node:e})=>e.depthMap[34]>0?"\n```":"`",collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},24:{enter:({node:e})=>u(e)?`<ul>`:void 0,exit:({node:e})=>u(e)?`</ul>`:void 0},25:{enter:({node:e})=>{if(u(e))return`<li>`;let t=(e.depthMap[24]||0)+(e.depthMap[33]||0)-1,n=e.parent?.tagId===33;return`${` `.repeat(Math.max(0,t))}${n?`${e.index+1}. `:`- `}`},exit:({node:e})=>u(e)?`</li>`:void 0,spacing:s},26:{enter:({node:e})=>{if(e.attributes?.href)return`[`},exit:({node:e,state:t})=>{if(!e.attributes?.href)return``;let n=l(e.attributes?.href||``,t.options?.origin),r=e.attributes?.title;return t.lastContentCache===r&&(r=``),r?`](${n} "${r}")`:`](${n})`},collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},27:{enter:({node:e,state:t})=>`![${e.attributes?.alt||``}](${l(e.attributes?.src||``,t.options?.origin)})`,collapsesInnerWhiteSpace:!0,isSelfClosing:!0,spacing:i,isInline:!0},28:{enter:({node:e,state:t})=>{if(u(e))return`<table>`;e.depthMap[28]<=1&&(t.tableRenderedTable=!1),t.tableColumnAlignments=[]},exit:({node:e})=>u(e)?`</table>`:void 0},29:{enter:({node:e})=>{if(u(e))return`<thead>`},exit:({node:e})=>u(e)?`</thead>`:void 0,spacing:c,excludesTextNodes:!0},30:{enter:({node:e,state:t})=>u(e)?`<tr>`:(t.tableCurrentRowCells=0,`| `),exit:({node:e,state:t})=>{if(u(e)||e.depthMap[28]>1)return`</tr>`;if(!t.tableRenderedTable){t.tableRenderedTable=!0;let e=t.tableColumnAlignments;for(;e.length<t.tableCurrentRowCells;)e.push(``);return` |\n| ${e.map(e=>{switch(e){case`left`:return`:---`;case`center`:return`:---:`;case`right`:return`---:`;default:return`---`}}).join(` | `)} |`}return` |`},excludesTextNodes:!0,spacing:c},31:{enter:({node:e,state:t})=>{if(e.depthMap[28]>1)return`<th>`;let n=e.attributes?.align?.toLowerCase();return n?t.tableColumnAlignments.push(n):t.tableColumnAlignments.length<=t.tableCurrentRowCells&&t.tableColumnAlignments.push(``),e.index===0?``:` | `},exit:({node:e,state:t})=>{if(e.depthMap[28]>1)return`</th>`;t.tableCurrentRowCells++},collapsesInnerWhiteSpace:!0,spacing:i},32:{enter:({node:e})=>e.depthMap[28]>1?`<td>`:e.index===0?``:` | `,exit:({node:e,state:t})=>{if(e.depthMap[28]>1)return`</td>`;t.tableCurrentRowCells++},collapsesInnerWhiteSpace:!0,spacing:i},35:{},36:{},37:{collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},41:{},42:{collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},43:{collapsesInnerWhiteSpace:!0,isInline:!0},44:{spacing:i},45:{enter:({node:e})=>{if(e.depthMap[28]>1)return`<center>`},exit:({node:e})=>{if(e.depthMap[28]>1)return`</center>`},spacing:i},38:{spacing:i,excludesTextNodes:!0},39:{spacing:c,excludesTextNodes:!0},46:{enter:()=>"`",exit:()=>"`",collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},47:{spacing:i},40:{spacing:i},54:{isSelfClosing:!0,spacing:i,collapsesInnerWhiteSpace:!0,isInline:!0},55:{isSelfClosing:!0,spacing:i,isInline:!0},56:{isSelfClosing:!0,spacing:i,isInline:!0},57:{isSelfClosing:!0,spacing:i},58:{isSelfClosing:!0,spacing:i},59:{isSelfClosing:!0,spacing:i,isInline:!0},60:{isSelfClosing:!0,spacing:i,isInline:!0},61:{isSelfClosing:!0,spacing:i},62:{isSelfClosing:!0,spacing:i},63:{isSelfClosing:!0,spacing:i},64:{isSelfClosing:!0,spacing:i,isInline:!0},49:{spacing:i},65:{spacing:i},66:{isNonNesting:!0,spacing:i},67:{isNonNesting:!0,spacing:i},68:{spacing:i},69:{spacing:i},70:{spacing:i},71:{spacing:i},72:{spacing:i},73:{isNonNesting:!0,spacing:i},74:{spacing:i},75:{spacing:i},76:{spacing:i},77:{spacing:i},78:{spacing:i},79:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},80:{enter:()=>`<mark>`,exit:()=>`</mark>`,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},81:{enter:()=>`"`,exit:()=>`"`,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},82:{enter:()=>"`",exit:()=>"`",collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},83:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},84:{excludesTextNodes:!0,spacing:i},85:{isNonNesting:!0,spacing:i},86:{isNonNesting:!0,spacing:i},87:{isNonNesting:!0,spacing:i},88:{spacing:i},89:{enter:()=>`<u>`,exit:()=>`</u>`,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},90:{enter:()=>`*`,exit:()=>`*`,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},91:{enter:()=>`**`,exit:()=>`**`,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},92:{enter:()=>"`",exit:()=>"`",collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},93:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},94:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},95:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},96:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},97:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},100:{enter:()=>`<address>`,exit:()=>`</address>`,spacing:i,collapsesInnerWhiteSpace:!0},101:{spacing:i,enter:()=>`<dl>`,exit:()=>`</dl>`},99:{enter:()=>`<dt>`,exit:()=>`</dt>`,collapsesInnerWhiteSpace:!0,spacing:[0,1]},98:{enter:()=>`<dd>`,exit:()=>`</dd>`,spacing:[0,1]}};function g(e){let t=``,r=0;for(;r<e.length;){if(e[r]===`&`){let i=!1;for(let[a,o]of Object.entries(n))if(e.startsWith(a,r)){t+=o,r+=a.length,i=!0;break}if(i)continue;if(r+2<e.length&&e[r+1]===`#`){let n=r;r+=2;let i=e[r]===`x`||e[r]===`X`;i&&r++;let a=r;for(;r<e.length&&e[r]!==`;`;)r++;if(r<e.length&&e[r]===`;`){let n=e.substring(a,r),o=i?16:10;try{let e=Number.parseInt(n,o);if(!Number.isNaN(e)){t+=String.fromCodePoint(e),r++;continue}}catch{}}r=n}}t+=e[r],r++}return t}function _(e){let t=e,n=[t];for(;t.tagHandler?.isInline&&t.parent;)t=t.parent,n.push(t);return n}const v=Object.freeze({});function y(e){return new Uint8Array(e)}function b(e){return e===32||e===9||e===10||e===13}function x(e,t,n){return S(e,t,n)}function S(e,t,n){let i=``;t.depthMap??=new Uint8Array(108),t.depth??=0,t.lastCharWasWhitespace??=!0,t.justClosedTag??=!1,t.isFirstTextInElement??=!1,t.lastCharWasBackslash??=!1;let a=0,o=e.length;for(;a<o;){let s=e.charCodeAt(a);if(s!==60){if(s===38&&(t.hasEncodedHtmlEntity=!0),b(s)){let n=t.depthMap[34]>0;if(t.justClosedTag&&(t.justClosedTag=!1,t.lastCharWasWhitespace=!1),!n&&t.lastCharWasWhitespace){a++;continue}n?i+=e[a]:(s===32||!t.lastCharWasWhitespace)&&(i+=` `),t.lastCharWasWhitespace=!0,t.textBufferContainsWhitespace=!0,t.lastCharWasBackslash=!1}else t.textBufferContainsNonWhitespace=!0,t.lastCharWasWhitespace=!1,t.justClosedTag=!1,s===124&&t.depthMap[28]?i+=`\\|`:s===96&&(t.depthMap[23]||t.depthMap[34])?i+="\\`":s===91&&t.depthMap[26]?i+=`\\[`:s===93&&t.depthMap[26]?i+=`\\]`:s===62&&t.depthMap[22]?i+=`\\>`:i+=e[a],t.currentNode?.tagHandler?.isNonNesting&&(t.lastCharWasBackslash||(s===39&&!t.inDoubleQuote&&!t.inBacktick?t.inSingleQuote=!t.inSingleQuote:s===34&&!t.inSingleQuote&&!t.inBacktick?t.inDoubleQuote=!t.inDoubleQuote:s===96&&!t.inSingleQuote&&!t.inDoubleQuote&&(t.inBacktick=!t.inBacktick))),t.lastCharWasBackslash=s===92;a++;continue}if(a+1>=o){i+=e[a];break}let c=e.charCodeAt(a+1);if(c===33){i.length>0&&(C(i,t,n),i=``);let r=E(e,a);if(r.complete)a=r.newPosition;else{i+=r.remainingText;break}}else if(c===47){let r=t.inSingleQuote||t.inDoubleQuote||t.inBacktick;if(t.currentNode?.tagHandler?.isNonNesting&&r){i+=e[a],a++;continue}i.length>0&&(C(i,t,n),i=``);let o=w(e,a,t,n);if(o.complete)a=o.newPosition;else{i+=o.remainingText;break}}else{let s=a+1,c=s,l=-1;for(;s<o;){let t=e.charCodeAt(s);if(b(t)||t===47||t===62){l=s;break}s++}if(l===-1){i+=e.substring(a);break}let u=e.substring(c,l).toLowerCase();if(!u){a=l;break}let d=r[u]??-1;if(s=l,t.currentNode?.tagHandler?.isNonNesting&&d!==t.currentNode?.tagId){i+=e[a++];continue}i.length>0&&(C(i,t,n),i=``);let f=D(u,d,e,s,t,n);if(f.skip)i+=e[a++];else if(f.complete)a=f.newPosition,f.selfClosing||(t.isFirstTextInElement=!0);else{i+=f.remainingText;break}}}return i}function C(e,t,n){let r=t.textBufferContainsNonWhitespace,i=t.textBufferContainsWhitespace;if(t.textBufferContainsNonWhitespace=!1,t.textBufferContainsWhitespace=!1,!t.currentNode)return;let a=t.currentNode?.tagHandler?.excludesTextNodes,o=t.depthMap[34]>0;if(!o&&!r&&!t.currentNode.childTextNodeIndex)return;let s=e;if(s.length===0)return;let c=_(t.currentNode),l=c[c.length-1];if(i&&!l?.childTextNodeIndex){let e=0;for(;e<s.length&&(o?s.charCodeAt(e)===10||s.charCodeAt(e)===13:b(s.charCodeAt(e)));)e++;e>0&&(s=s.substring(e))}t.hasEncodedHtmlEntity&&=(s=g(String(s)),!1);let u={type:2,value:s,parent:t.currentNode,regionId:t.currentNode?.regionId,index:t.currentNode.currentWalkIndex++,depth:t.depth,containsWhitespace:i,excludedFromMarkdown:a};for(let e of c)e.childTextNodeIndex=(e.childTextNodeIndex||0)+1;n({type:0,node:u}),t.lastTextNode=u}function w(e,t,n,i){let a=t+2,o=a,s=e.length,c=!1;for(;a<s;){if(e.charCodeAt(a)===62){c=!0;break}a++}if(!c)return{complete:!1,newPosition:t,remainingText:e.substring(t)};let l=r[e.substring(o,a).toLowerCase()]??-1;if(n.currentNode?.tagHandler?.isNonNesting&&l!==n.currentNode.tagId)return{complete:!1,newPosition:t,remainingText:e.substring(t)};let u=n.currentNode;if(u){let e=u.tagId!==l;for(;u&&e;)T(u,n,i),u=u.parent,e=u?.tagId!==l}return u&&T(u,n,i),n.justClosedTag=!0,{complete:!0,newPosition:a+1,remainingText:``}}function T(e,t,n){if(e){if(e.tagId===26&&!e.childTextNodeIndex){let t=e.attributes?.title||e.attributes?.[`aria-label`]||``;if(t){e.childTextNodeIndex=1,n({type:0,node:{type:2,value:t,parent:e,index:0,depth:e.depth+1}});for(let t of _(e))t.childTextNodeIndex=(t.childTextNodeIndex||0)+1}}e.tagId&&(t.depthMap[e.tagId]=Math.max(0,t.depthMap[e.tagId]-1)),e.tagHandler?.isNonNesting&&(t.inSingleQuote=!1,t.inDoubleQuote=!1,t.inBacktick=!1,t.lastCharWasBackslash=!1),t.depth--,n({type:1,node:e}),t.currentNode=t.currentNode.parent,t.hasEncodedHtmlEntity=!1,t.justClosedTag=!0}}function E(e,t){let n=t,r=e.length;if(n+3<r&&e.charCodeAt(n+2)===45&&e.charCodeAt(n+3)===45){for(n+=4;n<r-2;){if(e.charCodeAt(n)===45&&e.charCodeAt(n+1)===45&&e.charCodeAt(n+2)===62)return n+=3,{complete:!0,newPosition:n,remainingText:``};n++}return{complete:!1,newPosition:t,remainingText:e.substring(t)}}else{for(n+=2;n<r;){if(e.charCodeAt(n)===62)return n++,{complete:!0,newPosition:n,remainingText:``};n++}return{complete:!1,newPosition:n,remainingText:e.substring(t,n)}}}function D(e,t,n,r,i,a){i.currentNode?.tagHandler?.isNonNesting&&T(i.currentNode,i,a);let o=h[t],s=O(n,r,o);if(!s.complete)return{complete:!1,newPosition:r,remainingText:`<${e}${s.attrBuffer}`,selfClosing:!1};let c=i.depthMap[t];i.depthMap[t]=c+1,i.depth++,r=s.newPosition,i.currentNode&&(i.currentNode.currentWalkIndex=i.currentNode.currentWalkIndex||0);let l=i.currentNode?i.currentNode.currentWalkIndex++:0,u={type:1,name:e,attributes:s.attributes,parent:i.currentNode,depthMap:y(i.depthMap),depth:i.depth,index:l,regionId:i.currentNode?.regionId,tagId:t,tagHandler:o};i.lastTextNode=u,a({type:0,node:u});let d=u;return d.currentWalkIndex=0,i.currentNode=d,i.hasEncodedHtmlEntity=!1,o?.isNonNesting&&!s.selfClosing&&(i.inSingleQuote=!1,i.inDoubleQuote=!1,i.inBacktick=!1,i.lastCharWasBackslash=!1),s.selfClosing?(T(u,i,a),i.justClosedTag=!0):i.justClosedTag=!1,{complete:!0,newPosition:r,remainingText:``,selfClosing:s.selfClosing}}function O(e,t,n){let r=t,i=e.length,a=n?.isSelfClosing||!1,o=r,s=!1,c=0,l=0;for(;r<i;){let t=e.charCodeAt(r);if(s){t===c&&l!==92&&(s=!1),r++;continue}else if(t===34||t===39)s=!0,c=t;else if(t===47&&r+1<i&&e.charCodeAt(r+1)===62){let t=e.substring(o,r).trim();return{complete:!0,newPosition:r+2,attributes:k(t),selfClosing:!0,attrBuffer:t}}else if(t===62){let t=e.substring(o,r).trim();return{complete:!0,newPosition:r+1,attributes:k(t),selfClosing:a,attrBuffer:t}}r++,l=t}return{complete:!1,newPosition:r,attributes:v,selfClosing:!1,attrBuffer:e.substring(o,r)}}function k(e){if(!e)return v;let t={},n=e.length,r=0,i=0,a=0,o=0,s=0,c=0,l=``;for(;r<n;){let u=e.charCodeAt(r),d=b(u);switch(i){case 0:d||(i=1,a=r,o=0);break;case 1:(u===61||d)&&(o=r,l=e.substring(a,o).toLowerCase(),i=u===61?3:2);break;case 2:u===61?i=3:d||(t[l]=``,i=1,a=r,o=0);break;case 3:u===34||u===39?(c=u,i=4,s=r+1):d||(i=5,s=r);break;case 4:u===92&&r+1<n?r++:u===c&&(t[l]=e.substring(s,r),i=0);break;case 5:(d||u===62)&&(t[l]=e.substring(s,r),i=0);break}r++}if(i===4||i===5)l&&(t[l]=e.substring(s,r));else if(i===1||i===2||i===3){o||=r;let n=e.substring(a,o).toLowerCase();n&&(t[n]=``)}return t}function A(e,t,n,r){if(t?.length){for(let r of t){let t=r.beforeNodeProcess?.(e,n);if(typeof t==`object`&&t.skip)return!0}if(e.node.type===1){let r=e.node;if(e.type===0)for(let e of t)e.processAttributes&&e.processAttributes(r,n);let i=e.type===0?`onNodeEnter`:`onNodeExit`,a=[];for(let e of t)if(e[i]){let t=e[i](r,n);t&&a.push(t)}a.length>0&&(r.pluginOutput=(r.pluginOutput||[]).concat(a))}else if(e.node.type===2&&e.type===0){let r=e.node;for(let e of t)if(e.processTextNode){let t=e.processTextNode(r,n);if(t){if(t.skip)return!0;r.value=t.content}}}}return r(e),!1}function j(e,t,n){if(e===` `||e===`
9
6
  `||e===` `||t===` `||t===`
10
- `||t===` `)return!1;let r=new Set([`[`,`(`,`>`,`*`,`_`,"`"]),i=new Set([`]`,`)`,`<`,`.`,`,`,`!`,`?`,`:`,`;`,`*`,`_`,"`"]);return e===`|`&&t===`<`&&n&&n.depthMap[T]>0?!0:!(r.has(e)||i.has(t))}function cn(e,t,n){return!!e&&e!==`
11
- `&&e!==` `&&e!==`[`&&e!==`>`&&!t?.tagHandler?.isInline&&n.value[0]!==` `}function ln(e){let t=e.tagId,n=e.depthMap;if(t!==C&&n[C]>0||t!==b&&n[b]>0)return z;let r=t!==void 0&&(t>=l&&t<=m||t===k||t===A),i=e.parent;for(;i;){if(i.tagHandler?.collapsesInnerWhiteSpace){if(r&&i.tagId===j){i=i.parent;continue}return z}i=i.parent}return e.tagHandler?.spacing?e.tagHandler?.spacing:At}function un(n={}){let r={options:n,regionToggles:new Map,regionContentBuffers:new Map,depthMap:new Uint8Array(bt)};r.regionToggles.set(0,!0),r.regionContentBuffers.set(0,[]);let i=0;function a(t){let{type:n,node:i}=t,a=r.lastNode;r.lastNode=t.node,r.depth=i.depth;let o=r.regionContentBuffers.get(i.regionId||0)||[],s=o[o.length-1],c=s?.charAt(s.length-1)||``,l;if(l=s?.length>1?s.charAt(s.length-2):o[o.length-2]?.charAt(o[o.length-2].length-1),i.type===L&&n===R){let t=i;if(t.value){if(t.excludedFromMarkdown||t.value===` `&&c===`
12
- `)return;cn(c,a,t)&&(t.value=` ${t.value}`),e(t,t.value,r)}r.lastTextNode=t;return}if(i.type!==I)return;let u={node:i,state:r},d=[],f=i;f.pluginOutput?.length&&(d.push(...f.pluginOutput),f.pluginOutput=[]);let p=r.lastContentCache,m=0;c===`
7
+ `||t===` `)return!1;let r=new Set([`[`,`(`,`>`,`*`,`_`,"`"]),i=new Set([`]`,`)`,`<`,`.`,`,`,`!`,`?`,`:`,`;`,`*`,`_`,"`"]);return e===`|`&&t===`<`&&n&&n.depthMap[28]>0?!0:!(r.has(e)||i.has(t))}function M(e,t,n){return!!e&&e!==`
8
+ `&&e!==` `&&e!==`[`&&e!==`>`&&!t?.tagHandler?.isInline&&n.value[0]!==` `}function N(e){let t=e.tagId,n=e.depthMap;if(t!==25&&n[25]>0||t!==22&&n[22]>0)return i;let r=t!==void 0&&(t>=7&&t<=12||t===35||t===36),o=e.parent;for(;o;){if(o.tagHandler?.collapsesInnerWhiteSpace){if(r&&o.tagId===37){o=o.parent;continue}return i}o=o.parent}return e.tagHandler?.spacing?e.tagHandler?.spacing:a}function P(n={}){let r={options:n,regionToggles:new Map,regionContentBuffers:new Map,depthMap:new Uint8Array(108)};r.regionToggles.set(0,!0),r.regionContentBuffers.set(0,[]);let i=0;function a(t){let{type:n,node:i}=t,a=r.lastNode;r.lastNode=t.node,r.depth=i.depth;let o=r.regionContentBuffers.get(i.regionId||0)||[],s=o[o.length-1],c=s?.charAt(s.length-1)||``,l;if(l=s?.length>1?s.charAt(s.length-2):o[o.length-2]?.charAt(o[o.length-2].length-1),i.type===2&&n===0){let t=i;if(t.value){if(t.excludedFromMarkdown||t.value===` `&&c===`
9
+ `)return;M(c,a,t)&&(t.value=` ${t.value}`),e(t,t.value,r)}r.lastTextNode=t;return}if(i.type!==1)return;let u={node:i,state:r},d=[],f=i;f.pluginOutput?.length&&(d.push(...f.pluginOutput),f.pluginOutput=[]);let p=r.lastContentCache,m=0;c===`
13
10
  `&&m++,l===`
14
- `&&m++;let h=n===R?`enter`:`exit`,g=i.tagHandler;if(!d.length&&g?.[h]){let e=g[h](u);e&&d.push(e)}let _=ln(i),v=_[n]||0,y=Math.max(0,v-m);if(y>0){if(!o.length){for(let t of d)e(i,t,r);return}let t=`
15
- `.repeat(y);c===` `&&o?.length&&(o[o.length-1]=o[o.length-1].substring(0,o[o.length-1].length-1)),n===R?d.unshift(t):d.push(t)}else if(p&&r.lastTextNode?.containsWhitespace&&i.parent&&`value`in r.lastTextNode&&typeof r.lastTextNode.value==`string`&&(!i.parent.depthMap[O]||i.parent.tagId===O)){let e=i.tagHandler?.isInline,t=i.tagHandler?.collapsesInnerWhiteSpace,a=i.tagHandler?.spacing&&Array.isArray(i.tagHandler.spacing),s=!e&&!t&&v>0,c=(!e||n===St)&&!s&&!(t&&n===R)&&!(a&&n===R);if(c){let e=p.length,t=p.trimEnd(),n=e-t.length;n>0&&o?.length&&o[o.length-1]===p&&(o[o.length-1]=t)}r.lastTextNode=void 0}d[0]?.[0]&&n===R&&c&&sn(c,d[0][0],r)&&e(i,` `,r);for(let t of d)e(i,t,r)}function o(e){let t={depthMap:r.depthMap,depth:0,plugins:r.options?.plugins||[]};Qt(e,t,e=>{on(e,r.options?.plugins,r,a)})}function s(){let e=t(r);return e.trimEnd()}function c(){let e=[];for(let[t,n]of Array.from(r.regionContentBuffers.entries())){let i=r.regionToggles.get(t);i&&e.push(...n)}let t=e.join(``).trimStart(),n=t.slice(i);return i=t.length,n}return{processEvent:a,processHtml:o,getMarkdown:s,getMarkdownChunk:c,state:r}}function dn(e,t={}){let n=un(t);return n.processHtml(e),n.getMarkdown()}const fn={htmlToMarkdown:dn};typeof window<`u`&&(window.mdream=fn);var pn=fn;
16
-
17
- // Expose mdream globally
18
- if (typeof window !== 'undefined') {
19
- window.mdream = fn;
20
- }
21
-
22
- })();
11
+ `&&m++;let h=n===0?`enter`:`exit`,g=i.tagHandler;if(!d.length&&g?.[h]){let e=g[h](u);e&&d.push(e)}let _=N(i)[n]||0,v=Math.max(0,_-m);if(v>0){if(!o.length){for(let t of d)e(i,t,r);return}let t=`
12
+ `.repeat(v);c===` `&&o?.length&&(o[o.length-1]=o[o.length-1].substring(0,o[o.length-1].length-1)),n===0?d.unshift(t):d.push(t)}else if(p&&r.lastTextNode?.containsWhitespace&&i.parent&&`value`in r.lastTextNode&&typeof r.lastTextNode.value==`string`&&(!i.parent.depthMap[34]||i.parent.tagId===34)){let e=i.tagHandler?.isInline,t=i.tagHandler?.collapsesInnerWhiteSpace,a=i.tagHandler?.spacing&&Array.isArray(i.tagHandler.spacing);if((!e||n===1)&&!(!e&&!t&&_>0)&&!(t&&n===0)&&!(a&&n===0)){let e=p.length,t=p.trimEnd();e-t.length>0&&o?.length&&o[o.length-1]===p&&(o[o.length-1]=t)}r.lastTextNode=void 0}d[0]?.[0]&&n===0&&c&&j(c,d[0][0],r)&&e(i,` `,r);for(let t of d)e(i,t,r)}function o(e){x(e,{depthMap:r.depthMap,depth:0,plugins:r.options?.plugins||[]},e=>{A(e,r.options?.plugins,r,a)})}function s(){return t(r).trimEnd()}function c(){let e=[];for(let[t,n]of Array.from(r.regionContentBuffers.entries()))r.regionToggles.get(t)&&e.push(...n);let t=e.join(``).trimStart(),n=t.slice(i);return i=t.length,n}return{processEvent:a,processHtml:o,getMarkdown:s,getMarkdownChunk:c,state:r}}function F(e,t={}){let n=P(t);return n.processHtml(e),n.getMarkdown()}const I={htmlToMarkdown:F};typeof window<`u`&&(window.mdream=I);var L=I;
package/dist/index.d.mts CHANGED
@@ -1,5 +1,5 @@
1
- import { BufferRegion, ELEMENT_NODE$1 as ELEMENT_NODE, ElementNode, ExtractedElement, HTMLToMarkdownOptions, HandlerContext, MarkdownChunk, MdreamProcessingState, MdreamRuntimeState, Node, NodeEvent, Plugin, PluginContext, PluginCreationOptions, ReadabilityContext, SplitterOptions, TEXT_NODE$1 as TEXT_NODE, TagHandler, TailwindContext, TextNode } from "./_chunks/types-DqiI86yW.mjs";
2
- import { createPlugin$1 as createPlugin } from "./_chunks/plugin-CgnpSqtP.mjs";
1
+ import { _ as TagHandler, a as HandlerContext, b as ExtractedElement, c as MdreamRuntimeState, d as Plugin, f as PluginContext, g as TEXT_NODE, h as SplitterOptions, i as HTMLToMarkdownOptions, l as Node, m as ReadabilityContext, n as ELEMENT_NODE, o as MarkdownChunk, p as PluginCreationOptions, r as ElementNode, s as MdreamProcessingState, t as BufferRegion, u as NodeEvent, v as TailwindContext, y as TextNode } from "./_chunks/types-CT4ZxeOH.mjs";
2
+ import { t as createPlugin } from "./_chunks/plugin-D5soyEXm.mjs";
3
3
  import { ReadableStream } from "node:stream/web";
4
4
 
5
5
  //#region src/const.d.ts
@@ -165,9 +165,6 @@ interface ParseResult {
165
165
  * Completely decoupled from markdown generation
166
166
  */
167
167
  declare function parseHtml(html: string, options?: ParseOptions): ParseResult;
168
- /**
169
- * Streaming HTML parser - calls onEvent for each DOM event
170
- */
171
168
  //#endregion
172
169
  //#region src/stream.d.ts
173
170
  /**
package/dist/index.mjs CHANGED
@@ -1,6 +1,6 @@
1
- import { TagIdMap } from "./_chunks/const-BOAJ1T5c.mjs";
2
- import { MarkdownProcessor, parseHtml } from "./_chunks/markdown-processor-f7XT0--8.mjs";
3
- import { createPlugin } from "./_chunks/plugin-DrovQriD.mjs";
4
- import { htmlToMarkdown, streamHtmlToMarkdown } from "./_chunks/src-C3QpB75q.mjs";
1
+ import { _n as TagIdMap } from "./_chunks/const-Bf_XN9U9.mjs";
2
+ import { i as parseHtml, t as MarkdownProcessor } from "./_chunks/markdown-processor-D26Uo5td.mjs";
3
+ import { t as createPlugin } from "./_chunks/plugin-CjWWQTuL.mjs";
4
+ import { n as streamHtmlToMarkdown, t as htmlToMarkdown } from "./_chunks/src-BJpipdul.mjs";
5
5
 
6
6
  export { MarkdownProcessor, TagIdMap, createPlugin, htmlToMarkdown, parseHtml, streamHtmlToMarkdown };
package/dist/llms-txt.mjs CHANGED
@@ -1,8 +1,8 @@
1
- import "./_chunks/const-BOAJ1T5c.mjs";
2
- import "./_chunks/markdown-processor-f7XT0--8.mjs";
3
- import "./_chunks/plugin-DrovQriD.mjs";
4
- import "./_chunks/src-C3QpB75q.mjs";
5
- import "./_chunks/extraction-BPaDGYvv.mjs";
6
- import { generateLlmsTxtArtifacts } from "./_chunks/llms-txt-DC12yO2l.mjs";
1
+ import "./_chunks/const-Bf_XN9U9.mjs";
2
+ import "./_chunks/markdown-processor-D26Uo5td.mjs";
3
+ import "./_chunks/plugin-CjWWQTuL.mjs";
4
+ import "./_chunks/src-BJpipdul.mjs";
5
+ import "./_chunks/extraction-BA9MDtq3.mjs";
6
+ import { t as generateLlmsTxtArtifacts } from "./_chunks/llms-txt-D7Hduhij.mjs";
7
7
 
8
8
  export { generateLlmsTxtArtifacts };
@@ -1,5 +1,5 @@
1
- import { Plugin, extractionPlugin$1 as extractionPlugin } from "./_chunks/types-DqiI86yW.mjs";
2
- import { createPlugin$1 as createPlugin } from "./_chunks/plugin-CgnpSqtP.mjs";
1
+ import { d as Plugin, x as extractionPlugin } from "./_chunks/types-CT4ZxeOH.mjs";
2
+ import { t as createPlugin } from "./_chunks/plugin-D5soyEXm.mjs";
3
3
 
4
4
  //#region src/plugins/filter.d.ts
5
5
 
package/dist/plugins.mjs CHANGED
@@ -1,6 +1,6 @@
1
- import "./_chunks/const-BOAJ1T5c.mjs";
2
- import { createPlugin } from "./_chunks/plugin-DrovQriD.mjs";
3
- import { extractionPlugin } from "./_chunks/extraction-BPaDGYvv.mjs";
4
- import { filterPlugin, frontmatterPlugin, isolateMainPlugin, readabilityPlugin, tailwindPlugin } from "./_chunks/plugins-C5_irVJs.mjs";
1
+ import "./_chunks/const-Bf_XN9U9.mjs";
2
+ import { t as createPlugin } from "./_chunks/plugin-CjWWQTuL.mjs";
3
+ import { t as extractionPlugin } from "./_chunks/extraction-BA9MDtq3.mjs";
4
+ import { a as filterPlugin, i as frontmatterPlugin, n as readabilityPlugin, r as isolateMainPlugin, t as tailwindPlugin } from "./_chunks/plugins-DJnqR2fA.mjs";
5
5
 
6
6
  export { createPlugin, extractionPlugin, filterPlugin, frontmatterPlugin, isolateMainPlugin, readabilityPlugin, tailwindPlugin };
@@ -1,4 +1,4 @@
1
- import { HTMLToMarkdownOptions } from "../_chunks/types-DqiI86yW.mjs";
1
+ import { i as HTMLToMarkdownOptions } from "../_chunks/types-CT4ZxeOH.mjs";
2
2
 
3
3
  //#region src/preset/minimal.d.ts
4
4
 
@@ -1,7 +1,7 @@
1
- import "../_chunks/const-BOAJ1T5c.mjs";
2
- import "../_chunks/plugin-DrovQriD.mjs";
3
- import "../_chunks/extraction-BPaDGYvv.mjs";
4
- import "../_chunks/plugins-C5_irVJs.mjs";
5
- import { withMinimalPreset } from "../_chunks/minimal-co1tIZYm.mjs";
1
+ import "../_chunks/const-Bf_XN9U9.mjs";
2
+ import "../_chunks/plugin-CjWWQTuL.mjs";
3
+ import "../_chunks/extraction-BA9MDtq3.mjs";
4
+ import "../_chunks/plugins-DJnqR2fA.mjs";
5
+ import { t as withMinimalPreset } from "../_chunks/minimal-BiDhcwif.mjs";
6
6
 
7
7
  export { withMinimalPreset };
@@ -1,11 +1,16 @@
1
- import { MarkdownChunk, SplitterOptions } from "./_chunks/types-DqiI86yW.mjs";
1
+ import { h as SplitterOptions, o as MarkdownChunk } from "./_chunks/types-CT4ZxeOH.mjs";
2
2
 
3
3
  //#region src/splitter.d.ts
4
4
 
5
+ /**
6
+ * Convert HTML to Markdown and split into chunks in single pass
7
+ * Yields chunks during HTML event processing for better memory efficiency
8
+ */
9
+ declare function htmlToMarkdownSplitChunksStream(html: string, options?: SplitterOptions): Generator<MarkdownChunk, void, undefined>;
5
10
  /**
6
11
  * Convert HTML to Markdown and split into chunks in single pass
7
12
  * Chunks are created during HTML event processing
8
13
  */
9
14
  declare function htmlToMarkdownSplitChunks(html: string, options?: SplitterOptions): MarkdownChunk[];
10
15
  //#endregion
11
- export { type MarkdownChunk, type SplitterOptions, htmlToMarkdownSplitChunks };
16
+ export { type MarkdownChunk, type SplitterOptions, htmlToMarkdownSplitChunks, htmlToMarkdownSplitChunksStream };
package/dist/splitter.mjs CHANGED
@@ -1,5 +1,5 @@
1
- import { ELEMENT_NODE, NodeEventEnter, NodeEventExit, TAG_CODE, TAG_H1, TAG_H2, TAG_H3, TAG_H4, TAG_H5, TAG_H6, TAG_HR, TAG_PRE, TEXT_NODE } from "./_chunks/const-BOAJ1T5c.mjs";
2
- import { createMarkdownProcessor, parseHtmlStream, processPluginsForEvent } from "./_chunks/markdown-processor-f7XT0--8.mjs";
1
+ import { $ as TAG_H2, F as TAG_CODE, Nt as TAG_PRE, Q as TAG_H1, et as TAG_H3, gn as TEXT_NODE, h as NodeEventExit, m as NodeEventEnter, nt as TAG_H5, ot as TAG_HR, r as ELEMENT_NODE, rt as TAG_H6, tt as TAG_H4 } from "./_chunks/const-Bf_XN9U9.mjs";
2
+ import { a as parseHtmlStream, n as createMarkdownProcessor, r as processPluginsForEvent } from "./_chunks/markdown-processor-D26Uo5td.mjs";
3
3
 
4
4
  //#region src/splitter.ts
5
5
  const DEFAULT_HEADERS_TO_SPLIT_ON = [
@@ -36,26 +36,22 @@ function shouldSplitOnHeader(tagId, options) {
36
36
  */
37
37
  function getCurrentMarkdown(state) {
38
38
  const fragments = [];
39
- for (const [regionId, content] of state.regionContentBuffers.entries()) {
40
- const include = state.regionToggles.get(regionId);
41
- if (include) fragments.push(...content);
42
- }
39
+ for (const [regionId, content] of state.regionContentBuffers.entries()) if (state.regionToggles.get(regionId)) fragments.push(...content);
43
40
  return fragments.join("").trimStart();
44
41
  }
45
42
  /**
46
43
  * Convert HTML to Markdown and split into chunks in single pass
47
- * Chunks are created during HTML event processing
44
+ * Yields chunks during HTML event processing for better memory efficiency
48
45
  */
49
- function htmlToMarkdownSplitChunks(html, options = {}) {
46
+ function* htmlToMarkdownSplitChunksStream(html, options = {}) {
50
47
  const opts = createOptions(options);
51
48
  if (opts.chunkOverlap >= opts.chunkSize) throw new Error("chunkOverlap must be less than chunkSize");
52
49
  const processor = createMarkdownProcessor({
53
50
  origin: opts.origin,
54
51
  plugins: opts.plugins
55
52
  });
56
- const chunks = [];
57
- const headerHierarchy = new Map();
58
- const seenSplitHeaders = new Set();
53
+ const headerHierarchy = /* @__PURE__ */ new Map();
54
+ const seenSplitHeaders = /* @__PURE__ */ new Set();
59
55
  let currentChunkCodeLanguage = "";
60
56
  let collectingHeaderText = false;
61
57
  let currentHeaderTagId = null;
@@ -63,19 +59,27 @@ function htmlToMarkdownSplitChunks(html, options = {}) {
63
59
  let lineNumber = 1;
64
60
  let lastChunkEndPosition = 0;
65
61
  let lastSplitPosition = 0;
66
- function flushChunk(endPosition, applyOverlap = false) {
62
+ function* flushChunk(endPosition, applyOverlap = false) {
67
63
  const currentMd = getCurrentMarkdown(processor.state);
68
64
  const chunkEnd = endPosition ?? currentMd.length;
69
- const chunkContent = currentMd.slice(lastChunkEndPosition, chunkEnd);
70
- if (!chunkContent.trim()) {
65
+ const originalChunkContent = currentMd.slice(lastChunkEndPosition, chunkEnd);
66
+ if (!originalChunkContent.trim()) {
71
67
  lastChunkEndPosition = chunkEnd;
72
68
  return;
73
69
  }
70
+ let chunkContent = originalChunkContent;
71
+ if (opts.stripHeaders) {
72
+ chunkContent = chunkContent.split("\n").filter((line) => !line.match(/^#{1,6}\s+/)).join("\n").trim();
73
+ if (!chunkContent) {
74
+ lastChunkEndPosition = chunkEnd;
75
+ return;
76
+ }
77
+ }
74
78
  const chunk = {
75
79
  content: chunkContent.trimEnd(),
76
80
  metadata: { loc: { lines: {
77
81
  from: lineNumber,
78
- to: lineNumber + (chunkContent.match(/\n/g) || []).length
82
+ to: lineNumber + (originalChunkContent.match(/\n/g) || []).length
79
83
  } } }
80
84
  };
81
85
  if (headerHierarchy.size > 0) {
@@ -86,22 +90,25 @@ function htmlToMarkdownSplitChunks(html, options = {}) {
86
90
  }
87
91
  }
88
92
  if (currentChunkCodeLanguage) chunk.metadata.code = currentChunkCodeLanguage;
89
- chunks.push(chunk);
93
+ yield chunk;
90
94
  currentChunkCodeLanguage = "";
91
95
  lastSplitPosition = chunkEnd;
92
96
  if (applyOverlap && opts.chunkOverlap > 0) {
93
- const maxOverlap = Math.max(0, chunkContent.length - 1);
94
- const actualOverlap = Math.min(opts.chunkOverlap, maxOverlap);
95
- lastChunkEndPosition = chunkEnd - actualOverlap;
97
+ const maxOverlap = Math.max(0, originalChunkContent.length - 1);
98
+ lastChunkEndPosition = chunkEnd - Math.min(opts.chunkOverlap, maxOverlap);
96
99
  } else lastChunkEndPosition = chunkEnd;
97
- lineNumber += (chunkContent.match(/\n/g) || []).length;
100
+ lineNumber += (originalChunkContent.match(/\n/g) || []).length;
98
101
  }
99
102
  const parseState = {
100
103
  depthMap: processor.state.depthMap,
101
104
  depth: 0,
102
105
  plugins: opts.plugins
103
106
  };
107
+ const eventBuffer = [];
104
108
  parseHtmlStream(html, parseState, (event) => {
109
+ eventBuffer.push(event);
110
+ });
111
+ for (const event of eventBuffer) {
105
112
  const { type: eventType, node } = event;
106
113
  if (node.type === ELEMENT_NODE) {
107
114
  const element = node;
@@ -113,7 +120,7 @@ function htmlToMarkdownSplitChunks(html, options = {}) {
113
120
  currentHeaderText = "";
114
121
  if (shouldSplitOnHeader(tagId, opts)) {
115
122
  if (seenSplitHeaders.has(tagId)) {
116
- flushChunk();
123
+ yield* flushChunk();
117
124
  for (let i = tagId; i <= TAG_H6; i++) headerHierarchy.delete(i);
118
125
  }
119
126
  seenSplitHeaders.add(tagId);
@@ -130,17 +137,13 @@ function htmlToMarkdownSplitChunks(html, options = {}) {
130
137
  if (lang && !currentChunkCodeLanguage) currentChunkCodeLanguage = lang;
131
138
  }
132
139
  }
133
- if (tagId === TAG_HR && eventType === NodeEventEnter) flushChunk();
134
- }
135
- if (collectingHeaderText && node.type === TEXT_NODE) {
136
- const textNode = node;
137
- currentHeaderText += textNode.value;
140
+ if (tagId === TAG_HR && eventType === NodeEventEnter) yield* flushChunk();
138
141
  }
142
+ if (collectingHeaderText && node.type === TEXT_NODE) currentHeaderText += node.value;
139
143
  processPluginsForEvent(event, opts.plugins, processor.state, processor.processEvent);
140
144
  if (!opts.returnEachLine) {
141
145
  const currentMd = getCurrentMarkdown(processor.state);
142
- const currentChunkSize = opts.lengthFunction(currentMd.slice(lastChunkEndPosition));
143
- if (currentChunkSize > opts.chunkSize) {
146
+ if (opts.lengthFunction(currentMd.slice(lastChunkEndPosition)) > opts.chunkSize) {
144
147
  const idealSplitPos = lastChunkEndPosition + opts.chunkSize;
145
148
  const separators = [
146
149
  "\n\n",
@@ -168,11 +171,20 @@ function htmlToMarkdownSplitChunks(html, options = {}) {
168
171
  }
169
172
  }
170
173
  if (splitPosition === -1 || splitPosition <= lastChunkEndPosition) splitPosition = currentMd.length;
171
- flushChunk(splitPosition, true);
174
+ yield* flushChunk(splitPosition, true);
172
175
  }
173
176
  }
174
- });
175
- flushChunk();
177
+ }
178
+ yield* flushChunk();
179
+ }
180
+ /**
181
+ * Convert HTML to Markdown and split into chunks in single pass
182
+ * Chunks are created during HTML event processing
183
+ */
184
+ function htmlToMarkdownSplitChunks(html, options = {}) {
185
+ const opts = createOptions(options);
186
+ const chunks = [];
187
+ for (const chunk of htmlToMarkdownSplitChunksStream(html, options)) chunks.push(chunk);
176
188
  if (opts.returnEachLine && chunks.length > 0) {
177
189
  const lineChunks = [];
178
190
  for (const chunk of chunks) {
@@ -194,9 +206,8 @@ function htmlToMarkdownSplitChunks(html, options = {}) {
194
206
  }
195
207
  return lineChunks;
196
208
  }
197
- if (opts.stripHeaders) for (const chunk of chunks) chunk.content = chunk.content.split("\n").filter((line) => !line.match(/^#{1,6}\s+/)).join("\n").trim();
198
- return chunks.filter((chunk) => chunk.content.length > 0);
209
+ return chunks;
199
210
  }
200
211
 
201
212
  //#endregion
202
- export { htmlToMarkdownSplitChunks };
213
+ export { htmlToMarkdownSplitChunks, htmlToMarkdownSplitChunksStream };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "mdream",
3
3
  "type": "module",
4
- "version": "0.13.3",
4
+ "version": "0.14.0",
5
5
  "description": "Ultra-performant HTML to Markdown Convertor Optimized for LLMs and llm.txt artifacts.",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",