mdream 0.15.2 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -201,7 +201,6 @@ Mdream includes several built-in plugins that can be used individually or combin
201
201
  - **[`frontmatterPlugin`](./src/plugins/frontmatter.ts)**: Generate YAML frontmatter from HTML head elements (title, meta tags)
202
202
  - **[`isolateMainPlugin`](./src/plugins/isolate-main.ts)**: Isolate main content using `<main>` elements or header-to-footer boundaries
203
203
  - **[`tailwindPlugin`](./src/plugins/tailwind.ts)**: Convert Tailwind CSS classes to Markdown formatting (bold, italic, etc.)
204
- - **[`readabilityPlugin`](./src/plugins/readability.ts)**: Content scoring and extraction (experimental)
205
204
 
206
205
  ```ts
207
206
  import { filterPlugin, frontmatterPlugin, isolateMainPlugin } from 'mdream/plugins'
@@ -215,6 +214,26 @@ const markdown = htmlToMarkdown(html, {
215
214
  })
216
215
  ```
217
216
 
217
+ ### Content Extraction with Readability
218
+
219
+ For advanced content extraction (article detection, boilerplate removal), use [@mozilla/readability](https://github.com/mozilla/readability) before mdream:
220
+
221
+ ```ts
222
+ import { Readability } from '@mozilla/readability'
223
+ import { JSDOM } from 'jsdom'
224
+ import { htmlToMarkdown } from 'mdream'
225
+
226
+ const dom = new JSDOM(html, { url: 'https://example.com' })
227
+ const article = new Readability(dom.window.document).parse()
228
+
229
+ if (article) {
230
+ const markdown = htmlToMarkdown(article.content)
231
+ // article.title, article.excerpt, article.byline also available
232
+ }
233
+ ```
234
+
235
+ This pipeline gives you battle-tested content extraction + fast markdown conversion.
236
+
218
237
  ### Plugin Hooks
219
238
 
220
239
  - `beforeNodeProcess`: Called before any node processing, can skip nodes
@@ -1,42 +1,3 @@
1
- //#region src/buffer-region.ts
2
- /**
3
- * Creates a new buffer region
4
- * Returns null if node already has a region assigned
5
- */
6
- function createBufferRegion(node, state, include) {
7
- if (node.regionId) return null;
8
- const id = state.regionToggles.size + 1;
9
- node.regionId = id;
10
- state.regionToggles.set(id, include);
11
- state.regionContentBuffers.set(id, []);
12
- return id;
13
- }
14
- /**
15
- * Collects content for a node into appropriate buffer (optimized)
16
- */
17
- function collectNodeContent(node, content, state) {
18
- if (!content) return;
19
- const regionId = node.regionId || 0;
20
- const targetBuffer = state.regionContentBuffers.get(regionId);
21
- if (targetBuffer) {
22
- targetBuffer.push(content);
23
- state.lastContentCache = content;
24
- }
25
- }
26
- /**
27
- * Assembles final content from buffer regions and clears them after use
28
- * Ensures frontmatter (regionId -1) appears first, followed by other included regions
29
- */
30
- function assembleBufferedContent(state) {
31
- const fragments = [];
32
- for (const [regionId, content] of Array.from(state.regionContentBuffers.entries())) if (state.regionToggles.get(regionId)) fragments.push(...content);
33
- state.regionToggles.clear();
34
- state.regionContentBuffers.clear();
35
- return fragments.join("").trimStart();
36
- }
37
-
38
- //#endregion
39
- //#region src/const.ts
40
1
  const TAG_HTML = 0;
41
2
  const TAG_HEAD = 1;
42
3
  const TAG_DETAILS = 2;
@@ -280,6 +241,4 @@ const DEFAULT_BLOCK_SPACING = [2, 2];
280
241
  const BLOCKQUOTE_SPACING = [1, 1];
281
242
  const LIST_ITEM_SPACING = [1, 0];
282
243
  const TABLE_ROW_SPACING = [0, 1];
283
-
284
- //#endregion
285
- export { TAG_H2 as $, TAG_TBODY as $t, TAG_BUTTON as A, TAG_P as At, TAG_DFN as B, TAG_SCRIPT as Bt, TAG_AUDIO as C, TAG_METER as Ct, TAG_BLOCKQUOTE as D, TAG_OBJECT as Dt, TAG_BDO as E, TAG_NOSCRIPT as Et, TAG_CODE as F, TAG_Q as Ft, TAG_EM as G, TAG_SPAN as Gt, TAG_DIV as H, TAG_SELECT as Ht, TAG_COL as I, TAG_RP as It, TAG_FIGCAPTION as J, TAG_SUB as Jt, TAG_EMBED as K, TAG_STRONG as Kt, TAG_DD as L, TAG_RT as Lt, TAG_CAPTION as M, TAG_PLAINTEXT as Mt, TAG_CENTER as N, TAG_PRE as Nt, TAG_BODY as O, TAG_OL as Ot, TAG_CITE as P, TAG_PROGRESS as Pt, TAG_H1 as Q, TAG_TABLE as Qt, TAG_DEL as R, TAG_RUBY as Rt, TAG_ASIDE as S, TAG_META as St, TAG_BASE as T, TAG_NOFRAMES as Tt, TAG_DL as U, TAG_SMALL as Ut, TAG_DIALOG as V, TAG_SECTION as Vt, TAG_DT as W, TAG_SOURCE as Wt, TAG_FOOTER as X, TAG_SUP as Xt, TAG_FIGURE as Y, TAG_SUMMARY as Yt, TAG_FORM as Z, TAG_SVG as Zt, TAG_A as _, TagIdMap as _n, TAG_LI as _t, LIST_ITEM_SPACING as a, TAG_THEAD as an, TAG_HEADER as at, TAG_AREA as b, createBufferRegion as bn, TAG_MAP as bt, MARKDOWN_HORIZONTAL_RULE as c, TAG_TR as cn, TAG_I as ct, MARKDOWN_STRONG as d, TAG_UL as dn, TAG_INPUT as dt, TAG_TD as en, TAG_H3 as et, MAX_TAG_ID as f, TAG_VAR as fn, TAG_INS as ft, TABLE_ROW_SPACING as g, TEXT_NODE as gn, TAG_LEGEND as gt, NodeEventExit as h, TAG_XMP as hn, TAG_LABEL as ht, HTML_ENTITIES as i, TAG_TH as in, TAG_HEAD as it, TAG_CANVAS as j, TAG_PARAM as jt, TAG_BR as k, TAG_OPTION as kt, MARKDOWN_INLINE_CODE as l, TAG_TRACK as ln, TAG_IFRAME as lt, NodeEventEnter as m, TAG_WBR as mn, TAG_KEYGEN as mt, DEFAULT_BLOCK_SPACING as n, TAG_TEXTAREA as nn, TAG_H5 as nt, MARKDOWN_CODE_BLOCK as o, TAG_TIME as on, TAG_HR as ot, NO_SPACING as p, TAG_VIDEO as pn, TAG_KBD as pt, TAG_FIELDSET as q, TAG_STYLE as qt, ELEMENT_NODE as r, TAG_TFOOT as rn, TAG_H6 as rt, MARKDOWN_EMPHASIS as s, TAG_TITLE as sn, TAG_HTML as st, BLOCKQUOTE_SPACING as t, TAG_TEMPLATE as tn, TAG_H4 as tt, MARKDOWN_STRIKETHROUGH as u, TAG_U as un, TAG_IMG as ut, TAG_ABBR as v, assembleBufferedContent as vn, TAG_LINK as vt, TAG_B as w, TAG_NAV as wt, TAG_ARTICLE as x, TAG_MARK as xt, TAG_ADDRESS as y, collectNodeContent as yn, TAG_MAIN as yt, TAG_DETAILS as z, TAG_SAMP as zt };
244
+ export { TAG_H5 as $, TAG_TH as $t, TAG_CANVAS as A, TAG_PROGRESS as At, TAG_DIV as B, TAG_SPAN as Bt, TAG_B as C, TAG_OBJECT as Ct, TAG_BODY as D, TAG_PARAM as Dt, TAG_BLOCKQUOTE as E, TAG_P as Et, TAG_DD as F, TAG_SAMP as Ft, TAG_FIELDSET as G, TAG_SUP as Gt, TAG_DT as H, TAG_STYLE as Ht, TAG_DEL as I, TAG_SCRIPT as It, TAG_FORM as J, TAG_TBODY as Jt, TAG_FIGURE as K, TAG_SVG as Kt, TAG_DETAILS as L, TAG_SELECT as Lt, TAG_CITE as M, TAG_RP as Mt, TAG_CODE as N, TAG_RT as Nt, TAG_BR as O, TAG_PLAINTEXT as Ot, TAG_COL as P, TAG_RUBY as Pt, TAG_H4 as Q, TAG_TFOOT as Qt, TAG_DFN as R, TAG_SMALL as Rt, TAG_AUDIO as S, TAG_NOSCRIPT as St, TAG_BDO as T, TAG_OPTION as Tt, TAG_EM as U, TAG_SUB as Ut, TAG_DL as V, TAG_STRONG as Vt, TAG_EMBED as W, TAG_SUMMARY as Wt, TAG_H2 as X, TAG_TEMPLATE as Xt, TAG_H1 as Y, TAG_TD as Yt, TAG_H3 as Z, TAG_TEXTAREA as Zt, TAG_A as _, TAG_MARK as _t, LIST_ITEM_SPACING as a, TAG_U as an, TAG_IFRAME as at, TAG_AREA as b, TAG_NAV as bt, MARKDOWN_HORIZONTAL_RULE as c, TAG_VIDEO as cn, TAG_INS as ct, MARKDOWN_STRONG as d, TEXT_NODE as dn, TAG_LABEL as dt, TAG_THEAD as en, TAG_H6 as et, MAX_TAG_ID as f, TagIdMap as fn, TAG_LEGEND as ft, TABLE_ROW_SPACING as g, TAG_MAP as gt, NodeEventExit as h, TAG_MAIN as ht, HTML_ENTITIES as i, TAG_TRACK as in, TAG_I as it, TAG_CENTER as j, TAG_Q as jt, TAG_BUTTON as k, TAG_PRE as kt, MARKDOWN_INLINE_CODE as l, TAG_WBR as ln, TAG_KBD as lt, NodeEventEnter as m, TAG_LINK as mt, DEFAULT_BLOCK_SPACING as n, TAG_TITLE as nn, TAG_HEADER as nt, MARKDOWN_CODE_BLOCK as o, TAG_UL as on, TAG_IMG as ot, NO_SPACING as p, TAG_LI as pt, TAG_FOOTER as q, TAG_TABLE as qt, ELEMENT_NODE as r, TAG_TR as rn, TAG_HR as rt, MARKDOWN_EMPHASIS as s, TAG_VAR as sn, TAG_INPUT as st, BLOCKQUOTE_SPACING as t, TAG_TIME as tn, TAG_HEAD as tt, MARKDOWN_STRIKETHROUGH as u, TAG_XMP as un, TAG_KEYGEN as ut, TAG_ABBR as v, TAG_META as vt, TAG_BASE as w, TAG_OL as wt, TAG_ASIDE as x, TAG_NOFRAMES as xt, TAG_ADDRESS as y, TAG_METER as yt, TAG_DIALOG as z, TAG_SOURCE as zt };
@@ -1,18 +1,10 @@
1
- import { t as createPlugin } from "./plugin-CjWWQTuL.mjs";
2
-
3
- //#region src/libs/query-selector.ts
4
- /**
5
- * Creates a tag selector matcher (e.g., 'div', 'p', 'h1')
6
- */
1
+ import { t as createPlugin } from "./plugin.mjs";
7
2
  function createTagSelector(tagName) {
8
3
  return {
9
4
  matches: (element) => element.name === tagName,
10
5
  toString: () => tagName
11
6
  };
12
7
  }
13
- /**
14
- * Creates an ID selector matcher (e.g., '#main', '#content')
15
- */
16
8
  function createIdSelector(selector) {
17
9
  const id = selector.slice(1);
18
10
  return {
@@ -20,9 +12,6 @@ function createIdSelector(selector) {
20
12
  toString: () => `#${id}`
21
13
  };
22
14
  }
23
- /**
24
- * Creates a class selector matcher (e.g., '.container', '.header')
25
- */
26
15
  function createClassSelector(selector) {
27
16
  const className = selector.slice(1);
28
17
  return {
@@ -33,9 +22,6 @@ function createClassSelector(selector) {
33
22
  toString: () => `.${className}`
34
23
  };
35
24
  }
36
- /**
37
- * Creates an attribute selector matcher (e.g., '[data-id]', '[href="https://example.com"]')
38
- */
39
25
  function createAttributeSelector(selector) {
40
26
  const match = selector.match(/\[([^\]=~|^$*]+)(?:([=~|^$*]+)["']?([^"'\]]+)["']?)?\]/);
41
27
  const attrName = match ? match[1] : selector.slice(1, -1);
@@ -62,18 +48,12 @@ function createAttributeSelector(selector) {
62
48
  }
63
49
  };
64
50
  }
65
- /**
66
- * Creates a compound selector that combines multiple selectors (e.g., 'div.container', 'h1#title')
67
- */
68
51
  function createCompoundSelector(selectors) {
69
52
  return {
70
53
  matches: (element) => selectors.every((selector) => selector.matches(element)),
71
54
  toString: () => selectors.map((s) => s.toString()).join("")
72
55
  };
73
56
  }
74
- /**
75
- * Parses a CSS selector into a matcher
76
- */
77
57
  function parseSelector(selector) {
78
58
  selector = selector.trim();
79
59
  if (!selector) throw new Error("Empty selector");
@@ -100,9 +80,6 @@ function parseSelector(selector) {
100
80
  if (selectorParts.length === 1) return selectorParts[0];
101
81
  return createCompoundSelector(selectorParts);
102
82
  }
103
-
104
- //#endregion
105
- //#region src/plugins/extraction.ts
106
83
  function extractionPlugin(selectors) {
107
84
  const matcherCallbacks = Object.entries(selectors).map(([selector, callback]) => ({
108
85
  matcher: parseSelector(selector),
@@ -139,6 +116,4 @@ function extractionPlugin(selectors) {
139
116
  }
140
117
  });
141
118
  }
142
-
143
- //#endregion
144
- export { parseSelector as n, extractionPlugin as t };
119
+ export { parseSelector as n, extractionPlugin as t };
@@ -1,6 +1,4 @@
1
- import { $ as TAG_H2, $t as TAG_TBODY, A as TAG_BUTTON, At as TAG_P, B as TAG_DFN, Bt as TAG_SCRIPT, C as TAG_AUDIO, Ct as TAG_METER, D as TAG_BLOCKQUOTE, E as TAG_BDO, Et as TAG_NOSCRIPT, F as TAG_CODE, Ft as TAG_Q, G as TAG_EM, Gt as TAG_SPAN, H as TAG_DIV, Ht as TAG_SELECT, I as TAG_COL, It as TAG_RP, Jt as TAG_SUB, K as TAG_EMBED, Kt as TAG_STRONG, L as TAG_DD, Lt as TAG_RT, Mt as TAG_PLAINTEXT, N as TAG_CENTER, Nt as TAG_PRE, O as TAG_BODY, Ot as TAG_OL, P as TAG_CITE, Pt as TAG_PROGRESS, Q as TAG_H1, Qt as TAG_TABLE, R as TAG_DEL, Rt as TAG_RUBY, S as TAG_ASIDE, St as TAG_META, T as TAG_BASE, Tt as TAG_NOFRAMES, U as TAG_DL, Ut as TAG_SMALL, V as TAG_DIALOG, W as TAG_DT, Wt as TAG_SOURCE, X as TAG_FOOTER, Xt as TAG_SUP, Yt as TAG_SUMMARY, Z as TAG_FORM, Zt as TAG_SVG, _ as TAG_A, _n as TagIdMap, _t as TAG_LI, a as LIST_ITEM_SPACING, an as TAG_THEAD, b as TAG_AREA, bt as TAG_MAP, c as MARKDOWN_HORIZONTAL_RULE, cn as TAG_TR, ct as TAG_I, d as MARKDOWN_STRONG, dn as TAG_UL, dt as TAG_INPUT, en as TAG_TD, et as TAG_H3, f as MAX_TAG_ID, fn as TAG_VAR, ft as TAG_INS, g as TABLE_ROW_SPACING, gn as TEXT_NODE, gt as TAG_LEGEND, h as NodeEventExit, hn as TAG_XMP, ht as TAG_LABEL, i as HTML_ENTITIES, in as TAG_TH, it as TAG_HEAD, j as TAG_CANVAS, jt as TAG_PARAM, k as TAG_BR, kt as TAG_OPTION, l as MARKDOWN_INLINE_CODE, ln as TAG_TRACK, lt as TAG_IFRAME, m as NodeEventEnter, mn as TAG_WBR, mt as TAG_KEYGEN, n as DEFAULT_BLOCK_SPACING, nn as TAG_TEXTAREA, nt as TAG_H5, o as MARKDOWN_CODE_BLOCK, on as TAG_TIME, ot as TAG_HR, p as NO_SPACING, pn as TAG_VIDEO, pt as TAG_KBD, q as TAG_FIELDSET, qt as TAG_STYLE, r as ELEMENT_NODE, rn as TAG_TFOOT, rt as TAG_H6, s as MARKDOWN_EMPHASIS, sn as TAG_TITLE, t as BLOCKQUOTE_SPACING, tn as TAG_TEMPLATE, tt as TAG_H4, u as MARKDOWN_STRIKETHROUGH, un as TAG_U, ut as TAG_IMG, v as TAG_ABBR, vn as assembleBufferedContent, vt as TAG_LINK, w as TAG_B, wt as TAG_NAV, xt as TAG_MARK, y as TAG_ADDRESS, yn as collectNodeContent, z as TAG_DETAILS, zt as TAG_SAMP } from "./const-Bf_XN9U9.mjs";
2
-
3
- //#region src/tags.ts
1
+ import { $ as TAG_H5, $t as TAG_TH, A as TAG_CANVAS, At as TAG_PROGRESS, B as TAG_DIV, Bt as TAG_SPAN, C as TAG_B, D as TAG_BODY, Dt as TAG_PARAM, E as TAG_BLOCKQUOTE, Et as TAG_P, F as TAG_DD, Ft as TAG_SAMP, G as TAG_FIELDSET, Gt as TAG_SUP, H as TAG_DT, Ht as TAG_STYLE, I as TAG_DEL, It as TAG_SCRIPT, J as TAG_FORM, Jt as TAG_TBODY, Kt as TAG_SVG, L as TAG_DETAILS, Lt as TAG_SELECT, M as TAG_CITE, Mt as TAG_RP, N as TAG_CODE, Nt as TAG_RT, O as TAG_BR, Ot as TAG_PLAINTEXT, P as TAG_COL, Pt as TAG_RUBY, Q as TAG_H4, Qt as TAG_TFOOT, R as TAG_DFN, Rt as TAG_SMALL, S as TAG_AUDIO, St as TAG_NOSCRIPT, T as TAG_BDO, Tt as TAG_OPTION, U as TAG_EM, Ut as TAG_SUB, V as TAG_DL, Vt as TAG_STRONG, W as TAG_EMBED, Wt as TAG_SUMMARY, X as TAG_H2, Xt as TAG_TEMPLATE, Y as TAG_H1, Yt as TAG_TD, Z as TAG_H3, Zt as TAG_TEXTAREA, _ as TAG_A, _t as TAG_MARK, a as LIST_ITEM_SPACING, an as TAG_U, at as TAG_IFRAME, b as TAG_AREA, bt as TAG_NAV, c as MARKDOWN_HORIZONTAL_RULE, cn as TAG_VIDEO, ct as TAG_INS, d as MARKDOWN_STRONG, dn as TEXT_NODE, dt as TAG_LABEL, en as TAG_THEAD, et as TAG_H6, f as MAX_TAG_ID, fn as TagIdMap, ft as TAG_LEGEND, g as TABLE_ROW_SPACING, gt as TAG_MAP, h as NodeEventExit, i as HTML_ENTITIES, in as TAG_TRACK, it as TAG_I, j as TAG_CENTER, jt as TAG_Q, k as TAG_BUTTON, kt as TAG_PRE, l as MARKDOWN_INLINE_CODE, ln as TAG_WBR, lt as TAG_KBD, m as NodeEventEnter, mt as TAG_LINK, n as DEFAULT_BLOCK_SPACING, nn as TAG_TITLE, o as MARKDOWN_CODE_BLOCK, on as TAG_UL, ot as TAG_IMG, p as NO_SPACING, pt as TAG_LI, q as TAG_FOOTER, qt as TAG_TABLE, r as ELEMENT_NODE, rn as TAG_TR, rt as TAG_HR, s as MARKDOWN_EMPHASIS, sn as TAG_VAR, st as TAG_INPUT, t as BLOCKQUOTE_SPACING, tn as TAG_TIME, tt as TAG_HEAD, u as MARKDOWN_STRIKETHROUGH, un as TAG_XMP, ut as TAG_KEYGEN, v as TAG_ABBR, vt as TAG_META, w as TAG_BASE, wt as TAG_OL, x as TAG_ASIDE, xt as TAG_NOFRAMES, y as TAG_ADDRESS, yt as TAG_METER, z as TAG_DIALOG, zt as TAG_SOURCE } from "./const.mjs";
4
2
  function resolveUrl(url, origin) {
5
3
  if (!url) return url;
6
4
  if (url.startsWith("//")) return `https:${url}`;
@@ -529,12 +527,6 @@ const tagHandlers = {
529
527
  spacing: [0, 1]
530
528
  }
531
529
  };
532
-
533
- //#endregion
534
- //#region src/utils.ts
535
- /**
536
- * Decode HTML entities - optimized version with single pass
537
- */
538
530
  function decodeHTMLEntities(text) {
539
531
  let result = "";
540
532
  let i = 0;
@@ -585,9 +577,6 @@ function traverseUpToFirstBlockNode(node) {
585
577
  }
586
578
  return parentsToIncrement;
587
579
  }
588
-
589
- //#endregion
590
- //#region src/parse.ts
591
580
  const LT_CHAR = 60;
592
581
  const GT_CHAR = 62;
593
582
  const SLASH_CHAR = 47;
@@ -610,16 +599,9 @@ const EMPTY_ATTRIBUTES = Object.freeze({});
610
599
  function copyDepthMap(depthMap) {
611
600
  return new Uint8Array(depthMap);
612
601
  }
613
- /**
614
- * Fast whitespace check using direct character code comparison
615
- */
616
602
  function isWhitespace(charCode) {
617
603
  return charCode === SPACE_CHAR || charCode === TAB_CHAR || charCode === NEWLINE_CHAR || charCode === CARRIAGE_RETURN_CHAR;
618
604
  }
619
- /**
620
- * Pure HTML parser that emits DOM events
621
- * Completely decoupled from markdown generation
622
- */
623
605
  function parseHtml(html, options = {}) {
624
606
  const events = [];
625
607
  return {
@@ -633,15 +615,9 @@ function parseHtml(html, options = {}) {
633
615
  })
634
616
  };
635
617
  }
636
- /**
637
- * Streaming HTML parser - calls onEvent for each DOM event
638
- */
639
618
  function parseHtmlStream(html, state, onEvent) {
640
619
  return parseHtmlInternal(html, state, onEvent);
641
620
  }
642
- /**
643
- * Internal parsing function - extracted from original parseHTML
644
- */
645
621
  function parseHtmlInternal(htmlChunk, state, handleEvent) {
646
622
  let textBuffer = "";
647
623
  state.depthMap ??= new Uint8Array(MAX_TAG_ID);
@@ -772,9 +748,6 @@ function parseHtmlInternal(htmlChunk, state, handleEvent) {
772
748
  }
773
749
  return textBuffer;
774
750
  }
775
- /**
776
- * Process accumulated text buffer and create text node event
777
- */
778
751
  function processTextBuffer(textBuffer, state, handleEvent) {
779
752
  const containsNonWhitespace = state.textBufferContainsNonWhitespace;
780
753
  const containsWhitespace = state.textBufferContainsWhitespace;
@@ -801,7 +774,6 @@ function processTextBuffer(textBuffer, state, handleEvent) {
801
774
  type: TEXT_NODE,
802
775
  value: text,
803
776
  parent: state.currentNode,
804
- regionId: state.currentNode?.regionId,
805
777
  index: state.currentNode.currentWalkIndex++,
806
778
  depth: state.depth,
807
779
  containsWhitespace,
@@ -814,9 +786,6 @@ function processTextBuffer(textBuffer, state, handleEvent) {
814
786
  });
815
787
  state.lastTextNode = textNode;
816
788
  }
817
- /**
818
- * Process HTML closing tag
819
- */
820
789
  function processClosingTag(htmlChunk, position, state, handleEvent) {
821
790
  let i = position + 2;
822
791
  const tagNameStart = i;
@@ -857,9 +826,6 @@ function processClosingTag(htmlChunk, position, state, handleEvent) {
857
826
  remainingText: ""
858
827
  };
859
828
  }
860
- /**
861
- * Close a node and emit exit event
862
- */
863
829
  function closeNode(node, state, handleEvent) {
864
830
  if (!node) return;
865
831
  if (node.tagId === TAG_A && !node.childTextNodeIndex) {
@@ -895,9 +861,6 @@ function closeNode(node, state, handleEvent) {
895
861
  state.hasEncodedHtmlEntity = false;
896
862
  state.justClosedTag = true;
897
863
  }
898
- /**
899
- * Process HTML comment or doctype
900
- */
901
864
  function processCommentOrDoctype(htmlChunk, position) {
902
865
  let i = position;
903
866
  const chunkLength = htmlChunk.length;
@@ -939,9 +902,6 @@ function processCommentOrDoctype(htmlChunk, position) {
939
902
  };
940
903
  }
941
904
  }
942
- /**
943
- * Process HTML opening tag
944
- */
945
905
  function processOpeningTag(tagName, tagId, htmlChunk, i, state, handleEvent) {
946
906
  if (state.currentNode?.tagHandler?.isNonNesting) closeNode(state.currentNode, state, handleEvent);
947
907
  const tagHandler = tagHandlers[tagId];
@@ -966,7 +926,6 @@ function processOpeningTag(tagName, tagId, htmlChunk, i, state, handleEvent) {
966
926
  depthMap: copyDepthMap(state.depthMap),
967
927
  depth: state.depth,
968
928
  index: currentWalkIndex,
969
- regionId: state.currentNode?.regionId,
970
929
  tagId,
971
930
  tagHandler
972
931
  };
@@ -996,9 +955,6 @@ function processOpeningTag(tagName, tagId, htmlChunk, i, state, handleEvent) {
996
955
  selfClosing: result.selfClosing
997
956
  };
998
957
  }
999
- /**
1000
- * Extract and process HTML tag attributes
1001
- */
1002
958
  function processTagAttributes(htmlChunk, position, tagHandler) {
1003
959
  let i = position;
1004
960
  const chunkLength = htmlChunk.length;
@@ -1046,9 +1002,6 @@ function processTagAttributes(htmlChunk, position, tagHandler) {
1046
1002
  attrBuffer: htmlChunk.substring(attrStartPos, i)
1047
1003
  };
1048
1004
  }
1049
- /**
1050
- * Parse HTML attributes string into key-value object
1051
- */
1052
1005
  function parseAttributes(attrStr) {
1053
1006
  if (!attrStr) return EMPTY_ATTRIBUTES;
1054
1007
  const result = {};
@@ -1128,19 +1081,6 @@ function parseAttributes(attrStr) {
1128
1081
  }
1129
1082
  return result;
1130
1083
  }
1131
-
1132
- //#endregion
1133
- //#region src/plugin-processor.ts
1134
- /**
1135
- * Processes plugins for a given node event
1136
- * Shared logic between markdown-processor.ts and stream.ts
1137
- *
1138
- * @param event - The node event to process
1139
- * @param plugins - Array of plugins to apply
1140
- * @param state - The current runtime state
1141
- * @param processEvent - Callback to process the event after plugin processing
1142
- * @returns true if the event should be skipped, false to continue processing
1143
- */
1144
1084
  function processPluginsForEvent(event, plugins, state, processEvent) {
1145
1085
  if (plugins?.length) {
1146
1086
  for (const plugin of plugins) {
@@ -1173,12 +1113,6 @@ function processPluginsForEvent(event, plugins, state, processEvent) {
1173
1113
  processEvent(event);
1174
1114
  return false;
1175
1115
  }
1176
-
1177
- //#endregion
1178
- //#region src/markdown-processor.ts
1179
- /**
1180
- * Determines if spacing is needed between two characters
1181
- */
1182
1116
  function needsSpacing(lastChar, firstChar, state) {
1183
1117
  if (lastChar === " " || lastChar === "\n" || lastChar === " ") return false;
1184
1118
  if (firstChar === " " || firstChar === "\n" || firstChar === " ") return false;
@@ -1208,15 +1142,9 @@ function needsSpacing(lastChar, firstChar, state) {
1208
1142
  if (noSpaceAfter.has(lastChar) || noSpaceBefore.has(firstChar)) return false;
1209
1143
  return true;
1210
1144
  }
1211
- /**
1212
- * Determines if spacing should be added before text content
1213
- */
1214
1145
  function shouldAddSpacingBeforeText(lastChar, lastNode, textNode) {
1215
1146
  return !!lastChar && lastChar !== "\n" && lastChar !== " " && lastChar !== "[" && lastChar !== ">" && !lastNode?.tagHandler?.isInline && textNode.value[0] !== " ";
1216
1147
  }
1217
- /**
1218
- * Calculate newline configuration based on tag handler spacing config
1219
- */
1220
1148
  function calculateNewLineConfig(node) {
1221
1149
  const tagId = node.tagId;
1222
1150
  const depthMap = node.depthMap;
@@ -1236,28 +1164,19 @@ function calculateNewLineConfig(node) {
1236
1164
  if (node.tagHandler?.spacing) return node.tagHandler?.spacing;
1237
1165
  return DEFAULT_BLOCK_SPACING;
1238
1166
  }
1239
- /**
1240
- * Creates a markdown processor that consumes DOM events and generates markdown
1241
- */
1242
1167
  function createMarkdownProcessor(options = {}) {
1243
1168
  const state = {
1244
1169
  options,
1245
- regionToggles: /* @__PURE__ */ new Map(),
1246
- regionContentBuffers: /* @__PURE__ */ new Map(),
1170
+ buffer: [],
1247
1171
  depthMap: new Uint8Array(MAX_TAG_ID)
1248
1172
  };
1249
- state.regionToggles.set(0, true);
1250
- state.regionContentBuffers.set(0, []);
1251
1173
  let lastYieldedLength = 0;
1252
- /**
1253
- * Process a DOM event and generate markdown
1254
- */
1255
1174
  function processEvent(event) {
1256
1175
  const { type: eventType, node } = event;
1257
1176
  const lastNode = state.lastNode;
1258
1177
  state.lastNode = event.node;
1259
1178
  state.depth = node.depth;
1260
- const buff = state.regionContentBuffers.get(node.regionId || 0) || [];
1179
+ const buff = state.buffer;
1261
1180
  const lastBuffEntry = buff[buff.length - 1];
1262
1181
  const lastChar = lastBuffEntry?.charAt(lastBuffEntry.length - 1) || "";
1263
1182
  let secondLastChar;
@@ -1269,7 +1188,8 @@ function createMarkdownProcessor(options = {}) {
1269
1188
  if (textNode.excludedFromMarkdown) return;
1270
1189
  if (textNode.value === " " && lastChar === "\n") return;
1271
1190
  if (shouldAddSpacingBeforeText(lastChar, lastNode, textNode)) textNode.value = ` ${textNode.value}`;
1272
- collectNodeContent(textNode, textNode.value, state);
1191
+ state.buffer.push(textNode.value);
1192
+ state.lastContentCache = textNode.value;
1273
1193
  }
1274
1194
  state.lastTextNode = textNode;
1275
1195
  return;
@@ -1299,7 +1219,10 @@ function createMarkdownProcessor(options = {}) {
1299
1219
  const newLines = Math.max(0, configuredNewLines - lastNewLines);
1300
1220
  if (newLines > 0) {
1301
1221
  if (!buff.length) {
1302
- for (const fragment of output) collectNodeContent(node, fragment, state);
1222
+ for (const fragment of output) if (fragment) {
1223
+ state.buffer.push(fragment);
1224
+ state.lastContentCache = fragment;
1225
+ }
1303
1226
  return;
1304
1227
  }
1305
1228
  const newlinesStr = "\n".repeat(newLines);
@@ -1321,12 +1244,15 @@ function createMarkdownProcessor(options = {}) {
1321
1244
  state.lastTextNode = void 0;
1322
1245
  }
1323
1246
  }
1324
- if (output[0]?.[0] && eventType === NodeEventEnter && lastChar && needsSpacing(lastChar, output[0][0], state)) collectNodeContent(node, " ", state);
1325
- for (const fragment of output) collectNodeContent(node, fragment, state);
1247
+ if (output[0]?.[0] && eventType === NodeEventEnter && lastChar && needsSpacing(lastChar, output[0][0], state)) {
1248
+ state.buffer.push(" ");
1249
+ state.lastContentCache = " ";
1250
+ }
1251
+ for (const fragment of output) if (fragment) {
1252
+ state.buffer.push(fragment);
1253
+ state.lastContentCache = fragment;
1254
+ }
1326
1255
  }
1327
- /**
1328
- * Process HTML string and generate events
1329
- */
1330
1256
  function processHtml(html) {
1331
1257
  parseHtmlStream(html, {
1332
1258
  depthMap: state.depthMap,
@@ -1336,19 +1262,13 @@ function createMarkdownProcessor(options = {}) {
1336
1262
  processPluginsForEvent(event, state.options?.plugins, state, processEvent);
1337
1263
  });
1338
1264
  }
1339
- /**
1340
- * Get the final markdown output
1341
- */
1342
1265
  function getMarkdown() {
1343
- return assembleBufferedContent(state).trimEnd();
1266
+ const result = state.buffer.join("").trimStart();
1267
+ state.buffer.length = 0;
1268
+ return result.trimEnd();
1344
1269
  }
1345
- /**
1346
- * Get new markdown content since the last call (for streaming)
1347
- */
1348
1270
  function getMarkdownChunk() {
1349
- const fragments = [];
1350
- for (const [regionId, content] of Array.from(state.regionContentBuffers.entries())) if (state.regionToggles.get(regionId)) fragments.push(...content);
1351
- const currentContent = fragments.join("").trimStart();
1271
+ const currentContent = state.buffer.join("").trimStart();
1352
1272
  const newContent = currentContent.slice(lastYieldedLength);
1353
1273
  lastYieldedLength = currentContent.length;
1354
1274
  return newContent;
@@ -1362,6 +1282,4 @@ function createMarkdownProcessor(options = {}) {
1362
1282
  };
1363
1283
  }
1364
1284
  const MarkdownProcessor = createMarkdownProcessor;
1365
-
1366
- //#endregion
1367
- export { parseHtmlStream as a, parseHtml as i, createMarkdownProcessor as n, processPluginsForEvent as r, MarkdownProcessor as t };
1285
+ export { parseHtmlStream as a, parseHtml as i, createMarkdownProcessor as n, processPluginsForEvent as r, MarkdownProcessor as t };
@@ -1,7 +1,6 @@
1
- import { d as Plugin } from "./types-CT4ZxeOH.mjs";
1
+ import { u as Plugin } from "./types.mjs";
2
2
 
3
3
  //#region src/pluggable/plugin.d.ts
4
-
5
4
  /**
6
5
  * Create a plugin that implements the Plugin interface with improved type inference
7
6
  *
@@ -0,0 +1,4 @@
1
+ function createPlugin(plugin) {
2
+ return plugin;
3
+ }
4
+ export { createPlugin as t };
@@ -1,12 +1,4 @@
1
- import { a as parseHtmlStream, n as createMarkdownProcessor, r as processPluginsForEvent } from "./markdown-processor-D26Uo5td.mjs";
2
-
3
- //#region src/stream.ts
4
- /**
5
- * Creates a markdown stream from an HTML stream
6
- * @param htmlStream - ReadableStream of HTML content (as Uint8Array or string)
7
- * @param options - Configuration options for conversion
8
- * @returns An async generator yielding markdown chunks
9
- */
1
+ import { a as parseHtmlStream, n as createMarkdownProcessor, r as processPluginsForEvent } from "./markdown-processor.mjs";
10
2
  async function* streamHtmlToMarkdown(htmlStream, options = {}) {
11
3
  if (!htmlStream) throw new Error("Invalid HTML stream provided");
12
4
  const decoder = new TextDecoder();
@@ -38,14 +30,4 @@ async function* streamHtmlToMarkdown(htmlStream, options = {}) {
38
30
  reader.releaseLock();
39
31
  }
40
32
  }
41
-
42
- //#endregion
43
- //#region src/index.ts
44
- function htmlToMarkdown(html, options = {}) {
45
- const processor = createMarkdownProcessor(options);
46
- processor.processHtml(html);
47
- return processor.getMarkdown();
48
- }
49
-
50
- //#endregion
51
- export { streamHtmlToMarkdown as n, htmlToMarkdown as t };
33
+ export { streamHtmlToMarkdown as t };