mdream 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +63 -25
- package/dist/cli.mjs +4 -4
- package/dist/index.d.mts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.mjs +3 -3
- package/dist/plugins.d.mts +7 -2
- package/dist/plugins.d.ts +7 -2
- package/dist/plugins.mjs +46 -4
- package/dist/preset/minimal.d.mts +1 -1
- package/dist/preset/minimal.d.ts +1 -1
- package/dist/preset/minimal.mjs +3 -3
- package/dist/shared/{mdream.cpEmpxyh.mjs → mdream.C6Z2rfeq.mjs} +1 -1
- package/dist/shared/{mdream.a2AvjJLp.d.ts → mdream.C9ruFMrk.d.mts} +2 -0
- package/dist/shared/{mdream.a2AvjJLp.d.mts → mdream.C9ruFMrk.d.ts} +2 -0
- package/dist/shared/{mdream.DUeWbUFG.mjs → mdream.CRBi8vE8.mjs} +8 -3
- package/dist/shared/{mdream.-hdaPj9a.mjs → mdream.Ch6B8TEB.mjs} +1 -1
- package/dist/shared/{mdream.DEM9pag4.mjs → mdream.D5zBVbP9.mjs} +3 -3
- package/dist/shared/{mdream.C8ruysN5.mjs → mdream.DMUbnRbh.mjs} +2 -2
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -44,7 +44,10 @@
|
|
|
44
44
|
Traditional HTML to Markdown converters were not built for LLMs or humans. They tend to be slow and bloated and produce output that's poorly suited for LLMs token usage or for
|
|
45
45
|
human readability.
|
|
46
46
|
|
|
47
|
-
|
|
47
|
+
Other LLM specific convertors focus on supporting _all_ document formats, resulting in larger bundles and lower quality Markdown output.
|
|
48
|
+
|
|
49
|
+
Mdream is an ultra-performant HTML to Markdown converter built specifically for producing high-quality Markdown for LLMs as quickly as possible. It provides
|
|
50
|
+
a powerful plugin system to customize the conversion process, allowing you to extract, transform, and filter content as needed.
|
|
48
51
|
|
|
49
52
|
Perfect for: RAG systems, web scraping, content extraction, ChatGPT/Claude integration, and large-scale document processing.
|
|
50
53
|
|
|
@@ -110,7 +113,7 @@ import { htmlToMarkdown } from 'mdream'
|
|
|
110
113
|
// Simple conversion
|
|
111
114
|
const markdown = htmlToMarkdown('<h1>Hello World</h1>')
|
|
112
115
|
console.log(markdown) // # Hello World
|
|
113
|
-
|
|
116
|
+
```
|
|
114
117
|
|
|
115
118
|
**Convert from Fetch**
|
|
116
119
|
|
|
@@ -135,33 +138,69 @@ for await (const chunk of markdownGenerator) {
|
|
|
135
138
|
|
|
136
139
|
### Plugin System
|
|
137
140
|
|
|
138
|
-
|
|
141
|
+
The plugin system allows you to customize HTML to Markdown conversion by hooking into the processing pipeline. Plugins can filter content, extract data, transform nodes, or add custom behavior.
|
|
142
|
+
|
|
143
|
+
#### Plugin Hooks
|
|
144
|
+
|
|
145
|
+
- `beforeNodeProcess`: Called before any node processing, can skip nodes
|
|
146
|
+
- `onNodeEnter`: Called when entering an element node
|
|
147
|
+
- `onNodeExit`: Called when exiting an element node
|
|
148
|
+
- `processTextNode`: Called for each text node
|
|
149
|
+
- `processAttributes`: Called to process element attributes
|
|
150
|
+
|
|
151
|
+
#### Creating a Plugin
|
|
152
|
+
|
|
153
|
+
Use `createPlugin()` to create a plugin with type safety:
|
|
139
154
|
|
|
140
155
|
```ts
|
|
141
|
-
import {
|
|
156
|
+
import type { ElementNode, TextNode } from 'mdream'
|
|
157
|
+
import { htmlToMarkdown } from 'mdream'
|
|
158
|
+
import { createPlugin } from 'mdream/plugins'
|
|
142
159
|
|
|
143
|
-
// Create a custom plugin
|
|
144
160
|
const myPlugin = createPlugin({
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
161
|
+
onNodeEnter(node: ElementNode): string | undefined {
|
|
162
|
+
if (node.name === 'h1') {
|
|
163
|
+
return '🔥 '
|
|
164
|
+
}
|
|
165
|
+
},
|
|
166
|
+
|
|
167
|
+
processTextNode(textNode: TextNode): { content: string, skip: boolean } | undefined {
|
|
168
|
+
// Transform text content
|
|
169
|
+
if (textNode.parent?.attributes?.id === 'highlight') {
|
|
170
|
+
return {
|
|
171
|
+
content: `**${textNode.value}**`,
|
|
172
|
+
skip: false
|
|
173
|
+
}
|
|
149
174
|
}
|
|
150
|
-
return content
|
|
151
175
|
}
|
|
152
176
|
})
|
|
153
177
|
|
|
154
|
-
// Use
|
|
155
|
-
const html = '<div
|
|
156
|
-
const markdown = htmlToMarkdown(html, {
|
|
157
|
-
|
|
158
|
-
withTailwind(), // Apply Tailwind class processing
|
|
159
|
-
filterUnsupportedTags(), // Filter out unsupported tags
|
|
160
|
-
myPlugin // Apply custom transformations
|
|
161
|
-
]
|
|
162
|
-
})
|
|
178
|
+
// Use the plugin
|
|
179
|
+
const html: string = '<div id="highlight">Important text</div>'
|
|
180
|
+
const markdown: string = htmlToMarkdown(html, { plugins: [myPlugin] })
|
|
181
|
+
```
|
|
163
182
|
|
|
164
|
-
|
|
183
|
+
#### Example: Content Filter Plugin
|
|
184
|
+
|
|
185
|
+
```ts
|
|
186
|
+
import type { ElementNode, NodeEvent } from 'mdream'
|
|
187
|
+
import { ELEMENT_NODE } from 'mdream'
|
|
188
|
+
import { createPlugin } from 'mdream/plugins'
|
|
189
|
+
|
|
190
|
+
const adBlockPlugin = createPlugin({
|
|
191
|
+
beforeNodeProcess(event: NodeEvent): { skip: boolean } | undefined {
|
|
192
|
+
const { node } = event
|
|
193
|
+
|
|
194
|
+
if (node.type === ELEMENT_NODE && node.name === 'div') {
|
|
195
|
+
const element = node as ElementNode
|
|
196
|
+
// Skip ads and promotional content
|
|
197
|
+
if (element.attributes?.class?.includes('ad')
|
|
198
|
+
|| element.attributes?.id?.includes('promo')) {
|
|
199
|
+
return { skip: true }
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
})
|
|
165
204
|
```
|
|
166
205
|
|
|
167
206
|
#### Extraction Plugin
|
|
@@ -169,9 +208,10 @@ console.log(markdown) // "⚠️ **Important message** ⚠️"
|
|
|
169
208
|
Extract specific elements and their content during HTML processing for data analysis or content discovery:
|
|
170
209
|
|
|
171
210
|
```ts
|
|
211
|
+
import type { ExtractedElement } from 'mdream/plugins'
|
|
172
212
|
import { extractionPlugin, htmlToMarkdown } from 'mdream'
|
|
173
213
|
|
|
174
|
-
const html = `
|
|
214
|
+
const html: string = `
|
|
175
215
|
<article>
|
|
176
216
|
<h2>Getting Started</h2>
|
|
177
217
|
<p>This is a tutorial about web scraping.</p>
|
|
@@ -181,10 +221,10 @@ const html = `
|
|
|
181
221
|
|
|
182
222
|
// Extract elements using CSS selectors
|
|
183
223
|
const plugin = extractionPlugin({
|
|
184
|
-
'h2': (element) => {
|
|
224
|
+
'h2': (element: ExtractedElement): void => {
|
|
185
225
|
console.log('Heading:', element.textContent) // "Getting Started"
|
|
186
226
|
},
|
|
187
|
-
'img[alt]': (element) => {
|
|
227
|
+
'img[alt]': (element: ExtractedElement): void => {
|
|
188
228
|
console.log('Image:', element.attributes.src, element.attributes.alt)
|
|
189
229
|
// "Image: /hero.jpg Hero image"
|
|
190
230
|
}
|
|
@@ -195,8 +235,6 @@ htmlToMarkdown(html, { plugins: [plugin] })
|
|
|
195
235
|
|
|
196
236
|
The extraction plugin provides memory-efficient element extraction with full text content and attributes, perfect for SEO analysis, content discovery, and data mining.
|
|
197
237
|
|
|
198
|
-
For more details, see the [plugin documentation](./docs/plugins.md).
|
|
199
|
-
|
|
200
238
|
## Credits
|
|
201
239
|
|
|
202
240
|
- [ultrahtml](https://github.com/natemoo-re/ultrahtml): HTML parsing inspiration
|
package/dist/cli.mjs
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import { Readable } from 'node:stream';
|
|
2
2
|
import { cac } from 'cac';
|
|
3
|
-
import { f as frontmatterPlugin } from './shared/mdream.
|
|
4
|
-
import { r as readabilityPlugin } from './shared/mdream.
|
|
5
|
-
import { s as streamHtmlToMarkdown } from './shared/mdream.
|
|
6
|
-
import './shared/mdream
|
|
3
|
+
import { f as frontmatterPlugin } from './shared/mdream.C6Z2rfeq.mjs';
|
|
4
|
+
import { r as readabilityPlugin } from './shared/mdream.DMUbnRbh.mjs';
|
|
5
|
+
import { s as streamHtmlToMarkdown } from './shared/mdream.CRBi8vE8.mjs';
|
|
6
|
+
import './shared/mdream.Ch6B8TEB.mjs';
|
|
7
7
|
|
|
8
8
|
async function streamingConvert(options = {}) {
|
|
9
9
|
const outputStream = process.stdout;
|
package/dist/index.d.mts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { H as HTMLToMarkdownOptions } from './shared/mdream.
|
|
2
|
-
export { B as BufferRegion, E as ELEMENT_NODE, b as ElementNode, f as HandlerContext, M as MdreamProcessingState, d as MdreamRuntimeState, N as Node, e as NodeEvent, P as Plugin, a as PluginCreationOptions, T as TEXT_NODE, g as TagHandler, c as TextNode } from './shared/mdream.
|
|
1
|
+
import { H as HTMLToMarkdownOptions } from './shared/mdream.C9ruFMrk.mjs';
|
|
2
|
+
export { B as BufferRegion, E as ELEMENT_NODE, b as ElementNode, f as HandlerContext, M as MdreamProcessingState, d as MdreamRuntimeState, N as Node, e as NodeEvent, P as Plugin, a as PluginCreationOptions, T as TEXT_NODE, g as TagHandler, c as TextNode } from './shared/mdream.C9ruFMrk.mjs';
|
|
3
3
|
import { ReadableStream } from 'node:stream/web';
|
|
4
4
|
|
|
5
5
|
/**
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { H as HTMLToMarkdownOptions } from './shared/mdream.
|
|
2
|
-
export { B as BufferRegion, E as ELEMENT_NODE, b as ElementNode, f as HandlerContext, M as MdreamProcessingState, d as MdreamRuntimeState, N as Node, e as NodeEvent, P as Plugin, a as PluginCreationOptions, T as TEXT_NODE, g as TagHandler, c as TextNode } from './shared/mdream.
|
|
1
|
+
import { H as HTMLToMarkdownOptions } from './shared/mdream.C9ruFMrk.js';
|
|
2
|
+
export { B as BufferRegion, E as ELEMENT_NODE, b as ElementNode, f as HandlerContext, M as MdreamProcessingState, d as MdreamRuntimeState, N as Node, e as NodeEvent, P as Plugin, a as PluginCreationOptions, T as TEXT_NODE, g as TagHandler, c as TextNode } from './shared/mdream.C9ruFMrk.js';
|
|
3
3
|
import { ReadableStream } from 'node:stream/web';
|
|
4
4
|
|
|
5
5
|
/**
|
package/dist/index.mjs
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { p as processPartialHTMLToMarkdown } from './shared/mdream.
|
|
2
|
-
export { s as streamHtmlToMarkdown } from './shared/mdream.
|
|
3
|
-
import './shared/mdream
|
|
1
|
+
import { p as processPartialHTMLToMarkdown } from './shared/mdream.CRBi8vE8.mjs';
|
|
2
|
+
export { s as streamHtmlToMarkdown } from './shared/mdream.CRBi8vE8.mjs';
|
|
3
|
+
import './shared/mdream.Ch6B8TEB.mjs';
|
|
4
4
|
|
|
5
5
|
function htmlToMarkdown(html, options = {}) {
|
|
6
6
|
const state = {
|
package/dist/plugins.d.mts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { P as Plugin } from './shared/mdream.
|
|
1
|
+
import { P as Plugin, b as ElementNode } from './shared/mdream.C9ruFMrk.mjs';
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
4
|
* Create a plugin that implements the Plugin interface with improved type inference
|
|
@@ -7,6 +7,11 @@ import { P as Plugin } from './shared/mdream.a2AvjJLp.mjs';
|
|
|
7
7
|
*/
|
|
8
8
|
declare function createPlugin<T extends Partial<Plugin>>(plugin: T): Plugin;
|
|
9
9
|
|
|
10
|
+
interface ExtractedElement extends ElementNode {
|
|
11
|
+
textContent: string;
|
|
12
|
+
}
|
|
13
|
+
declare function extractionPlugin(selectors: Record<string, (element: ExtractedElement) => void>): Plugin;
|
|
14
|
+
|
|
10
15
|
/**
|
|
11
16
|
* Plugin that filters nodes based on CSS selectors.
|
|
12
17
|
* Allows including or excluding nodes based on selectors.
|
|
@@ -85,4 +90,4 @@ declare function readabilityPlugin(): Plugin;
|
|
|
85
90
|
*/
|
|
86
91
|
declare function tailwindPlugin(): Plugin;
|
|
87
92
|
|
|
88
|
-
export { createPlugin, filterPlugin, frontmatterPlugin, isolateMainPlugin, readabilityPlugin, tailwindPlugin };
|
|
93
|
+
export { createPlugin, extractionPlugin, filterPlugin, frontmatterPlugin, isolateMainPlugin, readabilityPlugin, tailwindPlugin };
|
package/dist/plugins.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { P as Plugin } from './shared/mdream.
|
|
1
|
+
import { P as Plugin, b as ElementNode } from './shared/mdream.C9ruFMrk.js';
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
4
|
* Create a plugin that implements the Plugin interface with improved type inference
|
|
@@ -7,6 +7,11 @@ import { P as Plugin } from './shared/mdream.a2AvjJLp.js';
|
|
|
7
7
|
*/
|
|
8
8
|
declare function createPlugin<T extends Partial<Plugin>>(plugin: T): Plugin;
|
|
9
9
|
|
|
10
|
+
interface ExtractedElement extends ElementNode {
|
|
11
|
+
textContent: string;
|
|
12
|
+
}
|
|
13
|
+
declare function extractionPlugin(selectors: Record<string, (element: ExtractedElement) => void>): Plugin;
|
|
14
|
+
|
|
10
15
|
/**
|
|
11
16
|
* Plugin that filters nodes based on CSS selectors.
|
|
12
17
|
* Allows including or excluding nodes based on selectors.
|
|
@@ -85,4 +90,4 @@ declare function readabilityPlugin(): Plugin;
|
|
|
85
90
|
*/
|
|
86
91
|
declare function tailwindPlugin(): Plugin;
|
|
87
92
|
|
|
88
|
-
export { createPlugin, filterPlugin, frontmatterPlugin, isolateMainPlugin, readabilityPlugin, tailwindPlugin };
|
|
93
|
+
export { createPlugin, extractionPlugin, filterPlugin, frontmatterPlugin, isolateMainPlugin, readabilityPlugin, tailwindPlugin };
|
package/dist/plugins.mjs
CHANGED
|
@@ -1,4 +1,46 @@
|
|
|
1
|
-
|
|
2
|
-
export { f as
|
|
3
|
-
|
|
4
|
-
|
|
1
|
+
import { c as createPlugin } from './shared/mdream.C6Z2rfeq.mjs';
|
|
2
|
+
export { f as frontmatterPlugin } from './shared/mdream.C6Z2rfeq.mjs';
|
|
3
|
+
import { p as parseSelector } from './shared/mdream.D5zBVbP9.mjs';
|
|
4
|
+
export { f as filterPlugin, i as isolateMainPlugin, t as tailwindPlugin } from './shared/mdream.D5zBVbP9.mjs';
|
|
5
|
+
export { r as readabilityPlugin } from './shared/mdream.DMUbnRbh.mjs';
|
|
6
|
+
import './shared/mdream.Ch6B8TEB.mjs';
|
|
7
|
+
|
|
8
|
+
function extractionPlugin(selectors) {
|
|
9
|
+
const matcherCallbacks = Object.entries(selectors).map(([selector, callback]) => ({
|
|
10
|
+
matcher: parseSelector(selector),
|
|
11
|
+
callback
|
|
12
|
+
}));
|
|
13
|
+
const trackedElements = /* @__PURE__ */ new Map();
|
|
14
|
+
return createPlugin({
|
|
15
|
+
onNodeEnter(element) {
|
|
16
|
+
matcherCallbacks.forEach(({ matcher, callback }) => {
|
|
17
|
+
if (matcher.matches(element)) {
|
|
18
|
+
trackedElements.set(element, { textContent: "", callback });
|
|
19
|
+
}
|
|
20
|
+
});
|
|
21
|
+
},
|
|
22
|
+
processTextNode(textNode) {
|
|
23
|
+
let currentParent = textNode.parent;
|
|
24
|
+
while (currentParent) {
|
|
25
|
+
const tracked = trackedElements.get(currentParent);
|
|
26
|
+
if (tracked) {
|
|
27
|
+
tracked.textContent += textNode.value;
|
|
28
|
+
}
|
|
29
|
+
currentParent = currentParent.parent;
|
|
30
|
+
}
|
|
31
|
+
},
|
|
32
|
+
onNodeExit(element) {
|
|
33
|
+
const tracked = trackedElements.get(element);
|
|
34
|
+
if (tracked) {
|
|
35
|
+
const extractedElement = {
|
|
36
|
+
...element,
|
|
37
|
+
textContent: tracked.textContent.trim()
|
|
38
|
+
};
|
|
39
|
+
tracked.callback(extractedElement);
|
|
40
|
+
trackedElements.delete(element);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export { createPlugin, extractionPlugin };
|
package/dist/preset/minimal.d.ts
CHANGED
package/dist/preset/minimal.mjs
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { i as isolateMainPlugin, t as tailwindPlugin, f as filterPlugin } from '../shared/mdream.
|
|
3
|
-
import { f as frontmatterPlugin } from '../shared/mdream.
|
|
1
|
+
import { a9 as TAG_FORM, V as TAG_FIELDSET, b2 as TAG_OBJECT, a4 as TAG_EMBED, b6 as TAG_FIGURE, aa as TAG_FOOTER, u as TAG_ASIDE, P as TAG_IFRAME, a3 as TAG_INPUT, X as TAG_TEXTAREA, Y as TAG_SELECT, ah as TAG_BUTTON, aj as TAG_NAV } from '../shared/mdream.Ch6B8TEB.mjs';
|
|
2
|
+
import { i as isolateMainPlugin, t as tailwindPlugin, f as filterPlugin } from '../shared/mdream.D5zBVbP9.mjs';
|
|
3
|
+
import { f as frontmatterPlugin } from '../shared/mdream.C6Z2rfeq.mjs';
|
|
4
4
|
|
|
5
5
|
function withMinimalPreset(options = {}) {
|
|
6
6
|
const plugins = [
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { E as ELEMENT_NODE, aU as TAG_HEAD, c as collectNodeContent, aR as TAG_TITLE, aO as TAG_META } from './mdream.Ch6B8TEB.mjs';
|
|
2
2
|
|
|
3
3
|
function createPlugin(plugin) {
|
|
4
4
|
return plugin;
|
|
@@ -82,6 +82,8 @@ interface TextNode extends Node {
|
|
|
82
82
|
value: string;
|
|
83
83
|
/** Custom data added by plugins */
|
|
84
84
|
context?: Record<string, any>;
|
|
85
|
+
/** Whether this text node should be excluded from markdown output (for script/style elements) */
|
|
86
|
+
excludedFromMarkdown?: boolean;
|
|
85
87
|
}
|
|
86
88
|
/**
|
|
87
89
|
* Base DOM node interface
|
|
@@ -82,6 +82,8 @@ interface TextNode extends Node {
|
|
|
82
82
|
value: string;
|
|
83
83
|
/** Custom data added by plugins */
|
|
84
84
|
context?: Record<string, any>;
|
|
85
|
+
/** Whether this text node should be excluded from markdown output (for script/style elements) */
|
|
86
|
+
excludedFromMarkdown?: boolean;
|
|
85
87
|
}
|
|
86
88
|
/**
|
|
87
89
|
* Base DOM node interface
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { T as TEXT_NODE, N as NodeEventEnter, c as collectNodeContent, E as ELEMENT_NODE, a as TAG_PRE, b as TAG_LI, d as TAG_BLOCKQUOTE, e as NO_SPACING, D as DEFAULT_BLOCK_SPACING, f as TABLE_ROW_SPACING, L as LIST_ITEM_SPACING, B as BLOCKQUOTE_SPACING, M as MARKDOWN_STRIKETHROUGH, g as MARKDOWN_HORIZONTAL_RULE, h as TAG_DD, i as TAG_DT, j as TAG_DL, k as TAG_ADDRESS, l as TAG_RP, m as TAG_RT, n as TAG_RUBY, o as TAG_BDO, p as TAG_TIME, q as TAG_VAR, r as TAG_DFN, s as TAG_CITE, t as TAG_U, u as TAG_ASIDE, v as TAG_PLAINTEXT, w as TAG_XMP, x as TAG_NOFRAMES, y as TAG_NOSCRIPT, z as TAG_SMALL, A as TAG_SAMP, C as TAG_Q, F as TAG_MARK, G as TAG_ABBR, H as TAG_TEMPLATE, I as TAG_PROGRESS, J as TAG_METER, K as TAG_DIALOG, O as TAG_MAP, P as TAG_IFRAME, Q as TAG_CANVAS, R as TAG_VIDEO, S as TAG_AUDIO, U as TAG_LEGEND, V as TAG_FIELDSET, W as TAG_OPTION, X as TAG_TEXTAREA, Y as TAG_SELECT, Z as TAG_SVG, _ as TAG_WBR, $ as TAG_TRACK, a0 as TAG_SOURCE, a1 as TAG_PARAM, a2 as TAG_KEYGEN, a3 as TAG_INPUT, a4 as TAG_EMBED, a5 as TAG_COL, a6 as TAG_BASE, a7 as TAG_AREA, a8 as TAG_LINK, a9 as TAG_FORM, aa as TAG_FOOTER, ab as TAG_KBD, ac as TAG_TFOOT, ad as TAG_TBODY, ae as TAG_CENTER, af as TAG_TABLE, ag as TAG_BODY, ah as TAG_BUTTON, ai as TAG_LABEL, aj as TAG_NAV, ak as TAG_SPAN, al as TAG_DIV, am as TAG_P, an as TAG_TD, ao as TAG_TH, ap as TAG_TR, aq as TAG_THEAD, ar as TAG_IMG, as as TAG_A, at as TAG_UL, au as TAG_OL, av as TAG_CODE, aw as MARKDOWN_CODE_BLOCK, ax as MARKDOWN_INLINE_CODE, ay as TAG_INS, az as TAG_SUP, aA as TAG_SUB, aB as TAG_DEL, aC as TAG_I, aD as TAG_EM, aE as TAG_B, aF as TAG_STRONG, aG as TAG_HR, aH as TAG_H6, aI as TAG_H5, aJ as TAG_H4, aK as TAG_H3, aL as TAG_H2, aM as TAG_H1, aN as TAG_BR, aO as TAG_META, aP as TAG_STYLE, aQ as TAG_SCRIPT, aR as TAG_TITLE, aS as TAG_SUMMARY, aT as TAG_DETAILS, aU as TAG_HEAD, aV as MARKDOWN_EMPHASIS, aW as MARKDOWN_STRONG, aX as HTML_ENTITIES, aY as MAX_TAG_ID, aZ as assembleBufferedContent, a_ as TagIdMap, a$ as NodeEventExit } from './mdream.Ch6B8TEB.mjs';
|
|
2
2
|
|
|
3
3
|
function needsSpacing(lastChar, firstChar) {
|
|
4
4
|
const noSpaceLastChars = /* @__PURE__ */ new Set(["\n", " ", "[", ">", "_", "*", "`", "|", "#", "<", "("]);
|
|
@@ -48,6 +48,9 @@ function processHtmlEventToMarkdown(event, state) {
|
|
|
48
48
|
textNode.value = pluginResult.content;
|
|
49
49
|
}
|
|
50
50
|
}
|
|
51
|
+
if (textNode.excludedFromMarkdown) {
|
|
52
|
+
return;
|
|
53
|
+
}
|
|
51
54
|
if (textNode.value === " " && lastChar === "\n") {
|
|
52
55
|
return;
|
|
53
56
|
}
|
|
@@ -1024,9 +1027,10 @@ function processTextBuffer(textBuffer, state, handleEvent) {
|
|
|
1024
1027
|
const containsWhitespace = state.textBufferContainsWhitespace;
|
|
1025
1028
|
state.textBufferContainsNonWhitespace = false;
|
|
1026
1029
|
state.textBufferContainsWhitespace = false;
|
|
1027
|
-
if (!state.currentNode
|
|
1030
|
+
if (!state.currentNode) {
|
|
1028
1031
|
return;
|
|
1029
1032
|
}
|
|
1033
|
+
const excludesTextNodes = state.currentNode?.tagHandler?.excludesTextNodes;
|
|
1030
1034
|
const inPreTag = state.depthMap[TAG_PRE] > 0;
|
|
1031
1035
|
if (!inPreTag && !containsNonWhitespace && !state.currentNode.childTextNodeIndex) {
|
|
1032
1036
|
return;
|
|
@@ -1057,7 +1061,8 @@ function processTextBuffer(textBuffer, state, handleEvent) {
|
|
|
1057
1061
|
regionId: state.currentNode?.regionId,
|
|
1058
1062
|
index: state.currentNode.currentWalkIndex++,
|
|
1059
1063
|
depth: state.depth,
|
|
1060
|
-
containsWhitespace
|
|
1064
|
+
containsWhitespace,
|
|
1065
|
+
excludedFromMarkdown: excludesTextNodes
|
|
1061
1066
|
};
|
|
1062
1067
|
for (const parent of parentsToIncrement) {
|
|
1063
1068
|
parent.childTextNodeIndex = (parent.childTextNodeIndex || 0) + 1;
|
|
@@ -277,4 +277,4 @@ const BLOCKQUOTE_SPACING = [1, 1];
|
|
|
277
277
|
const LIST_ITEM_SPACING = [1, 0];
|
|
278
278
|
const TABLE_ROW_SPACING = [0, 1];
|
|
279
279
|
|
|
280
|
-
export {
|
|
280
|
+
export { TAG_TRACK as $, TAG_SAMP as A, BLOCKQUOTE_SPACING as B, TAG_Q as C, DEFAULT_BLOCK_SPACING as D, ELEMENT_NODE as E, TAG_MARK as F, TAG_ABBR as G, TAG_TEMPLATE as H, TAG_PROGRESS as I, TAG_METER as J, TAG_DIALOG as K, LIST_ITEM_SPACING as L, MARKDOWN_STRIKETHROUGH as M, NodeEventEnter as N, TAG_MAP as O, TAG_IFRAME as P, TAG_CANVAS as Q, TAG_VIDEO as R, TAG_AUDIO as S, TEXT_NODE as T, TAG_LEGEND as U, TAG_FIELDSET as V, TAG_OPTION as W, TAG_TEXTAREA as X, TAG_SELECT as Y, TAG_SVG as Z, TAG_WBR as _, TAG_PRE as a, NodeEventExit as a$, TAG_SOURCE as a0, TAG_PARAM as a1, TAG_KEYGEN as a2, TAG_INPUT as a3, TAG_EMBED as a4, TAG_COL as a5, TAG_BASE as a6, TAG_AREA as a7, TAG_LINK as a8, TAG_FORM as a9, TAG_SUB as aA, TAG_DEL as aB, TAG_I as aC, TAG_EM as aD, TAG_B as aE, TAG_STRONG as aF, TAG_HR as aG, TAG_H6 as aH, TAG_H5 as aI, TAG_H4 as aJ, TAG_H3 as aK, TAG_H2 as aL, TAG_H1 as aM, TAG_BR as aN, TAG_META as aO, TAG_STYLE as aP, TAG_SCRIPT as aQ, TAG_TITLE as aR, TAG_SUMMARY as aS, TAG_DETAILS as aT, TAG_HEAD as aU, MARKDOWN_EMPHASIS as aV, MARKDOWN_STRONG as aW, HTML_ENTITIES as aX, MAX_TAG_ID as aY, assembleBufferedContent as aZ, TagIdMap as a_, TAG_FOOTER as aa, TAG_KBD as ab, TAG_TFOOT as ac, TAG_TBODY as ad, TAG_CENTER as ae, TAG_TABLE as af, TAG_BODY as ag, TAG_BUTTON as ah, TAG_LABEL as ai, TAG_NAV as aj, TAG_SPAN as ak, TAG_DIV as al, TAG_P as am, TAG_TD as an, TAG_TH as ao, TAG_TR as ap, TAG_THEAD as aq, TAG_IMG as ar, TAG_A as as, TAG_UL as at, TAG_OL as au, TAG_CODE as av, MARKDOWN_CODE_BLOCK as aw, MARKDOWN_INLINE_CODE as ax, TAG_INS as ay, TAG_SUP as az, TAG_LI as b, TAG_HTML as b0, createBufferRegion as b1, TAG_OBJECT as b2, TAG_HEADER as b3, TAG_CAPTION as b4, TAG_FIGCAPTION as b5, TAG_FIGURE as b6, TAG_MAIN as b7, TAG_SECTION as b8, TAG_ARTICLE as b9, collectNodeContent as c, TAG_BLOCKQUOTE as d, NO_SPACING as e, TABLE_ROW_SPACING as f, MARKDOWN_HORIZONTAL_RULE as g, TAG_DD as h, TAG_DT as i, TAG_DL as j, TAG_ADDRESS as k, TAG_RP as l, TAG_RT as m, TAG_RUBY as n, TAG_BDO as o, TAG_TIME as p, TAG_VAR as q, TAG_DFN as r, TAG_CITE as s, TAG_U as t, TAG_ASIDE as u, TAG_PLAINTEXT as v, TAG_XMP as w, TAG_NOFRAMES as x, TAG_NOSCRIPT as y, TAG_SMALL as z };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { c as createPlugin } from './mdream.
|
|
1
|
+
import { T as TEXT_NODE, E as ELEMENT_NODE, b7 as TAG_MAIN, b3 as TAG_HEADER, aa as TAG_FOOTER, aM as TAG_H1, aL as TAG_H2, aK as TAG_H3, aJ as TAG_H4, aI as TAG_H5, aH as TAG_H6 } from './mdream.Ch6B8TEB.mjs';
|
|
2
|
+
import { c as createPlugin } from './mdream.C6Z2rfeq.mjs';
|
|
3
3
|
|
|
4
4
|
class TagSelector {
|
|
5
5
|
constructor(tagName) {
|
|
@@ -498,4 +498,4 @@ function tailwindPlugin() {
|
|
|
498
498
|
});
|
|
499
499
|
}
|
|
500
500
|
|
|
501
|
-
export { filterPlugin as f, isolateMainPlugin as i, tailwindPlugin as t };
|
|
501
|
+
export { filterPlugin as f, isolateMainPlugin as i, parseSelector as p, tailwindPlugin as t };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { c as createPlugin } from './mdream.
|
|
1
|
+
import { ag as TAG_BODY, b0 as TAG_HTML, aU as TAG_HEAD, b1 as createBufferRegion, as as TAG_A, k as TAG_ADDRESS, aS as TAG_SUMMARY, aT as TAG_DETAILS, aP as TAG_STYLE, aQ as TAG_SCRIPT, ak as TAG_SPAN, aN as TAG_BR, aG as TAG_HR, aC as TAG_I, aD as TAG_EM, aE as TAG_B, aF as TAG_STRONG, b2 as TAG_OBJECT, a4 as TAG_EMBED, P as TAG_IFRAME, V as TAG_FIELDSET, Y as TAG_SELECT, X as TAG_TEXTAREA, a3 as TAG_INPUT, ah as TAG_BUTTON, a9 as TAG_FORM, u as TAG_ASIDE, aj as TAG_NAV, aa as TAG_FOOTER, b3 as TAG_HEADER, aH as TAG_H6, aI as TAG_H5, aJ as TAG_H4, aK as TAG_H3, aL as TAG_H2, aM as TAG_H1, h as TAG_DD, i as TAG_DT, j as TAG_DL, b as TAG_LI, au as TAG_OL, at as TAG_UL, an as TAG_TD, ao as TAG_TH, ap as TAG_TR, ac as TAG_TFOOT, ad as TAG_TBODY, aq as TAG_THEAD, b4 as TAG_CAPTION, af as TAG_TABLE, Z as TAG_SVG, S as TAG_AUDIO, R as TAG_VIDEO, b5 as TAG_FIGCAPTION, b6 as TAG_FIGURE, ar as TAG_IMG, av as TAG_CODE, a as TAG_PRE, d as TAG_BLOCKQUOTE, al as TAG_DIV, am as TAG_P, b7 as TAG_MAIN, b8 as TAG_SECTION, b9 as TAG_ARTICLE } from './mdream.Ch6B8TEB.mjs';
|
|
2
|
+
import { c as createPlugin } from './mdream.C6Z2rfeq.mjs';
|
|
3
3
|
|
|
4
4
|
const REGEXPS = {
|
|
5
5
|
// Positive patterns that suggest high-quality content
|