mdream 0.1.1 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -5
- package/dist/index.d.mts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.mjs +2 -2
- package/dist/plugins.mjs +1 -1
- package/dist/preset/minimal.mjs +1 -1
- package/dist/shared/{mdream.5zaIXVJz.mjs → mdream.DEM9pag4.mjs} +1 -8
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -99,16 +99,16 @@ pnpm add mdream
|
|
|
99
99
|
### Usage
|
|
100
100
|
|
|
101
101
|
Mdream provides two utils for working with HTML, both will process content as a stream.
|
|
102
|
-
- `
|
|
102
|
+
- `htmlToMarkdown`: Useful if you already have the entire HTML payload you want to convert.
|
|
103
103
|
- `streamHtmlToMarkdown`: Best practice if you are fetching or reading from a local file.
|
|
104
104
|
|
|
105
105
|
**Convert existing HTML**
|
|
106
106
|
|
|
107
107
|
```ts
|
|
108
|
-
import {
|
|
108
|
+
import { htmlToMarkdown } from 'mdream'
|
|
109
109
|
|
|
110
110
|
// Simple conversion
|
|
111
|
-
const markdown =
|
|
111
|
+
const markdown = htmlToMarkdown('<h1>Hello World</h1>')
|
|
112
112
|
console.log(markdown) // # Hello World
|
|
113
113
|
````
|
|
114
114
|
|
|
@@ -138,7 +138,7 @@ for await (const chunk of markdownGenerator) {
|
|
|
138
138
|
Mdream now features a powerful plugin system that allows you to customize and extend the HTML-to-Markdown conversion process.
|
|
139
139
|
|
|
140
140
|
```ts
|
|
141
|
-
import { createPlugin, filterUnsupportedTags,
|
|
141
|
+
import { createPlugin, filterUnsupportedTags, htmlToMarkdown, withTailwind } from 'mdream'
|
|
142
142
|
|
|
143
143
|
// Create a custom plugin
|
|
144
144
|
const myPlugin = createPlugin({
|
|
@@ -153,7 +153,7 @@ const myPlugin = createPlugin({
|
|
|
153
153
|
|
|
154
154
|
// Use multiple plugins together
|
|
155
155
|
const html = '<div role="alert" class="font-bold">Important message</div>'
|
|
156
|
-
const markdown =
|
|
156
|
+
const markdown = htmlToMarkdown(html, {
|
|
157
157
|
plugins: [
|
|
158
158
|
withTailwind(), // Apply Tailwind class processing
|
|
159
159
|
filterUnsupportedTags(), // Filter out unsupported tags
|
|
@@ -164,6 +164,37 @@ const markdown = syncHtmlToMarkdown(html, {
|
|
|
164
164
|
console.log(markdown) // "⚠️ **Important message** ⚠️"
|
|
165
165
|
```
|
|
166
166
|
|
|
167
|
+
#### Extraction Plugin
|
|
168
|
+
|
|
169
|
+
Extract specific elements and their content during HTML processing for data analysis or content discovery:
|
|
170
|
+
|
|
171
|
+
```ts
|
|
172
|
+
import { extractionPlugin, htmlToMarkdown } from 'mdream'
|
|
173
|
+
|
|
174
|
+
const html = `
|
|
175
|
+
<article>
|
|
176
|
+
<h2>Getting Started</h2>
|
|
177
|
+
<p>This is a tutorial about web scraping.</p>
|
|
178
|
+
<img src="/hero.jpg" alt="Hero image" />
|
|
179
|
+
</article>
|
|
180
|
+
`
|
|
181
|
+
|
|
182
|
+
// Extract elements using CSS selectors
|
|
183
|
+
const plugin = extractionPlugin({
|
|
184
|
+
'h2': (element) => {
|
|
185
|
+
console.log('Heading:', element.textContent) // "Getting Started"
|
|
186
|
+
},
|
|
187
|
+
'img[alt]': (element) => {
|
|
188
|
+
console.log('Image:', element.attributes.src, element.attributes.alt)
|
|
189
|
+
// "Image: /hero.jpg Hero image"
|
|
190
|
+
}
|
|
191
|
+
})
|
|
192
|
+
|
|
193
|
+
htmlToMarkdown(html, { plugins: [plugin] })
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
The extraction plugin provides memory-efficient element extraction with full text content and attributes, perfect for SEO analysis, content discovery, and data mining.
|
|
197
|
+
|
|
167
198
|
For more details, see the [plugin documentation](./docs/plugins.md).
|
|
168
199
|
|
|
169
200
|
## Credits
|
package/dist/index.d.mts
CHANGED
|
@@ -10,6 +10,6 @@ import { ReadableStream } from 'node:stream/web';
|
|
|
10
10
|
*/
|
|
11
11
|
declare function streamHtmlToMarkdown(htmlStream: ReadableStream | null, options?: HTMLToMarkdownOptions): AsyncIterable<string>;
|
|
12
12
|
|
|
13
|
-
declare function
|
|
13
|
+
declare function htmlToMarkdown(html: string, options?: HTMLToMarkdownOptions): string;
|
|
14
14
|
|
|
15
|
-
export { HTMLToMarkdownOptions,
|
|
15
|
+
export { HTMLToMarkdownOptions, htmlToMarkdown, streamHtmlToMarkdown };
|
package/dist/index.d.ts
CHANGED
|
@@ -10,6 +10,6 @@ import { ReadableStream } from 'node:stream/web';
|
|
|
10
10
|
*/
|
|
11
11
|
declare function streamHtmlToMarkdown(htmlStream: ReadableStream | null, options?: HTMLToMarkdownOptions): AsyncIterable<string>;
|
|
12
12
|
|
|
13
|
-
declare function
|
|
13
|
+
declare function htmlToMarkdown(html: string, options?: HTMLToMarkdownOptions): string;
|
|
14
14
|
|
|
15
|
-
export { HTMLToMarkdownOptions,
|
|
15
|
+
export { HTMLToMarkdownOptions, htmlToMarkdown, streamHtmlToMarkdown };
|
package/dist/index.mjs
CHANGED
|
@@ -2,7 +2,7 @@ import { p as processPartialHTMLToMarkdown } from './shared/mdream.DUeWbUFG.mjs'
|
|
|
2
2
|
export { s as streamHtmlToMarkdown } from './shared/mdream.DUeWbUFG.mjs';
|
|
3
3
|
import './shared/mdream.-hdaPj9a.mjs';
|
|
4
4
|
|
|
5
|
-
function
|
|
5
|
+
function htmlToMarkdown(html, options = {}) {
|
|
6
6
|
const state = {
|
|
7
7
|
options
|
|
8
8
|
};
|
|
@@ -10,4 +10,4 @@ function syncHtmlToMarkdown(html, options = {}) {
|
|
|
10
10
|
return result.trimEnd();
|
|
11
11
|
}
|
|
12
12
|
|
|
13
|
-
export {
|
|
13
|
+
export { htmlToMarkdown };
|
package/dist/plugins.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
1
|
export { c as createPlugin, f as frontmatterPlugin } from './shared/mdream.cpEmpxyh.mjs';
|
|
2
|
-
export { f as filterPlugin, i as isolateMainPlugin, t as tailwindPlugin } from './shared/mdream.
|
|
2
|
+
export { f as filterPlugin, i as isolateMainPlugin, t as tailwindPlugin } from './shared/mdream.DEM9pag4.mjs';
|
|
3
3
|
export { r as readabilityPlugin } from './shared/mdream.C8ruysN5.mjs';
|
|
4
4
|
import './shared/mdream.-hdaPj9a.mjs';
|
package/dist/preset/minimal.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { y as TAG_FORM, t as TAG_FIELDSET, q as TAG_OBJECT, r as TAG_EMBED, a0 as TAG_FIGURE, B as TAG_FOOTER, z as TAG_ASIDE, s as TAG_IFRAME, w as TAG_INPUT, v as TAG_TEXTAREA, u as TAG_SELECT, x as TAG_BUTTON, A as TAG_NAV } from '../shared/mdream.-hdaPj9a.mjs';
|
|
2
|
-
import { i as isolateMainPlugin, t as tailwindPlugin, f as filterPlugin } from '../shared/mdream.
|
|
2
|
+
import { i as isolateMainPlugin, t as tailwindPlugin, f as filterPlugin } from '../shared/mdream.DEM9pag4.mjs';
|
|
3
3
|
import { f as frontmatterPlugin } from '../shared/mdream.cpEmpxyh.mjs';
|
|
4
4
|
|
|
5
5
|
function withMinimalPreset(options = {}) {
|
|
@@ -103,7 +103,6 @@ function parseSelector(selector) {
|
|
|
103
103
|
}
|
|
104
104
|
const selectorParts = [];
|
|
105
105
|
let current = "";
|
|
106
|
-
let inAttribute = false;
|
|
107
106
|
for (let i = 0; i < selector.length; i++) {
|
|
108
107
|
const char = selector[i];
|
|
109
108
|
if ((char === "." || char === "#" || char === "[") && current) {
|
|
@@ -120,13 +119,6 @@ function parseSelector(selector) {
|
|
|
120
119
|
} else {
|
|
121
120
|
current += char;
|
|
122
121
|
}
|
|
123
|
-
if (char === "[")
|
|
124
|
-
inAttribute = true;
|
|
125
|
-
if (char === "]")
|
|
126
|
-
inAttribute = false;
|
|
127
|
-
if (inAttribute && char !== "[") {
|
|
128
|
-
continue;
|
|
129
|
-
}
|
|
130
122
|
}
|
|
131
123
|
if (current) {
|
|
132
124
|
if (current[0] === ".") {
|
|
@@ -144,6 +136,7 @@ function parseSelector(selector) {
|
|
|
144
136
|
}
|
|
145
137
|
return new CompoundSelector(selectorParts);
|
|
146
138
|
}
|
|
139
|
+
|
|
147
140
|
function filterPlugin(options = {}) {
|
|
148
141
|
const includeSelectors = options.include?.map((selector) => {
|
|
149
142
|
if (typeof selector === "string") {
|