mdream 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +9 -0
- package/README.md +185 -0
- package/bin/mdream.mjs +2 -0
- package/dist/cli.d.mts +2 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.mjs +25 -0
- package/dist/index.d.mts +15 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.mjs +13 -0
- package/dist/plugins.d.mts +88 -0
- package/dist/plugins.d.ts +88 -0
- package/dist/plugins.mjs +4 -0
- package/dist/preset/minimal.d.mts +11 -0
- package/dist/preset/minimal.d.ts +11 -0
- package/dist/preset/minimal.mjs +39 -0
- package/dist/shared/mdream.-hdaPj9a.mjs +280 -0
- package/dist/shared/mdream.5zaIXVJz.mjs +508 -0
- package/dist/shared/mdream.C8ruysN5.mjs +291 -0
- package/dist/shared/mdream.DUeWbUFG.mjs +1432 -0
- package/dist/shared/mdream.a2AvjJLp.d.mts +218 -0
- package/dist/shared/mdream.a2AvjJLp.d.ts +218 -0
- package/dist/shared/mdream.cpEmpxyh.mjs +105 -0
- package/package.json +62 -0
package/LICENSE.md
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Harlan Wilton
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
6
|
+
|
|
7
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
8
|
+
|
|
9
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
<h1>mdream</h1>
|
|
2
|
+
|
|
3
|
+
[](https://npmjs.com/package/mdream)
|
|
4
|
+
[](https://npm.chart.dev/mdream)
|
|
5
|
+
[](https://github.com/harlan-zw/mdream/blob/main/LICENSE.md)
|
|
6
|
+
|
|
7
|
+
> Ultra-performant JavaScript HTML to Markdown converter optimized for LLMs.
|
|
8
|
+
|
|
9
|
+
<img src=".github/logo.png" alt="mdream logo" width="200">
|
|
10
|
+
|
|
11
|
+
<p align="center">
|
|
12
|
+
<table>
|
|
13
|
+
<tbody>
|
|
14
|
+
<td align="center">
|
|
15
|
+
<sub>Made possible by my <a href="https://github.com/sponsors/harlan-zw">Sponsor Program 💖</a><br> Follow me <a href="https://twitter.com/harlan_zw">@harlan_zw</a> 🐦 • Join <a href="https://discord.gg/275MBUBvgP">Discord</a> for help</sub><br>
|
|
16
|
+
</td>
|
|
17
|
+
</tbody>
|
|
18
|
+
</table>
|
|
19
|
+
</p>
|
|
20
|
+
|
|
21
|
+
## Features
|
|
22
|
+
|
|
23
|
+
- 🧠 Content Extraction: [Readability.js]() scoring heuristics for [~50% fewer tokens*]() and improved accuracy.
|
|
24
|
+
- 🔍 GitHub Flavored Markdown: Frontmatter, Nested & HTML markup support.
|
|
25
|
+
- Tailwind CSS: Converts Tailwind CSS classes to Markdown for better readability.
|
|
26
|
+
|
|
27
|
+
**Ultra Performant**
|
|
28
|
+
- 🚀 Convert 1.4MB of HTML in [~50ms*]() with advanced streaming support, including content-based buffering.
|
|
29
|
+
- ⚡ 5kB gzip, zero dependencies.
|
|
30
|
+
- Streaming support
|
|
31
|
+
|
|
32
|
+
**Adaptable**
|
|
33
|
+
|
|
34
|
+
- ⚙️ Run anywhere: CLI, edge workers, browsers, Node, etc.
|
|
35
|
+
- 🔌 Extensible: [Plugin system](#plugin-system) for customizing and extending functionality.
|
|
36
|
+
|
|
37
|
+
**CLI**
|
|
38
|
+
|
|
39
|
+
- integrates with crawlee to provide entire site markdown dumps
|
|
40
|
+
- Run a MCP web server
|
|
41
|
+
|
|
42
|
+
## Why Mdream?
|
|
43
|
+
|
|
44
|
+
Traditional HTML to Markdown converters were not built for LLMs or humans. They tend to be slow and bloated and produce output that's poorly suited for LLMs token usage or for
|
|
45
|
+
human readability.
|
|
46
|
+
|
|
47
|
+
Mdream is an ultra-performant HTML to Markdown converter built specifically for LLM Content Analysis & Human Readability. With zero dependencies, streaming built-in and opinionated output optimized for both human readability and AI processing.
|
|
48
|
+
|
|
49
|
+
Perfect for: RAG systems, web scraping, content extraction, ChatGPT/Claude integration, and large-scale document processing.
|
|
50
|
+
|
|
51
|
+
## CLI Usage
|
|
52
|
+
|
|
53
|
+
The Mdream CLI is designed to work exclusively with Unix pipes, providing flexibility and freedom to integrate with other tools.
|
|
54
|
+
|
|
55
|
+
**Pipe Site to Markdown**
|
|
56
|
+
|
|
57
|
+
Fetches the [Markdown Wikipedia page](https://en.wikipedia.org/wiki/Markdown) and converts it to Markdown preserving the original links and images.
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
curl -s https://en.wikipedia.org/wiki/Markdown \
|
|
61
|
+
| npx mdream --origin https://en.wikipedia.org --filters minimal-from-first-header \
|
|
62
|
+
| tee streaming.md
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
_Tip: The `--origin` flag will fix relative image and link paths_
|
|
66
|
+
|
|
67
|
+
**Local File to Markdown**
|
|
68
|
+
|
|
69
|
+
Converts a local HTML file to a Markdown file, using `tee` to write the output to a file and display it in the terminal.
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
cat index.html \
|
|
73
|
+
| npx mdream \
|
|
74
|
+
| tee streaming.md
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### CLI Options
|
|
78
|
+
|
|
79
|
+
- `--origin <url>`: Base URL for resolving relative links and images
|
|
80
|
+
- `-v, --verbose`: Enable verbose debug logging to stderr
|
|
81
|
+
- `--help`: Display help information
|
|
82
|
+
- `--version`: Display version information
|
|
83
|
+
|
|
84
|
+
## API Usage
|
|
85
|
+
|
|
86
|
+
### Installation
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
# npm
|
|
90
|
+
npm install mdream
|
|
91
|
+
|
|
92
|
+
# yarn
|
|
93
|
+
yarn add mdream
|
|
94
|
+
|
|
95
|
+
# pnpm
|
|
96
|
+
pnpm add mdream
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Usage
|
|
100
|
+
|
|
101
|
+
Mdream provides two utils for working with HTML, both will process content as a stream.
|
|
102
|
+
- `syncHtmlToMarkdown`: Useful if you already have the entire HTML payload you want to convert.
|
|
103
|
+
- `streamHtmlToMarkdown`: Best practice if you are fetching or reading from a local file.
|
|
104
|
+
|
|
105
|
+
**Convert existing HTML**
|
|
106
|
+
|
|
107
|
+
```ts
|
|
108
|
+
import { syncHtmlToMarkdown } from 'mdream'
|
|
109
|
+
|
|
110
|
+
// Simple conversion
|
|
111
|
+
const markdown = syncHtmlToMarkdown('<h1>Hello World</h1>')
|
|
112
|
+
console.log(markdown) // # Hello World
|
|
113
|
+
````
|
|
114
|
+
|
|
115
|
+
**Convert from Fetch**
|
|
116
|
+
|
|
117
|
+
```ts
|
|
118
|
+
import { streamHtmlToMarkdown } from 'mdream'
|
|
119
|
+
|
|
120
|
+
// Using fetch with streaming
|
|
121
|
+
const response = await fetch('https://example.com')
|
|
122
|
+
const htmlStream = response.body
|
|
123
|
+
const markdownGenerator = streamHtmlToMarkdown(htmlStream, {
|
|
124
|
+
origin: 'https://example.com',
|
|
125
|
+
filters: 'minimal-from-first-header'
|
|
126
|
+
})
|
|
127
|
+
|
|
128
|
+
// Process chunks as they arrive
|
|
129
|
+
for await (const chunk of markdownGenerator) {
|
|
130
|
+
console.log(chunk)
|
|
131
|
+
}
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
## Documentation
|
|
135
|
+
|
|
136
|
+
### Plugin System
|
|
137
|
+
|
|
138
|
+
Mdream now features a powerful plugin system that allows you to customize and extend the HTML-to-Markdown conversion process.
|
|
139
|
+
|
|
140
|
+
```ts
|
|
141
|
+
import { createPlugin, filterUnsupportedTags, syncHtmlToMarkdown, withTailwind } from 'mdream'
|
|
142
|
+
|
|
143
|
+
// Create a custom plugin
|
|
144
|
+
const myPlugin = createPlugin({
|
|
145
|
+
name: 'my-plugin',
|
|
146
|
+
transformContent: (content, node) => {
|
|
147
|
+
if (node.type === 1 && node.name === 'div' && node.attributes?.role === 'alert') {
|
|
148
|
+
return `⚠️ ${content} ⚠️`
|
|
149
|
+
}
|
|
150
|
+
return content
|
|
151
|
+
}
|
|
152
|
+
})
|
|
153
|
+
|
|
154
|
+
// Use multiple plugins together
|
|
155
|
+
const html = '<div role="alert" class="font-bold">Important message</div>'
|
|
156
|
+
const markdown = syncHtmlToMarkdown(html, {
|
|
157
|
+
plugins: [
|
|
158
|
+
withTailwind(), // Apply Tailwind class processing
|
|
159
|
+
filterUnsupportedTags(), // Filter out unsupported tags
|
|
160
|
+
myPlugin // Apply custom transformations
|
|
161
|
+
]
|
|
162
|
+
})
|
|
163
|
+
|
|
164
|
+
console.log(markdown) // "⚠️ **Important message** ⚠️"
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
For more details, see the [plugin documentation](./docs/plugins.md).
|
|
168
|
+
|
|
169
|
+
## Credits
|
|
170
|
+
|
|
171
|
+
- [ultrahtml](https://github.com/natemoo-re/ultrahtml): HTML parsing inspiration
|
|
172
|
+
|
|
173
|
+
## License
|
|
174
|
+
|
|
175
|
+
Licensed under the [MIT license](https://github.com/harlan-zw/mdream/blob/main/LICENSE.md).
|
|
176
|
+
|
|
177
|
+
<!-- Badges -->
|
|
178
|
+
[npm-version-src]: https://img.shields.io/npm/v/mdream/latest.svg?style=flat&colorA=18181B&colorB=4C9BE0
|
|
179
|
+
[npm-version-href]: https://npmjs.com/package/mdream
|
|
180
|
+
|
|
181
|
+
[npm-downloads-src]: https://img.shields.io/npm/dm/mdream.svg?style=flat&colorA=18181B&colorB=4C9BE0
|
|
182
|
+
[npm-downloads-href]: https://npmjs.com/package/mdream
|
|
183
|
+
|
|
184
|
+
[license-src]: https://img.shields.io/github/license/harlan-zw/mdream.svg?style=flat&colorA=18181B&colorB=4C9BE0
|
|
185
|
+
[license-href]: https://github.com/harlan-zw/mdream/blob/main/LICENSE.md
|
package/bin/mdream.mjs
ADDED
package/dist/cli.d.mts
ADDED
package/dist/cli.d.ts
ADDED
package/dist/cli.mjs
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { Readable } from 'node:stream';
|
|
2
|
+
import { cac } from 'cac';
|
|
3
|
+
import { f as frontmatterPlugin } from './shared/mdream.cpEmpxyh.mjs';
|
|
4
|
+
import { r as readabilityPlugin } from './shared/mdream.C8ruysN5.mjs';
|
|
5
|
+
import { s as streamHtmlToMarkdown } from './shared/mdream.DUeWbUFG.mjs';
|
|
6
|
+
import './shared/mdream.-hdaPj9a.mjs';
|
|
7
|
+
|
|
8
|
+
async function streamingConvert(options = {}) {
|
|
9
|
+
const outputStream = process.stdout;
|
|
10
|
+
const conversionOptions = { origin: options.origin };
|
|
11
|
+
conversionOptions.plugins = conversionOptions.plugins || [];
|
|
12
|
+
conversionOptions.plugins.push(readabilityPlugin());
|
|
13
|
+
conversionOptions.plugins.push(frontmatterPlugin());
|
|
14
|
+
const markdownGenerator = streamHtmlToMarkdown(Readable.toWeb(process.stdin), conversionOptions);
|
|
15
|
+
for await (const markdownChunk of markdownGenerator) {
|
|
16
|
+
if (markdownChunk && markdownChunk.length > 0) {
|
|
17
|
+
outputStream.write(markdownChunk);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
const cli = cac();
|
|
22
|
+
cli.command("[options]", "Convert HTML from stdin to Markdown on stdout").option("--origin <url>", "Origin URL for resolving relative image paths").option("--preset <preset>", "Conversion presets: minimal").action(async (_, opts) => {
|
|
23
|
+
await streamingConvert(opts);
|
|
24
|
+
});
|
|
25
|
+
cli.help().version("1.0.0").parse();
|
package/dist/index.d.mts
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { H as HTMLToMarkdownOptions } from './shared/mdream.a2AvjJLp.mjs';
|
|
2
|
+
export { B as BufferRegion, E as ELEMENT_NODE, b as ElementNode, f as HandlerContext, M as MdreamProcessingState, d as MdreamRuntimeState, N as Node, e as NodeEvent, P as Plugin, a as PluginCreationOptions, T as TEXT_NODE, g as TagHandler, c as TextNode } from './shared/mdream.a2AvjJLp.mjs';
|
|
3
|
+
import { ReadableStream } from 'node:stream/web';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Creates a markdown stream from an HTML stream
|
|
7
|
+
* @param htmlStream - ReadableStream of HTML content (as Uint8Array or string)
|
|
8
|
+
* @param options - Configuration options for conversion
|
|
9
|
+
* @returns An async generator yielding markdown chunks
|
|
10
|
+
*/
|
|
11
|
+
declare function streamHtmlToMarkdown(htmlStream: ReadableStream | null, options?: HTMLToMarkdownOptions): AsyncIterable<string>;
|
|
12
|
+
|
|
13
|
+
declare function syncHtmlToMarkdown(html: string, options?: HTMLToMarkdownOptions): string;
|
|
14
|
+
|
|
15
|
+
export { HTMLToMarkdownOptions, streamHtmlToMarkdown, syncHtmlToMarkdown };
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { H as HTMLToMarkdownOptions } from './shared/mdream.a2AvjJLp.js';
|
|
2
|
+
export { B as BufferRegion, E as ELEMENT_NODE, b as ElementNode, f as HandlerContext, M as MdreamProcessingState, d as MdreamRuntimeState, N as Node, e as NodeEvent, P as Plugin, a as PluginCreationOptions, T as TEXT_NODE, g as TagHandler, c as TextNode } from './shared/mdream.a2AvjJLp.js';
|
|
3
|
+
import { ReadableStream } from 'node:stream/web';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Creates a markdown stream from an HTML stream
|
|
7
|
+
* @param htmlStream - ReadableStream of HTML content (as Uint8Array or string)
|
|
8
|
+
* @param options - Configuration options for conversion
|
|
9
|
+
* @returns An async generator yielding markdown chunks
|
|
10
|
+
*/
|
|
11
|
+
declare function streamHtmlToMarkdown(htmlStream: ReadableStream | null, options?: HTMLToMarkdownOptions): AsyncIterable<string>;
|
|
12
|
+
|
|
13
|
+
declare function syncHtmlToMarkdown(html: string, options?: HTMLToMarkdownOptions): string;
|
|
14
|
+
|
|
15
|
+
export { HTMLToMarkdownOptions, streamHtmlToMarkdown, syncHtmlToMarkdown };
|
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { p as processPartialHTMLToMarkdown } from './shared/mdream.DUeWbUFG.mjs';
|
|
2
|
+
export { s as streamHtmlToMarkdown } from './shared/mdream.DUeWbUFG.mjs';
|
|
3
|
+
import './shared/mdream.-hdaPj9a.mjs';
|
|
4
|
+
|
|
5
|
+
function syncHtmlToMarkdown(html, options = {}) {
|
|
6
|
+
const state = {
|
|
7
|
+
options
|
|
8
|
+
};
|
|
9
|
+
const result = processPartialHTMLToMarkdown(html, state).chunk;
|
|
10
|
+
return result.trimEnd();
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export { syncHtmlToMarkdown };
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import { P as Plugin } from './shared/mdream.a2AvjJLp.mjs';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Create a plugin that implements the Plugin interface with improved type inference
|
|
5
|
+
*
|
|
6
|
+
* @returns A complete plugin implementation
|
|
7
|
+
*/
|
|
8
|
+
declare function createPlugin<T extends Partial<Plugin>>(plugin: T): Plugin;
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Plugin that filters nodes based on CSS selectors.
|
|
12
|
+
* Allows including or excluding nodes based on selectors.
|
|
13
|
+
*
|
|
14
|
+
* @example
|
|
15
|
+
* // Include only heading elements and their children
|
|
16
|
+
* withQuerySelectorPlugin({ include: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] })
|
|
17
|
+
*
|
|
18
|
+
* @example
|
|
19
|
+
* // Exclude navigation, sidebar, and footer
|
|
20
|
+
* withQuerySelectorPlugin({ exclude: ['nav', '#sidebar', '.footer'] })
|
|
21
|
+
*/
|
|
22
|
+
declare function filterPlugin(options?: {
|
|
23
|
+
/** CSS selectors (or Tag Ids) for elements to include (all others will be excluded) */
|
|
24
|
+
include?: (string | number)[];
|
|
25
|
+
/** CSS selectors (or Tag Ids) for elements to exclude */
|
|
26
|
+
exclude?: (string | number)[];
|
|
27
|
+
/** Whether to also process the children of matching elements */
|
|
28
|
+
processChildren?: boolean;
|
|
29
|
+
keepAbsolute?: boolean;
|
|
30
|
+
}): Plugin;
|
|
31
|
+
|
|
32
|
+
interface FrontmatterPluginOptions {
|
|
33
|
+
/** Additional frontmatter fields to include */
|
|
34
|
+
additionalFields?: Record<string, string>;
|
|
35
|
+
/** Meta tag names to extract (beyond the standard ones) */
|
|
36
|
+
metaFields?: string[];
|
|
37
|
+
/** Custom formatter for frontmatter values */
|
|
38
|
+
formatValue?: (name: string, value: string) => string;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* A plugin that manages frontmatter generation from HTML head elements
|
|
42
|
+
* Extracts metadata from meta tags and title and generates YAML frontmatter
|
|
43
|
+
*/
|
|
44
|
+
declare function frontmatterPlugin(options?: FrontmatterPluginOptions): Plugin;
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Plugin that isolates main content using the following priority order:
|
|
48
|
+
* 1. If an explicit <main> element exists (within 5 depth levels), use its content exclusively
|
|
49
|
+
* 2. Otherwise, find content between the first header tag (h1-h6) and first footer
|
|
50
|
+
* 3. If footer is within 5 levels of nesting from the header, use it as the end boundary
|
|
51
|
+
* 4. Exclude all content before the start marker and after the end marker
|
|
52
|
+
*
|
|
53
|
+
* @example
|
|
54
|
+
* ```html
|
|
55
|
+
* <body>
|
|
56
|
+
* <nav>Navigation (excluded)</nav>
|
|
57
|
+
* <main>
|
|
58
|
+
* <h1>Main Title (included)</h1>
|
|
59
|
+
* <p>Main content (included)</p>
|
|
60
|
+
* </main>
|
|
61
|
+
* <footer>Footer (excluded)</footer>
|
|
62
|
+
* </body>
|
|
63
|
+
* ```
|
|
64
|
+
*
|
|
65
|
+
* @example
|
|
66
|
+
* ```html
|
|
67
|
+
* <body>
|
|
68
|
+
* <nav>Navigation (excluded)</nav>
|
|
69
|
+
* <h1>Main Title (included)</h1>
|
|
70
|
+
* <p>Main content (included)</p>
|
|
71
|
+
* <footer>Footer (excluded)</footer>
|
|
72
|
+
* </body>
|
|
73
|
+
* ```
|
|
74
|
+
*/
|
|
75
|
+
declare function isolateMainPlugin(): Plugin;
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Creates a plugin that implements readability.js style heuristics for content quality assessment
|
|
79
|
+
* Controls content inclusion/exclusion using buffer regions
|
|
80
|
+
*/
|
|
81
|
+
declare function readabilityPlugin(): Plugin;
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Creates a plugin that adds Tailwind class processing
|
|
85
|
+
*/
|
|
86
|
+
declare function tailwindPlugin(): Plugin;
|
|
87
|
+
|
|
88
|
+
export { createPlugin, filterPlugin, frontmatterPlugin, isolateMainPlugin, readabilityPlugin, tailwindPlugin };
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import { P as Plugin } from './shared/mdream.a2AvjJLp.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Create a plugin that implements the Plugin interface with improved type inference
|
|
5
|
+
*
|
|
6
|
+
* @returns A complete plugin implementation
|
|
7
|
+
*/
|
|
8
|
+
declare function createPlugin<T extends Partial<Plugin>>(plugin: T): Plugin;
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Plugin that filters nodes based on CSS selectors.
|
|
12
|
+
* Allows including or excluding nodes based on selectors.
|
|
13
|
+
*
|
|
14
|
+
* @example
|
|
15
|
+
* // Include only heading elements and their children
|
|
16
|
+
* withQuerySelectorPlugin({ include: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] })
|
|
17
|
+
*
|
|
18
|
+
* @example
|
|
19
|
+
* // Exclude navigation, sidebar, and footer
|
|
20
|
+
* withQuerySelectorPlugin({ exclude: ['nav', '#sidebar', '.footer'] })
|
|
21
|
+
*/
|
|
22
|
+
declare function filterPlugin(options?: {
|
|
23
|
+
/** CSS selectors (or Tag Ids) for elements to include (all others will be excluded) */
|
|
24
|
+
include?: (string | number)[];
|
|
25
|
+
/** CSS selectors (or Tag Ids) for elements to exclude */
|
|
26
|
+
exclude?: (string | number)[];
|
|
27
|
+
/** Whether to also process the children of matching elements */
|
|
28
|
+
processChildren?: boolean;
|
|
29
|
+
keepAbsolute?: boolean;
|
|
30
|
+
}): Plugin;
|
|
31
|
+
|
|
32
|
+
interface FrontmatterPluginOptions {
|
|
33
|
+
/** Additional frontmatter fields to include */
|
|
34
|
+
additionalFields?: Record<string, string>;
|
|
35
|
+
/** Meta tag names to extract (beyond the standard ones) */
|
|
36
|
+
metaFields?: string[];
|
|
37
|
+
/** Custom formatter for frontmatter values */
|
|
38
|
+
formatValue?: (name: string, value: string) => string;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* A plugin that manages frontmatter generation from HTML head elements
|
|
42
|
+
* Extracts metadata from meta tags and title and generates YAML frontmatter
|
|
43
|
+
*/
|
|
44
|
+
declare function frontmatterPlugin(options?: FrontmatterPluginOptions): Plugin;
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Plugin that isolates main content using the following priority order:
|
|
48
|
+
* 1. If an explicit <main> element exists (within 5 depth levels), use its content exclusively
|
|
49
|
+
* 2. Otherwise, find content between the first header tag (h1-h6) and first footer
|
|
50
|
+
* 3. If footer is within 5 levels of nesting from the header, use it as the end boundary
|
|
51
|
+
* 4. Exclude all content before the start marker and after the end marker
|
|
52
|
+
*
|
|
53
|
+
* @example
|
|
54
|
+
* ```html
|
|
55
|
+
* <body>
|
|
56
|
+
* <nav>Navigation (excluded)</nav>
|
|
57
|
+
* <main>
|
|
58
|
+
* <h1>Main Title (included)</h1>
|
|
59
|
+
* <p>Main content (included)</p>
|
|
60
|
+
* </main>
|
|
61
|
+
* <footer>Footer (excluded)</footer>
|
|
62
|
+
* </body>
|
|
63
|
+
* ```
|
|
64
|
+
*
|
|
65
|
+
* @example
|
|
66
|
+
* ```html
|
|
67
|
+
* <body>
|
|
68
|
+
* <nav>Navigation (excluded)</nav>
|
|
69
|
+
* <h1>Main Title (included)</h1>
|
|
70
|
+
* <p>Main content (included)</p>
|
|
71
|
+
* <footer>Footer (excluded)</footer>
|
|
72
|
+
* </body>
|
|
73
|
+
* ```
|
|
74
|
+
*/
|
|
75
|
+
declare function isolateMainPlugin(): Plugin;
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Creates a plugin that implements readability.js style heuristics for content quality assessment
|
|
79
|
+
* Controls content inclusion/exclusion using buffer regions
|
|
80
|
+
*/
|
|
81
|
+
declare function readabilityPlugin(): Plugin;
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Creates a plugin that adds Tailwind class processing
|
|
85
|
+
*/
|
|
86
|
+
declare function tailwindPlugin(): Plugin;
|
|
87
|
+
|
|
88
|
+
export { createPlugin, filterPlugin, frontmatterPlugin, isolateMainPlugin, readabilityPlugin, tailwindPlugin };
|
package/dist/plugins.mjs
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export { c as createPlugin, f as frontmatterPlugin } from './shared/mdream.cpEmpxyh.mjs';
|
|
2
|
+
export { f as filterPlugin, i as isolateMainPlugin, t as tailwindPlugin } from './shared/mdream.5zaIXVJz.mjs';
|
|
3
|
+
export { r as readabilityPlugin } from './shared/mdream.C8ruysN5.mjs';
|
|
4
|
+
import './shared/mdream.-hdaPj9a.mjs';
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { H as HTMLToMarkdownOptions } from '../shared/mdream.a2AvjJLp.mjs';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Creates a configurable minimal preset with advanced options
|
|
5
|
+
*
|
|
6
|
+
* @param options HTML to Markdown options
|
|
7
|
+
* @returns HTML to Markdown options with configured plugins
|
|
8
|
+
*/
|
|
9
|
+
declare function withMinimalPreset(options?: HTMLToMarkdownOptions): HTMLToMarkdownOptions;
|
|
10
|
+
|
|
11
|
+
export { withMinimalPreset };
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { H as HTMLToMarkdownOptions } from '../shared/mdream.a2AvjJLp.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Creates a configurable minimal preset with advanced options
|
|
5
|
+
*
|
|
6
|
+
* @param options HTML to Markdown options
|
|
7
|
+
* @returns HTML to Markdown options with configured plugins
|
|
8
|
+
*/
|
|
9
|
+
declare function withMinimalPreset(options?: HTMLToMarkdownOptions): HTMLToMarkdownOptions;
|
|
10
|
+
|
|
11
|
+
export { withMinimalPreset };
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { y as TAG_FORM, t as TAG_FIELDSET, q as TAG_OBJECT, r as TAG_EMBED, a0 as TAG_FIGURE, B as TAG_FOOTER, z as TAG_ASIDE, s as TAG_IFRAME, w as TAG_INPUT, v as TAG_TEXTAREA, u as TAG_SELECT, x as TAG_BUTTON, A as TAG_NAV } from '../shared/mdream.-hdaPj9a.mjs';
|
|
2
|
+
import { i as isolateMainPlugin, t as tailwindPlugin, f as filterPlugin } from '../shared/mdream.5zaIXVJz.mjs';
|
|
3
|
+
import { f as frontmatterPlugin } from '../shared/mdream.cpEmpxyh.mjs';
|
|
4
|
+
|
|
5
|
+
function withMinimalPreset(options = {}) {
|
|
6
|
+
const plugins = [
|
|
7
|
+
isolateMainPlugin(),
|
|
8
|
+
frontmatterPlugin(),
|
|
9
|
+
tailwindPlugin(),
|
|
10
|
+
// First apply readability plugin to extract main content
|
|
11
|
+
// Then filter out unwanted tags
|
|
12
|
+
filterPlugin({
|
|
13
|
+
exclude: [
|
|
14
|
+
TAG_FORM,
|
|
15
|
+
TAG_FIELDSET,
|
|
16
|
+
TAG_OBJECT,
|
|
17
|
+
TAG_EMBED,
|
|
18
|
+
TAG_FIGURE,
|
|
19
|
+
TAG_FOOTER,
|
|
20
|
+
TAG_ASIDE,
|
|
21
|
+
TAG_IFRAME,
|
|
22
|
+
TAG_INPUT,
|
|
23
|
+
TAG_TEXTAREA,
|
|
24
|
+
TAG_SELECT,
|
|
25
|
+
TAG_BUTTON,
|
|
26
|
+
TAG_NAV
|
|
27
|
+
]
|
|
28
|
+
})
|
|
29
|
+
];
|
|
30
|
+
if (options.plugins) {
|
|
31
|
+
plugins.push(...options.plugins);
|
|
32
|
+
}
|
|
33
|
+
return {
|
|
34
|
+
...options,
|
|
35
|
+
plugins
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export { withMinimalPreset };
|