@mdream/js 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +9 -0
- package/README.md +135 -0
- package/bin/mdream.mjs +2 -0
- package/dist/_chunks/const.mjs +137 -0
- package/dist/_chunks/index.d.mts +14 -0
- package/dist/_chunks/minimal.d.mts +10 -0
- package/dist/_chunks/parse.mjs +1201 -0
- package/dist/_chunks/plugins.mjs +791 -0
- package/dist/_chunks/resolve-plugins.mjs +302 -0
- package/dist/_chunks/src.mjs +344 -0
- package/dist/_chunks/types.d.mts +390 -0
- package/dist/cli.d.mts +1 -0
- package/dist/cli.mjs +27 -0
- package/dist/index.d.mts +4 -0
- package/dist/index.mjs +7 -0
- package/dist/llms-txt.d.mts +89 -0
- package/dist/llms-txt.mjs +347 -0
- package/dist/negotiate.d.mts +26 -0
- package/dist/negotiate.mjs +92 -0
- package/dist/parse.d.mts +57 -0
- package/dist/parse.mjs +3 -0
- package/dist/plugins.d.mts +93 -0
- package/dist/plugins.mjs +3 -0
- package/dist/preset/minimal.d.mts +2 -0
- package/dist/preset/minimal.mjs +34 -0
- package/dist/splitter.d.mts +21 -0
- package/dist/splitter.mjs +215 -0
- package/package.json +93 -0
package/LICENSE.md
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Harlan Wilton
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
6
|
+
|
|
7
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
8
|
+
|
|
9
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# @mdream/js
|
|
2
|
+
|
|
3
|
+
[![npm version][npm-version-src]][npm-version-href]
|
|
4
|
+
[![npm downloads][npm-downloads-src]][npm-downloads-href]
|
|
5
|
+
[![License][license-src]][license-href]
|
|
6
|
+
|
|
7
|
+
JavaScript HTML-to-Markdown engine for mdream. Use this package when you need plugin hooks, custom transform plugins, or are targeting edge runtimes where the native Rust engine cannot run.
|
|
8
|
+
|
|
9
|
+
This package consolidates functionality previously split across `@mdream/core`, `@mdream/shared`, and `@mdream/llms-txt`.
|
|
10
|
+
|
|
11
|
+
> For most use cases, prefer the main `mdream` package which uses the Rust engine for significantly better performance. Reach for `@mdream/js` when you need hooks, custom plugins, or edge runtime compatibility.
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pnpm add @mdream/js
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Entry Points
|
|
20
|
+
|
|
21
|
+
| Import | Description |
|
|
22
|
+
|---|---|
|
|
23
|
+
| `@mdream/js` | Core `htmlToMarkdown` and `streamHtmlToMarkdown` APIs |
|
|
24
|
+
| `@mdream/js/plugins` | Plugin utilities: `createPlugin`, `extractionPlugin`, `filterPlugin`, `frontmatterPlugin`, `isolateMainPlugin`, `tailwindPlugin` |
|
|
25
|
+
| `@mdream/js/preset/minimal` | `withMinimalPreset` -- declarative config for frontmatter, isolateMain, tailwind, and filter plugins |
|
|
26
|
+
| `@mdream/js/negotiate` | HTTP content negotiation: `shouldServeMarkdown`, `parseAcceptHeader` |
|
|
27
|
+
| `@mdream/js/parse` | Low-level HTML parser: `parseHtml`, `parseHtmlStream` |
|
|
28
|
+
| `@mdream/js/splitter` | Single-pass markdown splitter: `htmlToMarkdownSplitChunks`, `htmlToMarkdownSplitChunksStream` |
|
|
29
|
+
| `@mdream/js/llms-txt` | llms.txt artifact generation: `generateLlmsTxtArtifacts`, `createLlmsTxtStream` |
|
|
30
|
+
|
|
31
|
+
## Usage
|
|
32
|
+
|
|
33
|
+
### Basic Conversion
|
|
34
|
+
|
|
35
|
+
```typescript
|
|
36
|
+
import { htmlToMarkdown } from '@mdream/js'
|
|
37
|
+
|
|
38
|
+
const md = htmlToMarkdown('<h1>Hello</h1><p>World</p>')
|
|
39
|
+
// # Hello\n\nWorld
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### Streaming
|
|
43
|
+
|
|
44
|
+
```typescript
|
|
45
|
+
import { streamHtmlToMarkdown } from '@mdream/js'
|
|
46
|
+
|
|
47
|
+
const stream = streamHtmlToMarkdown(response.body, {
|
|
48
|
+
origin: 'https://example.com',
|
|
49
|
+
})
|
|
50
|
+
|
|
51
|
+
for await (const chunk of stream) {
|
|
52
|
+
process.stdout.write(chunk)
|
|
53
|
+
}
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### With Plugins
|
|
57
|
+
|
|
58
|
+
```typescript
|
|
59
|
+
import { htmlToMarkdown } from '@mdream/js'
|
|
60
|
+
import { createPlugin } from '@mdream/js/plugins'
|
|
61
|
+
|
|
62
|
+
const md = htmlToMarkdown(html, {
|
|
63
|
+
hooks: [
|
|
64
|
+
createPlugin({
|
|
65
|
+
onNodeEnter(element) {
|
|
66
|
+
if (element.name === 'aside')
|
|
67
|
+
return '' // skip asides
|
|
68
|
+
},
|
|
69
|
+
}),
|
|
70
|
+
],
|
|
71
|
+
})
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Minimal Preset
|
|
75
|
+
|
|
76
|
+
```typescript
|
|
77
|
+
import { htmlToMarkdown } from '@mdream/js'
|
|
78
|
+
import { withMinimalPreset } from '@mdream/js/preset/minimal'
|
|
79
|
+
|
|
80
|
+
const md = htmlToMarkdown(html, withMinimalPreset({
|
|
81
|
+
origin: 'https://example.com',
|
|
82
|
+
}))
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Content Negotiation
|
|
86
|
+
|
|
87
|
+
```typescript
|
|
88
|
+
import { shouldServeMarkdown } from '@mdream/js/negotiate'
|
|
89
|
+
|
|
90
|
+
// Returns true when Accept header prefers text/markdown over text/html
|
|
91
|
+
if (shouldServeMarkdown(request.headers.accept, request.headers['sec-fetch-dest'])) {
|
|
92
|
+
return new Response(markdown, { headers: { 'content-type': 'text/markdown' } })
|
|
93
|
+
}
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### llms.txt Generation
|
|
97
|
+
|
|
98
|
+
```typescript
|
|
99
|
+
import { generateLlmsTxtArtifacts } from '@mdream/js/llms-txt'
|
|
100
|
+
|
|
101
|
+
const result = await generateLlmsTxtArtifacts({
|
|
102
|
+
files: processedPages,
|
|
103
|
+
siteName: 'My Site',
|
|
104
|
+
origin: 'https://example.com',
|
|
105
|
+
generateFull: true,
|
|
106
|
+
})
|
|
107
|
+
|
|
108
|
+
// result.llmsTxt -- index file with links
|
|
109
|
+
// result.llmsFullTxt -- single file with all page content
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Markdown Splitter
|
|
113
|
+
|
|
114
|
+
```typescript
|
|
115
|
+
import { htmlToMarkdownSplitChunks } from '@mdream/js/splitter'
|
|
116
|
+
|
|
117
|
+
const chunks = htmlToMarkdownSplitChunks(html, {
|
|
118
|
+
chunkSize: 1000,
|
|
119
|
+
origin: 'https://example.com',
|
|
120
|
+
})
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## License
|
|
124
|
+
|
|
125
|
+
[MIT License](./LICENSE)
|
|
126
|
+
|
|
127
|
+
<!-- Badges -->
|
|
128
|
+
[npm-version-src]: https://img.shields.io/npm/v/@mdream/js/latest.svg?style=flat&colorA=18181B&colorB=4C9BE0
|
|
129
|
+
[npm-version-href]: https://npmjs.com/package/@mdream/js
|
|
130
|
+
|
|
131
|
+
[npm-downloads-src]: https://img.shields.io/npm/dm/@mdream/js.svg?style=flat&colorA=18181B&colorB=4C9BE0
|
|
132
|
+
[npm-downloads-href]: https://npm.chart.dev/@mdream/js
|
|
133
|
+
|
|
134
|
+
[license-src]: https://img.shields.io/npm/l/@mdream/js.svg?style=flat&colorA=18181B&colorB=4C9BE0
|
|
135
|
+
[license-href]: https://npmjs.com/package/@mdream/js
|
package/bin/mdream.mjs
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
const TAG_H1 = 7;
|
|
2
|
+
const TAG_H2 = 8;
|
|
3
|
+
const TAG_H3 = 9;
|
|
4
|
+
const TAG_H4 = 10;
|
|
5
|
+
const TAG_H5 = 11;
|
|
6
|
+
const TAG_H6 = 12;
|
|
7
|
+
const HTML_ENTITIES = {
|
|
8
|
+
"&": "&",
|
|
9
|
+
"<": "<",
|
|
10
|
+
">": ">",
|
|
11
|
+
""": "\"",
|
|
12
|
+
"'": "'",
|
|
13
|
+
"'": "'",
|
|
14
|
+
" ": " "
|
|
15
|
+
};
|
|
16
|
+
const ELEMENT_NODE = 1;
|
|
17
|
+
const TEXT_NODE = 2;
|
|
18
|
+
const NodeEventEnter = 0;
|
|
19
|
+
const NodeEventExit = 1;
|
|
20
|
+
const TagIdMap = {
|
|
21
|
+
html: 0,
|
|
22
|
+
head: 1,
|
|
23
|
+
details: 2,
|
|
24
|
+
summary: 3,
|
|
25
|
+
title: 4,
|
|
26
|
+
meta: 5,
|
|
27
|
+
br: 6,
|
|
28
|
+
h1: 7,
|
|
29
|
+
h2: 8,
|
|
30
|
+
h3: 9,
|
|
31
|
+
h4: 10,
|
|
32
|
+
h5: 11,
|
|
33
|
+
h6: 12,
|
|
34
|
+
hr: 13,
|
|
35
|
+
strong: 14,
|
|
36
|
+
b: 15,
|
|
37
|
+
em: 16,
|
|
38
|
+
i: 17,
|
|
39
|
+
del: 18,
|
|
40
|
+
sub: 19,
|
|
41
|
+
sup: 20,
|
|
42
|
+
ins: 21,
|
|
43
|
+
blockquote: 22,
|
|
44
|
+
code: 23,
|
|
45
|
+
ul: 24,
|
|
46
|
+
li: 25,
|
|
47
|
+
a: 26,
|
|
48
|
+
img: 27,
|
|
49
|
+
table: 28,
|
|
50
|
+
thead: 29,
|
|
51
|
+
tr: 30,
|
|
52
|
+
th: 31,
|
|
53
|
+
td: 32,
|
|
54
|
+
ol: 33,
|
|
55
|
+
pre: 34,
|
|
56
|
+
p: 35,
|
|
57
|
+
div: 36,
|
|
58
|
+
span: 37,
|
|
59
|
+
tbody: 38,
|
|
60
|
+
tfoot: 39,
|
|
61
|
+
form: 40,
|
|
62
|
+
nav: 41,
|
|
63
|
+
label: 42,
|
|
64
|
+
button: 43,
|
|
65
|
+
body: 44,
|
|
66
|
+
center: 45,
|
|
67
|
+
kbd: 46,
|
|
68
|
+
footer: 47,
|
|
69
|
+
path: 48,
|
|
70
|
+
svg: 49,
|
|
71
|
+
article: 50,
|
|
72
|
+
section: 51,
|
|
73
|
+
script: 52,
|
|
74
|
+
style: 53,
|
|
75
|
+
link: 54,
|
|
76
|
+
area: 55,
|
|
77
|
+
base: 56,
|
|
78
|
+
col: 57,
|
|
79
|
+
embed: 58,
|
|
80
|
+
input: 59,
|
|
81
|
+
keygen: 60,
|
|
82
|
+
param: 61,
|
|
83
|
+
source: 62,
|
|
84
|
+
track: 63,
|
|
85
|
+
wbr: 64,
|
|
86
|
+
select: 65,
|
|
87
|
+
textarea: 66,
|
|
88
|
+
option: 67,
|
|
89
|
+
fieldset: 68,
|
|
90
|
+
legend: 69,
|
|
91
|
+
audio: 70,
|
|
92
|
+
video: 71,
|
|
93
|
+
canvas: 72,
|
|
94
|
+
iframe: 73,
|
|
95
|
+
map: 74,
|
|
96
|
+
dialog: 75,
|
|
97
|
+
meter: 76,
|
|
98
|
+
progress: 77,
|
|
99
|
+
template: 78,
|
|
100
|
+
abbr: 79,
|
|
101
|
+
mark: 80,
|
|
102
|
+
q: 81,
|
|
103
|
+
samp: 82,
|
|
104
|
+
small: 83,
|
|
105
|
+
noscript: 84,
|
|
106
|
+
noframes: 85,
|
|
107
|
+
xmp: 86,
|
|
108
|
+
plaintext: 87,
|
|
109
|
+
aside: 88,
|
|
110
|
+
u: 89,
|
|
111
|
+
cite: 90,
|
|
112
|
+
dfn: 91,
|
|
113
|
+
var: 92,
|
|
114
|
+
time: 93,
|
|
115
|
+
bdo: 94,
|
|
116
|
+
ruby: 95,
|
|
117
|
+
rt: 96,
|
|
118
|
+
rp: 97,
|
|
119
|
+
dd: 98,
|
|
120
|
+
dt: 99,
|
|
121
|
+
dl: 101,
|
|
122
|
+
address: 100,
|
|
123
|
+
figure: 102,
|
|
124
|
+
object: 103,
|
|
125
|
+
main: 104,
|
|
126
|
+
header: 105,
|
|
127
|
+
figcaption: 106,
|
|
128
|
+
caption: 107
|
|
129
|
+
};
|
|
130
|
+
Object.entries(TagIdMap).map(([name, id]) => [id, name]);
|
|
131
|
+
const NO_SPACING = [0, 0];
|
|
132
|
+
const DEFAULT_BLOCK_SPACING = [2, 2];
|
|
133
|
+
const BLOCKQUOTE_SPACING = [1, 1];
|
|
134
|
+
const LIST_ITEM_SPACING = [1, 0];
|
|
135
|
+
const TABLE_ROW_SPACING = [0, 1];
|
|
136
|
+
//#endregion
|
|
137
|
+
export { TagIdMap as _, LIST_ITEM_SPACING as a, NodeEventExit as c, TAG_H2 as d, TAG_H3 as f, TEXT_NODE as g, TAG_H6 as h, HTML_ENTITIES as i, TABLE_ROW_SPACING as l, TAG_H5 as m, DEFAULT_BLOCK_SPACING as n, NO_SPACING as o, TAG_H4 as p, ELEMENT_NODE as r, NodeEventEnter as s, BLOCKQUOTE_SPACING as t, TAG_H1 as u };
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { _ as TransformPlugin, c as MdreamOptions } from "./types.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/pluggable/plugin.d.ts
|
|
4
|
+
/**
|
|
5
|
+
* Create a plugin with type-safe hook definitions.
|
|
6
|
+
* All TransformPlugin fields are optional, so this is a typed identity function.
|
|
7
|
+
*/
|
|
8
|
+
declare function createPlugin(plugin: TransformPlugin): TransformPlugin;
|
|
9
|
+
//#endregion
|
|
10
|
+
//#region src/index.d.ts
|
|
11
|
+
declare function htmlToMarkdown(html: string, options?: Partial<MdreamOptions>): string;
|
|
12
|
+
declare function streamHtmlToMarkdown(htmlStream: ReadableStream<Uint8Array | string> | null, options?: Partial<MdreamOptions>): AsyncIterable<string>;
|
|
13
|
+
//#endregion
|
|
14
|
+
export { streamHtmlToMarkdown as n, createPlugin as r, htmlToMarkdown as t };
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { i as EngineOptions } from "./types.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/preset/minimal.d.ts
|
|
4
|
+
/**
|
|
5
|
+
* Creates a configurable minimal preset with advanced options.
|
|
6
|
+
* Returns declarative plugin config that works with both JS and Rust engines.
|
|
7
|
+
*/
|
|
8
|
+
declare function withMinimalPreset<T extends EngineOptions>(options?: T): T;
|
|
9
|
+
//#endregion
|
|
10
|
+
export { withMinimalPreset as t };
|