@mdream/js 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE.md ADDED
@@ -0,0 +1,9 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Harlan Wilton
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
+
7
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8
+
9
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,135 @@
1
+ # @mdream/js
2
+
3
+ [![npm version][npm-version-src]][npm-version-href]
4
+ [![npm downloads][npm-downloads-src]][npm-downloads-href]
5
+ [![License][license-src]][license-href]
6
+
7
+ JavaScript HTML-to-Markdown engine for mdream. Use this package when you need plugin hooks, custom transform plugins, or are targeting edge runtimes where the native Rust engine cannot run.
8
+
9
+ This package consolidates functionality previously split across `@mdream/core`, `@mdream/shared`, and `@mdream/llms-txt`.
10
+
11
+ > For most use cases, prefer the main `mdream` package which uses the Rust engine for significantly better performance. Reach for `@mdream/js` when you need hooks, custom plugins, or edge runtime compatibility.
12
+
13
+ ## Installation
14
+
15
+ ```bash
16
+ pnpm add @mdream/js
17
+ ```
18
+
19
+ ## Entry Points
20
+
21
+ | Import | Description |
22
+ |---|---|
23
+ | `@mdream/js` | Core `htmlToMarkdown` and `streamHtmlToMarkdown` APIs |
24
+ | `@mdream/js/plugins` | Plugin utilities: `createPlugin`, `extractionPlugin`, `filterPlugin`, `frontmatterPlugin`, `isolateMainPlugin`, `tailwindPlugin` |
25
+ | `@mdream/js/preset/minimal` | `withMinimalPreset` -- declarative config for frontmatter, isolateMain, tailwind, and filter plugins |
26
+ | `@mdream/js/negotiate` | HTTP content negotiation: `shouldServeMarkdown`, `parseAcceptHeader` |
27
+ | `@mdream/js/parse` | Low-level HTML parser: `parseHtml`, `parseHtmlStream` |
28
+ | `@mdream/js/splitter` | Single-pass markdown splitter: `htmlToMarkdownSplitChunks`, `htmlToMarkdownSplitChunksStream` |
29
+ | `@mdream/js/llms-txt` | llms.txt artifact generation: `generateLlmsTxtArtifacts`, `createLlmsTxtStream` |
30
+
31
+ ## Usage
32
+
33
+ ### Basic Conversion
34
+
35
+ ```typescript
36
+ import { htmlToMarkdown } from '@mdream/js'
37
+
38
+ const md = htmlToMarkdown('<h1>Hello</h1><p>World</p>')
39
+ // # Hello\n\nWorld
40
+ ```
41
+
42
+ ### Streaming
43
+
44
+ ```typescript
45
+ import { streamHtmlToMarkdown } from '@mdream/js'
46
+
47
+ const stream = streamHtmlToMarkdown(response.body, {
48
+ origin: 'https://example.com',
49
+ })
50
+
51
+ for await (const chunk of stream) {
52
+ process.stdout.write(chunk)
53
+ }
54
+ ```
55
+
56
+ ### With Plugins
57
+
58
+ ```typescript
59
+ import { htmlToMarkdown } from '@mdream/js'
60
+ import { createPlugin } from '@mdream/js/plugins'
61
+
62
+ const md = htmlToMarkdown(html, {
63
+ hooks: [
64
+ createPlugin({
65
+ onNodeEnter(element) {
66
+ if (element.name === 'aside')
67
+ return '' // skip asides
68
+ },
69
+ }),
70
+ ],
71
+ })
72
+ ```
73
+
74
+ ### Minimal Preset
75
+
76
+ ```typescript
77
+ import { htmlToMarkdown } from '@mdream/js'
78
+ import { withMinimalPreset } from '@mdream/js/preset/minimal'
79
+
80
+ const md = htmlToMarkdown(html, withMinimalPreset({
81
+ origin: 'https://example.com',
82
+ }))
83
+ ```
84
+
85
+ ### Content Negotiation
86
+
87
+ ```typescript
88
+ import { shouldServeMarkdown } from '@mdream/js/negotiate'
89
+
90
+ // Returns true when Accept header prefers text/markdown over text/html
91
+ if (shouldServeMarkdown(request.headers.accept, request.headers['sec-fetch-dest'])) {
92
+ return new Response(markdown, { headers: { 'content-type': 'text/markdown' } })
93
+ }
94
+ ```
95
+
96
+ ### llms.txt Generation
97
+
98
+ ```typescript
99
+ import { generateLlmsTxtArtifacts } from '@mdream/js/llms-txt'
100
+
101
+ const result = await generateLlmsTxtArtifacts({
102
+ files: processedPages,
103
+ siteName: 'My Site',
104
+ origin: 'https://example.com',
105
+ generateFull: true,
106
+ })
107
+
108
+ // result.llmsTxt -- index file with links
109
+ // result.llmsFullTxt -- single file with all page content
110
+ ```
111
+
112
+ ### Markdown Splitter
113
+
114
+ ```typescript
115
+ import { htmlToMarkdownSplitChunks } from '@mdream/js/splitter'
116
+
117
+ const chunks = htmlToMarkdownSplitChunks(html, {
118
+ chunkSize: 1000,
119
+ origin: 'https://example.com',
120
+ })
121
+ ```
122
+
123
+ ## License
124
+
125
+ [MIT License](./LICENSE)
126
+
127
+ <!-- Badges -->
128
+ [npm-version-src]: https://img.shields.io/npm/v/@mdream/js/latest.svg?style=flat&colorA=18181B&colorB=4C9BE0
129
+ [npm-version-href]: https://npmjs.com/package/@mdream/js
130
+
131
+ [npm-downloads-src]: https://img.shields.io/npm/dm/@mdream/js.svg?style=flat&colorA=18181B&colorB=4C9BE0
132
+ [npm-downloads-href]: https://npm.chart.dev/@mdream/js
133
+
134
+ [license-src]: https://img.shields.io/npm/l/@mdream/js.svg?style=flat&colorA=18181B&colorB=4C9BE0
135
+ [license-href]: https://npmjs.com/package/@mdream/js
package/bin/mdream.mjs ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env node
2
+ import('../dist/cli.mjs')
@@ -0,0 +1,137 @@
1
+ const TAG_H1 = 7;
2
+ const TAG_H2 = 8;
3
+ const TAG_H3 = 9;
4
+ const TAG_H4 = 10;
5
+ const TAG_H5 = 11;
6
+ const TAG_H6 = 12;
7
+ const HTML_ENTITIES = {
8
+ "&amp;": "&",
9
+ "&lt;": "<",
10
+ "&gt;": ">",
11
+ "&quot;": "\"",
12
+ "&#39;": "'",
13
+ "&apos;": "'",
14
+ "&nbsp;": " "
15
+ };
16
+ const ELEMENT_NODE = 1;
17
+ const TEXT_NODE = 2;
18
+ const NodeEventEnter = 0;
19
+ const NodeEventExit = 1;
20
+ const TagIdMap = {
21
+ html: 0,
22
+ head: 1,
23
+ details: 2,
24
+ summary: 3,
25
+ title: 4,
26
+ meta: 5,
27
+ br: 6,
28
+ h1: 7,
29
+ h2: 8,
30
+ h3: 9,
31
+ h4: 10,
32
+ h5: 11,
33
+ h6: 12,
34
+ hr: 13,
35
+ strong: 14,
36
+ b: 15,
37
+ em: 16,
38
+ i: 17,
39
+ del: 18,
40
+ sub: 19,
41
+ sup: 20,
42
+ ins: 21,
43
+ blockquote: 22,
44
+ code: 23,
45
+ ul: 24,
46
+ li: 25,
47
+ a: 26,
48
+ img: 27,
49
+ table: 28,
50
+ thead: 29,
51
+ tr: 30,
52
+ th: 31,
53
+ td: 32,
54
+ ol: 33,
55
+ pre: 34,
56
+ p: 35,
57
+ div: 36,
58
+ span: 37,
59
+ tbody: 38,
60
+ tfoot: 39,
61
+ form: 40,
62
+ nav: 41,
63
+ label: 42,
64
+ button: 43,
65
+ body: 44,
66
+ center: 45,
67
+ kbd: 46,
68
+ footer: 47,
69
+ path: 48,
70
+ svg: 49,
71
+ article: 50,
72
+ section: 51,
73
+ script: 52,
74
+ style: 53,
75
+ link: 54,
76
+ area: 55,
77
+ base: 56,
78
+ col: 57,
79
+ embed: 58,
80
+ input: 59,
81
+ keygen: 60,
82
+ param: 61,
83
+ source: 62,
84
+ track: 63,
85
+ wbr: 64,
86
+ select: 65,
87
+ textarea: 66,
88
+ option: 67,
89
+ fieldset: 68,
90
+ legend: 69,
91
+ audio: 70,
92
+ video: 71,
93
+ canvas: 72,
94
+ iframe: 73,
95
+ map: 74,
96
+ dialog: 75,
97
+ meter: 76,
98
+ progress: 77,
99
+ template: 78,
100
+ abbr: 79,
101
+ mark: 80,
102
+ q: 81,
103
+ samp: 82,
104
+ small: 83,
105
+ noscript: 84,
106
+ noframes: 85,
107
+ xmp: 86,
108
+ plaintext: 87,
109
+ aside: 88,
110
+ u: 89,
111
+ cite: 90,
112
+ dfn: 91,
113
+ var: 92,
114
+ time: 93,
115
+ bdo: 94,
116
+ ruby: 95,
117
+ rt: 96,
118
+ rp: 97,
119
+ dd: 98,
120
+ dt: 99,
121
+ dl: 101,
122
+ address: 100,
123
+ figure: 102,
124
+ object: 103,
125
+ main: 104,
126
+ header: 105,
127
+ figcaption: 106,
128
+ caption: 107
129
+ };
130
+ Object.entries(TagIdMap).map(([name, id]) => [id, name]);
131
+ const NO_SPACING = [0, 0];
132
+ const DEFAULT_BLOCK_SPACING = [2, 2];
133
+ const BLOCKQUOTE_SPACING = [1, 1];
134
+ const LIST_ITEM_SPACING = [1, 0];
135
+ const TABLE_ROW_SPACING = [0, 1];
136
+ //#endregion
137
+ export { TagIdMap as _, LIST_ITEM_SPACING as a, NodeEventExit as c, TAG_H2 as d, TAG_H3 as f, TEXT_NODE as g, TAG_H6 as h, HTML_ENTITIES as i, TABLE_ROW_SPACING as l, TAG_H5 as m, DEFAULT_BLOCK_SPACING as n, NO_SPACING as o, TAG_H4 as p, ELEMENT_NODE as r, NodeEventEnter as s, BLOCKQUOTE_SPACING as t, TAG_H1 as u };
@@ -0,0 +1,14 @@
1
+ import { _ as TransformPlugin, c as MdreamOptions } from "./types.mjs";
2
+
3
+ //#region src/pluggable/plugin.d.ts
4
+ /**
5
+ * Create a plugin with type-safe hook definitions.
6
+ * All TransformPlugin fields are optional, so this is a typed identity function.
7
+ */
8
+ declare function createPlugin(plugin: TransformPlugin): TransformPlugin;
9
+ //#endregion
10
+ //#region src/index.d.ts
11
+ declare function htmlToMarkdown(html: string, options?: Partial<MdreamOptions>): string;
12
+ declare function streamHtmlToMarkdown(htmlStream: ReadableStream<Uint8Array | string> | null, options?: Partial<MdreamOptions>): AsyncIterable<string>;
13
+ //#endregion
14
+ export { streamHtmlToMarkdown as n, createPlugin as r, htmlToMarkdown as t };
@@ -0,0 +1,10 @@
1
+ import { i as EngineOptions } from "./types.mjs";
2
+
3
+ //#region src/preset/minimal.d.ts
4
+ /**
5
+ * Creates a configurable minimal preset with advanced options.
6
+ * Returns declarative plugin config that works with both JS and Rust engines.
7
+ */
8
+ declare function withMinimalPreset<T extends EngineOptions>(options?: T): T;
9
+ //#endregion
10
+ export { withMinimalPreset as t };