mdream 0.2.6 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -2
- package/dist/cli.mjs +1 -1
- package/dist/index.d.mts +5 -4
- package/dist/index.d.ts +5 -4
- package/dist/index.mjs +2 -2
- package/dist/plugins.d.mts +3 -7
- package/dist/plugins.d.ts +3 -7
- package/dist/preset/minimal.d.mts +1 -1
- package/dist/preset/minimal.d.ts +1 -1
- package/dist/shared/mdream.BFdDSM96.d.ts +9 -0
- package/dist/shared/{mdream.-SGj02be.d.mts → mdream.C0Qx0F7t.d.mts} +2 -2
- package/dist/shared/{mdream.-SGj02be.d.ts → mdream.C0Qx0F7t.d.ts} +2 -2
- package/dist/shared/mdream.DMe7T-0M.d.mts +9 -0
- package/dist/shared/{mdream.BUjJQ2oF.mjs → mdream.DZEl9tTZ.mjs} +8 -6
- package/package.json +8 -8
package/README.md
CHANGED
|
@@ -22,12 +22,10 @@
|
|
|
22
22
|
|
|
23
23
|
- 🧠 Content Extraction: [Readability.js]() scoring heuristics for [~50% fewer tokens*]() and improved accuracy.
|
|
24
24
|
- 🔍 GitHub Flavored Markdown: Frontmatter, Nested & HTML markup support.
|
|
25
|
-
- Tailwind CSS: Converts Tailwind CSS classes to Markdown for better readability.
|
|
26
25
|
|
|
27
26
|
**Ultra Performant**
|
|
28
27
|
- 🚀 Convert 1.4MB of HTML in [~50ms*]() with advanced streaming support, including content-based buffering.
|
|
29
28
|
- ⚡ 5kB gzip, zero dependencies.
|
|
30
|
-
- Streaming support
|
|
31
29
|
|
|
32
30
|
**Adaptable**
|
|
33
31
|
|
package/dist/cli.mjs
CHANGED
|
@@ -2,7 +2,7 @@ import { Readable } from 'node:stream';
|
|
|
2
2
|
import { cac } from 'cac';
|
|
3
3
|
import { f as frontmatterPlugin } from './shared/mdream.CNrwlePY.mjs';
|
|
4
4
|
import { r as readabilityPlugin } from './shared/mdream.VU-fHLcf.mjs';
|
|
5
|
-
import { s as streamHtmlToMarkdown } from './shared/mdream.
|
|
5
|
+
import { s as streamHtmlToMarkdown } from './shared/mdream.DZEl9tTZ.mjs';
|
|
6
6
|
import './shared/mdream.C8Xgmr_a.mjs';
|
|
7
7
|
|
|
8
8
|
async function streamingConvert(options = {}) {
|
package/dist/index.d.mts
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
|
-
import { H as HTMLToMarkdownOptions } from './shared/mdream
|
|
2
|
-
export { B as BufferRegion,
|
|
1
|
+
import { H as HTMLToMarkdownOptions } from './shared/mdream.C0Qx0F7t.mjs';
|
|
2
|
+
export { B as BufferRegion, b as ELEMENT_NODE, E as ElementNode, f as HandlerContext, d as MdreamProcessingState, M as MdreamRuntimeState, N as Node, e as NodeEvent, P as Plugin, a as PluginCreationOptions, T as TEXT_NODE, g as TagHandler, c as TextNode } from './shared/mdream.C0Qx0F7t.mjs';
|
|
3
3
|
import { ReadableStream } from 'node:stream/web';
|
|
4
|
+
export { E as ExtractedElement } from './shared/mdream.DMe7T-0M.mjs';
|
|
5
|
+
|
|
6
|
+
declare const TagIdMap: Record<string, number>;
|
|
4
7
|
|
|
5
8
|
/**
|
|
6
9
|
* Creates a markdown stream from an HTML stream
|
|
@@ -10,8 +13,6 @@ import { ReadableStream } from 'node:stream/web';
|
|
|
10
13
|
*/
|
|
11
14
|
declare function streamHtmlToMarkdown(htmlStream: ReadableStream | null, options?: HTMLToMarkdownOptions): AsyncIterable<string>;
|
|
12
15
|
|
|
13
|
-
declare const TagIdMap: Record<string, number>;
|
|
14
|
-
|
|
15
16
|
declare function htmlToMarkdown(html: string, options?: HTMLToMarkdownOptions): string;
|
|
16
17
|
|
|
17
18
|
export { HTMLToMarkdownOptions, TagIdMap, htmlToMarkdown, streamHtmlToMarkdown };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
|
-
import { H as HTMLToMarkdownOptions } from './shared/mdream
|
|
2
|
-
export { B as BufferRegion,
|
|
1
|
+
import { H as HTMLToMarkdownOptions } from './shared/mdream.C0Qx0F7t.js';
|
|
2
|
+
export { B as BufferRegion, b as ELEMENT_NODE, E as ElementNode, f as HandlerContext, d as MdreamProcessingState, M as MdreamRuntimeState, N as Node, e as NodeEvent, P as Plugin, a as PluginCreationOptions, T as TEXT_NODE, g as TagHandler, c as TextNode } from './shared/mdream.C0Qx0F7t.js';
|
|
3
3
|
import { ReadableStream } from 'node:stream/web';
|
|
4
|
+
export { E as ExtractedElement } from './shared/mdream.BFdDSM96.js';
|
|
5
|
+
|
|
6
|
+
declare const TagIdMap: Record<string, number>;
|
|
4
7
|
|
|
5
8
|
/**
|
|
6
9
|
* Creates a markdown stream from an HTML stream
|
|
@@ -10,8 +13,6 @@ import { ReadableStream } from 'node:stream/web';
|
|
|
10
13
|
*/
|
|
11
14
|
declare function streamHtmlToMarkdown(htmlStream: ReadableStream | null, options?: HTMLToMarkdownOptions): AsyncIterable<string>;
|
|
12
15
|
|
|
13
|
-
declare const TagIdMap: Record<string, number>;
|
|
14
|
-
|
|
15
16
|
declare function htmlToMarkdown(html: string, options?: HTMLToMarkdownOptions): string;
|
|
16
17
|
|
|
17
18
|
export { HTMLToMarkdownOptions, TagIdMap, htmlToMarkdown, streamHtmlToMarkdown };
|
package/dist/index.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { p as processPartialHTMLToMarkdown } from './shared/mdream.
|
|
2
|
-
export { s as streamHtmlToMarkdown } from './shared/mdream.
|
|
1
|
+
import { p as processPartialHTMLToMarkdown } from './shared/mdream.DZEl9tTZ.mjs';
|
|
2
|
+
export { s as streamHtmlToMarkdown } from './shared/mdream.DZEl9tTZ.mjs';
|
|
3
3
|
export { T as TagIdMap } from './shared/mdream.C8Xgmr_a.mjs';
|
|
4
4
|
|
|
5
5
|
function htmlToMarkdown(html, options = {}) {
|
package/dist/plugins.d.mts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import { P as Plugin
|
|
1
|
+
import { P as Plugin } from './shared/mdream.C0Qx0F7t.mjs';
|
|
2
|
+
export { e as extractionPlugin } from './shared/mdream.DMe7T-0M.mjs';
|
|
2
3
|
|
|
3
4
|
/**
|
|
4
5
|
* Create a plugin that implements the Plugin interface with improved type inference
|
|
@@ -7,11 +8,6 @@ import { P as Plugin, b as ElementNode, d as MdreamRuntimeState } from './shared
|
|
|
7
8
|
*/
|
|
8
9
|
declare function createPlugin<T extends Partial<Plugin>>(plugin: T): Plugin;
|
|
9
10
|
|
|
10
|
-
interface ExtractedElement extends ElementNode {
|
|
11
|
-
textContent: string;
|
|
12
|
-
}
|
|
13
|
-
declare function extractionPlugin(selectors: Record<string, (element: ExtractedElement, state: MdreamRuntimeState) => void>): Plugin;
|
|
14
|
-
|
|
15
11
|
/**
|
|
16
12
|
* Plugin that filters nodes based on CSS selectors.
|
|
17
13
|
* Allows including or excluding nodes based on selectors.
|
|
@@ -90,4 +86,4 @@ declare function readabilityPlugin(): Plugin;
|
|
|
90
86
|
*/
|
|
91
87
|
declare function tailwindPlugin(): Plugin;
|
|
92
88
|
|
|
93
|
-
export { createPlugin,
|
|
89
|
+
export { createPlugin, filterPlugin, frontmatterPlugin, isolateMainPlugin, readabilityPlugin, tailwindPlugin };
|
package/dist/plugins.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import { P as Plugin
|
|
1
|
+
import { P as Plugin } from './shared/mdream.C0Qx0F7t.js';
|
|
2
|
+
export { e as extractionPlugin } from './shared/mdream.BFdDSM96.js';
|
|
2
3
|
|
|
3
4
|
/**
|
|
4
5
|
* Create a plugin that implements the Plugin interface with improved type inference
|
|
@@ -7,11 +8,6 @@ import { P as Plugin, b as ElementNode, d as MdreamRuntimeState } from './shared
|
|
|
7
8
|
*/
|
|
8
9
|
declare function createPlugin<T extends Partial<Plugin>>(plugin: T): Plugin;
|
|
9
10
|
|
|
10
|
-
interface ExtractedElement extends ElementNode {
|
|
11
|
-
textContent: string;
|
|
12
|
-
}
|
|
13
|
-
declare function extractionPlugin(selectors: Record<string, (element: ExtractedElement, state: MdreamRuntimeState) => void>): Plugin;
|
|
14
|
-
|
|
15
11
|
/**
|
|
16
12
|
* Plugin that filters nodes based on CSS selectors.
|
|
17
13
|
* Allows including or excluding nodes based on selectors.
|
|
@@ -90,4 +86,4 @@ declare function readabilityPlugin(): Plugin;
|
|
|
90
86
|
*/
|
|
91
87
|
declare function tailwindPlugin(): Plugin;
|
|
92
88
|
|
|
93
|
-
export { createPlugin,
|
|
89
|
+
export { createPlugin, filterPlugin, frontmatterPlugin, isolateMainPlugin, readabilityPlugin, tailwindPlugin };
|
package/dist/preset/minimal.d.ts
CHANGED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { E as ElementNode, M as MdreamRuntimeState, P as Plugin } from './mdream.C0Qx0F7t.js';
|
|
2
|
+
|
|
3
|
+
interface ExtractedElement extends ElementNode {
|
|
4
|
+
textContent: string;
|
|
5
|
+
}
|
|
6
|
+
declare function extractionPlugin(selectors: Record<string, (element: ExtractedElement, state: MdreamRuntimeState) => void>): Plugin;
|
|
7
|
+
|
|
8
|
+
export { extractionPlugin as e };
|
|
9
|
+
export type { ExtractedElement as E };
|
|
@@ -222,5 +222,5 @@ interface TagHandler {
|
|
|
222
222
|
excludesTextNodes?: boolean;
|
|
223
223
|
}
|
|
224
224
|
|
|
225
|
-
export {
|
|
226
|
-
export type { BufferRegion as B, HTMLToMarkdownOptions as H,
|
|
225
|
+
export { TEXT_NODE as T, ELEMENT_NODE as b };
|
|
226
|
+
export type { BufferRegion as B, ElementNode as E, HTMLToMarkdownOptions as H, MdreamRuntimeState as M, Node as N, Plugin as P, PluginCreationOptions as a, TextNode as c, MdreamProcessingState as d, NodeEvent as e, HandlerContext as f, TagHandler as g };
|
|
@@ -222,5 +222,5 @@ interface TagHandler {
|
|
|
222
222
|
excludesTextNodes?: boolean;
|
|
223
223
|
}
|
|
224
224
|
|
|
225
|
-
export {
|
|
226
|
-
export type { BufferRegion as B, HTMLToMarkdownOptions as H,
|
|
225
|
+
export { TEXT_NODE as T, ELEMENT_NODE as b };
|
|
226
|
+
export type { BufferRegion as B, ElementNode as E, HTMLToMarkdownOptions as H, MdreamRuntimeState as M, Node as N, Plugin as P, PluginCreationOptions as a, TextNode as c, MdreamProcessingState as d, NodeEvent as e, HandlerContext as f, TagHandler as g };
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { E as ElementNode, M as MdreamRuntimeState, P as Plugin } from './mdream.C0Qx0F7t.mjs';
|
|
2
|
+
|
|
3
|
+
interface ExtractedElement extends ElementNode {
|
|
4
|
+
textContent: string;
|
|
5
|
+
}
|
|
6
|
+
declare function extractionPlugin(selectors: Record<string, (element: ExtractedElement, state: MdreamRuntimeState) => void>): Plugin;
|
|
7
|
+
|
|
8
|
+
export { extractionPlugin as e };
|
|
9
|
+
export type { ExtractedElement as E };
|
|
@@ -1402,13 +1402,15 @@ function parseAttributes(attrStr) {
|
|
|
1402
1402
|
}
|
|
1403
1403
|
i++;
|
|
1404
1404
|
}
|
|
1405
|
-
if (
|
|
1406
|
-
if (
|
|
1405
|
+
if (state === QUOTED_VALUE || state === UNQUOTED_VALUE) {
|
|
1406
|
+
if (name) {
|
|
1407
1407
|
result[name] = attrStr.substring(valueStart, i);
|
|
1408
|
-
}
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1408
|
+
}
|
|
1409
|
+
} else if (state === NAME || state === AFTER_NAME || state === BEFORE_VALUE) {
|
|
1410
|
+
nameEnd = nameEnd || i;
|
|
1411
|
+
const currentName = attrStr.substring(nameStart, nameEnd).toLowerCase();
|
|
1412
|
+
if (currentName) {
|
|
1413
|
+
result[currentName] = "";
|
|
1412
1414
|
}
|
|
1413
1415
|
}
|
|
1414
1416
|
return result;
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mdream",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.2.
|
|
4
|
+
"version": "0.2.8",
|
|
5
5
|
"description": "Ultra-performant JavaScript HTML to Markdown converter optimized for LLMs.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -29,16 +29,16 @@
|
|
|
29
29
|
"cac": "^6.7.14"
|
|
30
30
|
},
|
|
31
31
|
"devDependencies": {
|
|
32
|
-
"@antfu/eslint-config": "^4.
|
|
33
|
-
"@types/node": "^22.15.
|
|
34
|
-
"bumpp": "^10.
|
|
35
|
-
"crawlee": "^3.13.
|
|
36
|
-
"eslint": "^9.
|
|
32
|
+
"@antfu/eslint-config": "^4.16.1",
|
|
33
|
+
"@types/node": "^22.15.34",
|
|
34
|
+
"bumpp": "^10.2.0",
|
|
35
|
+
"crawlee": "^3.13.8",
|
|
36
|
+
"eslint": "^9.30.0",
|
|
37
37
|
"llm-cost": "^1.0.5",
|
|
38
|
-
"playwright": "^1.
|
|
38
|
+
"playwright": "^1.53.1",
|
|
39
39
|
"typescript": "5.8.3",
|
|
40
40
|
"unbuild": "^3.5.0",
|
|
41
|
-
"vitest": "^3.
|
|
41
|
+
"vitest": "^3.2.4"
|
|
42
42
|
},
|
|
43
43
|
"scripts": {
|
|
44
44
|
"flame": "pnpm build && unbuild bench/bundle && clinic flame -- node bench/bundle/dist/string.mjs 10",
|