mdream 0.2.4 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -3
- package/dist/cli.mjs +4 -4
- package/dist/index.d.mts +3 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.mjs +3 -3
- package/dist/plugins.d.mts +2 -2
- package/dist/plugins.d.ts +2 -2
- package/dist/plugins.mjs +8 -8
- package/dist/preset/minimal.mjs +3 -3
- package/dist/shared/{mdream.CsDVbUMp.mjs → mdream.BUjJQ2oF.mjs} +1 -1
- package/dist/shared/{mdream.Ch6B8TEB.mjs → mdream.C8Xgmr_a.mjs} +1 -1
- package/dist/shared/{mdream.C6Z2rfeq.mjs → mdream.CNrwlePY.mjs} +1 -1
- package/dist/shared/{mdream.D5zBVbP9.mjs → mdream.Crxe0Sar.mjs} +2 -2
- package/dist/shared/{mdream.DMUbnRbh.mjs → mdream.VU-fHLcf.mjs} +2 -2
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -206,7 +206,7 @@ const adBlockPlugin = createPlugin({
|
|
|
206
206
|
Extract specific elements and their content during HTML processing for data analysis or content discovery:
|
|
207
207
|
|
|
208
208
|
```ts
|
|
209
|
-
import type { ExtractedElement } from 'mdream/plugins'
|
|
209
|
+
import type { ExtractedElement, MdreamRuntimeState } from 'mdream/plugins'
|
|
210
210
|
import { extractionPlugin, htmlToMarkdown } from 'mdream'
|
|
211
211
|
|
|
212
212
|
const html: string = `
|
|
@@ -219,12 +219,14 @@ const html: string = `
|
|
|
219
219
|
|
|
220
220
|
// Extract elements using CSS selectors
|
|
221
221
|
const plugin = extractionPlugin({
|
|
222
|
-
'h2': (element: ExtractedElement): void => {
|
|
222
|
+
'h2': (element: ExtractedElement, state: MdreamRuntimeState): void => {
|
|
223
223
|
console.log('Heading:', element.textContent) // "Getting Started"
|
|
224
|
+
console.log('Depth:', state.depth) // Current nesting depth
|
|
224
225
|
},
|
|
225
|
-
'img[alt]': (element: ExtractedElement): void => {
|
|
226
|
+
'img[alt]': (element: ExtractedElement, state: MdreamRuntimeState): void => {
|
|
226
227
|
console.log('Image:', element.attributes.src, element.attributes.alt)
|
|
227
228
|
// "Image: /hero.jpg Hero image"
|
|
229
|
+
console.log('Context:', state.options) // Access to conversion options
|
|
228
230
|
}
|
|
229
231
|
})
|
|
230
232
|
|
package/dist/cli.mjs
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import { Readable } from 'node:stream';
|
|
2
2
|
import { cac } from 'cac';
|
|
3
|
-
import { f as frontmatterPlugin } from './shared/mdream.
|
|
4
|
-
import { r as readabilityPlugin } from './shared/mdream.
|
|
5
|
-
import { s as streamHtmlToMarkdown } from './shared/mdream.
|
|
6
|
-
import './shared/mdream.
|
|
3
|
+
import { f as frontmatterPlugin } from './shared/mdream.CNrwlePY.mjs';
|
|
4
|
+
import { r as readabilityPlugin } from './shared/mdream.VU-fHLcf.mjs';
|
|
5
|
+
import { s as streamHtmlToMarkdown } from './shared/mdream.BUjJQ2oF.mjs';
|
|
6
|
+
import './shared/mdream.C8Xgmr_a.mjs';
|
|
7
7
|
|
|
8
8
|
async function streamingConvert(options = {}) {
|
|
9
9
|
const outputStream = process.stdout;
|
package/dist/index.d.mts
CHANGED
|
@@ -10,6 +10,8 @@ import { ReadableStream } from 'node:stream/web';
|
|
|
10
10
|
*/
|
|
11
11
|
declare function streamHtmlToMarkdown(htmlStream: ReadableStream | null, options?: HTMLToMarkdownOptions): AsyncIterable<string>;
|
|
12
12
|
|
|
13
|
+
declare const TagIdMap: Record<string, number>;
|
|
14
|
+
|
|
13
15
|
declare function htmlToMarkdown(html: string, options?: HTMLToMarkdownOptions): string;
|
|
14
16
|
|
|
15
|
-
export { HTMLToMarkdownOptions, htmlToMarkdown, streamHtmlToMarkdown };
|
|
17
|
+
export { HTMLToMarkdownOptions, TagIdMap, htmlToMarkdown, streamHtmlToMarkdown };
|
package/dist/index.d.ts
CHANGED
|
@@ -10,6 +10,8 @@ import { ReadableStream } from 'node:stream/web';
|
|
|
10
10
|
*/
|
|
11
11
|
declare function streamHtmlToMarkdown(htmlStream: ReadableStream | null, options?: HTMLToMarkdownOptions): AsyncIterable<string>;
|
|
12
12
|
|
|
13
|
+
declare const TagIdMap: Record<string, number>;
|
|
14
|
+
|
|
13
15
|
declare function htmlToMarkdown(html: string, options?: HTMLToMarkdownOptions): string;
|
|
14
16
|
|
|
15
|
-
export { HTMLToMarkdownOptions, htmlToMarkdown, streamHtmlToMarkdown };
|
|
17
|
+
export { HTMLToMarkdownOptions, TagIdMap, htmlToMarkdown, streamHtmlToMarkdown };
|
package/dist/index.mjs
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { p as processPartialHTMLToMarkdown } from './shared/mdream.
|
|
2
|
-
export { s as streamHtmlToMarkdown } from './shared/mdream.
|
|
3
|
-
|
|
1
|
+
import { p as processPartialHTMLToMarkdown } from './shared/mdream.BUjJQ2oF.mjs';
|
|
2
|
+
export { s as streamHtmlToMarkdown } from './shared/mdream.BUjJQ2oF.mjs';
|
|
3
|
+
export { T as TagIdMap } from './shared/mdream.C8Xgmr_a.mjs';
|
|
4
4
|
|
|
5
5
|
function htmlToMarkdown(html, options = {}) {
|
|
6
6
|
const state = {
|
package/dist/plugins.d.mts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { P as Plugin, b as ElementNode } from './shared/mdream.-SGj02be.mjs';
|
|
1
|
+
import { P as Plugin, b as ElementNode, d as MdreamRuntimeState } from './shared/mdream.-SGj02be.mjs';
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
4
|
* Create a plugin that implements the Plugin interface with improved type inference
|
|
@@ -10,7 +10,7 @@ declare function createPlugin<T extends Partial<Plugin>>(plugin: T): Plugin;
|
|
|
10
10
|
interface ExtractedElement extends ElementNode {
|
|
11
11
|
textContent: string;
|
|
12
12
|
}
|
|
13
|
-
declare function extractionPlugin(selectors: Record<string, (element: ExtractedElement) => void>): Plugin;
|
|
13
|
+
declare function extractionPlugin(selectors: Record<string, (element: ExtractedElement, state: MdreamRuntimeState) => void>): Plugin;
|
|
14
14
|
|
|
15
15
|
/**
|
|
16
16
|
* Plugin that filters nodes based on CSS selectors.
|
package/dist/plugins.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { P as Plugin, b as ElementNode } from './shared/mdream.-SGj02be.js';
|
|
1
|
+
import { P as Plugin, b as ElementNode, d as MdreamRuntimeState } from './shared/mdream.-SGj02be.js';
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
4
|
* Create a plugin that implements the Plugin interface with improved type inference
|
|
@@ -10,7 +10,7 @@ declare function createPlugin<T extends Partial<Plugin>>(plugin: T): Plugin;
|
|
|
10
10
|
interface ExtractedElement extends ElementNode {
|
|
11
11
|
textContent: string;
|
|
12
12
|
}
|
|
13
|
-
declare function extractionPlugin(selectors: Record<string, (element: ExtractedElement) => void>): Plugin;
|
|
13
|
+
declare function extractionPlugin(selectors: Record<string, (element: ExtractedElement, state: MdreamRuntimeState) => void>): Plugin;
|
|
14
14
|
|
|
15
15
|
/**
|
|
16
16
|
* Plugin that filters nodes based on CSS selectors.
|
package/dist/plugins.mjs
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { c as createPlugin } from './shared/mdream.
|
|
2
|
-
export { f as frontmatterPlugin } from './shared/mdream.
|
|
3
|
-
import { p as parseSelector } from './shared/mdream.
|
|
4
|
-
export { f as filterPlugin, i as isolateMainPlugin, t as tailwindPlugin } from './shared/mdream.
|
|
5
|
-
export { r as readabilityPlugin } from './shared/mdream.
|
|
6
|
-
import './shared/mdream.
|
|
1
|
+
import { c as createPlugin } from './shared/mdream.CNrwlePY.mjs';
|
|
2
|
+
export { f as frontmatterPlugin } from './shared/mdream.CNrwlePY.mjs';
|
|
3
|
+
import { p as parseSelector } from './shared/mdream.Crxe0Sar.mjs';
|
|
4
|
+
export { f as filterPlugin, i as isolateMainPlugin, t as tailwindPlugin } from './shared/mdream.Crxe0Sar.mjs';
|
|
5
|
+
export { r as readabilityPlugin } from './shared/mdream.VU-fHLcf.mjs';
|
|
6
|
+
import './shared/mdream.C8Xgmr_a.mjs';
|
|
7
7
|
|
|
8
8
|
function extractionPlugin(selectors) {
|
|
9
9
|
const matcherCallbacks = Object.entries(selectors).map(([selector, callback]) => ({
|
|
@@ -29,14 +29,14 @@ function extractionPlugin(selectors) {
|
|
|
29
29
|
currentParent = currentParent.parent;
|
|
30
30
|
}
|
|
31
31
|
},
|
|
32
|
-
onNodeExit(element) {
|
|
32
|
+
onNodeExit(element, state) {
|
|
33
33
|
const tracked = trackedElements.get(element);
|
|
34
34
|
if (tracked) {
|
|
35
35
|
const extractedElement = {
|
|
36
36
|
...element,
|
|
37
37
|
textContent: tracked.textContent.trim()
|
|
38
38
|
};
|
|
39
|
-
tracked.callback(extractedElement);
|
|
39
|
+
tracked.callback(extractedElement, state);
|
|
40
40
|
trackedElements.delete(element);
|
|
41
41
|
}
|
|
42
42
|
}
|
package/dist/preset/minimal.mjs
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { i as isolateMainPlugin, t as tailwindPlugin, f as filterPlugin } from '../shared/mdream.
|
|
3
|
-
import { f as frontmatterPlugin } from '../shared/mdream.
|
|
1
|
+
import { aa as TAG_FORM, W as TAG_FIELDSET, b2 as TAG_OBJECT, a5 as TAG_EMBED, b6 as TAG_FIGURE, ab as TAG_FOOTER, v as TAG_ASIDE, Q as TAG_IFRAME, a4 as TAG_INPUT, Y as TAG_TEXTAREA, Z as TAG_SELECT, ai as TAG_BUTTON, ak as TAG_NAV } from '../shared/mdream.C8Xgmr_a.mjs';
|
|
2
|
+
import { i as isolateMainPlugin, t as tailwindPlugin, f as filterPlugin } from '../shared/mdream.Crxe0Sar.mjs';
|
|
3
|
+
import { f as frontmatterPlugin } from '../shared/mdream.CNrwlePY.mjs';
|
|
4
4
|
|
|
5
5
|
function withMinimalPreset(options = {}) {
|
|
6
6
|
const plugins = [
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { a as TEXT_NODE, N as NodeEventEnter, c as collectNodeContent, E as ELEMENT_NODE, b as TAG_PRE, d as TAG_LI, e as TAG_BLOCKQUOTE, f as NO_SPACING, D as DEFAULT_BLOCK_SPACING, g as TABLE_ROW_SPACING, L as LIST_ITEM_SPACING, B as BLOCKQUOTE_SPACING, M as MARKDOWN_STRIKETHROUGH, h as MARKDOWN_HORIZONTAL_RULE, i as TAG_DD, j as TAG_DT, k as TAG_DL, l as TAG_ADDRESS, m as TAG_RP, n as TAG_RT, o as TAG_RUBY, p as TAG_BDO, q as TAG_TIME, r as TAG_VAR, s as TAG_DFN, t as TAG_CITE, u as TAG_U, v as TAG_ASIDE, w as TAG_PLAINTEXT, x as TAG_XMP, y as TAG_NOFRAMES, z as TAG_NOSCRIPT, A as TAG_SMALL, C as TAG_SAMP, F as TAG_Q, G as TAG_MARK, H as TAG_ABBR, I as TAG_TEMPLATE, J as TAG_PROGRESS, K as TAG_METER, O as TAG_DIALOG, P as TAG_MAP, Q as TAG_IFRAME, R as TAG_CANVAS, S as TAG_VIDEO, U as TAG_AUDIO, V as TAG_LEGEND, W as TAG_FIELDSET, X as TAG_OPTION, Y as TAG_TEXTAREA, Z as TAG_SELECT, _ as TAG_SVG, $ as TAG_WBR, a0 as TAG_TRACK, a1 as TAG_SOURCE, a2 as TAG_PARAM, a3 as TAG_KEYGEN, a4 as TAG_INPUT, a5 as TAG_EMBED, a6 as TAG_COL, a7 as TAG_BASE, a8 as TAG_AREA, a9 as TAG_LINK, aa as TAG_FORM, ab as TAG_FOOTER, ac as TAG_KBD, ad as TAG_TFOOT, ae as TAG_TBODY, af as TAG_CENTER, ag as TAG_TABLE, ah as TAG_BODY, ai as TAG_BUTTON, aj as TAG_LABEL, ak as TAG_NAV, al as TAG_SPAN, am as TAG_DIV, an as TAG_P, ao as TAG_TD, ap as TAG_TH, aq as TAG_TR, ar as TAG_THEAD, as as TAG_IMG, at as TAG_A, au as TAG_UL, av as TAG_OL, aw as TAG_CODE, ax as MARKDOWN_CODE_BLOCK, ay as MARKDOWN_INLINE_CODE, az as TAG_INS, aA as TAG_SUP, aB as TAG_SUB, aC as TAG_DEL, aD as TAG_I, aE as TAG_EM, aF as TAG_B, aG as TAG_STRONG, aH as TAG_HR, aI as TAG_H6, aJ as TAG_H5, aK as TAG_H4, aL as TAG_H3, aM as TAG_H2, aN as TAG_H1, aO as TAG_BR, aP as TAG_META, aQ as TAG_STYLE, aR as TAG_SCRIPT, aS as TAG_TITLE, aT as TAG_SUMMARY, aU as TAG_DETAILS, aV as TAG_HEAD, aW as MARKDOWN_EMPHASIS, aX as MARKDOWN_STRONG, aY as HTML_ENTITIES, aZ as MAX_TAG_ID, a_ as assembleBufferedContent, T as TagIdMap, a$ as NodeEventExit } from './mdream.C8Xgmr_a.mjs';
|
|
2
2
|
|
|
3
3
|
function needsSpacing(lastChar, firstChar) {
|
|
4
4
|
const noSpaceLastChars = /* @__PURE__ */ new Set(["\n", " ", "[", ">", "_", "*", "`", "|", "#", "<", "("]);
|
|
@@ -277,4 +277,4 @@ const BLOCKQUOTE_SPACING = [1, 1];
|
|
|
277
277
|
const LIST_ITEM_SPACING = [1, 0];
|
|
278
278
|
const TABLE_ROW_SPACING = [0, 1];
|
|
279
279
|
|
|
280
|
-
export {
|
|
280
|
+
export { TAG_WBR as $, TAG_SMALL as A, BLOCKQUOTE_SPACING as B, TAG_SAMP as C, DEFAULT_BLOCK_SPACING as D, ELEMENT_NODE as E, TAG_Q as F, TAG_MARK as G, TAG_ABBR as H, TAG_TEMPLATE as I, TAG_PROGRESS as J, TAG_METER as K, LIST_ITEM_SPACING as L, MARKDOWN_STRIKETHROUGH as M, NodeEventEnter as N, TAG_DIALOG as O, TAG_MAP as P, TAG_IFRAME as Q, TAG_CANVAS as R, TAG_VIDEO as S, TagIdMap as T, TAG_AUDIO as U, TAG_LEGEND as V, TAG_FIELDSET as W, TAG_OPTION as X, TAG_TEXTAREA as Y, TAG_SELECT as Z, TAG_SVG as _, TEXT_NODE as a, NodeEventExit as a$, TAG_TRACK as a0, TAG_SOURCE as a1, TAG_PARAM as a2, TAG_KEYGEN as a3, TAG_INPUT as a4, TAG_EMBED as a5, TAG_COL as a6, TAG_BASE as a7, TAG_AREA as a8, TAG_LINK as a9, TAG_SUP as aA, TAG_SUB as aB, TAG_DEL as aC, TAG_I as aD, TAG_EM as aE, TAG_B as aF, TAG_STRONG as aG, TAG_HR as aH, TAG_H6 as aI, TAG_H5 as aJ, TAG_H4 as aK, TAG_H3 as aL, TAG_H2 as aM, TAG_H1 as aN, TAG_BR as aO, TAG_META as aP, TAG_STYLE as aQ, TAG_SCRIPT as aR, TAG_TITLE as aS, TAG_SUMMARY as aT, TAG_DETAILS as aU, TAG_HEAD as aV, MARKDOWN_EMPHASIS as aW, MARKDOWN_STRONG as aX, HTML_ENTITIES as aY, MAX_TAG_ID as aZ, assembleBufferedContent as a_, TAG_FORM as aa, TAG_FOOTER as ab, TAG_KBD as ac, TAG_TFOOT as ad, TAG_TBODY as ae, TAG_CENTER as af, TAG_TABLE as ag, TAG_BODY as ah, TAG_BUTTON as ai, TAG_LABEL as aj, TAG_NAV as ak, TAG_SPAN as al, TAG_DIV as am, TAG_P as an, TAG_TD as ao, TAG_TH as ap, TAG_TR as aq, TAG_THEAD as ar, TAG_IMG as as, TAG_A as at, TAG_UL as au, TAG_OL as av, TAG_CODE as aw, MARKDOWN_CODE_BLOCK as ax, MARKDOWN_INLINE_CODE as ay, TAG_INS as az, TAG_PRE as b, TAG_HTML as b0, createBufferRegion as b1, TAG_OBJECT as b2, TAG_HEADER as b3, TAG_CAPTION as b4, TAG_FIGCAPTION as b5, TAG_FIGURE as b6, TAG_MAIN as b7, TAG_SECTION as b8, TAG_ARTICLE as b9, collectNodeContent as c, TAG_LI as d, TAG_BLOCKQUOTE as e, NO_SPACING as f, TABLE_ROW_SPACING as g, MARKDOWN_HORIZONTAL_RULE as h, TAG_DD as i, TAG_DT as j, TAG_DL as k, TAG_ADDRESS as l, TAG_RP as m, TAG_RT as n, TAG_RUBY as o, TAG_BDO as p, TAG_TIME as q, TAG_VAR as r, TAG_DFN as s, TAG_CITE as t, TAG_U as u, TAG_ASIDE as v, TAG_PLAINTEXT as w, TAG_XMP as x, TAG_NOFRAMES as y, TAG_NOSCRIPT as z };
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { E as ELEMENT_NODE,
|
|
1
|
+
import { E as ELEMENT_NODE, aV as TAG_HEAD, c as collectNodeContent, aS as TAG_TITLE, aP as TAG_META } from './mdream.C8Xgmr_a.mjs';
|
|
2
2
|
|
|
3
3
|
function createPlugin(plugin) {
|
|
4
4
|
return plugin;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { c as createPlugin } from './mdream.
|
|
1
|
+
import { a as TEXT_NODE, E as ELEMENT_NODE, b7 as TAG_MAIN, b3 as TAG_HEADER, ab as TAG_FOOTER, aN as TAG_H1, aM as TAG_H2, aL as TAG_H3, aK as TAG_H4, aJ as TAG_H5, aI as TAG_H6 } from './mdream.C8Xgmr_a.mjs';
|
|
2
|
+
import { c as createPlugin } from './mdream.CNrwlePY.mjs';
|
|
3
3
|
|
|
4
4
|
class TagSelector {
|
|
5
5
|
constructor(tagName) {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { c as createPlugin } from './mdream.
|
|
1
|
+
import { ah as TAG_BODY, b0 as TAG_HTML, aV as TAG_HEAD, b1 as createBufferRegion, at as TAG_A, l as TAG_ADDRESS, aT as TAG_SUMMARY, aU as TAG_DETAILS, aQ as TAG_STYLE, aR as TAG_SCRIPT, al as TAG_SPAN, aO as TAG_BR, aH as TAG_HR, aD as TAG_I, aE as TAG_EM, aF as TAG_B, aG as TAG_STRONG, b2 as TAG_OBJECT, a5 as TAG_EMBED, Q as TAG_IFRAME, W as TAG_FIELDSET, Z as TAG_SELECT, Y as TAG_TEXTAREA, a4 as TAG_INPUT, ai as TAG_BUTTON, aa as TAG_FORM, v as TAG_ASIDE, ak as TAG_NAV, ab as TAG_FOOTER, b3 as TAG_HEADER, aI as TAG_H6, aJ as TAG_H5, aK as TAG_H4, aL as TAG_H3, aM as TAG_H2, aN as TAG_H1, i as TAG_DD, j as TAG_DT, k as TAG_DL, d as TAG_LI, av as TAG_OL, au as TAG_UL, ao as TAG_TD, ap as TAG_TH, aq as TAG_TR, ad as TAG_TFOOT, ae as TAG_TBODY, ar as TAG_THEAD, b4 as TAG_CAPTION, ag as TAG_TABLE, _ as TAG_SVG, U as TAG_AUDIO, S as TAG_VIDEO, b5 as TAG_FIGCAPTION, b6 as TAG_FIGURE, as as TAG_IMG, aw as TAG_CODE, b as TAG_PRE, e as TAG_BLOCKQUOTE, am as TAG_DIV, an as TAG_P, b7 as TAG_MAIN, b8 as TAG_SECTION, b9 as TAG_ARTICLE } from './mdream.C8Xgmr_a.mjs';
|
|
2
|
+
import { c as createPlugin } from './mdream.CNrwlePY.mjs';
|
|
3
3
|
|
|
4
4
|
const REGEXPS = {
|
|
5
5
|
// Positive patterns that suggest high-quality content
|