mdream 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { BLOCKQUOTE_SPACING, DEFAULT_BLOCK_SPACING, ELEMENT_NODE, HTML_ENTITIES, LIST_ITEM_SPACING, MARKDOWN_CODE_BLOCK, MARKDOWN_EMPHASIS, MARKDOWN_HORIZONTAL_RULE, MARKDOWN_INLINE_CODE, MARKDOWN_STRIKETHROUGH, MARKDOWN_STRONG, MAX_TAG_ID, NO_SPACING, NodeEventEnter, NodeEventExit, TABLE_ROW_SPACING, TAG_A, TAG_ABBR, TAG_ADDRESS, TAG_AREA, TAG_ASIDE, TAG_AUDIO, TAG_B, TAG_BASE, TAG_BDO, TAG_BLOCKQUOTE, TAG_BODY, TAG_BR, TAG_BUTTON, TAG_CANVAS, TAG_CENTER, TAG_CITE, TAG_CODE, TAG_COL, TAG_DD, TAG_DEL, TAG_DETAILS, TAG_DFN, TAG_DIALOG, TAG_DIV, TAG_DL, TAG_DT, TAG_EM, TAG_EMBED, TAG_FIELDSET, TAG_FOOTER, TAG_FORM, TAG_H1, TAG_H2, TAG_H3, TAG_H4, TAG_H5, TAG_H6, TAG_HEAD, TAG_HR, TAG_I, TAG_IFRAME, TAG_IMG, TAG_INPUT, TAG_INS, TAG_KBD, TAG_KEYGEN, TAG_LABEL, TAG_LEGEND, TAG_LI, TAG_LINK, TAG_MAP, TAG_MARK, TAG_META, TAG_METER, TAG_NAV, TAG_NOFRAMES, TAG_NOSCRIPT, TAG_OL, TAG_OPTION, TAG_P, TAG_PARAM, TAG_PLAINTEXT, TAG_PRE, TAG_PROGRESS, TAG_Q, TAG_RP, TAG_RT, TAG_RUBY, TAG_SAMP, TAG_SCRIPT, TAG_SELECT, TAG_SMALL, TAG_SOURCE, TAG_SPAN, TAG_STRONG, TAG_STYLE, TAG_SUB, TAG_SUMMARY, TAG_SUP, TAG_SVG, TAG_TABLE, TAG_TBODY, TAG_TD, TAG_TEMPLATE, TAG_TEXTAREA, TAG_TFOOT, TAG_TH, TAG_THEAD, TAG_TIME, TAG_TITLE, TAG_TR, TAG_TRACK, TAG_U, TAG_UL, TAG_VAR, TAG_VIDEO, TAG_WBR, TAG_XMP, TEXT_NODE, TagIdMap$1 as TagIdMap, assembleBufferedContent, collectNodeContent } from "./plugin-DCJFRZej.mjs";
1
+ import { BLOCKQUOTE_SPACING, DEFAULT_BLOCK_SPACING, ELEMENT_NODE, HTML_ENTITIES, LIST_ITEM_SPACING, MARKDOWN_CODE_BLOCK, MARKDOWN_EMPHASIS, MARKDOWN_HORIZONTAL_RULE, MARKDOWN_INLINE_CODE, MARKDOWN_STRIKETHROUGH, MARKDOWN_STRONG, MAX_TAG_ID, NO_SPACING, NodeEventEnter, NodeEventExit, TABLE_ROW_SPACING, TAG_A, TAG_ABBR, TAG_ADDRESS, TAG_AREA, TAG_ASIDE, TAG_AUDIO, TAG_B, TAG_BASE, TAG_BDO, TAG_BLOCKQUOTE, TAG_BODY, TAG_BR, TAG_BUTTON, TAG_CANVAS, TAG_CENTER, TAG_CITE, TAG_CODE, TAG_COL, TAG_DD, TAG_DEL, TAG_DETAILS, TAG_DFN, TAG_DIALOG, TAG_DIV, TAG_DL, TAG_DT, TAG_EM, TAG_EMBED, TAG_FIELDSET, TAG_FOOTER, TAG_FORM, TAG_H1, TAG_H2, TAG_H3, TAG_H4, TAG_H5, TAG_H6, TAG_HEAD, TAG_HR, TAG_I, TAG_IFRAME, TAG_IMG, TAG_INPUT, TAG_INS, TAG_KBD, TAG_KEYGEN, TAG_LABEL, TAG_LEGEND, TAG_LI, TAG_LINK, TAG_MAP, TAG_MARK, TAG_META, TAG_METER, TAG_NAV, TAG_NOFRAMES, TAG_NOSCRIPT, TAG_OL, TAG_OPTION, TAG_P, TAG_PARAM, TAG_PLAINTEXT, TAG_PRE, TAG_PROGRESS, TAG_Q, TAG_RP, TAG_RT, TAG_RUBY, TAG_SAMP, TAG_SCRIPT, TAG_SELECT, TAG_SMALL, TAG_SOURCE, TAG_SPAN, TAG_STRONG, TAG_STYLE, TAG_SUB, TAG_SUMMARY, TAG_SUP, TAG_SVG, TAG_TABLE, TAG_TBODY, TAG_TD, TAG_TEMPLATE, TAG_TEXTAREA, TAG_TFOOT, TAG_TH, TAG_THEAD, TAG_TIME, TAG_TITLE, TAG_TR, TAG_TRACK, TAG_U, TAG_UL, TAG_VAR, TAG_VIDEO, TAG_WBR, TAG_XMP, TEXT_NODE, TagIdMap, assembleBufferedContent, collectNodeContent } from "./plugin-Bqz9GKOA.mjs";
2
2
 
3
3
  //#region src/tags.ts
4
4
  function resolveUrl(url, origin) {
@@ -1146,6 +1146,51 @@ function parseAttributes(attrStr) {
1146
1146
  return result;
1147
1147
  }
1148
1148
 
1149
+ //#endregion
1150
+ //#region src/plugin-processor.ts
1151
+ /**
1152
+ * Processes plugins for a given node event
1153
+ * Shared logic between markdown-processor.ts and stream.ts
1154
+ *
1155
+ * @param event - The node event to process
1156
+ * @param plugins - Array of plugins to apply
1157
+ * @param state - The current runtime state
1158
+ * @param processEvent - Callback to process the event after plugin processing
1159
+ * @returns true if the event should be skipped, false to continue processing
1160
+ */
1161
+ function processPluginsForEvent(event, plugins, state, processEvent) {
1162
+ if (plugins?.length) {
1163
+ for (const plugin of plugins) {
1164
+ const res = plugin.beforeNodeProcess?.(event, state);
1165
+ if (typeof res === "object" && res.skip) return true;
1166
+ }
1167
+ if (event.node.type === ELEMENT_NODE) {
1168
+ const element = event.node;
1169
+ if (event.type === NodeEventEnter) {
1170
+ for (const plugin of plugins) if (plugin.processAttributes) plugin.processAttributes(element, state);
1171
+ }
1172
+ const fn = event.type === NodeEventEnter ? "onNodeEnter" : "onNodeExit";
1173
+ const pluginOutputs = [];
1174
+ for (const plugin of plugins) if (plugin[fn]) {
1175
+ const result = plugin[fn](element, state);
1176
+ if (result) pluginOutputs.push(result);
1177
+ }
1178
+ if (pluginOutputs.length > 0) element.pluginOutput = (element.pluginOutput || []).concat(pluginOutputs);
1179
+ } else if (event.node.type === TEXT_NODE && event.type === NodeEventEnter) {
1180
+ const textNode = event.node;
1181
+ for (const plugin of plugins) if (plugin.processTextNode) {
1182
+ const result = plugin.processTextNode(textNode, state);
1183
+ if (result) {
1184
+ if (result.skip) return true;
1185
+ textNode.value = result.content;
1186
+ }
1187
+ }
1188
+ }
1189
+ }
1190
+ processEvent(event);
1191
+ return false;
1192
+ }
1193
+
1149
1194
  //#endregion
1150
1195
  //#region src/markdown-processor.ts
1151
1196
  /**
@@ -1184,7 +1229,7 @@ function needsSpacing(lastChar, firstChar, state) {
1184
1229
  * Determines if spacing should be added before text content
1185
1230
  */
1186
1231
  function shouldAddSpacingBeforeText(lastChar, lastNode, textNode) {
1187
- return lastChar && lastChar !== "\n" && lastChar !== " " && lastChar !== "[" && lastChar !== ">" && !lastNode?.tagHandler?.isInline && textNode.value[0] !== " ";
1232
+ return !!lastChar && lastChar !== "\n" && lastChar !== " " && lastChar !== "[" && lastChar !== ">" && !lastNode?.tagHandler?.isInline && textNode.value[0] !== " ";
1188
1233
  }
1189
1234
  /**
1190
1235
  * Calculate newline configuration based on tag handler spacing config
@@ -1303,35 +1348,7 @@ function createMarkdownProcessor(options = {}) {
1303
1348
  plugins: state.options?.plugins || []
1304
1349
  };
1305
1350
  parseHtmlStream(html, parseState, (event) => {
1306
- if (state.options?.plugins?.length) {
1307
- for (const plugin of state.options.plugins) {
1308
- const res = plugin.beforeNodeProcess?.(event, state);
1309
- if (typeof res === "object" && res.skip) return;
1310
- }
1311
- if (event.node.type === ELEMENT_NODE) {
1312
- const element = event.node;
1313
- if (event.type === NodeEventEnter) {
1314
- for (const plugin of state.options.plugins) if (plugin.processAttributes) plugin.processAttributes(element, state);
1315
- }
1316
- const fn = event.type === NodeEventEnter ? "onNodeEnter" : "onNodeExit";
1317
- const pluginOutputs = [];
1318
- for (const plugin of state.options.plugins) if (plugin[fn]) {
1319
- const result = plugin[fn](element, state);
1320
- if (result) pluginOutputs.push(result);
1321
- }
1322
- if (pluginOutputs.length > 0) element.pluginOutput = (element.pluginOutput || []).concat(pluginOutputs);
1323
- } else if (event.node.type === TEXT_NODE && event.type === NodeEventEnter) {
1324
- const textNode = event.node;
1325
- for (const plugin of state.options.plugins) if (plugin.processTextNode) {
1326
- const result = plugin.processTextNode(textNode, state);
1327
- if (result) {
1328
- if (result.skip) return;
1329
- if (result.content) textNode.value = result.content;
1330
- }
1331
- }
1332
- }
1333
- }
1334
- processEvent(event);
1351
+ processPluginsForEvent(event, state.options?.plugins, state, processEvent);
1335
1352
  });
1336
1353
  }
1337
1354
  /**
@@ -1359,7 +1376,8 @@ function createMarkdownProcessor(options = {}) {
1359
1376
  processEvent,
1360
1377
  processHtml,
1361
1378
  getMarkdown,
1362
- getMarkdownChunk
1379
+ getMarkdownChunk,
1380
+ state
1363
1381
  };
1364
1382
  }
1365
1383
  const MarkdownProcessor = createMarkdownProcessor;
@@ -1389,13 +1407,13 @@ async function* streamHtmlToMarkdown(htmlStream, options = {}) {
1389
1407
  if (done) break;
1390
1408
  const htmlContent = `${remainingHtml}${typeof value === "string" ? value : decoder.decode(value, { stream: true })}`;
1391
1409
  remainingHtml = parseHtmlStream(htmlContent, parseState, (event) => {
1392
- processor.processEvent(event);
1410
+ processPluginsForEvent(event, options.plugins, processor.state, processor.processEvent);
1393
1411
  });
1394
1412
  const chunk = processor.getMarkdownChunk();
1395
1413
  if (chunk) yield chunk;
1396
1414
  }
1397
1415
  if (remainingHtml) parseHtmlStream(remainingHtml, parseState, (event) => {
1398
- processor.processEvent(event);
1416
+ processPluginsForEvent(event, options.plugins, processor.state, processor.processEvent);
1399
1417
  });
1400
1418
  const finalChunk = processor.getMarkdownChunk();
1401
1419
  if (finalChunk) yield finalChunk;
@@ -1406,4 +1424,4 @@ async function* streamHtmlToMarkdown(htmlStream, options = {}) {
1406
1424
  }
1407
1425
 
1408
1426
  //#endregion
1409
- export { MarkdownProcessor as MarkdownProcessor$1, createMarkdownProcessor, parseHtml as parseHtml$1, streamHtmlToMarkdown as streamHtmlToMarkdown$1 };
1427
+ export { MarkdownProcessor, createMarkdownProcessor, parseHtml, streamHtmlToMarkdown };
@@ -37,12 +37,12 @@ interface Plugin {
37
37
  * Process a text node before it's added to the output
38
38
  * @param node - The text node to process
39
39
  * @param state - The current runtime state
40
- * @returns Legacy format or PluginHookResult with textContent and skipNode
40
+ * @returns Result with content and skip flag, or undefined for no transformation
41
41
  */
42
- processTextNode?: (node: TextNode, state: MdreamRuntimeState) => undefined | void | {
42
+ processTextNode?: (node: TextNode, state: MdreamRuntimeState) => {
43
43
  content: string;
44
44
  skip: boolean;
45
- };
45
+ } | undefined;
46
46
  }
47
47
  /**
48
48
  * Plugin creation options for controlling plugin behavior
@@ -78,7 +78,7 @@ interface ElementNode extends Node {
78
78
  /** HTML attributes (for ELEMENT_NODE) */
79
79
  attributes: Record<string, string>;
80
80
  /** Custom data added by plugins */
81
- context?: Record<string, any>;
81
+ context?: PluginContext;
82
82
  /** ID of the tag for fast handler lookup */
83
83
  tagId?: number;
84
84
  /** Map of tag names to their nesting count (using Uint8Array for performance) */
@@ -90,7 +90,7 @@ interface TextNode extends Node {
90
90
  /** Text content (for TEXT_NODE) */
91
91
  value: string;
92
92
  /** Custom data added by plugins */
93
- context?: Record<string, any>;
93
+ context?: PluginContext;
94
94
  /** Whether this text node should be excluded from markdown output (for script/style elements) */
95
95
  excludedFromMarkdown?: boolean;
96
96
  }
@@ -117,7 +117,7 @@ interface Node {
117
117
  /** Parent node */
118
118
  parent?: ElementNode | null;
119
119
  /** Custom data added by plugins */
120
- context?: Record<string, any>;
120
+ context?: PluginContext;
121
121
  /** Region ID for buffer region tracking */
122
122
  regionId?: number;
123
123
  }
@@ -190,7 +190,7 @@ interface MdreamRuntimeState extends Partial<MdreamProcessingState> {
190
190
  lastContentCache?: string;
191
191
  /** Reference to the last processed node */
192
192
  lastNode?: Node;
193
- context?: Record<string, any>;
193
+ context?: PluginContext;
194
194
  }
195
195
  type NodeEventEnter = 0;
196
196
  type NodeEventExit = 1;
@@ -230,5 +230,26 @@ interface TagHandler {
230
230
  spacing?: readonly [number, number];
231
231
  excludesTextNodes?: boolean;
232
232
  }
233
+ interface ReadabilityContext {
234
+ score?: number;
235
+ tagCount?: number;
236
+ linkTextLength?: number;
237
+ textLength?: number;
238
+ isHighLinkDensity?: boolean;
239
+ }
240
+ interface TailwindContext {
241
+ hidden?: boolean;
242
+ prefix?: string;
243
+ suffix?: string;
244
+ }
245
+ interface PluginContext {
246
+ score?: number;
247
+ tagCount?: number;
248
+ linkTextLength?: number;
249
+ textLength?: number;
250
+ isHighLinkDensity?: boolean;
251
+ tailwind?: TailwindContext;
252
+ [key: string]: unknown;
253
+ }
233
254
  //#endregion
234
- export { BufferRegion, ELEMENT_NODE as ELEMENT_NODE$1, ElementNode, ExtractedElement, HTMLToMarkdownOptions, HandlerContext, MdreamProcessingState, MdreamRuntimeState, Node, NodeEvent, Plugin, PluginCreationOptions, TEXT_NODE as TEXT_NODE$1, TagHandler, TextNode, extractionPlugin as extractionPlugin$1 };
255
+ export { BufferRegion, ELEMENT_NODE as ELEMENT_NODE$1, ElementNode, ExtractedElement, HTMLToMarkdownOptions, HandlerContext, MdreamProcessingState, MdreamRuntimeState, Node, NodeEvent, Plugin, PluginContext, PluginCreationOptions, ReadabilityContext, TEXT_NODE as TEXT_NODE$1, TagHandler, TailwindContext, TextNode, extractionPlugin as extractionPlugin$1 };
package/dist/cli.mjs CHANGED
@@ -1,23 +1,29 @@
1
- import "./_chunks/plugin-DCJFRZej.mjs";
2
- import { streamHtmlToMarkdown$1 as streamHtmlToMarkdown } from "./_chunks/stream-BeojJNLt.mjs";
3
- import { frontmatterPlugin, readabilityPlugin } from "./_chunks/readability-BfCjcbbx.mjs";
1
+ import "./_chunks/plugin-Bqz9GKOA.mjs";
2
+ import { streamHtmlToMarkdown } from "./_chunks/stream-IeCVDuTy.mjs";
3
+ import "./_chunks/plugins-D305pIpW.mjs";
4
+ import { withMinimalPreset } from "./_chunks/minimal-Ru8PBNVI.mjs";
5
+ import { readFileSync } from "node:fs";
6
+ import { dirname, join } from "node:path";
4
7
  import { Readable } from "node:stream";
8
+ import { fileURLToPath } from "node:url";
5
9
  import { cac } from "cac";
6
10
 
7
11
  //#region src/cli.ts
8
12
  async function streamingConvert(options = {}) {
9
13
  const outputStream = process.stdout;
10
- const conversionOptions = { origin: options.origin };
11
- conversionOptions.plugins = conversionOptions.plugins || [];
12
- conversionOptions.plugins.push(readabilityPlugin());
13
- conversionOptions.plugins.push(frontmatterPlugin());
14
+ let conversionOptions = { origin: options.origin };
15
+ if (options.preset === "minimal") conversionOptions = withMinimalPreset(conversionOptions);
14
16
  const markdownGenerator = streamHtmlToMarkdown(Readable.toWeb(process.stdin), conversionOptions);
15
17
  for await (const markdownChunk of markdownGenerator) if (markdownChunk && markdownChunk.length > 0) outputStream.write(markdownChunk);
16
18
  }
19
+ const __dirname = dirname(fileURLToPath(import.meta.url));
20
+ const packageJsonPath = join(__dirname, "..", "package.json");
21
+ const packageJson = JSON.parse(readFileSync(packageJsonPath, "utf-8"));
22
+ const version = packageJson.version;
17
23
  const cli = cac();
18
24
  cli.command("[options]", "Convert HTML from stdin to Markdown on stdout").option("--origin <url>", "Origin URL for resolving relative image paths").option("--preset <preset>", "Conversion presets: minimal").action(async (_, opts) => {
19
25
  await streamingConvert(opts);
20
26
  });
21
- cli.help().version("1.0.0").parse();
27
+ cli.help().version(version).parse();
22
28
 
23
29
  //#endregion
package/dist/index.d.mts CHANGED
@@ -1,3 +1,75 @@
1
- import { BufferRegion, ELEMENT_NODE$1 as ELEMENT_NODE, ElementNode, ExtractedElement, HTMLToMarkdownOptions, HandlerContext, MdreamProcessingState, MdreamRuntimeState, Node, NodeEvent, Plugin, PluginCreationOptions, TEXT_NODE$1 as TEXT_NODE, TagHandler, TextNode } from "./_chunks/types-BHoibuoP.mjs";
2
- import { MarkdownProcessor, TagIdMap, createPlugin, htmlToMarkdown, parseHtml, streamHtmlToMarkdown } from "./_chunks/index-VTwTBxk0.mjs";
3
- export { BufferRegion, ELEMENT_NODE, ElementNode, ExtractedElement, HTMLToMarkdownOptions, HandlerContext, MarkdownProcessor, MdreamProcessingState, MdreamRuntimeState, Node, NodeEvent, Plugin, PluginCreationOptions, TEXT_NODE, TagHandler, TagIdMap, TextNode, createPlugin, htmlToMarkdown, parseHtml, streamHtmlToMarkdown };
1
+ import { BufferRegion, ELEMENT_NODE$1 as ELEMENT_NODE, ElementNode, ExtractedElement, HTMLToMarkdownOptions, HandlerContext, MdreamProcessingState, MdreamRuntimeState, Node, NodeEvent, Plugin, PluginContext, PluginCreationOptions, ReadabilityContext, TEXT_NODE$1 as TEXT_NODE, TagHandler, TailwindContext, TextNode } from "./_chunks/types-D9VKEbix.mjs";
2
+ import { createPlugin$1 as createPlugin } from "./_chunks/plugin-D45YAMmt.mjs";
3
+ import { ReadableStream } from "node:stream/web";
4
+
5
+ //#region src/const.d.ts
6
+
7
+ declare const TagIdMap: Record<string, number>;
8
+ //#endregion
9
+ //#region src/markdown-processor.d.ts
10
+ interface MarkdownState {
11
+ /** Configuration options for conversion */
12
+ options?: HTMLToMarkdownOptions;
13
+ /** Map of region IDs to buffer regions for O(1) lookups */
14
+ regionToggles: Map<number, boolean>;
15
+ /** Content buffers for regions */
16
+ regionContentBuffers: Map<number, string[]>;
17
+ /** Performance cache for last content to avoid iteration */
18
+ lastContentCache?: string;
19
+ /** Reference to the last processed node */
20
+ lastNode?: ElementNode | TextNode;
21
+ /** Reference to the last processed text node - for context tracking */
22
+ lastTextNode?: TextNode;
23
+ /** Table processing state - specialized for Markdown tables */
24
+ tableRenderedTable?: boolean;
25
+ tableCurrentRowCells?: number;
26
+ tableColumnAlignments?: string[];
27
+ /** Map of tag names to their current nesting depth */
28
+ depthMap: Uint8Array;
29
+ /** Current depth for plugin access */
30
+ depth?: number;
31
+ /** Context for additional data */
32
+ context?: PluginContext;
33
+ }
34
+ /**
35
+ * Creates a markdown processor that consumes DOM events and generates markdown
36
+ */
37
+ declare function createMarkdownProcessor(options?: HTMLToMarkdownOptions): {
38
+ processEvent: (event: NodeEvent) => void;
39
+ processHtml: (html: string) => void;
40
+ getMarkdown: () => string;
41
+ getMarkdownChunk: () => string;
42
+ state: MarkdownState;
43
+ };
44
+ declare const MarkdownProcessor: typeof createMarkdownProcessor;
45
+ //#endregion
46
+ //#region src/parse.d.ts
47
+ interface ParseOptions {
48
+ plugins?: Plugin[];
49
+ }
50
+ interface ParseResult {
51
+ events: NodeEvent[];
52
+ remainingHtml: string;
53
+ }
54
+ /**
55
+ * Pure HTML parser that emits DOM events
56
+ * Completely decoupled from markdown generation
57
+ */
58
+ declare function parseHtml(html: string, options?: ParseOptions): ParseResult;
59
+ /**
60
+ * Streaming HTML parser - calls onEvent for each DOM event
61
+ */
62
+ //#endregion
63
+ //#region src/stream.d.ts
64
+ /**
65
+ * Creates a markdown stream from an HTML stream
66
+ * @param htmlStream - ReadableStream of HTML content (as Uint8Array or string)
67
+ * @param options - Configuration options for conversion
68
+ * @returns An async generator yielding markdown chunks
69
+ */
70
+ declare function streamHtmlToMarkdown(htmlStream: ReadableStream | null, options?: HTMLToMarkdownOptions): AsyncIterable<string>;
71
+ //#endregion
72
+ //#region src/index.d.ts
73
+ declare function htmlToMarkdown(html: string, options?: HTMLToMarkdownOptions): string;
74
+ //#endregion
75
+ export { BufferRegion, ELEMENT_NODE, ElementNode, ExtractedElement, HTMLToMarkdownOptions, HandlerContext, MarkdownProcessor, MdreamProcessingState, MdreamRuntimeState, Node, NodeEvent, Plugin, PluginContext, PluginCreationOptions, ReadabilityContext, TEXT_NODE, TagHandler, TagIdMap, TailwindContext, TextNode, createPlugin, htmlToMarkdown, parseHtml, streamHtmlToMarkdown };
package/dist/index.mjs CHANGED
@@ -1,5 +1,5 @@
1
- import { TagIdMap$1 as TagIdMap, createPlugin$1 as createPlugin } from "./_chunks/plugin-DCJFRZej.mjs";
2
- import { MarkdownProcessor$1 as MarkdownProcessor, createMarkdownProcessor, parseHtml$1 as parseHtml, streamHtmlToMarkdown$1 as streamHtmlToMarkdown } from "./_chunks/stream-BeojJNLt.mjs";
1
+ import { TagIdMap, createPlugin } from "./_chunks/plugin-Bqz9GKOA.mjs";
2
+ import { MarkdownProcessor, createMarkdownProcessor, parseHtml, streamHtmlToMarkdown } from "./_chunks/stream-IeCVDuTy.mjs";
3
3
 
4
4
  //#region src/index.ts
5
5
  function htmlToMarkdown(html, options = {}) {
@@ -1,5 +1,5 @@
1
- import { Plugin, extractionPlugin$1 as extractionPlugin } from "./_chunks/types-BHoibuoP.mjs";
2
- import { createPlugin } from "./_chunks/index-VTwTBxk0.mjs";
1
+ import { Plugin, extractionPlugin$1 as extractionPlugin } from "./_chunks/types-D9VKEbix.mjs";
2
+ import { createPlugin$1 as createPlugin } from "./_chunks/plugin-D45YAMmt.mjs";
3
3
 
4
4
  //#region src/plugins/filter.d.ts
5
5
 
package/dist/plugins.mjs CHANGED
@@ -1,5 +1,4 @@
1
- import { createPlugin$1 as createPlugin } from "./_chunks/plugin-DCJFRZej.mjs";
2
- import { frontmatterPlugin, readabilityPlugin } from "./_chunks/readability-BfCjcbbx.mjs";
3
- import { extractionPlugin, filterPlugin, isolateMainPlugin, tailwindPlugin } from "./_chunks/plugins-DGakgpSl.mjs";
1
+ import { createPlugin } from "./_chunks/plugin-Bqz9GKOA.mjs";
2
+ import { extractionPlugin, filterPlugin, frontmatterPlugin, isolateMainPlugin, readabilityPlugin, tailwindPlugin } from "./_chunks/plugins-D305pIpW.mjs";
4
3
 
5
4
  export { createPlugin, extractionPlugin, filterPlugin, frontmatterPlugin, isolateMainPlugin, readabilityPlugin, tailwindPlugin };
@@ -1,4 +1,4 @@
1
- import { HTMLToMarkdownOptions } from "../_chunks/types-BHoibuoP.mjs";
1
+ import { HTMLToMarkdownOptions } from "../_chunks/types-D9VKEbix.mjs";
2
2
 
3
3
  //#region src/preset/minimal.d.ts
4
4
 
@@ -1,41 +1,5 @@
1
- import { TAG_ASIDE, TAG_BUTTON, TAG_EMBED, TAG_FIELDSET, TAG_FIGURE, TAG_FOOTER, TAG_FORM, TAG_IFRAME, TAG_INPUT, TAG_NAV, TAG_OBJECT, TAG_SELECT, TAG_TEXTAREA } from "../_chunks/plugin-DCJFRZej.mjs";
2
- import { frontmatterPlugin } from "../_chunks/readability-BfCjcbbx.mjs";
3
- import { filterPlugin, isolateMainPlugin, tailwindPlugin } from "../_chunks/plugins-DGakgpSl.mjs";
1
+ import "../_chunks/plugin-Bqz9GKOA.mjs";
2
+ import "../_chunks/plugins-D305pIpW.mjs";
3
+ import { withMinimalPreset } from "../_chunks/minimal-Ru8PBNVI.mjs";
4
4
 
5
- //#region src/preset/minimal.ts
6
- /**
7
- * Creates a configurable minimal preset with advanced options
8
- *
9
- * @param options HTML to Markdown options
10
- * @returns HTML to Markdown options with configured plugins
11
- */
12
- function withMinimalPreset(options = {}) {
13
- const plugins = [
14
- isolateMainPlugin(),
15
- frontmatterPlugin(),
16
- tailwindPlugin(),
17
- filterPlugin({ exclude: [
18
- TAG_FORM,
19
- TAG_FIELDSET,
20
- TAG_OBJECT,
21
- TAG_EMBED,
22
- TAG_FIGURE,
23
- TAG_FOOTER,
24
- TAG_ASIDE,
25
- TAG_IFRAME,
26
- TAG_INPUT,
27
- TAG_TEXTAREA,
28
- TAG_SELECT,
29
- TAG_BUTTON,
30
- TAG_NAV
31
- ] })
32
- ];
33
- if (options.plugins) plugins.push(...options.plugins);
34
- return {
35
- ...options,
36
- plugins
37
- };
38
- }
39
-
40
- //#endregion
41
5
  export { withMinimalPreset };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "mdream",
3
3
  "type": "module",
4
- "version": "0.3.0",
4
+ "version": "0.4.0",
5
5
  "description": "Ultra-performant JavaScript HTML to Markdown converter optimized for LLMs.",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",
@@ -28,19 +28,6 @@
28
28
  "dependencies": {
29
29
  "cac": "^6.7.14"
30
30
  },
31
- "devDependencies": {
32
- "@antfu/eslint-config": "^4.16.2",
33
- "@types/node": "^24.0.10",
34
- "bumpp": "^10.2.0",
35
- "crawlee": "^3.13.9",
36
- "eslint": "^9.30.1",
37
- "llm-cost": "^1.0.5",
38
- "obuild": "^0.2.1",
39
- "playwright": "^1.53.2",
40
- "typescript": "5.8.3",
41
- "unbuild": "^3.5.0",
42
- "vitest": "^3.2.4"
43
- },
44
31
  "scripts": {
45
32
  "flame": "pnpm build && unbuild bench/bundle && clinic flame -- node bench/bundle/dist/string.mjs 10",
46
33
  "bench:build": "pnpm build && unbuild bench/bundle",
@@ -55,9 +42,8 @@
55
42
  "test:wiki:file": "pnpm build && cat test/fixtures/wikipedia-largest.html | node ./bin/mdream.mjs --origin https://en.wikipedia.org | tee test/wiki-markdown.md",
56
43
  "test:wiki-small:file": "cat test/fixtures/wikipedia-small.html | node ./bin/mdream.mjs --origin https://en.wikipedia.org | tee test/wiki-markdown.md",
57
44
  "build": "obuild",
58
- "typecheck": "tsc --noEmit src/index.ts",
45
+ "typecheck": "tsc --noEmit",
59
46
  "dev:prepare": "obuild --stub",
60
- "test": "vitest test",
61
- "release": "pnpm build && bumpp && pnpm -r publish"
47
+ "test": "vitest test"
62
48
  }
63
49
  }