mdream 0.15.3 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -1
- package/dist/_chunks/const.mjs +1 -42
- package/dist/_chunks/extraction.mjs +1 -26
- package/dist/_chunks/markdown-processor.mjs +22 -104
- package/dist/_chunks/plugin.d.mts +1 -2
- package/dist/_chunks/plugin.mjs +1 -9
- package/dist/_chunks/{src.mjs → stream.mjs} +1 -19
- package/dist/_chunks/{plugins.mjs → tailwind.mjs} +13 -273
- package/dist/_chunks/types.d.mts +6 -22
- package/dist/cli.mjs +4 -9
- package/dist/iife.js +8 -8
- package/dist/index.d.mts +4 -7
- package/dist/index.mjs +9 -5
- package/dist/llms-txt.mjs +382 -4
- package/dist/plugins.d.mts +4 -15
- package/dist/plugins.mjs +2 -3
- package/dist/preset/minimal.d.mts +1 -2
- package/dist/preset/minimal.mjs +30 -4
- package/dist/splitter.d.mts +1 -2
- package/dist/splitter.mjs +6 -23
- package/package.json +1 -1
- package/dist/_chunks/llms-txt.mjs +0 -464
- package/dist/_chunks/minimal.mjs +0 -40
|
@@ -1,20 +1,6 @@
|
|
|
1
|
-
import { $ as
|
|
1
|
+
import { $ as TAG_H5, Q as TAG_H4, X as TAG_H2, Y as TAG_H1, Z as TAG_H3, dn as TEXT_NODE, et as TAG_H6, ht as TAG_MAIN, nn as TAG_TITLE, nt as TAG_HEADER, q as TAG_FOOTER, r as ELEMENT_NODE, tt as TAG_HEAD, vt as TAG_META } from "./const.mjs";
|
|
2
2
|
import { t as createPlugin } from "./plugin.mjs";
|
|
3
3
|
import { n as parseSelector } from "./extraction.mjs";
|
|
4
|
-
|
|
5
|
-
//#region src/plugins/filter.ts
|
|
6
|
-
/**
|
|
7
|
-
* Plugin that filters nodes based on CSS selectors.
|
|
8
|
-
* Allows including or excluding nodes based on selectors.
|
|
9
|
-
*
|
|
10
|
-
* @example
|
|
11
|
-
* // Include only heading elements and their children
|
|
12
|
-
* withQuerySelectorPlugin({ include: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] })
|
|
13
|
-
*
|
|
14
|
-
* @example
|
|
15
|
-
* // Exclude navigation, sidebar, and footer
|
|
16
|
-
* withQuerySelectorPlugin({ exclude: ['nav', '#sidebar', '.footer'] })
|
|
17
|
-
*/
|
|
18
4
|
function filterPlugin(options = {}) {
|
|
19
5
|
const includeSelectors = options.include?.map((selector) => {
|
|
20
6
|
if (typeof selector === "string") return parseSelector(selector);
|
|
@@ -28,10 +14,10 @@ function filterPlugin(options = {}) {
|
|
|
28
14
|
return createPlugin({ beforeNodeProcess(event) {
|
|
29
15
|
const { node } = event;
|
|
30
16
|
if (node.type === TEXT_NODE) {
|
|
31
|
-
let currentParent
|
|
32
|
-
while (currentParent
|
|
33
|
-
if (excludeSelectors.some((selector) => selector.matches(currentParent
|
|
34
|
-
currentParent
|
|
17
|
+
let currentParent = node.parent;
|
|
18
|
+
while (currentParent && excludeSelectors.length) {
|
|
19
|
+
if (excludeSelectors.some((selector) => selector.matches(currentParent))) return { skip: true };
|
|
20
|
+
currentParent = currentParent.parent;
|
|
35
21
|
}
|
|
36
22
|
return;
|
|
37
23
|
}
|
|
@@ -59,13 +45,6 @@ function filterPlugin(options = {}) {
|
|
|
59
45
|
}
|
|
60
46
|
} });
|
|
61
47
|
}
|
|
62
|
-
|
|
63
|
-
//#endregion
|
|
64
|
-
//#region src/plugins/frontmatter.ts
|
|
65
|
-
/**
|
|
66
|
-
* A plugin that manages frontmatter generation from HTML head elements
|
|
67
|
-
* Extracts metadata from meta tags and title and generates YAML frontmatter
|
|
68
|
-
*/
|
|
69
48
|
function frontmatterPlugin(options = {}) {
|
|
70
49
|
const additionalFields = options.additionalFields || {};
|
|
71
50
|
const metaFields = new Set([
|
|
@@ -106,10 +85,13 @@ function frontmatterPlugin(options = {}) {
|
|
|
106
85
|
onNodeExit(node, state) {
|
|
107
86
|
if (node.type === ELEMENT_NODE && node.tagId === TAG_HEAD) {
|
|
108
87
|
inHead = false;
|
|
109
|
-
if (Object.keys(frontmatter).length > 0)
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
88
|
+
if (Object.keys(frontmatter).length > 0) {
|
|
89
|
+
const frontmatterContent = generateFrontmatter();
|
|
90
|
+
if (frontmatterContent) {
|
|
91
|
+
state.buffer.push(frontmatterContent);
|
|
92
|
+
state.lastContentCache = frontmatterContent;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
113
95
|
}
|
|
114
96
|
},
|
|
115
97
|
processTextNode(node) {
|
|
@@ -124,9 +106,6 @@ function frontmatterPlugin(options = {}) {
|
|
|
124
106
|
}
|
|
125
107
|
}
|
|
126
108
|
});
|
|
127
|
-
/**
|
|
128
|
-
* Generate YAML frontmatter string from collected metadata
|
|
129
|
-
*/
|
|
130
109
|
function generateFrontmatter() {
|
|
131
110
|
if (Object.keys(frontmatter).length === 0) return "";
|
|
132
111
|
let yamlLines = [];
|
|
@@ -146,38 +125,6 @@ function frontmatterPlugin(options = {}) {
|
|
|
146
125
|
return `---\n${yamlLines.join("\n")}\n---\n\n`;
|
|
147
126
|
}
|
|
148
127
|
}
|
|
149
|
-
|
|
150
|
-
//#endregion
|
|
151
|
-
//#region src/plugins/isolate-main.ts
|
|
152
|
-
/**
|
|
153
|
-
* Plugin that isolates main content using the following priority order:
|
|
154
|
-
* 1. If an explicit <main> element exists (within 5 depth levels), use its content exclusively
|
|
155
|
-
* 2. Otherwise, find content between the first header tag (h1-h6) and first footer
|
|
156
|
-
* 3. If footer is within 5 levels of nesting from the header, use it as the end boundary
|
|
157
|
-
* 4. Exclude all content before the start marker and after the end marker
|
|
158
|
-
*
|
|
159
|
-
* @example
|
|
160
|
-
* ```html
|
|
161
|
-
* <body>
|
|
162
|
-
* <nav>Navigation (excluded)</nav>
|
|
163
|
-
* <main>
|
|
164
|
-
* <h1>Main Title (included)</h1>
|
|
165
|
-
* <p>Main content (included)</p>
|
|
166
|
-
* </main>
|
|
167
|
-
* <footer>Footer (excluded)</footer>
|
|
168
|
-
* </body>
|
|
169
|
-
* ```
|
|
170
|
-
*
|
|
171
|
-
* @example
|
|
172
|
-
* ```html
|
|
173
|
-
* <body>
|
|
174
|
-
* <nav>Navigation (excluded)</nav>
|
|
175
|
-
* <h1>Main Title (included)</h1>
|
|
176
|
-
* <p>Main content (included)</p>
|
|
177
|
-
* <footer>Footer (excluded)</footer>
|
|
178
|
-
* </body>
|
|
179
|
-
* ```
|
|
180
|
-
*/
|
|
181
128
|
function isolateMainPlugin() {
|
|
182
129
|
let mainElement = null;
|
|
183
130
|
let firstHeaderElement = null;
|
|
@@ -268,187 +215,6 @@ function isolateMainPlugin() {
|
|
|
268
215
|
}
|
|
269
216
|
} });
|
|
270
217
|
}
|
|
271
|
-
|
|
272
|
-
//#endregion
|
|
273
|
-
//#region src/plugins/readability.ts
|
|
274
|
-
const REGEXPS = {
|
|
275
|
-
positive: /article|body|content|entry|main|page|post|text|blog|story|recipe|ingredient|instruction|description|docs?|guide|tutorial|reference|manual/i,
|
|
276
|
-
negative: /ad|banner|combx|comment|disqus|extra|foot|header|menu|meta|nav|promo|related|scroll|share|sidebar|sponsor|social|tags|widget|sitemap|copyright|login|register|subscribe|newsletter|signup|category|author|date|publish|cta|button|apply|trial|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i,
|
|
277
|
-
commas: /,/g,
|
|
278
|
-
periodAtEnd: /\.( |$)/,
|
|
279
|
-
hidden: /hidden|display:\s*none|visibility:\s*hidden/i,
|
|
280
|
-
advertisement: /^ad-|^ad$|advertisement|sponsor|promo|banner/i,
|
|
281
|
-
comments: /comment|disqus|replies/i
|
|
282
|
-
};
|
|
283
|
-
const TagScores = {
|
|
284
|
-
[TAG_ARTICLE]: 15,
|
|
285
|
-
[TAG_SECTION]: 8,
|
|
286
|
-
[TAG_MAIN]: 15,
|
|
287
|
-
[TAG_P]: 5,
|
|
288
|
-
[TAG_DIV]: 2,
|
|
289
|
-
[TAG_BLOCKQUOTE]: 5,
|
|
290
|
-
[TAG_PRE]: 8,
|
|
291
|
-
[TAG_CODE]: 6,
|
|
292
|
-
[TAG_IMG]: 3,
|
|
293
|
-
[TAG_FIGURE]: 4,
|
|
294
|
-
[TAG_FIGCAPTION]: 3,
|
|
295
|
-
[TAG_VIDEO]: 3,
|
|
296
|
-
[TAG_AUDIO]: 3,
|
|
297
|
-
[TAG_SVG]: 1,
|
|
298
|
-
[TAG_TABLE]: 0,
|
|
299
|
-
[TAG_CAPTION]: 2,
|
|
300
|
-
[TAG_THEAD]: 0,
|
|
301
|
-
[TAG_TBODY]: 0,
|
|
302
|
-
[TAG_TFOOT]: 0,
|
|
303
|
-
[TAG_TR]: -1,
|
|
304
|
-
[TAG_TH]: -2,
|
|
305
|
-
[TAG_TD]: 0,
|
|
306
|
-
[TAG_UL]: -8,
|
|
307
|
-
[TAG_OL]: -5,
|
|
308
|
-
[TAG_LI]: -6,
|
|
309
|
-
[TAG_DL]: 0,
|
|
310
|
-
[TAG_DT]: 0,
|
|
311
|
-
[TAG_DD]: 0,
|
|
312
|
-
[TAG_H1]: 1,
|
|
313
|
-
[TAG_H2]: 1,
|
|
314
|
-
[TAG_H3]: 1,
|
|
315
|
-
[TAG_H4]: 0,
|
|
316
|
-
[TAG_H5]: 0,
|
|
317
|
-
[TAG_H6]: 0,
|
|
318
|
-
[TAG_HEADER]: -15,
|
|
319
|
-
[TAG_FOOTER]: -25,
|
|
320
|
-
[TAG_NAV]: -30,
|
|
321
|
-
[TAG_ASIDE]: -25,
|
|
322
|
-
[TAG_FORM]: -8,
|
|
323
|
-
[TAG_BUTTON]: -5,
|
|
324
|
-
[TAG_INPUT]: -5,
|
|
325
|
-
[TAG_TEXTAREA]: -5,
|
|
326
|
-
[TAG_SELECT]: -5,
|
|
327
|
-
[TAG_FIELDSET]: -5,
|
|
328
|
-
[TAG_IFRAME]: -3,
|
|
329
|
-
[TAG_EMBED]: -3,
|
|
330
|
-
[TAG_OBJECT]: -3,
|
|
331
|
-
[TAG_A]: -8,
|
|
332
|
-
[TAG_STRONG]: 1,
|
|
333
|
-
[TAG_B]: 1,
|
|
334
|
-
[TAG_EM]: 1,
|
|
335
|
-
[TAG_I]: 1,
|
|
336
|
-
[TAG_HR]: 0,
|
|
337
|
-
[TAG_BR]: 0,
|
|
338
|
-
[TAG_SPAN]: 0,
|
|
339
|
-
[TAG_SCRIPT]: -25,
|
|
340
|
-
[TAG_STYLE]: -25,
|
|
341
|
-
[TAG_DETAILS]: 2,
|
|
342
|
-
[TAG_SUMMARY]: 1,
|
|
343
|
-
[TAG_ADDRESS]: -3
|
|
344
|
-
};
|
|
345
|
-
/**
|
|
346
|
-
* Apply score adjustments based on class and ID names
|
|
347
|
-
*/
|
|
348
|
-
function scoreClassAndId(node) {
|
|
349
|
-
let scoreAdjustment = 0;
|
|
350
|
-
if (node.attributes?.class) {
|
|
351
|
-
const className = node.attributes.class;
|
|
352
|
-
if (/nav|menu|header|footer|sidebar|ad-|advertisement|banner|promo|cta|button|apply|trial|engagement|sharing|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i.test(className)) scoreAdjustment -= 35;
|
|
353
|
-
else if (REGEXPS.negative.test(className)) scoreAdjustment -= 15;
|
|
354
|
-
else if (REGEXPS.positive.test(className)) {
|
|
355
|
-
scoreAdjustment += 10;
|
|
356
|
-
if (/docs?|guide|tutorial|reference|manual|article/i.test(className)) scoreAdjustment += 5;
|
|
357
|
-
}
|
|
358
|
-
}
|
|
359
|
-
if (node.attributes?.id) {
|
|
360
|
-
const id = node.attributes.id;
|
|
361
|
-
if (/nav|menu|header|footer|sidebar|ad-|advertisement|banner|promo|cta|button|apply|trial|engagement|sharing|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i.test(id)) scoreAdjustment -= 35;
|
|
362
|
-
else if (REGEXPS.negative.test(id)) scoreAdjustment -= 15;
|
|
363
|
-
else if (REGEXPS.positive.test(id)) {
|
|
364
|
-
scoreAdjustment += 10;
|
|
365
|
-
if (/docs?|guide|tutorial|reference|manual|article/i.test(id)) scoreAdjustment += 5;
|
|
366
|
-
}
|
|
367
|
-
}
|
|
368
|
-
return scoreAdjustment;
|
|
369
|
-
}
|
|
370
|
-
/**
|
|
371
|
-
* Creates a plugin that implements readability.js style heuristics for content quality assessment
|
|
372
|
-
* Controls content inclusion/exclusion using buffer regions
|
|
373
|
-
*/
|
|
374
|
-
function readabilityPlugin() {
|
|
375
|
-
let inHead = false;
|
|
376
|
-
return createPlugin({
|
|
377
|
-
onNodeEnter(node, state) {
|
|
378
|
-
if (inHead) return;
|
|
379
|
-
if (!node.context) node.context = {};
|
|
380
|
-
if (node.tagId === TAG_BODY || node.tagId === TAG_HTML) return;
|
|
381
|
-
if (node.tagId === TAG_HEAD) {
|
|
382
|
-
createBufferRegion(node, state, true);
|
|
383
|
-
inHead = true;
|
|
384
|
-
return;
|
|
385
|
-
}
|
|
386
|
-
const tagScore = node.tagId !== void 0 ? TagScores[node.tagId] ?? 0 : 0;
|
|
387
|
-
const classAndIdScore = scoreClassAndId(node);
|
|
388
|
-
node.context.score = tagScore + classAndIdScore;
|
|
389
|
-
node.context.tagCount = 1;
|
|
390
|
-
node.context.linkTextLength = 0;
|
|
391
|
-
node.context.textLength = 0;
|
|
392
|
-
if (node.name && /nav|header|footer|aside|form|fieldset|button/i.test(node.name) || node.attributes?.class && /nav|menu|header|footer|sidebar|hidden|copyright|ad-|advertisement|banner|promo|related|comment|login|register|subscribe|newsletter|category|meta|tag|cta|button|apply|trial|engagement|sharing|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i.test(node.attributes.class) || node.attributes?.id && /nav|menu|header|footer|sidebar|hidden|copyright|ad-|advertisement|banner|promo|related|comment|login|register|subscribe|newsletter|category|meta|tag|cta|button|apply|trial|engagement|sharing|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i.test(node.attributes.id) || node.attributes?.style && /display:\s*none|visibility:\s*hidden/i.test(node.attributes.style) || node.attributes && Object.keys(node.attributes).some((attr) => attr.startsWith("aria-") && node.attributes[attr] === "true" && /hidden|invisible/i.test(attr))) createBufferRegion(node, state, false);
|
|
393
|
-
else if (node.parent && node.parent.context) node.context.score = (node.context.score || 0) + (node.parent.context.score || 0);
|
|
394
|
-
},
|
|
395
|
-
processTextNode(node) {
|
|
396
|
-
if (!node.parent || inHead) return void 0;
|
|
397
|
-
const textValue = node.value;
|
|
398
|
-
const len = textValue.length;
|
|
399
|
-
const commaCount = Math.min(3, (textValue.match(REGEXPS.commas) || []).length);
|
|
400
|
-
const isInsideLink = !!node.parent.depthMap?.[TAG_A];
|
|
401
|
-
let parent = node.parent;
|
|
402
|
-
while (parent) {
|
|
403
|
-
if (!parent.context) parent.context = {};
|
|
404
|
-
parent.context.score = (parent.context.score || 0) + commaCount;
|
|
405
|
-
parent.context.textLength = (parent.context.textLength || 0) + len;
|
|
406
|
-
if (isInsideLink) parent.context.linkTextLength = (parent.context.linkTextLength || 0) + len;
|
|
407
|
-
parent = parent.parent;
|
|
408
|
-
}
|
|
409
|
-
},
|
|
410
|
-
onNodeExit(node, state) {
|
|
411
|
-
if (!node.context) return;
|
|
412
|
-
if (node.tagId === TAG_BODY || node.tagId === TAG_HTML) return;
|
|
413
|
-
if (node.tagId === TAG_HEAD) {
|
|
414
|
-
inHead = false;
|
|
415
|
-
return;
|
|
416
|
-
}
|
|
417
|
-
if (inHead) return;
|
|
418
|
-
const textLength = node.context.textLength || 0;
|
|
419
|
-
if (textLength === 0 && node.tagId !== TAG_BODY && !node.childTextNodeIndex) {} else if (textLength > 100) node.context.score = (node.context.score || 0) + 3;
|
|
420
|
-
else if (textLength >= 50) node.context.score = (node.context.score || 0) + 2;
|
|
421
|
-
else if (textLength >= 25) node.context.score = (node.context.score || 0) + 1;
|
|
422
|
-
const linkTextLength = node.context.linkTextLength || 0;
|
|
423
|
-
if (textLength > 0) {
|
|
424
|
-
const linkDensity = linkTextLength / textLength;
|
|
425
|
-
if (linkDensity > .4) if (linkDensity > .6) {
|
|
426
|
-
node.context.score = (node.context.score || 0) * .02;
|
|
427
|
-
if (linkTextLength > 50) node.context.isHighLinkDensity = true;
|
|
428
|
-
} else node.context.score = (node.context.score || 0) * (1 - linkDensity * 2);
|
|
429
|
-
else if (linkDensity > .2) node.context.score = (node.context.score || 0) * (1 - linkDensity * 1);
|
|
430
|
-
}
|
|
431
|
-
if (linkTextLength > 0 && textLength > 0) {
|
|
432
|
-
const linkRatio = linkTextLength / textLength;
|
|
433
|
-
const hasDocumentationMarkers = node.attributes?.class && /docs?|guide|tutorial|reference|manual|article|content/i.test(node.attributes.class) || node.attributes?.id && /docs?|guide|tutorial|reference|manual|article|content/i.test(node.attributes.id) || node.name && /main|article|section/i.test(node.name);
|
|
434
|
-
if (linkRatio > .3 && linkTextLength > 30 && !hasDocumentationMarkers) node.context.score = (node.context.score || 0) - 10;
|
|
435
|
-
}
|
|
436
|
-
const finalScore = node.context.score || 0;
|
|
437
|
-
if (finalScore <= -12) createBufferRegion(node, state, false);
|
|
438
|
-
else if (node.context.isHighLinkDensity || linkTextLength > 50 && textLength > 0 && linkTextLength / textLength > .5) createBufferRegion(node, state, false);
|
|
439
|
-
if (node.tagHandler?.isInline) {
|
|
440
|
-
const parent = node.parent;
|
|
441
|
-
if (parent && parent.context) parent.context.score = (parent.context.score || 0) + finalScore;
|
|
442
|
-
}
|
|
443
|
-
}
|
|
444
|
-
});
|
|
445
|
-
}
|
|
446
|
-
|
|
447
|
-
//#endregion
|
|
448
|
-
//#region src/plugins/tailwind.ts
|
|
449
|
-
/**
|
|
450
|
-
* Mapping of Tailwind classes to Markdown formatting
|
|
451
|
-
*/
|
|
452
218
|
const TAILWIND_TO_MARKDOWN_MAP = {
|
|
453
219
|
"font-bold": {
|
|
454
220
|
prefix: "**",
|
|
@@ -488,9 +254,6 @@ const TAILWIND_TO_MARKDOWN_MAP = {
|
|
|
488
254
|
"fixed": { hidden: true },
|
|
489
255
|
"sticky": { hidden: true }
|
|
490
256
|
};
|
|
491
|
-
/**
|
|
492
|
-
* Extract base class name from a responsive breakpoint variant
|
|
493
|
-
*/
|
|
494
257
|
function extractBaseClass(className) {
|
|
495
258
|
for (const bp of [
|
|
496
259
|
"sm:",
|
|
@@ -507,9 +270,6 @@ function extractBaseClass(className) {
|
|
|
507
270
|
breakpoint: ""
|
|
508
271
|
};
|
|
509
272
|
}
|
|
510
|
-
/**
|
|
511
|
-
* Sort classes by breakpoint for mobile-first processing
|
|
512
|
-
*/
|
|
513
273
|
function sortByBreakpoint(classes) {
|
|
514
274
|
const breakpointOrder = {
|
|
515
275
|
"": 0,
|
|
@@ -525,9 +285,6 @@ function sortByBreakpoint(classes) {
|
|
|
525
285
|
return breakpointOrder[aBreakpoint] - breakpointOrder[bBreakpoint];
|
|
526
286
|
});
|
|
527
287
|
}
|
|
528
|
-
/**
|
|
529
|
-
* Group classes by their formatting type to handle overrides
|
|
530
|
-
*/
|
|
531
288
|
function groupByFormattingType(classes) {
|
|
532
289
|
const sorted = sortByBreakpoint(classes);
|
|
533
290
|
const groups = {
|
|
@@ -553,9 +310,6 @@ function groupByFormattingType(classes) {
|
|
|
553
310
|
}
|
|
554
311
|
return groups;
|
|
555
312
|
}
|
|
556
|
-
/**
|
|
557
|
-
* Fix redundant markdown delimiters without regex
|
|
558
|
-
*/
|
|
559
313
|
function fixRedundantDelimiters(content) {
|
|
560
314
|
content = content.replaceAll("****", "**");
|
|
561
315
|
content = content.replaceAll("~~~~", "~~");
|
|
@@ -565,9 +319,6 @@ function fixRedundantDelimiters(content) {
|
|
|
565
319
|
}
|
|
566
320
|
return content;
|
|
567
321
|
}
|
|
568
|
-
/**
|
|
569
|
-
* Normalizes a list of Tailwind classes by processing breakpoints and resolving conflicts
|
|
570
|
-
*/
|
|
571
322
|
function normalizeClasses(classes) {
|
|
572
323
|
const result = [];
|
|
573
324
|
const mobileClasses = classes.filter((cls) => !hasBreakpoint(cls));
|
|
@@ -576,16 +327,10 @@ function normalizeClasses(classes) {
|
|
|
576
327
|
result.push(...breakpointClasses);
|
|
577
328
|
return result;
|
|
578
329
|
}
|
|
579
|
-
/**
|
|
580
|
-
* Check if a class has a breakpoint prefix
|
|
581
|
-
*/
|
|
582
330
|
function hasBreakpoint(className) {
|
|
583
331
|
const { breakpoint } = extractBaseClass(className);
|
|
584
332
|
return breakpoint !== "";
|
|
585
333
|
}
|
|
586
|
-
/**
|
|
587
|
-
* Process Tailwind classes for an element with mobile-first approach
|
|
588
|
-
*/
|
|
589
334
|
function processTailwindClasses(classes) {
|
|
590
335
|
let prefix = "";
|
|
591
336
|
let suffix = "";
|
|
@@ -637,9 +382,6 @@ function processTailwindClasses(classes) {
|
|
|
637
382
|
hidden
|
|
638
383
|
};
|
|
639
384
|
}
|
|
640
|
-
/**
|
|
641
|
-
* Creates a plugin that adds Tailwind class processing
|
|
642
|
-
*/
|
|
643
385
|
function tailwindPlugin() {
|
|
644
386
|
return createPlugin({
|
|
645
387
|
processAttributes(node) {
|
|
@@ -680,6 +422,4 @@ function tailwindPlugin() {
|
|
|
680
422
|
}
|
|
681
423
|
});
|
|
682
424
|
}
|
|
683
|
-
|
|
684
|
-
//#endregion
|
|
685
|
-
export { filterPlugin as a, frontmatterPlugin as i, readabilityPlugin as n, isolateMainPlugin as r, tailwindPlugin as t };
|
|
425
|
+
export { filterPlugin as i, isolateMainPlugin as n, frontmatterPlugin as r, tailwindPlugin as t };
|
package/dist/_chunks/types.d.mts
CHANGED
|
@@ -22,7 +22,7 @@ interface Plugin {
|
|
|
22
22
|
onNodeEnter?: (node: ElementNode, state: MdreamRuntimeState) => string | undefined | void;
|
|
23
23
|
/**
|
|
24
24
|
* Hook that runs when exiting a node
|
|
25
|
-
* @param
|
|
25
|
+
* @param node - The node event
|
|
26
26
|
* @param state - The current runtime state
|
|
27
27
|
* @returns String to add to the output, or PluginHookResult with content
|
|
28
28
|
*/
|
|
@@ -118,17 +118,6 @@ interface Node {
|
|
|
118
118
|
parent?: ElementNode | null;
|
|
119
119
|
/** Custom data added by plugins */
|
|
120
120
|
context?: PluginContext;
|
|
121
|
-
/** Region ID for buffer region tracking */
|
|
122
|
-
regionId?: number;
|
|
123
|
-
}
|
|
124
|
-
/**
|
|
125
|
-
* Buffer region for tracking content inclusion/exclusion
|
|
126
|
-
*/
|
|
127
|
-
interface BufferRegion {
|
|
128
|
-
/** Unique identifier */
|
|
129
|
-
id: number;
|
|
130
|
-
/** Inclusion state */
|
|
131
|
-
include: boolean;
|
|
132
121
|
}
|
|
133
122
|
/**
|
|
134
123
|
* State interface for HTML parsing and processing
|
|
@@ -182,10 +171,8 @@ interface MdreamRuntimeState extends Partial<MdreamProcessingState> {
|
|
|
182
171
|
tableColumnAlignments?: string[];
|
|
183
172
|
/** Plugin instances array for efficient iteration */
|
|
184
173
|
plugins?: Plugin[];
|
|
185
|
-
/**
|
|
186
|
-
|
|
187
|
-
/** Content buffers for regions */
|
|
188
|
-
regionContentBuffers: Map<number, string[]>;
|
|
174
|
+
/** Content buffer for markdown output */
|
|
175
|
+
buffer: string[];
|
|
189
176
|
/** Performance cache for last content to avoid iteration */
|
|
190
177
|
lastContentCache?: string;
|
|
191
178
|
/** Reference to the last processed node */
|
|
@@ -260,11 +247,8 @@ interface MarkdownChunk {
|
|
|
260
247
|
content: string;
|
|
261
248
|
/** Metadata extracted during chunking */
|
|
262
249
|
metadata: {
|
|
263
|
-
/** Header hierarchy at this chunk position */
|
|
264
|
-
|
|
265
|
-
/** Code block language if chunk is/contains code */
|
|
266
|
-
code?: string;
|
|
267
|
-
/** Line number range in original document */
|
|
250
|
+
/** Header hierarchy at this chunk position */headers?: Record<string, string>; /** Code block language if chunk is/contains code */
|
|
251
|
+
code?: string; /** Line number range in original document */
|
|
268
252
|
loc?: {
|
|
269
253
|
lines: {
|
|
270
254
|
from: number;
|
|
@@ -317,4 +301,4 @@ interface SplitterOptions extends HTMLToMarkdownOptions {
|
|
|
317
301
|
keepSeparator?: boolean;
|
|
318
302
|
}
|
|
319
303
|
//#endregion
|
|
320
|
-
export {
|
|
304
|
+
export { TailwindContext as _, MarkdownChunk as a, extractionPlugin as b, Node as c, PluginContext as d, PluginCreationOptions as f, TagHandler as g, TEXT_NODE as h, HandlerContext as i, NodeEvent as l, SplitterOptions as m, ElementNode as n, MdreamProcessingState as o, ReadabilityContext as p, HTMLToMarkdownOptions as r, MdreamRuntimeState as s, ELEMENT_NODE as t, Plugin as u, TextNode as v, ExtractedElement as y };
|
package/dist/cli.mjs
CHANGED
|
@@ -1,16 +1,13 @@
|
|
|
1
1
|
import "./_chunks/markdown-processor.mjs";
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
4
|
-
import "./
|
|
5
|
-
import { t as withMinimalPreset } from "./_chunks/minimal.mjs";
|
|
2
|
+
import { t as streamHtmlToMarkdown } from "./_chunks/stream.mjs";
|
|
3
|
+
import { generateLlmsTxtArtifacts } from "./llms-txt.mjs";
|
|
4
|
+
import { withMinimalPreset } from "./preset/minimal.mjs";
|
|
6
5
|
import { readFileSync } from "node:fs";
|
|
7
6
|
import { mkdir, writeFile } from "node:fs/promises";
|
|
8
7
|
import { Readable } from "node:stream";
|
|
9
8
|
import { fileURLToPath } from "node:url";
|
|
10
9
|
import { cac } from "cac";
|
|
11
10
|
import { dirname, join, resolve } from "pathe";
|
|
12
|
-
|
|
13
|
-
//#region src/cli.ts
|
|
14
11
|
async function streamingConvert(options = {}) {
|
|
15
12
|
const outputStream = process.stdout;
|
|
16
13
|
let conversionOptions = { origin: options.origin };
|
|
@@ -61,6 +58,4 @@ cli.command("llms <patterns...>", "Generate llms.txt artifacts from HTML files")
|
|
|
61
58
|
});
|
|
62
59
|
});
|
|
63
60
|
cli.help().version(version).parse();
|
|
64
|
-
|
|
65
|
-
//#endregion
|
|
66
|
-
export { };
|
|
61
|
+
export {};
|
package/dist/iife.js
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
|
|
1
|
+
const e={"&":`&`,"<":`<`,">":`>`,""":`"`,"'":`'`,"'":`'`," ":` `},t={html:0,head:1,details:2,summary:3,title:4,meta:5,br:6,h1:7,h2:8,h3:9,h4:10,h5:11,h6:12,hr:13,strong:14,b:15,em:16,i:17,del:18,sub:19,sup:20,ins:21,blockquote:22,code:23,ul:24,li:25,a:26,img:27,table:28,thead:29,tr:30,th:31,td:32,ol:33,pre:34,p:35,div:36,span:37,tbody:38,tfoot:39,form:40,nav:41,label:42,button:43,body:44,center:45,kbd:46,footer:47,path:48,svg:49,article:50,section:51,script:52,style:53,link:54,area:55,base:56,col:57,embed:58,input:59,keygen:60,param:61,source:62,track:63,wbr:64,select:65,textarea:66,option:67,fieldset:68,legend:69,audio:70,video:71,canvas:72,iframe:73,map:74,dialog:75,meter:76,progress:77,template:78,abbr:79,mark:80,q:81,samp:82,small:83,noscript:84,noframes:85,xmp:86,plaintext:87,aside:88,u:89,cite:90,dfn:91,var:92,time:93,bdo:94,ruby:95,rt:96,rp:97,dd:98,dt:99,dl:101,address:100,figure:102,object:103,main:104,header:105,figcaption:106,caption:107},n=[0,0],r=[2,2],i=[1,1],a=[1,0],o=[0,1];function s(e,t){if(!e)return e;if(e.startsWith(`//`))return`https:${e}`;if(e.startsWith(`#`))return e;if(t){if(e.startsWith(`/`)&&t)return`${t.endsWith(`/`)?t.slice(0,-1):t}${e}`;if(e.startsWith(`./`))return`${t}/${e.slice(2)}`;if(!e.startsWith(`http`))return`${t}/${e.startsWith(`/`)?e.slice(1):e}`}return e}function c(e){return e.depthMap[32]>0}function l(e){if(!e)return``;let t=e.split(` `).map(e=>e.split(`language-`)[1]).filter(Boolean);return t.length>0?t[0].trim():``}function u(e){return{enter:({node:t})=>t.depthMap[26]?`<h${e}>`:`${`#`.repeat(e)} `,exit:({node:t})=>{if(t.depthMap[26])return`</h${e}>`},collapsesInnerWhiteSpace:!0}}const d={enter:({node:e})=>e.depthMap[15]>1?``:`**`,exit:({node:e})=>e.depthMap[15]>1?``:`**`,collapsesInnerWhiteSpace:!0,spacing:n,isInline:!0},f={enter:({node:e})=>e.depthMap[17]>1?``:`_`,exit:({node:e})=>e.depthMap[17]>1?``:`_`,collapsesInnerWhiteSpace:!0,spacing:n,isInline:!0},p={1:{spacing:n,collapsesInnerWhiteSpace:!0},2:{enter:()=>`<details>`,exit:()=>`</details>
|
|
2
2
|
|
|
3
3
|
`},3:{enter:()=>`<summary>`,exit:()=>`</summary>
|
|
4
4
|
|
|
5
|
-
`},4:{collapsesInnerWhiteSpace:!0,isNonNesting:!0,spacing:i},52:{excludesTextNodes:!0,isNonNesting:!0},53:{isNonNesting:!0,excludesTextNodes:!0},5:{collapsesInnerWhiteSpace:!0,isSelfClosing:!0,spacing:i},6:{enter:({node:e})=>u(e)?`<br>`:void 0,isSelfClosing:!0,spacing:i,collapsesInnerWhiteSpace:!0,isInline:!0},7:f(1),8:f(2),9:f(3),10:f(4),11:f(5),12:f(6),13:{enter:()=>`---`,isSelfClosing:!0},14:p,15:p,16:m,17:m,18:{enter:()=>`~~`,exit:()=>`~~`,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},19:{enter:()=>`<sub>`,exit:()=>`</sub>`,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},20:{enter:()=>`<sup>`,exit:()=>`</sup>`,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},21:{enter:()=>`<ins>`,exit:()=>`</ins>`,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},22:{enter:({node:e})=>{let t=e.depthMap[22]||1,n=`> `.repeat(t);return e.depthMap[25]>0&&(n=`\n${` `.repeat(e.depthMap[25])}${n}`),n},spacing:o},23:{enter:({node:e})=>(e.depthMap[34]||0)>0?`\`\`\`${d(e.attributes?.class)}\n`:"`",exit:({node:e})=>e.depthMap[34]>0?"\n```":"`",collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},24:{enter:({node:e})=>u(e)?`<ul>`:void 0,exit:({node:e})=>u(e)?`</ul>`:void 0},25:{enter:({node:e})=>{if(u(e))return`<li>`;let t=(e.depthMap[24]||0)+(e.depthMap[33]||0)-1,n=e.parent?.tagId===33;return`${` `.repeat(Math.max(0,t))}${n?`${e.index+1}. `:`- `}`},exit:({node:e})=>u(e)?`</li>`:void 0,spacing:s},26:{enter:({node:e})=>{if(e.attributes?.href)return`[`},exit:({node:e,state:t})=>{if(!e.attributes?.href)return``;let n=l(e.attributes?.href||``,t.options?.origin),r=e.attributes?.title;return t.lastContentCache===r&&(r=``),r?`](${n} "${r}")`:`](${n})`},collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},27:{enter:({node:e,state:t})=>`})`,collapsesInnerWhiteSpace:!0,isSelfClosing:!0,spacing:i,isInline:!0},28:{enter:({node:e,state:t})=>{if(u(e))return`<table>`;e.depthMap[28]<=1&&(t.tableRenderedTable=!1),t.tableColumnAlignments=[]},exit:({node:e})=>u(e)?`</table>`:void 0},29:{enter:({node:e})=>{if(u(e))return`<thead>`},exit:({node:e})=>u(e)?`</thead>`:void 0,spacing:c,excludesTextNodes:!0},30:{enter:({node:e,state:t})=>u(e)?`<tr>`:(t.tableCurrentRowCells=0,`| `),exit:({node:e,state:t})=>{if(u(e)||e.depthMap[28]>1)return`</tr>`;if(!t.tableRenderedTable){t.tableRenderedTable=!0;let e=t.tableColumnAlignments;for(;e.length<t.tableCurrentRowCells;)e.push(``);return` |\n| ${e.map(e=>{switch(e){case`left`:return`:---`;case`center`:return`:---:`;case`right`:return`---:`;default:return`---`}}).join(` | `)} |`}return` |`},excludesTextNodes:!0,spacing:c},31:{enter:({node:e,state:t})=>{if(e.depthMap[28]>1)return`<th>`;let n=e.attributes?.align?.toLowerCase();return n?t.tableColumnAlignments.push(n):t.tableColumnAlignments.length<=t.tableCurrentRowCells&&t.tableColumnAlignments.push(``),e.index===0?``:` | `},exit:({node:e,state:t})=>{if(e.depthMap[28]>1)return`</th>`;t.tableCurrentRowCells++},collapsesInnerWhiteSpace:!0,spacing:i},32:{enter:({node:e})=>e.depthMap[28]>1?`<td>`:e.index===0?``:` | `,exit:({node:e,state:t})=>{if(e.depthMap[28]>1)return`</td>`;t.tableCurrentRowCells++},collapsesInnerWhiteSpace:!0,spacing:i},35:{},36:{},37:{collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},41:{},42:{collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},43:{collapsesInnerWhiteSpace:!0,isInline:!0},44:{spacing:i},45:{enter:({node:e})=>{if(e.depthMap[28]>1)return`<center>`},exit:({node:e})=>{if(e.depthMap[28]>1)return`</center>`},spacing:i},38:{spacing:i,excludesTextNodes:!0},39:{spacing:c,excludesTextNodes:!0},46:{enter:()=>"`",exit:()=>"`",collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},47:{spacing:i},40:{spacing:i},54:{isSelfClosing:!0,spacing:i,collapsesInnerWhiteSpace:!0,isInline:!0},55:{isSelfClosing:!0,spacing:i,isInline:!0},56:{isSelfClosing:!0,spacing:i,isInline:!0},57:{isSelfClosing:!0,spacing:i},58:{isSelfClosing:!0,spacing:i},59:{isSelfClosing:!0,spacing:i,isInline:!0},60:{isSelfClosing:!0,spacing:i,isInline:!0},61:{isSelfClosing:!0,spacing:i},62:{isSelfClosing:!0,spacing:i},63:{isSelfClosing:!0,spacing:i},64:{isSelfClosing:!0,spacing:i,isInline:!0},49:{spacing:i},65:{spacing:i},66:{isNonNesting:!0,spacing:i},67:{isNonNesting:!0,spacing:i},68:{spacing:i},69:{spacing:i},70:{spacing:i},71:{spacing:i},72:{spacing:i},73:{isNonNesting:!0,spacing:i},74:{spacing:i},75:{spacing:i},76:{spacing:i},77:{spacing:i},78:{spacing:i},79:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},80:{enter:()=>`<mark>`,exit:()=>`</mark>`,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},81:{enter:()=>`"`,exit:()=>`"`,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},82:{enter:()=>"`",exit:()=>"`",collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},83:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},84:{excludesTextNodes:!0,spacing:i},85:{isNonNesting:!0,spacing:i},86:{isNonNesting:!0,spacing:i},87:{isNonNesting:!0,spacing:i},88:{spacing:i},89:{enter:()=>`<u>`,exit:()=>`</u>`,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},90:{enter:()=>`*`,exit:()=>`*`,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},91:{enter:()=>`**`,exit:()=>`**`,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},92:{enter:()=>"`",exit:()=>"`",collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},93:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},94:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},95:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},96:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},97:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:i,isInline:!0},100:{enter:()=>`<address>`,exit:()=>`</address>`,spacing:i,collapsesInnerWhiteSpace:!0},101:{spacing:i,enter:()=>`<dl>`,exit:()=>`</dl>`},99:{enter:()=>`<dt>`,exit:()=>`</dt>`,collapsesInnerWhiteSpace:!0,spacing:[0,1]},98:{enter:()=>`<dd>`,exit:()=>`</dd>`,spacing:[0,1]}};function g(e){let t=``,r=0;for(;r<e.length;){if(e[r]===`&`){let i=!1;for(let[a,o]of Object.entries(n))if(e.startsWith(a,r)){t+=o,r+=a.length,i=!0;break}if(i)continue;if(r+2<e.length&&e[r+1]===`#`){let n=r;r+=2;let i=e[r]===`x`||e[r]===`X`;i&&r++;let a=r;for(;r<e.length&&e[r]!==`;`;)r++;if(r<e.length&&e[r]===`;`){let n=e.substring(a,r),o=i?16:10;try{let e=Number.parseInt(n,o);if(!Number.isNaN(e)){t+=String.fromCodePoint(e),r++;continue}}catch{}}r=n}}t+=e[r],r++}return t}function _(e){let t=e,n=[t];for(;t.tagHandler?.isInline&&t.parent;)t=t.parent,n.push(t);return n}const v=Object.freeze({});function y(e){return new Uint8Array(e)}function b(e){return e===32||e===9||e===10||e===13}function x(e,t,n){return S(e,t,n)}function S(e,t,n){let i=``;t.depthMap??=new Uint8Array(108),t.depth??=0,t.lastCharWasWhitespace??=!0,t.justClosedTag??=!1,t.isFirstTextInElement??=!1,t.lastCharWasBackslash??=!1;let a=0,o=e.length;for(;a<o;){let s=e.charCodeAt(a);if(s!==60){if(s===38&&(t.hasEncodedHtmlEntity=!0),b(s)){let n=t.depthMap[34]>0;if(t.justClosedTag&&(t.justClosedTag=!1,t.lastCharWasWhitespace=!1),!n&&t.lastCharWasWhitespace){a++;continue}n?i+=e[a]:(s===32||!t.lastCharWasWhitespace)&&(i+=` `),t.lastCharWasWhitespace=!0,t.textBufferContainsWhitespace=!0,t.lastCharWasBackslash=!1}else t.textBufferContainsNonWhitespace=!0,t.lastCharWasWhitespace=!1,t.justClosedTag=!1,s===124&&t.depthMap[28]?i+=`\\|`:s===96&&(t.depthMap[23]||t.depthMap[34])?i+="\\`":s===91&&t.depthMap[26]?i+=`\\[`:s===93&&t.depthMap[26]?i+=`\\]`:s===62&&t.depthMap[22]?i+=`\\>`:i+=e[a],t.currentNode?.tagHandler?.isNonNesting&&(t.lastCharWasBackslash||(s===39&&!t.inDoubleQuote&&!t.inBacktick?t.inSingleQuote=!t.inSingleQuote:s===34&&!t.inSingleQuote&&!t.inBacktick?t.inDoubleQuote=!t.inDoubleQuote:s===96&&!t.inSingleQuote&&!t.inDoubleQuote&&(t.inBacktick=!t.inBacktick))),t.lastCharWasBackslash=s===92;a++;continue}if(a+1>=o){i+=e[a];break}let c=e.charCodeAt(a+1);if(c===33){i.length>0&&(C(i,t,n),i=``);let r=E(e,a);if(r.complete)a=r.newPosition;else{i+=r.remainingText;break}}else if(c===47){let r=t.inSingleQuote||t.inDoubleQuote||t.inBacktick;if(t.currentNode?.tagHandler?.isNonNesting&&r){i+=e[a],a++;continue}i.length>0&&(C(i,t,n),i=``);let o=w(e,a,t,n);if(o.complete)a=o.newPosition;else{i+=o.remainingText;break}}else{let s=a+1,c=s,l=-1;for(;s<o;){let t=e.charCodeAt(s);if(b(t)||t===47||t===62){l=s;break}s++}if(l===-1){i+=e.substring(a);break}let u=e.substring(c,l).toLowerCase();if(!u){a=l;break}let d=r[u]??-1;if(s=l,t.currentNode?.tagHandler?.isNonNesting&&d!==t.currentNode?.tagId){i+=e[a++];continue}i.length>0&&(C(i,t,n),i=``);let f=D(u,d,e,s,t,n);if(f.skip)i+=e[a++];else if(f.complete)a=f.newPosition,f.selfClosing||(t.isFirstTextInElement=!0);else{i+=f.remainingText;break}}}return i}function C(e,t,n){let r=t.textBufferContainsNonWhitespace,i=t.textBufferContainsWhitespace;if(t.textBufferContainsNonWhitespace=!1,t.textBufferContainsWhitespace=!1,!t.currentNode)return;let a=t.currentNode?.tagHandler?.excludesTextNodes,o=t.depthMap[34]>0;if(!o&&!r&&!t.currentNode.childTextNodeIndex)return;let s=e;if(s.length===0)return;let c=_(t.currentNode),l=c[c.length-1];if(i&&!l?.childTextNodeIndex){let e=0;for(;e<s.length&&(o?s.charCodeAt(e)===10||s.charCodeAt(e)===13:b(s.charCodeAt(e)));)e++;e>0&&(s=s.substring(e))}t.hasEncodedHtmlEntity&&=(s=g(String(s)),!1);let u={type:2,value:s,parent:t.currentNode,regionId:t.currentNode?.regionId,index:t.currentNode.currentWalkIndex++,depth:t.depth,containsWhitespace:i,excludedFromMarkdown:a};for(let e of c)e.childTextNodeIndex=(e.childTextNodeIndex||0)+1;n({type:0,node:u}),t.lastTextNode=u}function w(e,t,n,i){let a=t+2,o=a,s=e.length,c=!1;for(;a<s;){if(e.charCodeAt(a)===62){c=!0;break}a++}if(!c)return{complete:!1,newPosition:t,remainingText:e.substring(t)};let l=r[e.substring(o,a).toLowerCase()]??-1;if(n.currentNode?.tagHandler?.isNonNesting&&l!==n.currentNode.tagId)return{complete:!1,newPosition:t,remainingText:e.substring(t)};let u=n.currentNode;if(u){let e=u.tagId!==l;for(;u&&e;)T(u,n,i),u=u.parent,e=u?.tagId!==l}return u&&T(u,n,i),n.justClosedTag=!0,{complete:!0,newPosition:a+1,remainingText:``}}function T(e,t,n){if(e){if(e.tagId===26&&!e.childTextNodeIndex){let t=e.attributes?.title||e.attributes?.[`aria-label`]||``;if(t){e.childTextNodeIndex=1,n({type:0,node:{type:2,value:t,parent:e,index:0,depth:e.depth+1}});for(let t of _(e))t.childTextNodeIndex=(t.childTextNodeIndex||0)+1}}e.tagId&&(t.depthMap[e.tagId]=Math.max(0,t.depthMap[e.tagId]-1)),e.tagHandler?.isNonNesting&&(t.inSingleQuote=!1,t.inDoubleQuote=!1,t.inBacktick=!1,t.lastCharWasBackslash=!1),t.depth--,n({type:1,node:e}),t.currentNode=t.currentNode.parent,t.hasEncodedHtmlEntity=!1,t.justClosedTag=!0}}function E(e,t){let n=t,r=e.length;if(n+3<r&&e.charCodeAt(n+2)===45&&e.charCodeAt(n+3)===45){for(n+=4;n<r-2;){if(e.charCodeAt(n)===45&&e.charCodeAt(n+1)===45&&e.charCodeAt(n+2)===62)return n+=3,{complete:!0,newPosition:n,remainingText:``};n++}return{complete:!1,newPosition:t,remainingText:e.substring(t)}}else{for(n+=2;n<r;){if(e.charCodeAt(n)===62)return n++,{complete:!0,newPosition:n,remainingText:``};n++}return{complete:!1,newPosition:n,remainingText:e.substring(t,n)}}}function D(e,t,n,r,i,a){i.currentNode?.tagHandler?.isNonNesting&&T(i.currentNode,i,a);let o=h[t],s=O(n,r,o);if(!s.complete)return{complete:!1,newPosition:r,remainingText:`<${e}${s.attrBuffer}`,selfClosing:!1};let c=i.depthMap[t];i.depthMap[t]=c+1,i.depth++,r=s.newPosition,i.currentNode&&(i.currentNode.currentWalkIndex=i.currentNode.currentWalkIndex||0);let l=i.currentNode?i.currentNode.currentWalkIndex++:0,u={type:1,name:e,attributes:s.attributes,parent:i.currentNode,depthMap:y(i.depthMap),depth:i.depth,index:l,regionId:i.currentNode?.regionId,tagId:t,tagHandler:o};i.lastTextNode=u,a({type:0,node:u});let d=u;return d.currentWalkIndex=0,i.currentNode=d,i.hasEncodedHtmlEntity=!1,o?.isNonNesting&&!s.selfClosing&&(i.inSingleQuote=!1,i.inDoubleQuote=!1,i.inBacktick=!1,i.lastCharWasBackslash=!1),s.selfClosing?(T(u,i,a),i.justClosedTag=!0):i.justClosedTag=!1,{complete:!0,newPosition:r,remainingText:``,selfClosing:s.selfClosing}}function O(e,t,n){let r=t,i=e.length,a=n?.isSelfClosing||!1,o=r,s=!1,c=0,l=0;for(;r<i;){let t=e.charCodeAt(r);if(s){t===c&&l!==92&&(s=!1),r++;continue}else if(t===34||t===39)s=!0,c=t;else if(t===47&&r+1<i&&e.charCodeAt(r+1)===62){let t=e.substring(o,r).trim();return{complete:!0,newPosition:r+2,attributes:k(t),selfClosing:!0,attrBuffer:t}}else if(t===62){let t=e.substring(o,r).trim();return{complete:!0,newPosition:r+1,attributes:k(t),selfClosing:a,attrBuffer:t}}r++,l=t}return{complete:!1,newPosition:r,attributes:v,selfClosing:!1,attrBuffer:e.substring(o,r)}}function k(e){if(!e)return v;let t={},n=e.length,r=0,i=0,a=0,o=0,s=0,c=0,l=``;for(;r<n;){let u=e.charCodeAt(r),d=b(u);switch(i){case 0:d||(i=1,a=r,o=0);break;case 1:(u===61||d)&&(o=r,l=e.substring(a,o).toLowerCase(),i=u===61?3:2);break;case 2:u===61?i=3:d||(t[l]=``,i=1,a=r,o=0);break;case 3:u===34||u===39?(c=u,i=4,s=r+1):d||(i=5,s=r);break;case 4:u===92&&r+1<n?r++:u===c&&(t[l]=e.substring(s,r),i=0);break;case 5:(d||u===62)&&(t[l]=e.substring(s,r),i=0);break}r++}if(i===4||i===5)l&&(t[l]=e.substring(s,r));else if(i===1||i===2||i===3){o||=r;let n=e.substring(a,o).toLowerCase();n&&(t[n]=``)}return t}function A(e,t,n,r){if(t?.length){for(let r of t){let t=r.beforeNodeProcess?.(e,n);if(typeof t==`object`&&t.skip)return!0}if(e.node.type===1){let r=e.node;if(e.type===0)for(let e of t)e.processAttributes&&e.processAttributes(r,n);let i=e.type===0?`onNodeEnter`:`onNodeExit`,a=[];for(let e of t)if(e[i]){let t=e[i](r,n);t&&a.push(t)}a.length>0&&(r.pluginOutput=(r.pluginOutput||[]).concat(a))}else if(e.node.type===2&&e.type===0){let r=e.node;for(let e of t)if(e.processTextNode){let t=e.processTextNode(r,n);if(t){if(t.skip)return!0;r.value=t.content}}}}return r(e),!1}function j(e,t,n){if(e===` `||e===`
|
|
5
|
+
`},4:{collapsesInnerWhiteSpace:!0,isNonNesting:!0,spacing:n},52:{excludesTextNodes:!0,isNonNesting:!0},53:{isNonNesting:!0,excludesTextNodes:!0},5:{collapsesInnerWhiteSpace:!0,isSelfClosing:!0,spacing:n},6:{enter:({node:e})=>c(e)?`<br>`:void 0,isSelfClosing:!0,spacing:n,collapsesInnerWhiteSpace:!0,isInline:!0},7:u(1),8:u(2),9:u(3),10:u(4),11:u(5),12:u(6),13:{enter:()=>`---`,isSelfClosing:!0},14:d,15:d,16:f,17:f,18:{enter:()=>`~~`,exit:()=>`~~`,collapsesInnerWhiteSpace:!0,spacing:n,isInline:!0},19:{enter:()=>`<sub>`,exit:()=>`</sub>`,collapsesInnerWhiteSpace:!0,spacing:n,isInline:!0},20:{enter:()=>`<sup>`,exit:()=>`</sup>`,collapsesInnerWhiteSpace:!0,spacing:n,isInline:!0},21:{enter:()=>`<ins>`,exit:()=>`</ins>`,collapsesInnerWhiteSpace:!0,spacing:n,isInline:!0},22:{enter:({node:e})=>{let t=e.depthMap[22]||1,n=`> `.repeat(t);return e.depthMap[25]>0&&(n=`\n${` `.repeat(e.depthMap[25])}${n}`),n},spacing:i},23:{enter:({node:e})=>(e.depthMap[34]||0)>0?`\`\`\`${l(e.attributes?.class)}\n`:"`",exit:({node:e})=>e.depthMap[34]>0?"\n```":"`",collapsesInnerWhiteSpace:!0,spacing:n,isInline:!0},24:{enter:({node:e})=>c(e)?`<ul>`:void 0,exit:({node:e})=>c(e)?`</ul>`:void 0},25:{enter:({node:e})=>{if(c(e))return`<li>`;let t=(e.depthMap[24]||0)+(e.depthMap[33]||0)-1,n=e.parent?.tagId===33;return`${` `.repeat(Math.max(0,t))}${n?`${e.index+1}. `:`- `}`},exit:({node:e})=>c(e)?`</li>`:void 0,spacing:a},26:{enter:({node:e})=>{if(e.attributes?.href)return`[`},exit:({node:e,state:t})=>{if(!e.attributes?.href)return``;let n=s(e.attributes?.href||``,t.options?.origin),r=e.attributes?.title;return t.lastContentCache===r&&(r=``),r?`](${n} "${r}")`:`](${n})`},collapsesInnerWhiteSpace:!0,spacing:n,isInline:!0},27:{enter:({node:e,state:t})=>`})`,collapsesInnerWhiteSpace:!0,isSelfClosing:!0,spacing:n,isInline:!0},28:{enter:({node:e,state:t})=>{if(c(e))return`<table>`;e.depthMap[28]<=1&&(t.tableRenderedTable=!1),t.tableColumnAlignments=[]},exit:({node:e})=>c(e)?`</table>`:void 0},29:{enter:({node:e})=>{if(c(e))return`<thead>`},exit:({node:e})=>c(e)?`</thead>`:void 0,spacing:o,excludesTextNodes:!0},30:{enter:({node:e,state:t})=>c(e)?`<tr>`:(t.tableCurrentRowCells=0,`| `),exit:({node:e,state:t})=>{if(c(e)||e.depthMap[28]>1)return`</tr>`;if(!t.tableRenderedTable){t.tableRenderedTable=!0;let e=t.tableColumnAlignments;for(;e.length<t.tableCurrentRowCells;)e.push(``);return` |\n| ${e.map(e=>{switch(e){case`left`:return`:---`;case`center`:return`:---:`;case`right`:return`---:`;default:return`---`}}).join(` | `)} |`}return` |`},excludesTextNodes:!0,spacing:o},31:{enter:({node:e,state:t})=>{if(e.depthMap[28]>1)return`<th>`;let n=e.attributes?.align?.toLowerCase();return n?t.tableColumnAlignments.push(n):t.tableColumnAlignments.length<=t.tableCurrentRowCells&&t.tableColumnAlignments.push(``),e.index===0?``:` | `},exit:({node:e,state:t})=>{if(e.depthMap[28]>1)return`</th>`;t.tableCurrentRowCells++},collapsesInnerWhiteSpace:!0,spacing:n},32:{enter:({node:e})=>e.depthMap[28]>1?`<td>`:e.index===0?``:` | `,exit:({node:e,state:t})=>{if(e.depthMap[28]>1)return`</td>`;t.tableCurrentRowCells++},collapsesInnerWhiteSpace:!0,spacing:n},35:{},36:{},37:{collapsesInnerWhiteSpace:!0,spacing:n,isInline:!0},41:{},42:{collapsesInnerWhiteSpace:!0,spacing:n,isInline:!0},43:{collapsesInnerWhiteSpace:!0,isInline:!0},44:{spacing:n},45:{enter:({node:e})=>{if(e.depthMap[28]>1)return`<center>`},exit:({node:e})=>{if(e.depthMap[28]>1)return`</center>`},spacing:n},38:{spacing:n,excludesTextNodes:!0},39:{spacing:o,excludesTextNodes:!0},46:{enter:()=>"`",exit:()=>"`",collapsesInnerWhiteSpace:!0,spacing:n,isInline:!0},47:{spacing:n},40:{spacing:n},54:{isSelfClosing:!0,spacing:n,collapsesInnerWhiteSpace:!0,isInline:!0},55:{isSelfClosing:!0,spacing:n,isInline:!0},56:{isSelfClosing:!0,spacing:n,isInline:!0},57:{isSelfClosing:!0,spacing:n},58:{isSelfClosing:!0,spacing:n},59:{isSelfClosing:!0,spacing:n,isInline:!0},60:{isSelfClosing:!0,spacing:n,isInline:!0},61:{isSelfClosing:!0,spacing:n},62:{isSelfClosing:!0,spacing:n},63:{isSelfClosing:!0,spacing:n},64:{isSelfClosing:!0,spacing:n,isInline:!0},49:{spacing:n},65:{spacing:n},66:{isNonNesting:!0,spacing:n},67:{isNonNesting:!0,spacing:n},68:{spacing:n},69:{spacing:n},70:{spacing:n},71:{spacing:n},72:{spacing:n},73:{isNonNesting:!0,spacing:n},74:{spacing:n},75:{spacing:n},76:{spacing:n},77:{spacing:n},78:{spacing:n},79:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:n,isInline:!0},80:{enter:()=>`<mark>`,exit:()=>`</mark>`,collapsesInnerWhiteSpace:!0,spacing:n,isInline:!0},81:{enter:()=>`"`,exit:()=>`"`,collapsesInnerWhiteSpace:!0,spacing:n,isInline:!0},82:{enter:()=>"`",exit:()=>"`",collapsesInnerWhiteSpace:!0,spacing:n,isInline:!0},83:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:n,isInline:!0},84:{excludesTextNodes:!0,spacing:n},85:{isNonNesting:!0,spacing:n},86:{isNonNesting:!0,spacing:n},87:{isNonNesting:!0,spacing:n},88:{spacing:n},89:{enter:()=>`<u>`,exit:()=>`</u>`,collapsesInnerWhiteSpace:!0,spacing:n,isInline:!0},90:{enter:()=>`*`,exit:()=>`*`,collapsesInnerWhiteSpace:!0,spacing:n,isInline:!0},91:{enter:()=>`**`,exit:()=>`**`,collapsesInnerWhiteSpace:!0,spacing:n,isInline:!0},92:{enter:()=>"`",exit:()=>"`",collapsesInnerWhiteSpace:!0,spacing:n,isInline:!0},93:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:n,isInline:!0},94:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:n,isInline:!0},95:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:n,isInline:!0},96:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:n,isInline:!0},97:{enter:()=>``,exit:()=>``,collapsesInnerWhiteSpace:!0,spacing:n,isInline:!0},100:{enter:()=>`<address>`,exit:()=>`</address>`,spacing:n,collapsesInnerWhiteSpace:!0},101:{spacing:n,enter:()=>`<dl>`,exit:()=>`</dl>`},99:{enter:()=>`<dt>`,exit:()=>`</dt>`,collapsesInnerWhiteSpace:!0,spacing:[0,1]},98:{enter:()=>`<dd>`,exit:()=>`</dd>`,spacing:[0,1]}};function m(t){let n=``,r=0;for(;r<t.length;){if(t[r]===`&`){let i=!1;for(let[a,o]of Object.entries(e))if(t.startsWith(a,r)){n+=o,r+=a.length,i=!0;break}if(i)continue;if(r+2<t.length&&t[r+1]===`#`){let e=r;r+=2;let i=t[r]===`x`||t[r]===`X`;i&&r++;let a=r;for(;r<t.length&&t[r]!==`;`;)r++;if(r<t.length&&t[r]===`;`){let e=t.substring(a,r),o=i?16:10;try{let t=Number.parseInt(e,o);if(!Number.isNaN(t)){n+=String.fromCodePoint(t),r++;continue}}catch{}}r=e}}n+=t[r],r++}return n}function h(e){let t=e,n=[t];for(;t.tagHandler?.isInline&&t.parent;)t=t.parent,n.push(t);return n}const g=Object.freeze({});function _(e){return new Uint8Array(e)}function v(e){return e===32||e===9||e===10||e===13}function y(e,t,n){return b(e,t,n)}function b(e,n,r){let i=``;n.depthMap??=new Uint8Array(108),n.depth??=0,n.lastCharWasWhitespace??=!0,n.justClosedTag??=!1,n.isFirstTextInElement??=!1,n.lastCharWasBackslash??=!1;let a=0,o=e.length;for(;a<o;){let s=e.charCodeAt(a);if(s!==60){if(s===38&&(n.hasEncodedHtmlEntity=!0),v(s)){let t=n.depthMap[34]>0;if(n.justClosedTag&&(n.justClosedTag=!1,n.lastCharWasWhitespace=!1),!t&&n.lastCharWasWhitespace){a++;continue}t?i+=e[a]:(s===32||!n.lastCharWasWhitespace)&&(i+=` `),n.lastCharWasWhitespace=!0,n.textBufferContainsWhitespace=!0,n.lastCharWasBackslash=!1}else n.textBufferContainsNonWhitespace=!0,n.lastCharWasWhitespace=!1,n.justClosedTag=!1,s===124&&n.depthMap[28]?i+=`\\|`:s===96&&(n.depthMap[23]||n.depthMap[34])?i+="\\`":s===91&&n.depthMap[26]?i+=`\\[`:s===93&&n.depthMap[26]?i+=`\\]`:s===62&&n.depthMap[22]?i+=`\\>`:i+=e[a],n.currentNode?.tagHandler?.isNonNesting&&(n.lastCharWasBackslash||(s===39&&!n.inDoubleQuote&&!n.inBacktick?n.inSingleQuote=!n.inSingleQuote:s===34&&!n.inSingleQuote&&!n.inBacktick?n.inDoubleQuote=!n.inDoubleQuote:s===96&&!n.inSingleQuote&&!n.inDoubleQuote&&(n.inBacktick=!n.inBacktick))),n.lastCharWasBackslash=s===92;a++;continue}if(a+1>=o){i+=e[a];break}let c=e.charCodeAt(a+1);if(c===33){i.length>0&&(x(i,n,r),i=``);let t=w(e,a);if(t.complete)a=t.newPosition;else{i+=t.remainingText;break}}else if(c===47){let t=n.inSingleQuote||n.inDoubleQuote||n.inBacktick;if(n.currentNode?.tagHandler?.isNonNesting&&t){i+=e[a],a++;continue}i.length>0&&(x(i,n,r),i=``);let o=S(e,a,n,r);if(o.complete)a=o.newPosition;else{i+=o.remainingText;break}}else{let s=a+1,c=s,l=-1;for(;s<o;){let t=e.charCodeAt(s);if(v(t)||t===47||t===62){l=s;break}s++}if(l===-1){i+=e.substring(a);break}let u=e.substring(c,l).toLowerCase();if(!u){a=l;break}let d=t[u]??-1;if(s=l,n.currentNode?.tagHandler?.isNonNesting&&d!==n.currentNode?.tagId){i+=e[a++];continue}i.length>0&&(x(i,n,r),i=``);let f=T(u,d,e,s,n,r);if(f.skip)i+=e[a++];else if(f.complete)a=f.newPosition,f.selfClosing||(n.isFirstTextInElement=!0);else{i+=f.remainingText;break}}}return i}function x(e,t,n){let r=t.textBufferContainsNonWhitespace,i=t.textBufferContainsWhitespace;if(t.textBufferContainsNonWhitespace=!1,t.textBufferContainsWhitespace=!1,!t.currentNode)return;let a=t.currentNode?.tagHandler?.excludesTextNodes,o=t.depthMap[34]>0;if(!o&&!r&&!t.currentNode.childTextNodeIndex)return;let s=e;if(s.length===0)return;let c=h(t.currentNode),l=c[c.length-1];if(i&&!l?.childTextNodeIndex){let e=0;for(;e<s.length&&(o?s.charCodeAt(e)===10||s.charCodeAt(e)===13:v(s.charCodeAt(e)));)e++;e>0&&(s=s.substring(e))}t.hasEncodedHtmlEntity&&=(s=m(String(s)),!1);let u={type:2,value:s,parent:t.currentNode,index:t.currentNode.currentWalkIndex++,depth:t.depth,containsWhitespace:i,excludedFromMarkdown:a};for(let e of c)e.childTextNodeIndex=(e.childTextNodeIndex||0)+1;n({type:0,node:u}),t.lastTextNode=u}function S(e,n,r,i){let a=n+2,o=a,s=e.length,c=!1;for(;a<s;){if(e.charCodeAt(a)===62){c=!0;break}a++}if(!c)return{complete:!1,newPosition:n,remainingText:e.substring(n)};let l=t[e.substring(o,a).toLowerCase()]??-1;if(r.currentNode?.tagHandler?.isNonNesting&&l!==r.currentNode.tagId)return{complete:!1,newPosition:n,remainingText:e.substring(n)};let u=r.currentNode;if(u){let e=u.tagId!==l;for(;u&&e;)C(u,r,i),u=u.parent,e=u?.tagId!==l}return u&&C(u,r,i),r.justClosedTag=!0,{complete:!0,newPosition:a+1,remainingText:``}}function C(e,t,n){if(e){if(e.tagId===26&&!e.childTextNodeIndex){let t=e.attributes?.title||e.attributes?.[`aria-label`]||``;if(t){e.childTextNodeIndex=1,n({type:0,node:{type:2,value:t,parent:e,index:0,depth:e.depth+1}});for(let t of h(e))t.childTextNodeIndex=(t.childTextNodeIndex||0)+1}}e.tagId&&(t.depthMap[e.tagId]=Math.max(0,t.depthMap[e.tagId]-1)),e.tagHandler?.isNonNesting&&(t.inSingleQuote=!1,t.inDoubleQuote=!1,t.inBacktick=!1,t.lastCharWasBackslash=!1),t.depth--,n({type:1,node:e}),t.currentNode=t.currentNode.parent,t.hasEncodedHtmlEntity=!1,t.justClosedTag=!0}}function w(e,t){let n=t,r=e.length;if(n+3<r&&e.charCodeAt(n+2)===45&&e.charCodeAt(n+3)===45){for(n+=4;n<r-2;){if(e.charCodeAt(n)===45&&e.charCodeAt(n+1)===45&&e.charCodeAt(n+2)===62)return n+=3,{complete:!0,newPosition:n,remainingText:``};n++}return{complete:!1,newPosition:t,remainingText:e.substring(t)}}else{for(n+=2;n<r;){if(e.charCodeAt(n)===62)return n++,{complete:!0,newPosition:n,remainingText:``};n++}return{complete:!1,newPosition:n,remainingText:e.substring(t,n)}}}function T(e,t,n,r,i,a){i.currentNode?.tagHandler?.isNonNesting&&C(i.currentNode,i,a);let o=p[t],s=E(n,r,o);if(!s.complete)return{complete:!1,newPosition:r,remainingText:`<${e}${s.attrBuffer}`,selfClosing:!1};let c=i.depthMap[t];i.depthMap[t]=c+1,i.depth++,r=s.newPosition,i.currentNode&&(i.currentNode.currentWalkIndex=i.currentNode.currentWalkIndex||0);let l=i.currentNode?i.currentNode.currentWalkIndex++:0,u={type:1,name:e,attributes:s.attributes,parent:i.currentNode,depthMap:_(i.depthMap),depth:i.depth,index:l,tagId:t,tagHandler:o};i.lastTextNode=u,a({type:0,node:u});let d=u;return d.currentWalkIndex=0,i.currentNode=d,i.hasEncodedHtmlEntity=!1,o?.isNonNesting&&!s.selfClosing&&(i.inSingleQuote=!1,i.inDoubleQuote=!1,i.inBacktick=!1,i.lastCharWasBackslash=!1),s.selfClosing?(C(u,i,a),i.justClosedTag=!0):i.justClosedTag=!1,{complete:!0,newPosition:r,remainingText:``,selfClosing:s.selfClosing}}function E(e,t,n){let r=t,i=e.length,a=n?.isSelfClosing||!1,o=r,s=!1,c=0,l=0;for(;r<i;){let t=e.charCodeAt(r);if(s){t===c&&l!==92&&(s=!1),r++;continue}else if(t===34||t===39)s=!0,c=t;else if(t===47&&r+1<i&&e.charCodeAt(r+1)===62){let t=e.substring(o,r).trim();return{complete:!0,newPosition:r+2,attributes:D(t),selfClosing:!0,attrBuffer:t}}else if(t===62){let t=e.substring(o,r).trim();return{complete:!0,newPosition:r+1,attributes:D(t),selfClosing:a,attrBuffer:t}}r++,l=t}return{complete:!1,newPosition:r,attributes:g,selfClosing:!1,attrBuffer:e.substring(o,r)}}function D(e){if(!e)return g;let t={},n=e.length,r=0,i=0,a=0,o=0,s=0,c=0,l=``;for(;r<n;){let u=e.charCodeAt(r),d=v(u);switch(i){case 0:d||(i=1,a=r,o=0);break;case 1:(u===61||d)&&(o=r,l=e.substring(a,o).toLowerCase(),i=u===61?3:2);break;case 2:u===61?i=3:d||(t[l]=``,i=1,a=r,o=0);break;case 3:u===34||u===39?(c=u,i=4,s=r+1):d||(i=5,s=r);break;case 4:u===92&&r+1<n?r++:u===c&&(t[l]=e.substring(s,r),i=0);break;case 5:(d||u===62)&&(t[l]=e.substring(s,r),i=0);break}r++}if(i===4||i===5)l&&(t[l]=e.substring(s,r));else if(i===1||i===2||i===3){o||=r;let n=e.substring(a,o).toLowerCase();n&&(t[n]=``)}return t}function O(e,t,n,r){if(t?.length){for(let r of t){let t=r.beforeNodeProcess?.(e,n);if(typeof t==`object`&&t.skip)return!0}if(e.node.type===1){let r=e.node;if(e.type===0)for(let e of t)e.processAttributes&&e.processAttributes(r,n);let i=e.type===0?`onNodeEnter`:`onNodeExit`,a=[];for(let e of t)if(e[i]){let t=e[i](r,n);t&&a.push(t)}a.length>0&&(r.pluginOutput=(r.pluginOutput||[]).concat(a))}else if(e.node.type===2&&e.type===0){let r=e.node;for(let e of t)if(e.processTextNode){let t=e.processTextNode(r,n);if(t){if(t.skip)return!0;r.value=t.content}}}}return r(e),!1}function k(e,t,n){if(e===` `||e===`
|
|
6
6
|
`||e===` `||t===` `||t===`
|
|
7
|
-
`||t===` `)return!1;let r=new Set([`[`,`(`,`>`,`*`,`_`,"`"]),i=new Set([`]`,`)`,`<`,`.`,`,`,`!`,`?`,`:`,`;`,`*`,`_`,"`"]);return e===`|`&&t===`<`&&n&&n.depthMap[28]>0?!0:!(r.has(e)||i.has(t))}function
|
|
8
|
-
`&&e!==` `&&e!==`[`&&e!==`>`&&!t?.tagHandler?.isInline&&n.value[0]!==` `}function
|
|
9
|
-
`)return;
|
|
10
|
-
`&&
|
|
11
|
-
`&&
|
|
12
|
-
`.repeat(
|
|
7
|
+
`||t===` `)return!1;let r=new Set([`[`,`(`,`>`,`*`,`_`,"`"]),i=new Set([`]`,`)`,`<`,`.`,`,`,`!`,`?`,`:`,`;`,`*`,`_`,"`"]);return e===`|`&&t===`<`&&n&&n.depthMap[28]>0?!0:!(r.has(e)||i.has(t))}function A(e,t,n){return!!e&&e!==`
|
|
8
|
+
`&&e!==` `&&e!==`[`&&e!==`>`&&!t?.tagHandler?.isInline&&n.value[0]!==` `}function j(e){let t=e.tagId,i=e.depthMap;if(t!==25&&i[25]>0||t!==22&&i[22]>0)return n;let a=t!==void 0&&(t>=7&&t<=12||t===35||t===36),o=e.parent;for(;o;){if(o.tagHandler?.collapsesInnerWhiteSpace){if(a&&o.tagId===37){o=o.parent;continue}return n}o=o.parent}return e.tagHandler?.spacing?e.tagHandler?.spacing:r}function M(e={}){let t={options:e,buffer:[],depthMap:new Uint8Array(108)},n=0;function r(e){let{type:n,node:r}=e,i=t.lastNode;t.lastNode=e.node,t.depth=r.depth;let a=t.buffer,o=a[a.length-1],s=o?.charAt(o.length-1)||``,c;if(c=o?.length>1?o.charAt(o.length-2):a[a.length-2]?.charAt(a[a.length-2].length-1),r.type===2&&n===0){let e=r;if(e.value){if(e.excludedFromMarkdown||e.value===` `&&s===`
|
|
9
|
+
`)return;A(s,i,e)&&(e.value=` ${e.value}`),t.buffer.push(e.value),t.lastContentCache=e.value}t.lastTextNode=e;return}if(r.type!==1)return;let l={node:r,state:t},u=[],d=r;d.pluginOutput?.length&&(u.push(...d.pluginOutput),d.pluginOutput=[]);let f=t.lastContentCache,p=0;s===`
|
|
10
|
+
`&&p++,c===`
|
|
11
|
+
`&&p++;let m=n===0?`enter`:`exit`,h=r.tagHandler;if(!u.length&&h?.[m]){let e=h[m](l);e&&u.push(e)}let g=j(r)[n]||0,_=Math.max(0,g-p);if(_>0){if(!a.length){for(let e of u)e&&(t.buffer.push(e),t.lastContentCache=e);return}let e=`
|
|
12
|
+
`.repeat(_);s===` `&&a?.length&&(a[a.length-1]=a[a.length-1].substring(0,a[a.length-1].length-1)),n===0?u.unshift(e):u.push(e)}else if(f&&t.lastTextNode?.containsWhitespace&&r.parent&&`value`in t.lastTextNode&&typeof t.lastTextNode.value==`string`&&(!r.parent.depthMap[34]||r.parent.tagId===34)){let e=r.tagHandler?.isInline,i=r.tagHandler?.collapsesInnerWhiteSpace,o=r.tagHandler?.spacing&&Array.isArray(r.tagHandler.spacing);if((!e||n===1)&&!(!e&&!i&&g>0)&&!(i&&n===0)&&!(o&&n===0)){let e=f.length,t=f.trimEnd();e-t.length>0&&a?.length&&a[a.length-1]===f&&(a[a.length-1]=t)}t.lastTextNode=void 0}u[0]?.[0]&&n===0&&s&&k(s,u[0][0],t)&&(t.buffer.push(` `),t.lastContentCache=` `);for(let e of u)e&&(t.buffer.push(e),t.lastContentCache=e)}function i(e){y(e,{depthMap:t.depthMap,depth:0,plugins:t.options?.plugins||[]},e=>{O(e,t.options?.plugins,t,r)})}function a(){let e=t.buffer.join(``).trimStart();return t.buffer.length=0,e.trimEnd()}function o(){let e=t.buffer.join(``).trimStart(),r=e.slice(n);return n=e.length,r}return{processEvent:r,processHtml:i,getMarkdown:a,getMarkdownChunk:o,state:t}}function N(e,t={}){let n=M(t);return n.processHtml(e),n.getMarkdown()}const P={htmlToMarkdown:N};typeof window<`u`&&(window.mdream=P);
|
package/dist/index.d.mts
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
import { _ as
|
|
1
|
+
import { _ as TailwindContext, a as MarkdownChunk, c as Node, d as PluginContext, f as PluginCreationOptions, g as TagHandler, h as TEXT_NODE, i as HandlerContext, l as NodeEvent, m as SplitterOptions, n as ElementNode, o as MdreamProcessingState, p as ReadabilityContext, r as HTMLToMarkdownOptions, s as MdreamRuntimeState, t as ELEMENT_NODE, u as Plugin, v as TextNode, y as ExtractedElement } from "./_chunks/types.mjs";
|
|
2
2
|
import { t as createPlugin } from "./_chunks/plugin.mjs";
|
|
3
3
|
import { ReadableStream } from "node:stream/web";
|
|
4
4
|
|
|
5
5
|
//#region src/const.d.ts
|
|
6
|
-
|
|
7
6
|
declare const TagIdMap: {
|
|
8
7
|
readonly html: 0;
|
|
9
8
|
readonly head: 1;
|
|
@@ -119,10 +118,8 @@ declare const TagIdMap: {
|
|
|
119
118
|
interface MarkdownState {
|
|
120
119
|
/** Configuration options for conversion */
|
|
121
120
|
options?: HTMLToMarkdownOptions;
|
|
122
|
-
/**
|
|
123
|
-
|
|
124
|
-
/** Content buffers for regions */
|
|
125
|
-
regionContentBuffers: Map<number, string[]>;
|
|
121
|
+
/** Content buffer for markdown output */
|
|
122
|
+
buffer: string[];
|
|
126
123
|
/** Performance cache for last content to avoid iteration */
|
|
127
124
|
lastContentCache?: string;
|
|
128
125
|
/** Reference to the last processed node */
|
|
@@ -178,4 +175,4 @@ declare function streamHtmlToMarkdown(htmlStream: ReadableStream | null, options
|
|
|
178
175
|
//#region src/index.d.ts
|
|
179
176
|
declare function htmlToMarkdown(html: string, options?: HTMLToMarkdownOptions): string;
|
|
180
177
|
//#endregion
|
|
181
|
-
export {
|
|
178
|
+
export { ELEMENT_NODE, ElementNode, ExtractedElement, HTMLToMarkdownOptions, HandlerContext, MarkdownChunk, MarkdownProcessor, MdreamProcessingState, MdreamRuntimeState, Node, NodeEvent, Plugin, PluginContext, PluginCreationOptions, ReadabilityContext, SplitterOptions, TEXT_NODE, TagHandler, TagIdMap, TailwindContext, TextNode, createPlugin, htmlToMarkdown, parseHtml, streamHtmlToMarkdown };
|
package/dist/index.mjs
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { i as parseHtml, t as MarkdownProcessor } from "./_chunks/markdown-processor.mjs";
|
|
1
|
+
import { fn as TagIdMap } from "./_chunks/const.mjs";
|
|
2
|
+
import { i as parseHtml, n as createMarkdownProcessor, t as MarkdownProcessor } from "./_chunks/markdown-processor.mjs";
|
|
3
3
|
import { t as createPlugin } from "./_chunks/plugin.mjs";
|
|
4
|
-
import {
|
|
5
|
-
|
|
6
|
-
|
|
4
|
+
import { t as streamHtmlToMarkdown } from "./_chunks/stream.mjs";
|
|
5
|
+
function htmlToMarkdown(html, options = {}) {
|
|
6
|
+
const processor = createMarkdownProcessor(options);
|
|
7
|
+
processor.processHtml(html);
|
|
8
|
+
return processor.getMarkdown();
|
|
9
|
+
}
|
|
10
|
+
export { MarkdownProcessor, TagIdMap, createPlugin, htmlToMarkdown, parseHtml, streamHtmlToMarkdown };
|