mdream 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,582 @@
1
+ import { ELEMENT_NODE, TAG_FOOTER, TAG_H1, TAG_H2, TAG_H3, TAG_H4, TAG_H5, TAG_H6, TAG_HEADER, TAG_MAIN, TEXT_NODE, createPlugin$1 as createPlugin } from "./plugin-DCJFRZej.mjs";
2
+
3
+ //#region src/libs/query-selector.ts
4
+ /**
5
+ * Matches a simple tag selector (e.g., 'div', 'p', 'h1')
6
+ */
7
+ var TagSelector = class {
8
+ constructor(tagName) {
9
+ this.tagName = tagName;
10
+ }
11
+ matches(element) {
12
+ return element.name === this.tagName;
13
+ }
14
+ toString() {
15
+ return this.tagName;
16
+ }
17
+ };
18
+ /**
19
+ * Matches an ID selector (e.g., '#main', '#content')
20
+ */
21
+ var IdSelector = class {
22
+ id;
23
+ constructor(selector) {
24
+ this.id = selector.slice(1);
25
+ }
26
+ matches(element) {
27
+ return element.attributes?.id === this.id;
28
+ }
29
+ toString() {
30
+ return `#${this.id}`;
31
+ }
32
+ };
33
+ /**
34
+ * Matches a class selector (e.g., '.container', '.header')
35
+ */
36
+ var ClassSelector = class {
37
+ className;
38
+ constructor(selector) {
39
+ this.className = selector.slice(1);
40
+ }
41
+ matches(element) {
42
+ if (!element.attributes?.class) return false;
43
+ const classes = element.attributes.class.trim().split(" ").filter(Boolean);
44
+ return classes.includes(this.className);
45
+ }
46
+ toString() {
47
+ return `.${this.className}`;
48
+ }
49
+ };
50
+ /**
51
+ * Matches an attribute selector (e.g., '[data-id]', '[href="https://example.com"]')
52
+ */
53
+ var AttributeSelector = class {
54
+ attrName;
55
+ attrValue;
56
+ operator;
57
+ constructor(selector) {
58
+ const match = selector.match(/\[([^\]=~|^$*]+)(?:([=~|^$*]+)["']?([^"'\]]+)["']?)?\]/);
59
+ if (match) {
60
+ this.attrName = match[1];
61
+ this.operator = match[2];
62
+ this.attrValue = match[3];
63
+ } else this.attrName = selector.slice(1, -1);
64
+ }
65
+ matches(element) {
66
+ if (!(this.attrName in (element.attributes || {}))) return false;
67
+ if (!this.operator || !this.attrValue) return true;
68
+ const value = element.attributes[this.attrName];
69
+ switch (this.operator) {
70
+ case "=": return value === this.attrValue;
71
+ case "^=": return value.startsWith(this.attrValue);
72
+ case "$=": return value.endsWith(this.attrValue);
73
+ case "*=": return value.includes(this.attrValue);
74
+ case "~=": return value.trim().split(" ").filter(Boolean).includes(this.attrValue);
75
+ case "|=": return value === this.attrValue || value.startsWith(`${this.attrValue}-`);
76
+ default: return false;
77
+ }
78
+ }
79
+ toString() {
80
+ if (!this.operator || !this.attrValue) return `[${this.attrName}]`;
81
+ return `[${this.attrName}${this.operator}${this.attrValue}]`;
82
+ }
83
+ };
84
+ /**
85
+ * Compound selector that combines multiple selectors (e.g., 'div.container', 'h1#title')
86
+ */
87
+ var CompoundSelector = class {
88
+ constructor(selectors) {
89
+ this.selectors = selectors;
90
+ }
91
+ matches(element) {
92
+ return this.selectors.every((selector) => selector.matches(element));
93
+ }
94
+ toString() {
95
+ return this.selectors.map((s) => s.toString()).join("");
96
+ }
97
+ };
98
+ /**
99
+ * Parses a CSS selector into a matcher
100
+ */
101
+ function parseSelector(selector) {
102
+ selector = selector.trim();
103
+ if (!selector) throw new Error("Empty selector");
104
+ const selectorParts = [];
105
+ let current = "";
106
+ let inAttribute = false;
107
+ for (let i = 0; i < selector.length; i++) {
108
+ const char = selector[i];
109
+ if ((char === "." || char === "#" || char === "[") && current) {
110
+ if (current[0] === ".") selectorParts.push(new ClassSelector(current));
111
+ else if (current[0] === "#") selectorParts.push(new IdSelector(current));
112
+ else if (current[0] === "[") selectorParts.push(new AttributeSelector(current));
113
+ else selectorParts.push(new TagSelector(current));
114
+ current = char;
115
+ } else current += char;
116
+ if (char === "[") inAttribute = true;
117
+ if (char === "]") inAttribute = false;
118
+ if (inAttribute && char !== "[") {}
119
+ }
120
+ if (current) if (current[0] === ".") selectorParts.push(new ClassSelector(current));
121
+ else if (current[0] === "#") selectorParts.push(new IdSelector(current));
122
+ else if (current[0] === "[") selectorParts.push(new AttributeSelector(current));
123
+ else selectorParts.push(new TagSelector(current));
124
+ if (selectorParts.length === 1) return selectorParts[0];
125
+ return new CompoundSelector(selectorParts);
126
+ }
127
+
128
+ //#endregion
129
+ //#region src/plugins/extraction.ts
130
+ function extractionPlugin(selectors) {
131
+ const matcherCallbacks = Object.entries(selectors).map(([selector, callback]) => ({
132
+ matcher: parseSelector(selector),
133
+ callback
134
+ }));
135
+ const trackedElements = new Map();
136
+ return createPlugin({
137
+ onNodeEnter(element) {
138
+ matcherCallbacks.forEach(({ matcher, callback }) => {
139
+ if (matcher.matches(element)) trackedElements.set(element, {
140
+ textContent: "",
141
+ callback
142
+ });
143
+ });
144
+ },
145
+ processTextNode(textNode) {
146
+ let currentParent = textNode.parent;
147
+ while (currentParent) {
148
+ const tracked = trackedElements.get(currentParent);
149
+ if (tracked) tracked.textContent += textNode.value;
150
+ currentParent = currentParent.parent;
151
+ }
152
+ },
153
+ onNodeExit(element, state) {
154
+ const tracked = trackedElements.get(element);
155
+ if (tracked) {
156
+ const extractedElement = {
157
+ ...element,
158
+ textContent: tracked.textContent.trim()
159
+ };
160
+ tracked.callback(extractedElement, state);
161
+ trackedElements.delete(element);
162
+ }
163
+ }
164
+ });
165
+ }
166
+
167
+ //#endregion
168
+ //#region src/plugins/filter.ts
169
+ /**
170
+ * Plugin that filters nodes based on CSS selectors.
171
+ * Allows including or excluding nodes based on selectors.
172
+ *
173
+ * @example
174
+ * // Include only heading elements and their children
175
+ * withQuerySelectorPlugin({ include: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] })
176
+ *
177
+ * @example
178
+ * // Exclude navigation, sidebar, and footer
179
+ * withQuerySelectorPlugin({ exclude: ['nav', '#sidebar', '.footer'] })
180
+ */
181
+ function filterPlugin(options = {}) {
182
+ const includeSelectors = options.include?.map((selector) => {
183
+ if (typeof selector === "string") return parseSelector(selector);
184
+ return { matches: (element) => element.tagId === selector };
185
+ }) || [];
186
+ const excludeSelectors = options.exclude?.map((selector) => {
187
+ if (typeof selector === "string") return parseSelector(selector);
188
+ return { matches: (element) => element.tagId === selector };
189
+ }) || [];
190
+ const processChildren = options.processChildren !== false;
191
+ return createPlugin({ beforeNodeProcess(event) {
192
+ const { node } = event;
193
+ if (node.type === TEXT_NODE) {
194
+ const textNode = node;
195
+ let currentParent$1 = textNode.parent;
196
+ while (currentParent$1 && excludeSelectors.length) {
197
+ const parentShouldExclude = excludeSelectors.some((selector) => selector.matches(currentParent$1));
198
+ if (parentShouldExclude) return { skip: true };
199
+ currentParent$1 = currentParent$1.parent;
200
+ }
201
+ return;
202
+ }
203
+ if (node.type !== ELEMENT_NODE) return;
204
+ const element = node;
205
+ if (excludeSelectors.length) {
206
+ if (element.attributes.style?.includes("absolute") || element.attributes.style?.includes("fixed")) return { skip: true };
207
+ const shouldExclude = excludeSelectors.some((selector) => selector.matches(element));
208
+ if (shouldExclude) return { skip: true };
209
+ }
210
+ let currentParent = element.parent;
211
+ while (currentParent) {
212
+ if (excludeSelectors.length) {
213
+ const parentShouldExclude = excludeSelectors.some((selector) => selector.matches(currentParent));
214
+ if (parentShouldExclude) return { skip: true };
215
+ }
216
+ currentParent = currentParent.parent;
217
+ }
218
+ if (includeSelectors.length) {
219
+ let currentElement = element;
220
+ while (currentElement) {
221
+ const shouldInclude = includeSelectors.some((selector) => selector.matches(currentElement));
222
+ if (shouldInclude) return;
223
+ if (!processChildren) break;
224
+ currentElement = currentElement.parent;
225
+ }
226
+ return { skip: true };
227
+ }
228
+ } });
229
+ }
230
+
231
+ //#endregion
232
+ //#region src/plugins/isolate-main.ts
233
+ /**
234
+ * Plugin that isolates main content using the following priority order:
235
+ * 1. If an explicit <main> element exists (within 5 depth levels), use its content exclusively
236
+ * 2. Otherwise, find content between the first header tag (h1-h6) and first footer
237
+ * 3. If footer is within 5 levels of nesting from the header, use it as the end boundary
238
+ * 4. Exclude all content before the start marker and after the end marker
239
+ *
240
+ * @example
241
+ * ```html
242
+ * <body>
243
+ * <nav>Navigation (excluded)</nav>
244
+ * <main>
245
+ * <h1>Main Title (included)</h1>
246
+ * <p>Main content (included)</p>
247
+ * </main>
248
+ * <footer>Footer (excluded)</footer>
249
+ * </body>
250
+ * ```
251
+ *
252
+ * @example
253
+ * ```html
254
+ * <body>
255
+ * <nav>Navigation (excluded)</nav>
256
+ * <h1>Main Title (included)</h1>
257
+ * <p>Main content (included)</p>
258
+ * <footer>Footer (excluded)</footer>
259
+ * </body>
260
+ * ```
261
+ */
262
+ function isolateMainPlugin() {
263
+ let mainElement = null;
264
+ let firstHeaderElement = null;
265
+ let afterFooter = false;
266
+ const headerTagIds = new Set([
267
+ TAG_H1,
268
+ TAG_H2,
269
+ TAG_H3,
270
+ TAG_H4,
271
+ TAG_H5,
272
+ TAG_H6
273
+ ]);
274
+ return createPlugin({ beforeNodeProcess(event) {
275
+ const { node } = event;
276
+ if (node.type === ELEMENT_NODE) {
277
+ const element = node;
278
+ if (!mainElement && element.tagId === TAG_MAIN && element.depth <= 5) {
279
+ mainElement = element;
280
+ return;
281
+ }
282
+ if (mainElement) {
283
+ let current = element.parent;
284
+ let isInsideMain = element === mainElement;
285
+ while (current && !isInsideMain) {
286
+ if (current === mainElement) {
287
+ isInsideMain = true;
288
+ break;
289
+ }
290
+ current = current.parent;
291
+ }
292
+ if (!isInsideMain) return { skip: true };
293
+ return;
294
+ }
295
+ if (!firstHeaderElement && headerTagIds.has(element.tagId)) {
296
+ let current = element.parent;
297
+ let isInHeaderTag = false;
298
+ while (current) {
299
+ if (current.tagId === TAG_HEADER) {
300
+ isInHeaderTag = true;
301
+ break;
302
+ }
303
+ current = current.parent;
304
+ }
305
+ if (!isInHeaderTag) {
306
+ firstHeaderElement = element;
307
+ return;
308
+ }
309
+ }
310
+ if (firstHeaderElement && !afterFooter && element.tagId === TAG_FOOTER) {
311
+ const depthDifference = element.depth - firstHeaderElement.depth;
312
+ if (depthDifference <= 5) {
313
+ afterFooter = true;
314
+ return { skip: true };
315
+ }
316
+ }
317
+ if (!firstHeaderElement) return { skip: true };
318
+ if (afterFooter) return { skip: true };
319
+ }
320
+ if (node.type === TEXT_NODE) {
321
+ if (mainElement) {
322
+ let current = node.parent;
323
+ let isInsideMain = false;
324
+ while (current) {
325
+ if (current === mainElement) {
326
+ isInsideMain = true;
327
+ break;
328
+ }
329
+ current = current.parent;
330
+ }
331
+ if (!isInsideMain) return { skip: true };
332
+ return;
333
+ }
334
+ if (!firstHeaderElement || afterFooter) return { skip: true };
335
+ }
336
+ } });
337
+ }
338
+
339
+ //#endregion
340
+ //#region src/plugins/tailwind.ts
341
+ /**
342
+ * Mapping of Tailwind classes to Markdown formatting
343
+ */
344
+ const TAILWIND_TO_MARKDOWN_MAP = {
345
+ "font-bold": {
346
+ prefix: "**",
347
+ suffix: "**"
348
+ },
349
+ "font-semibold": {
350
+ prefix: "**",
351
+ suffix: "**"
352
+ },
353
+ "font-black": {
354
+ prefix: "**",
355
+ suffix: "**"
356
+ },
357
+ "font-extrabold": {
358
+ prefix: "**",
359
+ suffix: "**"
360
+ },
361
+ "font-medium": {
362
+ prefix: "**",
363
+ suffix: "**"
364
+ },
365
+ "font-italic": {
366
+ prefix: "*",
367
+ suffix: "*"
368
+ },
369
+ "italic": {
370
+ prefix: "*",
371
+ suffix: "*"
372
+ },
373
+ "line-through": {
374
+ prefix: "~~",
375
+ suffix: "~~"
376
+ },
377
+ "hidden": { hidden: true },
378
+ "invisible": { hidden: true },
379
+ "absolute": { hidden: true },
380
+ "fixed": { hidden: true },
381
+ "sticky": { hidden: true }
382
+ };
383
+ /**
384
+ * Extract base class name from a responsive breakpoint variant
385
+ */
386
+ function extractBaseClass(className) {
387
+ const breakpoints = [
388
+ "sm:",
389
+ "md:",
390
+ "lg:",
391
+ "xl:",
392
+ "2xl:"
393
+ ];
394
+ for (const bp of breakpoints) if (className.startsWith(bp)) return {
395
+ baseClass: className.substring(bp.length),
396
+ breakpoint: bp
397
+ };
398
+ return {
399
+ baseClass: className,
400
+ breakpoint: ""
401
+ };
402
+ }
403
+ /**
404
+ * Sort classes by breakpoint for mobile-first processing
405
+ */
406
+ function sortByBreakpoint(classes) {
407
+ const breakpointOrder = {
408
+ "": 0,
409
+ "sm:": 1,
410
+ "md:": 2,
411
+ "lg:": 3,
412
+ "xl:": 4,
413
+ "2xl:": 5
414
+ };
415
+ return [...classes].sort((a, b) => {
416
+ const aBreakpoint = extractBaseClass(a).breakpoint;
417
+ const bBreakpoint = extractBaseClass(b).breakpoint;
418
+ return breakpointOrder[aBreakpoint] - breakpointOrder[bBreakpoint];
419
+ });
420
+ }
421
+ /**
422
+ * Group classes by their formatting type to handle overrides
423
+ */
424
+ function groupByFormattingType(classes) {
425
+ const sorted = sortByBreakpoint(classes);
426
+ const groups = {
427
+ emphasis: [],
428
+ weight: [],
429
+ decoration: [],
430
+ display: [],
431
+ position: [],
432
+ other: []
433
+ };
434
+ for (const cls of sorted) {
435
+ const { baseClass } = extractBaseClass(cls);
436
+ if (baseClass.includes("italic")) groups.emphasis.push(cls);
437
+ else if (baseClass.includes("font-") || baseClass === "bold") groups.weight.push(cls);
438
+ else if (baseClass.includes("line-through") || baseClass.includes("underline")) groups.decoration.push(cls);
439
+ else if (baseClass === "hidden" || baseClass.includes("invisible")) groups.display.push(cls);
440
+ else if ([
441
+ "absolute",
442
+ "fixed",
443
+ "sticky"
444
+ ].includes(baseClass)) groups.position.push(cls);
445
+ else groups.other.push(cls);
446
+ }
447
+ return groups;
448
+ }
449
+ /**
450
+ * Fix redundant markdown delimiters without regex
451
+ */
452
+ function fixRedundantDelimiters(content) {
453
+ content = content.replaceAll("****", "**");
454
+ content = content.replaceAll("~~~~", "~~");
455
+ if (content.includes("***") && content.split("***").length > 3) {
456
+ const parts = content.split("***");
457
+ if (parts.length >= 4) content = `${parts[0]}***${parts[1]} ${parts[2]}***${parts.slice(3).join("***")}`;
458
+ }
459
+ return content;
460
+ }
461
+ /**
462
+ * Normalizes a list of Tailwind classes by processing breakpoints and resolving conflicts
463
+ */
464
+ function normalizeClasses(classes) {
465
+ const result = [];
466
+ const mobileClasses = classes.filter((cls) => !hasBreakpoint(cls));
467
+ const breakpointClasses = classes.filter((cls) => hasBreakpoint(cls));
468
+ result.push(...mobileClasses);
469
+ result.push(...breakpointClasses);
470
+ return result;
471
+ }
472
+ /**
473
+ * Check if a class has a breakpoint prefix
474
+ */
475
+ function hasBreakpoint(className) {
476
+ const { breakpoint } = extractBaseClass(className);
477
+ return breakpoint !== "";
478
+ }
479
+ /**
480
+ * Process Tailwind classes for an element with mobile-first approach
481
+ */
482
+ function processTailwindClasses(classes) {
483
+ let prefix = "";
484
+ let suffix = "";
485
+ let hidden = false;
486
+ const normalizedClasses = normalizeClasses(classes);
487
+ const grouped = groupByFormattingType(normalizedClasses);
488
+ if (grouped.weight.length > 0) {
489
+ const { baseClass } = extractBaseClass(grouped.weight[0]);
490
+ const mapping = TAILWIND_TO_MARKDOWN_MAP[baseClass];
491
+ if (mapping) {
492
+ if (mapping.prefix) prefix += mapping.prefix;
493
+ if (mapping.suffix) suffix = mapping.suffix + suffix;
494
+ }
495
+ }
496
+ if (grouped.emphasis.length > 0) {
497
+ const { baseClass } = extractBaseClass(grouped.emphasis[0]);
498
+ const mapping = TAILWIND_TO_MARKDOWN_MAP[baseClass];
499
+ if (mapping) {
500
+ if (mapping.prefix) prefix += mapping.prefix;
501
+ if (mapping.suffix) suffix = mapping.suffix + suffix;
502
+ }
503
+ }
504
+ if (grouped.decoration.length > 0) {
505
+ const { baseClass } = extractBaseClass(grouped.decoration[0]);
506
+ const mapping = TAILWIND_TO_MARKDOWN_MAP[baseClass];
507
+ if (mapping) {
508
+ if (mapping.prefix) prefix += mapping.prefix;
509
+ if (mapping.suffix) suffix = mapping.suffix + suffix;
510
+ }
511
+ }
512
+ for (const cls of grouped.display) {
513
+ const { baseClass } = extractBaseClass(cls);
514
+ const mapping = TAILWIND_TO_MARKDOWN_MAP[baseClass];
515
+ if (mapping && mapping.hidden) {
516
+ hidden = true;
517
+ break;
518
+ }
519
+ }
520
+ for (const cls of grouped.position) {
521
+ const { baseClass } = extractBaseClass(cls);
522
+ const mapping = TAILWIND_TO_MARKDOWN_MAP[baseClass];
523
+ if (mapping && mapping.hidden) {
524
+ hidden = true;
525
+ break;
526
+ }
527
+ }
528
+ return {
529
+ prefix,
530
+ suffix,
531
+ hidden
532
+ };
533
+ }
534
+ /**
535
+ * Creates a plugin that adds Tailwind class processing
536
+ */
537
+ function tailwindPlugin() {
538
+ return createPlugin({
539
+ processAttributes(node) {
540
+ const classAttr = node.attributes?.class;
541
+ if (!classAttr) return;
542
+ const classes = classAttr.trim().split(" ").filter(Boolean);
543
+ const { prefix, suffix, hidden } = processTailwindClasses(classes);
544
+ node.context = node.context || {};
545
+ node.context.tailwind = {
546
+ prefix,
547
+ suffix,
548
+ hidden
549
+ };
550
+ },
551
+ processTextNode(node) {
552
+ const parentNode = node.parent;
553
+ if (!parentNode || parentNode.type !== ELEMENT_NODE) return void 0;
554
+ const tailwindData = parentNode.context?.tailwind;
555
+ if (tailwindData?.hidden) return {
556
+ content: "",
557
+ skip: true
558
+ };
559
+ let content = node.value;
560
+ const prefix = tailwindData?.prefix || "";
561
+ const suffix = tailwindData?.suffix || "";
562
+ if (prefix || suffix) {
563
+ content = prefix + content + suffix;
564
+ content = fixRedundantDelimiters(content);
565
+ }
566
+ return {
567
+ content,
568
+ skip: false
569
+ };
570
+ },
571
+ beforeNodeProcess({ node }) {
572
+ if (node.type === ELEMENT_NODE) {
573
+ const elementNode = node;
574
+ const tailwindData = elementNode.context?.tailwind;
575
+ if (tailwindData?.hidden) return { skip: true };
576
+ }
577
+ }
578
+ });
579
+ }
580
+
581
+ //#endregion
582
+ export { extractionPlugin, filterPlugin, isolateMainPlugin, tailwindPlugin };