mdream 0.2.8 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,844 @@
1
+ import { ELEMENT_NODE, TAG_A, TAG_ADDRESS, TAG_ARTICLE, TAG_ASIDE, TAG_AUDIO, TAG_B, TAG_BLOCKQUOTE, TAG_BODY, TAG_BR, TAG_BUTTON, TAG_CAPTION, TAG_CODE, TAG_DD, TAG_DETAILS, TAG_DIV, TAG_DL, TAG_DT, TAG_EM, TAG_EMBED, TAG_FIELDSET, TAG_FIGCAPTION, TAG_FIGURE, TAG_FOOTER, TAG_FORM, TAG_H1, TAG_H2, TAG_H3, TAG_H4, TAG_H5, TAG_H6, TAG_HEAD, TAG_HEADER, TAG_HR, TAG_HTML, TAG_I, TAG_IFRAME, TAG_IMG, TAG_INPUT, TAG_LI, TAG_MAIN, TAG_META, TAG_NAV, TAG_OBJECT, TAG_OL, TAG_P, TAG_PRE, TAG_SCRIPT, TAG_SECTION, TAG_SELECT, TAG_SPAN, TAG_STRONG, TAG_STYLE, TAG_SUMMARY, TAG_SVG, TAG_TABLE, TAG_TBODY, TAG_TD, TAG_TEXTAREA, TAG_TFOOT, TAG_TH, TAG_THEAD, TAG_TITLE, TAG_TR, TAG_UL, TAG_VIDEO, TEXT_NODE, collectNodeContent, createBufferRegion, createPlugin } from "./plugin-Bqz9GKOA.mjs";
2
+
3
+ //#region src/libs/query-selector.ts
4
+ /**
5
+ * Creates a tag selector matcher (e.g., 'div', 'p', 'h1')
6
+ */
7
+ function createTagSelector(tagName) {
8
+ return {
9
+ matches: (element) => element.name === tagName,
10
+ toString: () => tagName
11
+ };
12
+ }
13
+ /**
14
+ * Creates an ID selector matcher (e.g., '#main', '#content')
15
+ */
16
+ function createIdSelector(selector) {
17
+ const id = selector.slice(1);
18
+ return {
19
+ matches: (element) => element.attributes?.id === id,
20
+ toString: () => `#${id}`
21
+ };
22
+ }
23
+ /**
24
+ * Creates a class selector matcher (e.g., '.container', '.header')
25
+ */
26
+ function createClassSelector(selector) {
27
+ const className = selector.slice(1);
28
+ return {
29
+ matches: (element) => {
30
+ if (!element.attributes?.class) return false;
31
+ const classes = element.attributes.class.trim().split(" ").filter(Boolean);
32
+ return classes.includes(className);
33
+ },
34
+ toString: () => `.${className}`
35
+ };
36
+ }
37
+ /**
38
+ * Creates an attribute selector matcher (e.g., '[data-id]', '[href="https://example.com"]')
39
+ */
40
+ function createAttributeSelector(selector) {
41
+ const match = selector.match(/\[([^\]=~|^$*]+)(?:([=~|^$*]+)["']?([^"'\]]+)["']?)?\]/);
42
+ const attrName = match ? match[1] : selector.slice(1, -1);
43
+ const operator = match?.[2];
44
+ const attrValue = match?.[3];
45
+ return {
46
+ matches: (element) => {
47
+ if (!(attrName in (element.attributes || {}))) return false;
48
+ if (!operator || !attrValue) return true;
49
+ const value = element.attributes[attrName];
50
+ switch (operator) {
51
+ case "=": return value === attrValue;
52
+ case "^=": return value.startsWith(attrValue);
53
+ case "$=": return value.endsWith(attrValue);
54
+ case "*=": return value.includes(attrValue);
55
+ case "~=": return value.trim().split(" ").filter(Boolean).includes(attrValue);
56
+ case "|=": return value === attrValue || value.startsWith(`${attrValue}-`);
57
+ default: return false;
58
+ }
59
+ },
60
+ toString: () => {
61
+ if (!operator || !attrValue) return `[${attrName}]`;
62
+ return `[${attrName}${operator}${attrValue}]`;
63
+ }
64
+ };
65
+ }
66
+ /**
67
+ * Creates a compound selector that combines multiple selectors (e.g., 'div.container', 'h1#title')
68
+ */
69
+ function createCompoundSelector(selectors) {
70
+ return {
71
+ matches: (element) => selectors.every((selector) => selector.matches(element)),
72
+ toString: () => selectors.map((s) => s.toString()).join("")
73
+ };
74
+ }
75
+ /**
76
+ * Parses a CSS selector into a matcher
77
+ */
78
+ function parseSelector(selector) {
79
+ selector = selector.trim();
80
+ if (!selector) throw new Error("Empty selector");
81
+ const selectorParts = [];
82
+ let current = "";
83
+ let inAttribute = false;
84
+ for (let i = 0; i < selector.length; i++) {
85
+ const char = selector[i];
86
+ if ((char === "." || char === "#" || char === "[") && current) {
87
+ if (current[0] === ".") selectorParts.push(createClassSelector(current));
88
+ else if (current[0] === "#") selectorParts.push(createIdSelector(current));
89
+ else if (current[0] === "[") selectorParts.push(createAttributeSelector(current));
90
+ else selectorParts.push(createTagSelector(current));
91
+ current = char;
92
+ } else current += char;
93
+ if (char === "[") inAttribute = true;
94
+ if (char === "]") inAttribute = false;
95
+ if (inAttribute && char !== "[") {}
96
+ }
97
+ if (current) if (current[0] === ".") selectorParts.push(createClassSelector(current));
98
+ else if (current[0] === "#") selectorParts.push(createIdSelector(current));
99
+ else if (current[0] === "[") selectorParts.push(createAttributeSelector(current));
100
+ else selectorParts.push(createTagSelector(current));
101
+ if (selectorParts.length === 1) return selectorParts[0];
102
+ return createCompoundSelector(selectorParts);
103
+ }
104
+
105
+ //#endregion
106
+ //#region src/plugins/extraction.ts
107
+ function extractionPlugin(selectors) {
108
+ const matcherCallbacks = Object.entries(selectors).map(([selector, callback]) => ({
109
+ matcher: parseSelector(selector),
110
+ callback
111
+ }));
112
+ const trackedElements = new Map();
113
+ return createPlugin({
114
+ onNodeEnter(element) {
115
+ matcherCallbacks.forEach(({ matcher, callback }) => {
116
+ if (matcher.matches(element)) trackedElements.set(element, {
117
+ textContent: "",
118
+ callback
119
+ });
120
+ });
121
+ },
122
+ processTextNode(textNode) {
123
+ let currentParent = textNode.parent;
124
+ while (currentParent) {
125
+ const tracked = trackedElements.get(currentParent);
126
+ if (tracked) tracked.textContent += textNode.value;
127
+ currentParent = currentParent.parent;
128
+ }
129
+ return void 0;
130
+ },
131
+ onNodeExit(element, state) {
132
+ const tracked = trackedElements.get(element);
133
+ if (tracked) {
134
+ const extractedElement = {
135
+ ...element,
136
+ textContent: tracked.textContent.trim()
137
+ };
138
+ tracked.callback(extractedElement, state);
139
+ trackedElements.delete(element);
140
+ }
141
+ }
142
+ });
143
+ }
144
+
145
+ //#endregion
146
+ //#region src/plugins/filter.ts
147
+ /**
148
+ * Plugin that filters nodes based on CSS selectors.
149
+ * Allows including or excluding nodes based on selectors.
150
+ *
151
+ * @example
152
+ * // Include only heading elements and their children
153
+ * withQuerySelectorPlugin({ include: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] })
154
+ *
155
+ * @example
156
+ * // Exclude navigation, sidebar, and footer
157
+ * withQuerySelectorPlugin({ exclude: ['nav', '#sidebar', '.footer'] })
158
+ */
159
+ function filterPlugin(options = {}) {
160
+ const includeSelectors = options.include?.map((selector) => {
161
+ if (typeof selector === "string") return parseSelector(selector);
162
+ return { matches: (element) => element.tagId === selector };
163
+ }) || [];
164
+ const excludeSelectors = options.exclude?.map((selector) => {
165
+ if (typeof selector === "string") return parseSelector(selector);
166
+ return { matches: (element) => element.tagId === selector };
167
+ }) || [];
168
+ const processChildren = options.processChildren !== false;
169
+ return createPlugin({ beforeNodeProcess(event) {
170
+ const { node } = event;
171
+ if (node.type === TEXT_NODE) {
172
+ const textNode = node;
173
+ let currentParent$1 = textNode.parent;
174
+ while (currentParent$1 && excludeSelectors.length) {
175
+ const parentShouldExclude = excludeSelectors.some((selector) => selector.matches(currentParent$1));
176
+ if (parentShouldExclude) return { skip: true };
177
+ currentParent$1 = currentParent$1.parent;
178
+ }
179
+ return;
180
+ }
181
+ if (node.type !== ELEMENT_NODE) return;
182
+ const element = node;
183
+ if (excludeSelectors.length) {
184
+ if (element.attributes.style?.includes("absolute") || element.attributes.style?.includes("fixed")) return { skip: true };
185
+ const shouldExclude = excludeSelectors.some((selector) => selector.matches(element));
186
+ if (shouldExclude) return { skip: true };
187
+ }
188
+ let currentParent = element.parent;
189
+ while (currentParent) {
190
+ if (excludeSelectors.length) {
191
+ const parentShouldExclude = excludeSelectors.some((selector) => selector.matches(currentParent));
192
+ if (parentShouldExclude) return { skip: true };
193
+ }
194
+ currentParent = currentParent.parent;
195
+ }
196
+ if (includeSelectors.length) {
197
+ let currentElement = element;
198
+ while (currentElement) {
199
+ const shouldInclude = includeSelectors.some((selector) => selector.matches(currentElement));
200
+ if (shouldInclude) return;
201
+ if (!processChildren) break;
202
+ currentElement = currentElement.parent;
203
+ }
204
+ return { skip: true };
205
+ }
206
+ } });
207
+ }
208
+
209
+ //#endregion
210
+ //#region src/plugins/frontmatter.ts
211
+ /**
212
+ * A plugin that manages frontmatter generation from HTML head elements
213
+ * Extracts metadata from meta tags and title and generates YAML frontmatter
214
+ */
215
+ function frontmatterPlugin(options = {}) {
216
+ const additionalFields = options.additionalFields || {};
217
+ const metaFields = new Set([
218
+ "description",
219
+ "keywords",
220
+ "author",
221
+ "date",
222
+ "og:title",
223
+ "og:description",
224
+ "twitter:title",
225
+ "twitter:description",
226
+ ...options.metaFields || []
227
+ ]);
228
+ const frontmatter = {
229
+ ...additionalFields,
230
+ meta: {}
231
+ };
232
+ let inHead = false;
233
+ const formatValue = options.formatValue || ((name, value) => {
234
+ value = value.replace(/"/g, "\\\"");
235
+ if (value.includes("\n") || value.includes(":") || value.includes("#") || value.includes(" ")) return `"${value}"`;
236
+ return value;
237
+ });
238
+ return createPlugin({
239
+ onNodeEnter(node) {
240
+ if (node.tagId === TAG_HEAD) {
241
+ inHead = true;
242
+ return;
243
+ }
244
+ if (inHead && node.type === ELEMENT_NODE && node.tagId === TAG_TITLE) return;
245
+ if (inHead && node.type === ELEMENT_NODE && node.tagId === TAG_META) {
246
+ const elementNode = node;
247
+ const { name, property, content } = elementNode.attributes || {};
248
+ const metaName = property || name;
249
+ if (metaName && content && metaFields.has(metaName)) frontmatter.meta[metaName.includes(":") ? `"${metaName}"` : metaName] = formatValue(metaName, content);
250
+ return void 0;
251
+ }
252
+ },
253
+ onNodeExit(node, state) {
254
+ if (node.type === ELEMENT_NODE && node.tagId === TAG_HEAD) {
255
+ inHead = false;
256
+ if (Object.keys(frontmatter).length > 0) {
257
+ const frontmatterContent = generateFrontmatter();
258
+ collectNodeContent({
259
+ type: 1,
260
+ regionId: 0
261
+ }, frontmatterContent, state);
262
+ }
263
+ }
264
+ return void 0;
265
+ },
266
+ processTextNode(node) {
267
+ if (!inHead) return;
268
+ const parent = node.parent;
269
+ if (parent && parent.tagId === TAG_TITLE && node.value) {
270
+ frontmatter.title = formatValue("title", node.value.trim());
271
+ return {
272
+ content: "",
273
+ skip: true
274
+ };
275
+ }
276
+ }
277
+ });
278
+ /**
279
+ * Generate YAML frontmatter string from collected metadata
280
+ */
281
+ function generateFrontmatter() {
282
+ if (Object.keys(frontmatter).length === 0) return "";
283
+ let yamlLines = [];
284
+ const entries = Object.entries(frontmatter).sort(([a], [b]) => {
285
+ if (a === "title") return -1;
286
+ if (b === "title") return 1;
287
+ if (a === "description") return -1;
288
+ if (b === "description") return 1;
289
+ return a.localeCompare(b);
290
+ });
291
+ for (const [key, value] of entries) if (key === "meta" && typeof value === "object" && value && Object.keys(value).length > 0) {
292
+ yamlLines.push("meta:");
293
+ const metaEntries = Object.entries(value).sort(([a], [b]) => a.localeCompare(b)).map(([metaKey, metaValue]) => ` ${metaKey}: ${metaValue}`);
294
+ yamlLines.push(...metaEntries);
295
+ } else if (key !== "meta" && typeof value === "string") yamlLines.push(`${key}: ${value}`);
296
+ if (Object.keys(frontmatter.meta).length === 0) yamlLines = yamlLines.filter((line) => !line.startsWith("meta:"));
297
+ return `---\n${yamlLines.join("\n")}\n---\n\n`;
298
+ }
299
+ }
300
+
301
+ //#endregion
302
+ //#region src/plugins/isolate-main.ts
303
+ /**
304
+ * Plugin that isolates main content using the following priority order:
305
+ * 1. If an explicit <main> element exists (within 5 depth levels), use its content exclusively
306
+ * 2. Otherwise, find content between the first header tag (h1-h6) and first footer
307
+ * 3. If footer is within 5 levels of nesting from the header, use it as the end boundary
308
+ * 4. Exclude all content before the start marker and after the end marker
309
+ *
310
+ * @example
311
+ * ```html
312
+ * <body>
313
+ * <nav>Navigation (excluded)</nav>
314
+ * <main>
315
+ * <h1>Main Title (included)</h1>
316
+ * <p>Main content (included)</p>
317
+ * </main>
318
+ * <footer>Footer (excluded)</footer>
319
+ * </body>
320
+ * ```
321
+ *
322
+ * @example
323
+ * ```html
324
+ * <body>
325
+ * <nav>Navigation (excluded)</nav>
326
+ * <h1>Main Title (included)</h1>
327
+ * <p>Main content (included)</p>
328
+ * <footer>Footer (excluded)</footer>
329
+ * </body>
330
+ * ```
331
+ */
332
+ function isolateMainPlugin() {
333
+ let mainElement = null;
334
+ let firstHeaderElement = null;
335
+ let afterFooter = false;
336
+ const headerTagIds = new Set([
337
+ TAG_H1,
338
+ TAG_H2,
339
+ TAG_H3,
340
+ TAG_H4,
341
+ TAG_H5,
342
+ TAG_H6
343
+ ]);
344
+ return createPlugin({ beforeNodeProcess(event) {
345
+ const { node } = event;
346
+ if (node.type === ELEMENT_NODE) {
347
+ const element = node;
348
+ if (!mainElement && element.tagId === TAG_MAIN && element.depth <= 5) {
349
+ mainElement = element;
350
+ return;
351
+ }
352
+ if (mainElement) {
353
+ let current = element.parent;
354
+ let isInsideMain = element === mainElement;
355
+ while (current && !isInsideMain) {
356
+ if (current === mainElement) {
357
+ isInsideMain = true;
358
+ break;
359
+ }
360
+ current = current.parent;
361
+ }
362
+ if (!isInsideMain) return { skip: true };
363
+ return;
364
+ }
365
+ if (!firstHeaderElement && element.tagId !== void 0 && headerTagIds.has(element.tagId)) {
366
+ let current = element.parent;
367
+ let isInHeaderTag = false;
368
+ while (current) {
369
+ if (current.tagId === TAG_HEADER) {
370
+ isInHeaderTag = true;
371
+ break;
372
+ }
373
+ current = current.parent;
374
+ }
375
+ if (!isInHeaderTag) {
376
+ firstHeaderElement = element;
377
+ return;
378
+ }
379
+ }
380
+ if (firstHeaderElement && !afterFooter && element.tagId === TAG_FOOTER) {
381
+ const depthDifference = element.depth - firstHeaderElement.depth;
382
+ if (depthDifference <= 5) {
383
+ afterFooter = true;
384
+ return { skip: true };
385
+ }
386
+ }
387
+ if (!firstHeaderElement) {
388
+ if (element.tagId === TAG_HEAD) return;
389
+ let current = element.parent;
390
+ while (current) {
391
+ if (current.tagId === TAG_HEAD) return;
392
+ current = current.parent;
393
+ }
394
+ return { skip: true };
395
+ }
396
+ if (afterFooter) return { skip: true };
397
+ }
398
+ if (node.type === TEXT_NODE) {
399
+ if (mainElement) {
400
+ let current = node.parent;
401
+ let isInsideMain = false;
402
+ while (current) {
403
+ if (current === mainElement) {
404
+ isInsideMain = true;
405
+ break;
406
+ }
407
+ current = current.parent;
408
+ }
409
+ if (!isInsideMain) return { skip: true };
410
+ return;
411
+ }
412
+ if (!firstHeaderElement || afterFooter) {
413
+ let current = node.parent;
414
+ while (current) {
415
+ if (current.tagId === TAG_HEAD) return;
416
+ current = current.parent;
417
+ }
418
+ return { skip: true };
419
+ }
420
+ }
421
+ } });
422
+ }
423
+
424
+ //#endregion
425
+ //#region src/plugins/readability.ts
426
+ const REGEXPS = {
427
+ positive: /article|body|content|entry|main|page|post|text|blog|story|recipe|ingredient|instruction|description|docs?|guide|tutorial|reference|manual/i,
428
+ negative: /ad|banner|combx|comment|disqus|extra|foot|header|menu|meta|nav|promo|related|scroll|share|sidebar|sponsor|social|tags|widget|sitemap|copyright|login|register|subscribe|newsletter|signup|category|author|date|publish|cta|button|apply|trial|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i,
429
+ commas: /,/g,
430
+ periodAtEnd: /\.( |$)/,
431
+ hidden: /hidden|display:\s*none|visibility:\s*hidden/i,
432
+ advertisement: /^ad-|^ad$|advertisement|sponsor|promo|banner/i,
433
+ comments: /comment|disqus|replies/i
434
+ };
435
+ const TagScores = {
436
+ [TAG_ARTICLE]: 15,
437
+ [TAG_SECTION]: 8,
438
+ [TAG_MAIN]: 15,
439
+ [TAG_P]: 5,
440
+ [TAG_DIV]: 2,
441
+ [TAG_BLOCKQUOTE]: 5,
442
+ [TAG_PRE]: 8,
443
+ [TAG_CODE]: 6,
444
+ [TAG_IMG]: 3,
445
+ [TAG_FIGURE]: 4,
446
+ [TAG_FIGCAPTION]: 3,
447
+ [TAG_VIDEO]: 3,
448
+ [TAG_AUDIO]: 3,
449
+ [TAG_SVG]: 1,
450
+ [TAG_TABLE]: 0,
451
+ [TAG_CAPTION]: 2,
452
+ [TAG_THEAD]: 0,
453
+ [TAG_TBODY]: 0,
454
+ [TAG_TFOOT]: 0,
455
+ [TAG_TR]: -1,
456
+ [TAG_TH]: -2,
457
+ [TAG_TD]: 0,
458
+ [TAG_UL]: -8,
459
+ [TAG_OL]: -5,
460
+ [TAG_LI]: -6,
461
+ [TAG_DL]: 0,
462
+ [TAG_DT]: 0,
463
+ [TAG_DD]: 0,
464
+ [TAG_H1]: 1,
465
+ [TAG_H2]: 1,
466
+ [TAG_H3]: 1,
467
+ [TAG_H4]: 0,
468
+ [TAG_H5]: 0,
469
+ [TAG_H6]: 0,
470
+ [TAG_HEADER]: -15,
471
+ [TAG_FOOTER]: -25,
472
+ [TAG_NAV]: -30,
473
+ [TAG_ASIDE]: -25,
474
+ [TAG_FORM]: -8,
475
+ [TAG_BUTTON]: -5,
476
+ [TAG_INPUT]: -5,
477
+ [TAG_TEXTAREA]: -5,
478
+ [TAG_SELECT]: -5,
479
+ [TAG_FIELDSET]: -5,
480
+ [TAG_IFRAME]: -3,
481
+ [TAG_EMBED]: -3,
482
+ [TAG_OBJECT]: -3,
483
+ [TAG_A]: -8,
484
+ [TAG_STRONG]: 1,
485
+ [TAG_B]: 1,
486
+ [TAG_EM]: 1,
487
+ [TAG_I]: 1,
488
+ [TAG_HR]: 0,
489
+ [TAG_BR]: 0,
490
+ [TAG_SPAN]: 0,
491
+ [TAG_SCRIPT]: -25,
492
+ [TAG_STYLE]: -25,
493
+ [TAG_DETAILS]: 2,
494
+ [TAG_SUMMARY]: 1,
495
+ [TAG_ADDRESS]: -3
496
+ };
497
+ /**
498
+ * Apply score adjustments based on class and ID names
499
+ */
500
+ function scoreClassAndId(node) {
501
+ let scoreAdjustment = 0;
502
+ if (node.attributes?.class) {
503
+ const className = node.attributes.class;
504
+ if (/nav|menu|header|footer|sidebar|ad-|advertisement|banner|promo|cta|button|apply|trial|engagement|sharing|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i.test(className)) scoreAdjustment -= 35;
505
+ else if (REGEXPS.negative.test(className)) scoreAdjustment -= 15;
506
+ else if (REGEXPS.positive.test(className)) {
507
+ scoreAdjustment += 10;
508
+ if (/docs?|guide|tutorial|reference|manual|article/i.test(className)) scoreAdjustment += 5;
509
+ }
510
+ }
511
+ if (node.attributes?.id) {
512
+ const id = node.attributes.id;
513
+ if (/nav|menu|header|footer|sidebar|ad-|advertisement|banner|promo|cta|button|apply|trial|engagement|sharing|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i.test(id)) scoreAdjustment -= 35;
514
+ else if (REGEXPS.negative.test(id)) scoreAdjustment -= 15;
515
+ else if (REGEXPS.positive.test(id)) {
516
+ scoreAdjustment += 10;
517
+ if (/docs?|guide|tutorial|reference|manual|article/i.test(id)) scoreAdjustment += 5;
518
+ }
519
+ }
520
+ return scoreAdjustment;
521
+ }
522
+ /**
523
+ * Creates a plugin that implements readability.js style heuristics for content quality assessment
524
+ * Controls content inclusion/exclusion using buffer regions
525
+ */
526
+ function readabilityPlugin() {
527
+ let inHead = false;
528
+ return createPlugin({
529
+ onNodeEnter(node, state) {
530
+ if (inHead) return;
531
+ if (!node.context) node.context = {};
532
+ if (node.tagId === TAG_BODY || node.tagId === TAG_HTML) return;
533
+ if (node.tagId === TAG_HEAD) {
534
+ createBufferRegion(node, state, true);
535
+ inHead = true;
536
+ return;
537
+ }
538
+ const tagScore = node.tagId !== void 0 ? TagScores[node.tagId] ?? 0 : 0;
539
+ const classAndIdScore = scoreClassAndId(node);
540
+ node.context.score = tagScore + classAndIdScore;
541
+ node.context.tagCount = 1;
542
+ node.context.linkTextLength = 0;
543
+ node.context.textLength = 0;
544
+ const hasStrongNegativePattern = node.name && /nav|header|footer|aside|form|fieldset|button/i.test(node.name) || node.attributes?.class && /nav|menu|header|footer|sidebar|hidden|copyright|ad-|advertisement|banner|promo|related|comment|login|register|subscribe|newsletter|category|meta|tag|cta|button|apply|trial|engagement|sharing|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i.test(node.attributes.class) || node.attributes?.id && /nav|menu|header|footer|sidebar|hidden|copyright|ad-|advertisement|banner|promo|related|comment|login|register|subscribe|newsletter|category|meta|tag|cta|button|apply|trial|engagement|sharing|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i.test(node.attributes.id) || node.attributes?.style && /display:\s*none|visibility:\s*hidden/i.test(node.attributes.style) || node.attributes && Object.keys(node.attributes).some((attr) => attr.startsWith("aria-") && node.attributes[attr] === "true" && /hidden|invisible/i.test(attr));
545
+ if (hasStrongNegativePattern) createBufferRegion(node, state, false);
546
+ else if (node.parent && node.parent.context) node.context.score = (node.context.score || 0) + (node.parent.context.score || 0);
547
+ },
548
+ processTextNode(node) {
549
+ if (!node.parent || inHead) return void 0;
550
+ const textValue = node.value;
551
+ const len = textValue.length;
552
+ const commaCount = Math.min(3, (textValue.match(REGEXPS.commas) || []).length);
553
+ const isInsideLink = !!node.parent.depthMap?.[TAG_A];
554
+ let parent = node.parent;
555
+ while (parent) {
556
+ if (!parent.context) parent.context = {};
557
+ parent.context.score = (parent.context.score || 0) + commaCount;
558
+ parent.context.textLength = (parent.context.textLength || 0) + len;
559
+ if (isInsideLink) parent.context.linkTextLength = (parent.context.linkTextLength || 0) + len;
560
+ parent = parent.parent;
561
+ }
562
+ return void 0;
563
+ },
564
+ onNodeExit(node, state) {
565
+ if (!node.context) return;
566
+ if (node.tagId === TAG_BODY || node.tagId === TAG_HTML) return;
567
+ if (node.tagId === TAG_HEAD) {
568
+ inHead = false;
569
+ return;
570
+ }
571
+ if (inHead) return;
572
+ const textLength = node.context.textLength || 0;
573
+ if (textLength === 0 && node.tagId !== TAG_BODY && !node.childTextNodeIndex) {} else if (textLength > 100) node.context.score = (node.context.score || 0) + 3;
574
+ else if (textLength >= 50) node.context.score = (node.context.score || 0) + 2;
575
+ else if (textLength >= 25) node.context.score = (node.context.score || 0) + 1;
576
+ const linkTextLength = node.context.linkTextLength || 0;
577
+ if (textLength > 0) {
578
+ const linkDensity = linkTextLength / textLength;
579
+ if (linkDensity > .4) if (linkDensity > .6) {
580
+ node.context.score = (node.context.score || 0) * .02;
581
+ if (linkTextLength > 50) node.context.isHighLinkDensity = true;
582
+ } else node.context.score = (node.context.score || 0) * (1 - linkDensity * 2);
583
+ else if (linkDensity > .2) node.context.score = (node.context.score || 0) * (1 - linkDensity * 1);
584
+ }
585
+ if (linkTextLength > 0 && textLength > 0) {
586
+ const linkRatio = linkTextLength / textLength;
587
+ const hasDocumentationMarkers = node.attributes?.class && /docs?|guide|tutorial|reference|manual|article|content/i.test(node.attributes.class) || node.attributes?.id && /docs?|guide|tutorial|reference|manual|article|content/i.test(node.attributes.id) || node.name && /main|article|section/i.test(node.name);
588
+ if (linkRatio > .3 && linkTextLength > 30 && !hasDocumentationMarkers) node.context.score = (node.context.score || 0) - 10;
589
+ }
590
+ const finalScore = node.context.score || 0;
591
+ if (finalScore <= -12) createBufferRegion(node, state, false);
592
+ else if (node.context.isHighLinkDensity || linkTextLength > 50 && textLength > 0 && linkTextLength / textLength > .5) createBufferRegion(node, state, false);
593
+ if (node.tagHandler?.isInline) {
594
+ const parent = node.parent;
595
+ if (parent && parent.context) parent.context.score = (parent.context.score || 0) + finalScore;
596
+ }
597
+ }
598
+ });
599
+ }
600
+
601
+ //#endregion
602
+ //#region src/plugins/tailwind.ts
603
+ /**
604
+ * Mapping of Tailwind classes to Markdown formatting
605
+ */
606
+ const TAILWIND_TO_MARKDOWN_MAP = {
607
+ "font-bold": {
608
+ prefix: "**",
609
+ suffix: "**"
610
+ },
611
+ "font-semibold": {
612
+ prefix: "**",
613
+ suffix: "**"
614
+ },
615
+ "font-black": {
616
+ prefix: "**",
617
+ suffix: "**"
618
+ },
619
+ "font-extrabold": {
620
+ prefix: "**",
621
+ suffix: "**"
622
+ },
623
+ "font-medium": {
624
+ prefix: "**",
625
+ suffix: "**"
626
+ },
627
+ "font-italic": {
628
+ prefix: "*",
629
+ suffix: "*"
630
+ },
631
+ "italic": {
632
+ prefix: "*",
633
+ suffix: "*"
634
+ },
635
+ "line-through": {
636
+ prefix: "~~",
637
+ suffix: "~~"
638
+ },
639
+ "hidden": { hidden: true },
640
+ "invisible": { hidden: true },
641
+ "absolute": { hidden: true },
642
+ "fixed": { hidden: true },
643
+ "sticky": { hidden: true }
644
+ };
645
+ /**
646
+ * Extract base class name from a responsive breakpoint variant
647
+ */
648
+ function extractBaseClass(className) {
649
+ const breakpoints = [
650
+ "sm:",
651
+ "md:",
652
+ "lg:",
653
+ "xl:",
654
+ "2xl:"
655
+ ];
656
+ for (const bp of breakpoints) if (className.startsWith(bp)) return {
657
+ baseClass: className.substring(bp.length),
658
+ breakpoint: bp
659
+ };
660
+ return {
661
+ baseClass: className,
662
+ breakpoint: ""
663
+ };
664
+ }
665
+ /**
666
+ * Sort classes by breakpoint for mobile-first processing
667
+ */
668
+ function sortByBreakpoint(classes) {
669
+ const breakpointOrder = {
670
+ "": 0,
671
+ "sm:": 1,
672
+ "md:": 2,
673
+ "lg:": 3,
674
+ "xl:": 4,
675
+ "2xl:": 5
676
+ };
677
+ return [...classes].sort((a, b) => {
678
+ const aBreakpoint = extractBaseClass(a).breakpoint;
679
+ const bBreakpoint = extractBaseClass(b).breakpoint;
680
+ return breakpointOrder[aBreakpoint] - breakpointOrder[bBreakpoint];
681
+ });
682
+ }
683
+ /**
684
+ * Group classes by their formatting type to handle overrides
685
+ */
686
+ function groupByFormattingType(classes) {
687
+ const sorted = sortByBreakpoint(classes);
688
+ const groups = {
689
+ emphasis: [],
690
+ weight: [],
691
+ decoration: [],
692
+ display: [],
693
+ position: [],
694
+ other: []
695
+ };
696
+ for (const cls of sorted) {
697
+ const { baseClass } = extractBaseClass(cls);
698
+ if (baseClass.includes("italic")) groups.emphasis.push(cls);
699
+ else if (baseClass.includes("font-") || baseClass === "bold") groups.weight.push(cls);
700
+ else if (baseClass.includes("line-through") || baseClass.includes("underline")) groups.decoration.push(cls);
701
+ else if (baseClass === "hidden" || baseClass.includes("invisible")) groups.display.push(cls);
702
+ else if ([
703
+ "absolute",
704
+ "fixed",
705
+ "sticky"
706
+ ].includes(baseClass)) groups.position.push(cls);
707
+ else groups.other.push(cls);
708
+ }
709
+ return groups;
710
+ }
711
+ /**
712
+ * Fix redundant markdown delimiters without regex
713
+ */
714
+ function fixRedundantDelimiters(content) {
715
+ content = content.replaceAll("****", "**");
716
+ content = content.replaceAll("~~~~", "~~");
717
+ if (content.includes("***") && content.split("***").length > 3) {
718
+ const parts = content.split("***");
719
+ if (parts.length >= 4) content = `${parts[0]}***${parts[1]} ${parts[2]}***${parts.slice(3).join("***")}`;
720
+ }
721
+ return content;
722
+ }
723
+ /**
724
+ * Normalizes a list of Tailwind classes by processing breakpoints and resolving conflicts
725
+ */
726
+ function normalizeClasses(classes) {
727
+ const result = [];
728
+ const mobileClasses = classes.filter((cls) => !hasBreakpoint(cls));
729
+ const breakpointClasses = classes.filter((cls) => hasBreakpoint(cls));
730
+ result.push(...mobileClasses);
731
+ result.push(...breakpointClasses);
732
+ return result;
733
+ }
734
+ /**
735
+ * Check if a class has a breakpoint prefix
736
+ */
737
+ function hasBreakpoint(className) {
738
+ const { breakpoint } = extractBaseClass(className);
739
+ return breakpoint !== "";
740
+ }
741
+ /**
742
+ * Process Tailwind classes for an element with mobile-first approach
743
+ */
744
+ function processTailwindClasses(classes) {
745
+ let prefix = "";
746
+ let suffix = "";
747
+ let hidden = false;
748
+ const normalizedClasses = normalizeClasses(classes);
749
+ const grouped = groupByFormattingType(normalizedClasses);
750
+ if (grouped.weight.length > 0) {
751
+ const { baseClass } = extractBaseClass(grouped.weight[0]);
752
+ const mapping = TAILWIND_TO_MARKDOWN_MAP[baseClass];
753
+ if (mapping) {
754
+ if (mapping.prefix) prefix += mapping.prefix;
755
+ if (mapping.suffix) suffix = mapping.suffix + suffix;
756
+ }
757
+ }
758
+ if (grouped.emphasis.length > 0) {
759
+ const { baseClass } = extractBaseClass(grouped.emphasis[0]);
760
+ const mapping = TAILWIND_TO_MARKDOWN_MAP[baseClass];
761
+ if (mapping) {
762
+ if (mapping.prefix) prefix += mapping.prefix;
763
+ if (mapping.suffix) suffix = mapping.suffix + suffix;
764
+ }
765
+ }
766
+ if (grouped.decoration.length > 0) {
767
+ const { baseClass } = extractBaseClass(grouped.decoration[0]);
768
+ const mapping = TAILWIND_TO_MARKDOWN_MAP[baseClass];
769
+ if (mapping) {
770
+ if (mapping.prefix) prefix += mapping.prefix;
771
+ if (mapping.suffix) suffix = mapping.suffix + suffix;
772
+ }
773
+ }
774
+ for (const cls of grouped.display) {
775
+ const { baseClass } = extractBaseClass(cls);
776
+ const mapping = TAILWIND_TO_MARKDOWN_MAP[baseClass];
777
+ if (mapping && mapping.hidden) {
778
+ hidden = true;
779
+ break;
780
+ }
781
+ }
782
+ for (const cls of grouped.position) {
783
+ const { baseClass } = extractBaseClass(cls);
784
+ const mapping = TAILWIND_TO_MARKDOWN_MAP[baseClass];
785
+ if (mapping && mapping.hidden) {
786
+ hidden = true;
787
+ break;
788
+ }
789
+ }
790
+ return {
791
+ prefix,
792
+ suffix,
793
+ hidden
794
+ };
795
+ }
796
+ /**
797
+ * Creates a plugin that adds Tailwind class processing
798
+ */
799
+ function tailwindPlugin() {
800
+ return createPlugin({
801
+ processAttributes(node) {
802
+ const classAttr = node.attributes?.class;
803
+ if (!classAttr) return;
804
+ const classes = classAttr.trim().split(" ").filter(Boolean);
805
+ const { prefix, suffix, hidden } = processTailwindClasses(classes);
806
+ node.context = node.context || {};
807
+ node.context.tailwind = {
808
+ prefix,
809
+ suffix,
810
+ hidden
811
+ };
812
+ },
813
+ processTextNode(node) {
814
+ const parentNode = node.parent;
815
+ if (!parentNode || parentNode.type !== ELEMENT_NODE) return void 0;
816
+ const tailwindData = parentNode.context?.tailwind;
817
+ if (tailwindData?.hidden) return {
818
+ content: "",
819
+ skip: true
820
+ };
821
+ let content = node.value;
822
+ const prefix = tailwindData?.prefix || "";
823
+ const suffix = tailwindData?.suffix || "";
824
+ if (prefix || suffix) {
825
+ content = prefix + content + suffix;
826
+ content = fixRedundantDelimiters(content);
827
+ }
828
+ return {
829
+ content,
830
+ skip: false
831
+ };
832
+ },
833
+ beforeNodeProcess({ node }) {
834
+ if (node.type === ELEMENT_NODE) {
835
+ const elementNode = node;
836
+ const tailwindData = elementNode.context?.tailwind;
837
+ if (tailwindData?.hidden) return { skip: true };
838
+ }
839
+ }
840
+ });
841
+ }
842
+
843
+ //#endregion
844
+ export { extractionPlugin, filterPlugin, frontmatterPlugin, isolateMainPlugin, readabilityPlugin, tailwindPlugin };