mdream 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,291 @@
1
+ import { T as TAG_BODY, a as TAG_HTML, b as TAG_HEAD, c as createBufferRegion, d as TAG_A, e as TAG_ADDRESS, f as TAG_SUMMARY, g as TAG_DETAILS, h as TAG_STYLE, i as TAG_SCRIPT, j as TAG_SPAN, k as TAG_BR, l as TAG_HR, m as TAG_I, n as TAG_EM, o as TAG_B, p as TAG_STRONG, q as TAG_OBJECT, r as TAG_EMBED, s as TAG_IFRAME, t as TAG_FIELDSET, u as TAG_SELECT, v as TAG_TEXTAREA, w as TAG_INPUT, x as TAG_BUTTON, y as TAG_FORM, z as TAG_ASIDE, A as TAG_NAV, B as TAG_FOOTER, C as TAG_HEADER, D as TAG_H6, E as TAG_H5, F as TAG_H4, G as TAG_H3, H as TAG_H2, I as TAG_H1, J as TAG_DD, K as TAG_DT, L as TAG_DL, M as TAG_LI, N as TAG_OL, O as TAG_UL, P as TAG_TD, Q as TAG_TH, R as TAG_TR, S as TAG_TFOOT, U as TAG_TBODY, V as TAG_THEAD, W as TAG_CAPTION, X as TAG_TABLE, Y as TAG_SVG, Z as TAG_AUDIO, _ as TAG_VIDEO, $ as TAG_FIGCAPTION, a0 as TAG_FIGURE, a1 as TAG_IMG, a2 as TAG_CODE, a3 as TAG_PRE, a4 as TAG_BLOCKQUOTE, a5 as TAG_DIV, a6 as TAG_P, a7 as TAG_MAIN, a8 as TAG_SECTION, a9 as TAG_ARTICLE } from './mdream.-hdaPj9a.mjs';
2
+ import { c as createPlugin } from './mdream.cpEmpxyh.mjs';
3
+
4
+ const REGEXPS = {
5
+ // Positive patterns that suggest high-quality content
6
+ positive: /article|body|content|entry|main|page|post|text|blog|story|recipe|ingredient|instruction|description|docs?|guide|tutorial|reference|manual/i,
7
+ // Negative patterns that suggest low-quality content
8
+ negative: /ad|banner|combx|comment|disqus|extra|foot|header|menu|meta|nav|promo|related|scroll|share|sidebar|sponsor|social|tags|widget|sitemap|copyright|login|register|subscribe|newsletter|signup|category|author|date|publish|cta|button|apply|trial|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i,
9
+ // Used for counting commas to determine complexity
10
+ commas: /,/g};
11
+ const TagScores = {
12
+ // Main structural elements
13
+ [TAG_ARTICLE]: 15,
14
+ // Explicit content container, highest confidence
15
+ [TAG_SECTION]: 8,
16
+ // Designated content section
17
+ [TAG_MAIN]: 15,
18
+ // Main content indicator
19
+ [TAG_P]: 5,
20
+ // Direct paragraph content
21
+ [TAG_DIV]: 2,
22
+ // Generic container, slightly positive
23
+ [TAG_BLOCKQUOTE]: 5,
24
+ // Quoted content, usually important
25
+ // Code and pre-formatted content
26
+ [TAG_PRE]: 8,
27
+ // Preformatted text/code, high value for documentation
28
+ [TAG_CODE]: 6,
29
+ // Code content, high value for documentation
30
+ // Media elements
31
+ [TAG_IMG]: 3,
32
+ // Images are typically content
33
+ [TAG_FIGURE]: 4,
34
+ // Figure with caption, content-focused
35
+ [TAG_FIGCAPTION]: 3,
36
+ // Description for a figure
37
+ [TAG_VIDEO]: 3,
38
+ // Video content
39
+ [TAG_AUDIO]: 3,
40
+ // Audio content
41
+ [TAG_SVG]: 1,
42
+ // Vector graphic, slight positive
43
+ // Table elements
44
+ [TAG_TABLE]: 0,
45
+ // Could be data or layout, neutral
46
+ [TAG_CAPTION]: 2,
47
+ // Table caption
48
+ [TAG_THEAD]: 0,
49
+ // Table structure, neutral
50
+ [TAG_TBODY]: 0,
51
+ // Table structure, neutral
52
+ [TAG_TFOOT]: 0,
53
+ // Table structure, neutral
54
+ [TAG_TR]: -1,
55
+ // Table row, slight negative
56
+ [TAG_TH]: -2,
57
+ // Table header, more negative than cells
58
+ [TAG_TD]: 0,
59
+ // Table cell, neutral
60
+ // List elements
61
+ [TAG_UL]: -8,
62
+ // Higher penalty as lists are often navigation
63
+ [TAG_OL]: -5,
64
+ // Ordered lists still often navigation
65
+ [TAG_LI]: -6,
66
+ // Higher penalty for list items to avoid nav lists
67
+ [TAG_DL]: 0,
68
+ // Definition lists, neutral
69
+ [TAG_DT]: 0,
70
+ // Definition lists, neutral
71
+ [TAG_DD]: 0,
72
+ // Definition lists, neutral
73
+ // Heading elements
74
+ [TAG_H1]: 1,
75
+ // Top-level heading (may be site title)
76
+ [TAG_H2]: 1,
77
+ // Section headers, slightly positive
78
+ [TAG_H3]: 1,
79
+ // Section headers, slightly positive
80
+ [TAG_H4]: 0,
81
+ // Minor headers, neutral
82
+ [TAG_H5]: 0,
83
+ // Minor headers, neutral
84
+ [TAG_H6]: 0,
85
+ // Minor headers, neutral
86
+ // Navigation and structural elements (negative)
87
+ [TAG_HEADER]: -15,
88
+ // Page header, often not content but may contain article headers
89
+ [TAG_FOOTER]: -25,
90
+ // Footer, rarely content
91
+ [TAG_NAV]: -30,
92
+ // Navigation, not content
93
+ [TAG_ASIDE]: -25,
94
+ // Sidebar, usually not main content
95
+ // Form elements (negative)
96
+ [TAG_FORM]: -8,
97
+ // User input, not content
98
+ [TAG_BUTTON]: -5,
99
+ // Interactive element, not content
100
+ [TAG_INPUT]: -5,
101
+ // Form field, not content
102
+ [TAG_TEXTAREA]: -5,
103
+ // Text input, not content
104
+ [TAG_SELECT]: -5,
105
+ // Drop-down, not content
106
+ [TAG_FIELDSET]: -5,
107
+ // Form field group, not content
108
+ // Embedded content (mostly negative)
109
+ [TAG_IFRAME]: -3,
110
+ // Embedded content, often ads
111
+ [TAG_EMBED]: -3,
112
+ // Embedded content, often ads
113
+ [TAG_OBJECT]: -3,
114
+ // Embedded content, often ads
115
+ // Links
116
+ [TAG_A]: -8,
117
+ // Higher penalty to avoid navigation-heavy areas
118
+ // Text formatting
119
+ [TAG_STRONG]: 1,
120
+ // Emphasized text, slightly positive
121
+ [TAG_B]: 1,
122
+ // Emphasized text, slightly positive
123
+ [TAG_EM]: 1,
124
+ // Emphasized text, slightly positive
125
+ [TAG_I]: 1,
126
+ // Emphasized text, slightly positive
127
+ // Miscellaneous elements
128
+ [TAG_HR]: 0,
129
+ // Divider, neutral
130
+ [TAG_BR]: 0,
131
+ // Line break, neutral
132
+ [TAG_SPAN]: 0,
133
+ // Inline container, neutral
134
+ [TAG_SCRIPT]: -25,
135
+ // Script, never content
136
+ [TAG_STYLE]: -25,
137
+ // Style, never content
138
+ // Expandable content
139
+ [TAG_DETAILS]: 2,
140
+ // Expandable content
141
+ [TAG_SUMMARY]: 1,
142
+ // Header for expandable content
143
+ // Additional tags not explicitly in scoring.md
144
+ [TAG_ADDRESS]: -3
145
+ // Similar to footer, rarely content
146
+ };
147
+ function scoreClassAndId(node) {
148
+ let scoreAdjustment = 0;
149
+ if (node.attributes?.class) {
150
+ const className = node.attributes.class;
151
+ if (/nav|menu|header|footer|sidebar|ad-|advertisement|banner|promo|cta|button|apply|trial|engagement|sharing|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i.test(className)) {
152
+ scoreAdjustment -= 35;
153
+ } else if (REGEXPS.negative.test(className)) {
154
+ scoreAdjustment -= 15;
155
+ } else if (REGEXPS.positive.test(className)) {
156
+ scoreAdjustment += 10;
157
+ if (/docs?|guide|tutorial|reference|manual|article/i.test(className)) {
158
+ scoreAdjustment += 5;
159
+ }
160
+ }
161
+ }
162
+ if (node.attributes?.id) {
163
+ const id = node.attributes.id;
164
+ if (/nav|menu|header|footer|sidebar|ad-|advertisement|banner|promo|cta|button|apply|trial|engagement|sharing|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i.test(id)) {
165
+ scoreAdjustment -= 35;
166
+ } else if (REGEXPS.negative.test(id)) {
167
+ scoreAdjustment -= 15;
168
+ } else if (REGEXPS.positive.test(id)) {
169
+ scoreAdjustment += 10;
170
+ if (/docs?|guide|tutorial|reference|manual|article/i.test(id)) {
171
+ scoreAdjustment += 5;
172
+ }
173
+ }
174
+ }
175
+ return scoreAdjustment;
176
+ }
177
+ function readabilityPlugin() {
178
+ let inHead = false;
179
+ return createPlugin({
180
+ onNodeEnter(node, state) {
181
+ if (inHead) {
182
+ return;
183
+ }
184
+ if (!node.context) {
185
+ node.context = {};
186
+ }
187
+ if (node.tagId === TAG_BODY || node.tagId === TAG_HTML) {
188
+ return;
189
+ }
190
+ if (node.tagId === TAG_HEAD) {
191
+ createBufferRegion(node, state, true);
192
+ inHead = true;
193
+ return;
194
+ }
195
+ const tagScore = TagScores[node.tagId] ?? 0;
196
+ const classAndIdScore = scoreClassAndId(node);
197
+ node.context.score = tagScore + classAndIdScore;
198
+ node.context.tagCount = 1;
199
+ node.context.linkTextLength = 0;
200
+ node.context.textLength = 0;
201
+ const hasStrongNegativePattern = node.name && /nav|header|footer|aside|form|fieldset|button/i.test(node.name) || node.attributes?.class && /nav|menu|header|footer|sidebar|hidden|copyright|ad-|advertisement|banner|promo|related|comment|login|register|subscribe|newsletter|category|meta|tag|cta|button|apply|trial|engagement|sharing|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i.test(node.attributes.class) || node.attributes?.id && /nav|menu|header|footer|sidebar|hidden|copyright|ad-|advertisement|banner|promo|related|comment|login|register|subscribe|newsletter|category|meta|tag|cta|button|apply|trial|engagement|sharing|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i.test(node.attributes.id) || node.attributes?.style && /display:\s*none|visibility:\s*hidden/i.test(node.attributes.style) || node.attributes && Object.keys(node.attributes).some((attr) => attr.startsWith("aria-") && node.attributes[attr] === "true" && /hidden|invisible/i.test(attr));
202
+ if (hasStrongNegativePattern) {
203
+ createBufferRegion(node, state, false);
204
+ } else {
205
+ if (node.parent && node.parent.context) {
206
+ node.context.score += node.parent.context.score || 0;
207
+ }
208
+ }
209
+ },
210
+ processTextNode(node) {
211
+ if (!node.parent || inHead)
212
+ return;
213
+ const textValue = node.value;
214
+ const len = textValue.length;
215
+ const commaCount = Math.min(3, (textValue.match(REGEXPS.commas) || []).length);
216
+ const isInsideLink = !!node.parent.depthMap?.[TAG_A];
217
+ let parent = node.parent;
218
+ while (parent) {
219
+ if (!parent.context) {
220
+ parent.context = {};
221
+ }
222
+ parent.context.score = (parent.context.score || 0) + commaCount;
223
+ parent.context.textLength = (parent.context.textLength || 0) + len;
224
+ if (isInsideLink) {
225
+ parent.context.linkTextLength = (parent.context.linkTextLength || 0) + len;
226
+ }
227
+ parent = parent.parent;
228
+ }
229
+ },
230
+ onNodeExit(node, state) {
231
+ if (!node.context) {
232
+ return;
233
+ }
234
+ if (node.tagId === TAG_BODY || node.tagId === TAG_HTML) {
235
+ return;
236
+ }
237
+ if (node.tagId === TAG_HEAD) {
238
+ inHead = false;
239
+ return;
240
+ }
241
+ if (inHead) {
242
+ return;
243
+ }
244
+ const textLength = node.context.textLength || 0;
245
+ if (textLength === 0 && node.tagId !== TAG_BODY && !node.childTextNodeIndex) ; else if (textLength > 100) {
246
+ node.context.score += 3;
247
+ } else if (textLength >= 50) {
248
+ node.context.score += 2;
249
+ } else if (textLength >= 25) {
250
+ node.context.score += 1;
251
+ }
252
+ const linkTextLength = node.context.linkTextLength || 0;
253
+ if (textLength > 0) {
254
+ const linkDensity = linkTextLength / textLength;
255
+ if (linkDensity > 0.4) {
256
+ if (linkDensity > 0.6) {
257
+ node.context.score = node.context.score * 0.02;
258
+ if (linkTextLength > 50) {
259
+ node.context.isHighLinkDensity = true;
260
+ }
261
+ } else {
262
+ node.context.score *= 1 - linkDensity * 2;
263
+ }
264
+ } else if (linkDensity > 0.2) {
265
+ node.context.score *= 1 - linkDensity * 1;
266
+ }
267
+ }
268
+ if (linkTextLength > 0 && textLength > 0) {
269
+ const linkRatio = linkTextLength / textLength;
270
+ const hasDocumentationMarkers = node.attributes?.class && /docs?|guide|tutorial|reference|manual|article|content/i.test(node.attributes.class) || node.attributes?.id && /docs?|guide|tutorial|reference|manual|article|content/i.test(node.attributes.id) || node.name && /main|article|section/i.test(node.name);
271
+ if (linkRatio > 0.3 && linkTextLength > 30 && !hasDocumentationMarkers) {
272
+ node.context.score -= 10;
273
+ }
274
+ }
275
+ const finalScore = node.context.score;
276
+ if (finalScore <= -12) {
277
+ createBufferRegion(node, state, false);
278
+ } else if (node.context.isHighLinkDensity || linkTextLength > 50 && textLength > 0 && linkTextLength / textLength > 0.5) {
279
+ createBufferRegion(node, state, false);
280
+ }
281
+ if (node.tagHandler?.isInline) {
282
+ const parent = node.parent;
283
+ if (parent && parent.context) {
284
+ parent.context.score += finalScore - (parent.context.score || 0);
285
+ }
286
+ }
287
+ }
288
+ });
289
+ }
290
+
291
+ export { readabilityPlugin as r };