mdream 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +9 -0
- package/README.md +185 -0
- package/bin/mdream.mjs +2 -0
- package/dist/cli.d.mts +2 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.mjs +25 -0
- package/dist/index.d.mts +15 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.mjs +13 -0
- package/dist/plugins.d.mts +88 -0
- package/dist/plugins.d.ts +88 -0
- package/dist/plugins.mjs +4 -0
- package/dist/preset/minimal.d.mts +11 -0
- package/dist/preset/minimal.d.ts +11 -0
- package/dist/preset/minimal.mjs +39 -0
- package/dist/shared/mdream.-hdaPj9a.mjs +280 -0
- package/dist/shared/mdream.5zaIXVJz.mjs +508 -0
- package/dist/shared/mdream.C8ruysN5.mjs +291 -0
- package/dist/shared/mdream.DUeWbUFG.mjs +1432 -0
- package/dist/shared/mdream.a2AvjJLp.d.mts +218 -0
- package/dist/shared/mdream.a2AvjJLp.d.ts +218 -0
- package/dist/shared/mdream.cpEmpxyh.mjs +105 -0
- package/package.json +62 -0
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
import { T as TAG_BODY, a as TAG_HTML, b as TAG_HEAD, c as createBufferRegion, d as TAG_A, e as TAG_ADDRESS, f as TAG_SUMMARY, g as TAG_DETAILS, h as TAG_STYLE, i as TAG_SCRIPT, j as TAG_SPAN, k as TAG_BR, l as TAG_HR, m as TAG_I, n as TAG_EM, o as TAG_B, p as TAG_STRONG, q as TAG_OBJECT, r as TAG_EMBED, s as TAG_IFRAME, t as TAG_FIELDSET, u as TAG_SELECT, v as TAG_TEXTAREA, w as TAG_INPUT, x as TAG_BUTTON, y as TAG_FORM, z as TAG_ASIDE, A as TAG_NAV, B as TAG_FOOTER, C as TAG_HEADER, D as TAG_H6, E as TAG_H5, F as TAG_H4, G as TAG_H3, H as TAG_H2, I as TAG_H1, J as TAG_DD, K as TAG_DT, L as TAG_DL, M as TAG_LI, N as TAG_OL, O as TAG_UL, P as TAG_TD, Q as TAG_TH, R as TAG_TR, S as TAG_TFOOT, U as TAG_TBODY, V as TAG_THEAD, W as TAG_CAPTION, X as TAG_TABLE, Y as TAG_SVG, Z as TAG_AUDIO, _ as TAG_VIDEO, $ as TAG_FIGCAPTION, a0 as TAG_FIGURE, a1 as TAG_IMG, a2 as TAG_CODE, a3 as TAG_PRE, a4 as TAG_BLOCKQUOTE, a5 as TAG_DIV, a6 as TAG_P, a7 as TAG_MAIN, a8 as TAG_SECTION, a9 as TAG_ARTICLE } from './mdream.-hdaPj9a.mjs';
|
|
2
|
+
import { c as createPlugin } from './mdream.cpEmpxyh.mjs';
|
|
3
|
+
|
|
4
|
+
const REGEXPS = {
|
|
5
|
+
// Positive patterns that suggest high-quality content
|
|
6
|
+
positive: /article|body|content|entry|main|page|post|text|blog|story|recipe|ingredient|instruction|description|docs?|guide|tutorial|reference|manual/i,
|
|
7
|
+
// Negative patterns that suggest low-quality content
|
|
8
|
+
negative: /ad|banner|combx|comment|disqus|extra|foot|header|menu|meta|nav|promo|related|scroll|share|sidebar|sponsor|social|tags|widget|sitemap|copyright|login|register|subscribe|newsletter|signup|category|author|date|publish|cta|button|apply|trial|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i,
|
|
9
|
+
// Used for counting commas to determine complexity
|
|
10
|
+
commas: /,/g};
|
|
11
|
+
const TagScores = {
|
|
12
|
+
// Main structural elements
|
|
13
|
+
[TAG_ARTICLE]: 15,
|
|
14
|
+
// Explicit content container, highest confidence
|
|
15
|
+
[TAG_SECTION]: 8,
|
|
16
|
+
// Designated content section
|
|
17
|
+
[TAG_MAIN]: 15,
|
|
18
|
+
// Main content indicator
|
|
19
|
+
[TAG_P]: 5,
|
|
20
|
+
// Direct paragraph content
|
|
21
|
+
[TAG_DIV]: 2,
|
|
22
|
+
// Generic container, slightly positive
|
|
23
|
+
[TAG_BLOCKQUOTE]: 5,
|
|
24
|
+
// Quoted content, usually important
|
|
25
|
+
// Code and pre-formatted content
|
|
26
|
+
[TAG_PRE]: 8,
|
|
27
|
+
// Preformatted text/code, high value for documentation
|
|
28
|
+
[TAG_CODE]: 6,
|
|
29
|
+
// Code content, high value for documentation
|
|
30
|
+
// Media elements
|
|
31
|
+
[TAG_IMG]: 3,
|
|
32
|
+
// Images are typically content
|
|
33
|
+
[TAG_FIGURE]: 4,
|
|
34
|
+
// Figure with caption, content-focused
|
|
35
|
+
[TAG_FIGCAPTION]: 3,
|
|
36
|
+
// Description for a figure
|
|
37
|
+
[TAG_VIDEO]: 3,
|
|
38
|
+
// Video content
|
|
39
|
+
[TAG_AUDIO]: 3,
|
|
40
|
+
// Audio content
|
|
41
|
+
[TAG_SVG]: 1,
|
|
42
|
+
// Vector graphic, slight positive
|
|
43
|
+
// Table elements
|
|
44
|
+
[TAG_TABLE]: 0,
|
|
45
|
+
// Could be data or layout, neutral
|
|
46
|
+
[TAG_CAPTION]: 2,
|
|
47
|
+
// Table caption
|
|
48
|
+
[TAG_THEAD]: 0,
|
|
49
|
+
// Table structure, neutral
|
|
50
|
+
[TAG_TBODY]: 0,
|
|
51
|
+
// Table structure, neutral
|
|
52
|
+
[TAG_TFOOT]: 0,
|
|
53
|
+
// Table structure, neutral
|
|
54
|
+
[TAG_TR]: -1,
|
|
55
|
+
// Table row, slight negative
|
|
56
|
+
[TAG_TH]: -2,
|
|
57
|
+
// Table header, more negative than cells
|
|
58
|
+
[TAG_TD]: 0,
|
|
59
|
+
// Table cell, neutral
|
|
60
|
+
// List elements
|
|
61
|
+
[TAG_UL]: -8,
|
|
62
|
+
// Higher penalty as lists are often navigation
|
|
63
|
+
[TAG_OL]: -5,
|
|
64
|
+
// Ordered lists still often navigation
|
|
65
|
+
[TAG_LI]: -6,
|
|
66
|
+
// Higher penalty for list items to avoid nav lists
|
|
67
|
+
[TAG_DL]: 0,
|
|
68
|
+
// Definition lists, neutral
|
|
69
|
+
[TAG_DT]: 0,
|
|
70
|
+
// Definition lists, neutral
|
|
71
|
+
[TAG_DD]: 0,
|
|
72
|
+
// Definition lists, neutral
|
|
73
|
+
// Heading elements
|
|
74
|
+
[TAG_H1]: 1,
|
|
75
|
+
// Top-level heading (may be site title)
|
|
76
|
+
[TAG_H2]: 1,
|
|
77
|
+
// Section headers, slightly positive
|
|
78
|
+
[TAG_H3]: 1,
|
|
79
|
+
// Section headers, slightly positive
|
|
80
|
+
[TAG_H4]: 0,
|
|
81
|
+
// Minor headers, neutral
|
|
82
|
+
[TAG_H5]: 0,
|
|
83
|
+
// Minor headers, neutral
|
|
84
|
+
[TAG_H6]: 0,
|
|
85
|
+
// Minor headers, neutral
|
|
86
|
+
// Navigation and structural elements (negative)
|
|
87
|
+
[TAG_HEADER]: -15,
|
|
88
|
+
// Page header, often not content but may contain article headers
|
|
89
|
+
[TAG_FOOTER]: -25,
|
|
90
|
+
// Footer, rarely content
|
|
91
|
+
[TAG_NAV]: -30,
|
|
92
|
+
// Navigation, not content
|
|
93
|
+
[TAG_ASIDE]: -25,
|
|
94
|
+
// Sidebar, usually not main content
|
|
95
|
+
// Form elements (negative)
|
|
96
|
+
[TAG_FORM]: -8,
|
|
97
|
+
// User input, not content
|
|
98
|
+
[TAG_BUTTON]: -5,
|
|
99
|
+
// Interactive element, not content
|
|
100
|
+
[TAG_INPUT]: -5,
|
|
101
|
+
// Form field, not content
|
|
102
|
+
[TAG_TEXTAREA]: -5,
|
|
103
|
+
// Text input, not content
|
|
104
|
+
[TAG_SELECT]: -5,
|
|
105
|
+
// Drop-down, not content
|
|
106
|
+
[TAG_FIELDSET]: -5,
|
|
107
|
+
// Form field group, not content
|
|
108
|
+
// Embedded content (mostly negative)
|
|
109
|
+
[TAG_IFRAME]: -3,
|
|
110
|
+
// Embedded content, often ads
|
|
111
|
+
[TAG_EMBED]: -3,
|
|
112
|
+
// Embedded content, often ads
|
|
113
|
+
[TAG_OBJECT]: -3,
|
|
114
|
+
// Embedded content, often ads
|
|
115
|
+
// Links
|
|
116
|
+
[TAG_A]: -8,
|
|
117
|
+
// Higher penalty to avoid navigation-heavy areas
|
|
118
|
+
// Text formatting
|
|
119
|
+
[TAG_STRONG]: 1,
|
|
120
|
+
// Emphasized text, slightly positive
|
|
121
|
+
[TAG_B]: 1,
|
|
122
|
+
// Emphasized text, slightly positive
|
|
123
|
+
[TAG_EM]: 1,
|
|
124
|
+
// Emphasized text, slightly positive
|
|
125
|
+
[TAG_I]: 1,
|
|
126
|
+
// Emphasized text, slightly positive
|
|
127
|
+
// Miscellaneous elements
|
|
128
|
+
[TAG_HR]: 0,
|
|
129
|
+
// Divider, neutral
|
|
130
|
+
[TAG_BR]: 0,
|
|
131
|
+
// Line break, neutral
|
|
132
|
+
[TAG_SPAN]: 0,
|
|
133
|
+
// Inline container, neutral
|
|
134
|
+
[TAG_SCRIPT]: -25,
|
|
135
|
+
// Script, never content
|
|
136
|
+
[TAG_STYLE]: -25,
|
|
137
|
+
// Style, never content
|
|
138
|
+
// Expandable content
|
|
139
|
+
[TAG_DETAILS]: 2,
|
|
140
|
+
// Expandable content
|
|
141
|
+
[TAG_SUMMARY]: 1,
|
|
142
|
+
// Header for expandable content
|
|
143
|
+
// Additional tags not explicitly in scoring.md
|
|
144
|
+
[TAG_ADDRESS]: -3
|
|
145
|
+
// Similar to footer, rarely content
|
|
146
|
+
};
|
|
147
|
+
function scoreClassAndId(node) {
|
|
148
|
+
let scoreAdjustment = 0;
|
|
149
|
+
if (node.attributes?.class) {
|
|
150
|
+
const className = node.attributes.class;
|
|
151
|
+
if (/nav|menu|header|footer|sidebar|ad-|advertisement|banner|promo|cta|button|apply|trial|engagement|sharing|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i.test(className)) {
|
|
152
|
+
scoreAdjustment -= 35;
|
|
153
|
+
} else if (REGEXPS.negative.test(className)) {
|
|
154
|
+
scoreAdjustment -= 15;
|
|
155
|
+
} else if (REGEXPS.positive.test(className)) {
|
|
156
|
+
scoreAdjustment += 10;
|
|
157
|
+
if (/docs?|guide|tutorial|reference|manual|article/i.test(className)) {
|
|
158
|
+
scoreAdjustment += 5;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
if (node.attributes?.id) {
|
|
163
|
+
const id = node.attributes.id;
|
|
164
|
+
if (/nav|menu|header|footer|sidebar|ad-|advertisement|banner|promo|cta|button|apply|trial|engagement|sharing|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i.test(id)) {
|
|
165
|
+
scoreAdjustment -= 35;
|
|
166
|
+
} else if (REGEXPS.negative.test(id)) {
|
|
167
|
+
scoreAdjustment -= 15;
|
|
168
|
+
} else if (REGEXPS.positive.test(id)) {
|
|
169
|
+
scoreAdjustment += 10;
|
|
170
|
+
if (/docs?|guide|tutorial|reference|manual|article/i.test(id)) {
|
|
171
|
+
scoreAdjustment += 5;
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
return scoreAdjustment;
|
|
176
|
+
}
|
|
177
|
+
function readabilityPlugin() {
|
|
178
|
+
let inHead = false;
|
|
179
|
+
return createPlugin({
|
|
180
|
+
onNodeEnter(node, state) {
|
|
181
|
+
if (inHead) {
|
|
182
|
+
return;
|
|
183
|
+
}
|
|
184
|
+
if (!node.context) {
|
|
185
|
+
node.context = {};
|
|
186
|
+
}
|
|
187
|
+
if (node.tagId === TAG_BODY || node.tagId === TAG_HTML) {
|
|
188
|
+
return;
|
|
189
|
+
}
|
|
190
|
+
if (node.tagId === TAG_HEAD) {
|
|
191
|
+
createBufferRegion(node, state, true);
|
|
192
|
+
inHead = true;
|
|
193
|
+
return;
|
|
194
|
+
}
|
|
195
|
+
const tagScore = TagScores[node.tagId] ?? 0;
|
|
196
|
+
const classAndIdScore = scoreClassAndId(node);
|
|
197
|
+
node.context.score = tagScore + classAndIdScore;
|
|
198
|
+
node.context.tagCount = 1;
|
|
199
|
+
node.context.linkTextLength = 0;
|
|
200
|
+
node.context.textLength = 0;
|
|
201
|
+
const hasStrongNegativePattern = node.name && /nav|header|footer|aside|form|fieldset|button/i.test(node.name) || node.attributes?.class && /nav|menu|header|footer|sidebar|hidden|copyright|ad-|advertisement|banner|promo|related|comment|login|register|subscribe|newsletter|category|meta|tag|cta|button|apply|trial|engagement|sharing|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i.test(node.attributes.class) || node.attributes?.id && /nav|menu|header|footer|sidebar|hidden|copyright|ad-|advertisement|banner|promo|related|comment|login|register|subscribe|newsletter|category|meta|tag|cta|button|apply|trial|engagement|sharing|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i.test(node.attributes.id) || node.attributes?.style && /display:\s*none|visibility:\s*hidden/i.test(node.attributes.style) || node.attributes && Object.keys(node.attributes).some((attr) => attr.startsWith("aria-") && node.attributes[attr] === "true" && /hidden|invisible/i.test(attr));
|
|
202
|
+
if (hasStrongNegativePattern) {
|
|
203
|
+
createBufferRegion(node, state, false);
|
|
204
|
+
} else {
|
|
205
|
+
if (node.parent && node.parent.context) {
|
|
206
|
+
node.context.score += node.parent.context.score || 0;
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
},
|
|
210
|
+
processTextNode(node) {
|
|
211
|
+
if (!node.parent || inHead)
|
|
212
|
+
return;
|
|
213
|
+
const textValue = node.value;
|
|
214
|
+
const len = textValue.length;
|
|
215
|
+
const commaCount = Math.min(3, (textValue.match(REGEXPS.commas) || []).length);
|
|
216
|
+
const isInsideLink = !!node.parent.depthMap?.[TAG_A];
|
|
217
|
+
let parent = node.parent;
|
|
218
|
+
while (parent) {
|
|
219
|
+
if (!parent.context) {
|
|
220
|
+
parent.context = {};
|
|
221
|
+
}
|
|
222
|
+
parent.context.score = (parent.context.score || 0) + commaCount;
|
|
223
|
+
parent.context.textLength = (parent.context.textLength || 0) + len;
|
|
224
|
+
if (isInsideLink) {
|
|
225
|
+
parent.context.linkTextLength = (parent.context.linkTextLength || 0) + len;
|
|
226
|
+
}
|
|
227
|
+
parent = parent.parent;
|
|
228
|
+
}
|
|
229
|
+
},
|
|
230
|
+
onNodeExit(node, state) {
|
|
231
|
+
if (!node.context) {
|
|
232
|
+
return;
|
|
233
|
+
}
|
|
234
|
+
if (node.tagId === TAG_BODY || node.tagId === TAG_HTML) {
|
|
235
|
+
return;
|
|
236
|
+
}
|
|
237
|
+
if (node.tagId === TAG_HEAD) {
|
|
238
|
+
inHead = false;
|
|
239
|
+
return;
|
|
240
|
+
}
|
|
241
|
+
if (inHead) {
|
|
242
|
+
return;
|
|
243
|
+
}
|
|
244
|
+
const textLength = node.context.textLength || 0;
|
|
245
|
+
if (textLength === 0 && node.tagId !== TAG_BODY && !node.childTextNodeIndex) ; else if (textLength > 100) {
|
|
246
|
+
node.context.score += 3;
|
|
247
|
+
} else if (textLength >= 50) {
|
|
248
|
+
node.context.score += 2;
|
|
249
|
+
} else if (textLength >= 25) {
|
|
250
|
+
node.context.score += 1;
|
|
251
|
+
}
|
|
252
|
+
const linkTextLength = node.context.linkTextLength || 0;
|
|
253
|
+
if (textLength > 0) {
|
|
254
|
+
const linkDensity = linkTextLength / textLength;
|
|
255
|
+
if (linkDensity > 0.4) {
|
|
256
|
+
if (linkDensity > 0.6) {
|
|
257
|
+
node.context.score = node.context.score * 0.02;
|
|
258
|
+
if (linkTextLength > 50) {
|
|
259
|
+
node.context.isHighLinkDensity = true;
|
|
260
|
+
}
|
|
261
|
+
} else {
|
|
262
|
+
node.context.score *= 1 - linkDensity * 2;
|
|
263
|
+
}
|
|
264
|
+
} else if (linkDensity > 0.2) {
|
|
265
|
+
node.context.score *= 1 - linkDensity * 1;
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
if (linkTextLength > 0 && textLength > 0) {
|
|
269
|
+
const linkRatio = linkTextLength / textLength;
|
|
270
|
+
const hasDocumentationMarkers = node.attributes?.class && /docs?|guide|tutorial|reference|manual|article|content/i.test(node.attributes.class) || node.attributes?.id && /docs?|guide|tutorial|reference|manual|article|content/i.test(node.attributes.id) || node.name && /main|article|section/i.test(node.name);
|
|
271
|
+
if (linkRatio > 0.3 && linkTextLength > 30 && !hasDocumentationMarkers) {
|
|
272
|
+
node.context.score -= 10;
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
const finalScore = node.context.score;
|
|
276
|
+
if (finalScore <= -12) {
|
|
277
|
+
createBufferRegion(node, state, false);
|
|
278
|
+
} else if (node.context.isHighLinkDensity || linkTextLength > 50 && textLength > 0 && linkTextLength / textLength > 0.5) {
|
|
279
|
+
createBufferRegion(node, state, false);
|
|
280
|
+
}
|
|
281
|
+
if (node.tagHandler?.isInline) {
|
|
282
|
+
const parent = node.parent;
|
|
283
|
+
if (parent && parent.context) {
|
|
284
|
+
parent.context.score += finalScore - (parent.context.score || 0);
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
});
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
export { readabilityPlugin as r };
|