mdream 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_chunks/minimal-Ru8PBNVI.mjs +40 -0
- package/dist/_chunks/{plugin-DCJFRZej.mjs → plugin-Bqz9GKOA.mjs} +1 -1
- package/dist/_chunks/plugin-D45YAMmt.d.mts +12 -0
- package/dist/_chunks/plugins-D305pIpW.mjs +844 -0
- package/dist/_chunks/{stream-BeojJNLt.mjs → stream-IeCVDuTy.mjs} +53 -35
- package/dist/_chunks/{types-BHoibuoP.d.mts → types-D9VKEbix.d.mts} +29 -8
- package/dist/cli.mjs +14 -8
- package/dist/index.d.mts +75 -3
- package/dist/index.mjs +2 -2
- package/dist/plugins.d.mts +2 -2
- package/dist/plugins.mjs +2 -3
- package/dist/preset/minimal.d.mts +1 -1
- package/dist/preset/minimal.mjs +3 -39
- package/package.json +3 -17
- package/README.md +0 -252
- package/dist/_chunks/index-VTwTBxk0.d.mts +0 -58
- package/dist/_chunks/plugins-DGakgpSl.mjs +0 -582
- package/dist/_chunks/readability-BfCjcbbx.mjs +0 -271
|
@@ -1,271 +0,0 @@
|
|
|
1
|
-
import { ELEMENT_NODE, TAG_A, TAG_ADDRESS, TAG_ARTICLE, TAG_ASIDE, TAG_AUDIO, TAG_B, TAG_BLOCKQUOTE, TAG_BODY, TAG_BR, TAG_BUTTON, TAG_CAPTION, TAG_CODE, TAG_DD, TAG_DETAILS, TAG_DIV, TAG_DL, TAG_DT, TAG_EM, TAG_EMBED, TAG_FIELDSET, TAG_FIGCAPTION, TAG_FIGURE, TAG_FOOTER, TAG_FORM, TAG_H1, TAG_H2, TAG_H3, TAG_H4, TAG_H5, TAG_H6, TAG_HEAD, TAG_HEADER, TAG_HR, TAG_HTML, TAG_I, TAG_IFRAME, TAG_IMG, TAG_INPUT, TAG_LI, TAG_MAIN, TAG_META, TAG_NAV, TAG_OBJECT, TAG_OL, TAG_P, TAG_PRE, TAG_SCRIPT, TAG_SECTION, TAG_SELECT, TAG_SPAN, TAG_STRONG, TAG_STYLE, TAG_SUMMARY, TAG_SVG, TAG_TABLE, TAG_TBODY, TAG_TD, TAG_TEXTAREA, TAG_TFOOT, TAG_TH, TAG_THEAD, TAG_TITLE, TAG_TR, TAG_UL, TAG_VIDEO, collectNodeContent, createBufferRegion, createPlugin$1 as createPlugin } from "./plugin-DCJFRZej.mjs";
|
|
2
|
-
|
|
3
|
-
//#region src/plugins/frontmatter.ts
|
|
4
|
-
/**
|
|
5
|
-
* A plugin that manages frontmatter generation from HTML head elements
|
|
6
|
-
* Extracts metadata from meta tags and title and generates YAML frontmatter
|
|
7
|
-
*/
|
|
8
|
-
function frontmatterPlugin(options = {}) {
|
|
9
|
-
const additionalFields = options.additionalFields || {};
|
|
10
|
-
const metaFields = new Set([
|
|
11
|
-
"description",
|
|
12
|
-
"keywords",
|
|
13
|
-
"author",
|
|
14
|
-
"date",
|
|
15
|
-
"og:title",
|
|
16
|
-
"og:description",
|
|
17
|
-
"twitter:title",
|
|
18
|
-
"twitter:description",
|
|
19
|
-
...options.metaFields || []
|
|
20
|
-
]);
|
|
21
|
-
const frontmatter = {
|
|
22
|
-
...additionalFields,
|
|
23
|
-
meta: {}
|
|
24
|
-
};
|
|
25
|
-
let inHead = false;
|
|
26
|
-
const formatValue = options.formatValue || ((name, value) => {
|
|
27
|
-
value = value.replace(/"/g, "\\\"");
|
|
28
|
-
if (value.includes("\n") || value.includes(":") || value.includes("#") || value.includes(" ")) return `"${value}"`;
|
|
29
|
-
return value;
|
|
30
|
-
});
|
|
31
|
-
return createPlugin({
|
|
32
|
-
onNodeEnter(node) {
|
|
33
|
-
if (node.tagId === TAG_HEAD) {
|
|
34
|
-
inHead = true;
|
|
35
|
-
return;
|
|
36
|
-
}
|
|
37
|
-
if (inHead && node.type === ELEMENT_NODE && node.tagId === TAG_TITLE) return;
|
|
38
|
-
if (inHead && node.type === ELEMENT_NODE && node.tagId === TAG_META) {
|
|
39
|
-
const elementNode = node;
|
|
40
|
-
const { name, property, content } = elementNode.attributes || {};
|
|
41
|
-
const metaName = property || name;
|
|
42
|
-
if (metaName && content && metaFields.has(metaName)) frontmatter.meta[metaName.includes(":") ? `"${metaName}"` : metaName] = formatValue(metaName, content);
|
|
43
|
-
return void 0;
|
|
44
|
-
}
|
|
45
|
-
},
|
|
46
|
-
onNodeExit(node, state) {
|
|
47
|
-
if (node.type === ELEMENT_NODE && node.tagId === TAG_HEAD) {
|
|
48
|
-
inHead = false;
|
|
49
|
-
if (Object.keys(frontmatter).length > 0) {
|
|
50
|
-
const frontmatterContent = generateFrontmatter();
|
|
51
|
-
collectNodeContent({
|
|
52
|
-
type: 1,
|
|
53
|
-
regionId: 0
|
|
54
|
-
}, frontmatterContent, state);
|
|
55
|
-
}
|
|
56
|
-
}
|
|
57
|
-
return void 0;
|
|
58
|
-
},
|
|
59
|
-
processTextNode(node) {
|
|
60
|
-
if (!inHead) return;
|
|
61
|
-
const parent = node.parent;
|
|
62
|
-
if (parent && parent.tagId === TAG_TITLE && node.value) {
|
|
63
|
-
frontmatter.title = formatValue("title", node.value.trim());
|
|
64
|
-
return {
|
|
65
|
-
content: "",
|
|
66
|
-
skip: true
|
|
67
|
-
};
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
});
|
|
71
|
-
/**
|
|
72
|
-
* Generate YAML frontmatter string from collected metadata
|
|
73
|
-
*/
|
|
74
|
-
function generateFrontmatter() {
|
|
75
|
-
if (Object.keys(frontmatter).length === 0) return "";
|
|
76
|
-
let yamlLines = [];
|
|
77
|
-
const entries = Object.entries(frontmatter).sort(([a], [b]) => {
|
|
78
|
-
if (a === "title") return -1;
|
|
79
|
-
if (b === "title") return 1;
|
|
80
|
-
if (a === "description") return -1;
|
|
81
|
-
if (b === "description") return 1;
|
|
82
|
-
return a.localeCompare(b);
|
|
83
|
-
});
|
|
84
|
-
for (const [key, value] of entries) if (key === "meta" && Object.keys(value).length > 0) {
|
|
85
|
-
yamlLines.push("meta:");
|
|
86
|
-
const metaEntries = Object.entries(value).sort(([a], [b]) => a.localeCompare(b)).map(([metaKey, metaValue]) => ` ${metaKey}: ${metaValue}`);
|
|
87
|
-
yamlLines.push(...metaEntries);
|
|
88
|
-
} else if (key !== "meta" || Object.keys(value).length > 0) yamlLines.push(`${key}: ${value}`);
|
|
89
|
-
if (Object.keys(frontmatter.meta).length === 0) yamlLines = yamlLines.filter((line) => !line.startsWith("meta:"));
|
|
90
|
-
return `---\n${yamlLines.join("\n")}\n---\n\n`;
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
//#endregion
|
|
95
|
-
//#region src/plugins/readability.ts
|
|
96
|
-
const REGEXPS = {
|
|
97
|
-
positive: /article|body|content|entry|main|page|post|text|blog|story|recipe|ingredient|instruction|description|docs?|guide|tutorial|reference|manual/i,
|
|
98
|
-
negative: /ad|banner|combx|comment|disqus|extra|foot|header|menu|meta|nav|promo|related|scroll|share|sidebar|sponsor|social|tags|widget|sitemap|copyright|login|register|subscribe|newsletter|signup|category|author|date|publish|cta|button|apply|trial|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i,
|
|
99
|
-
commas: /,/g,
|
|
100
|
-
periodAtEnd: /\.( |$)/,
|
|
101
|
-
hidden: /hidden|display:\s*none|visibility:\s*hidden/i,
|
|
102
|
-
advertisement: /^ad-|^ad$|advertisement|sponsor|promo|banner/i,
|
|
103
|
-
comments: /comment|disqus|replies/i
|
|
104
|
-
};
|
|
105
|
-
const TagScores = {
|
|
106
|
-
[TAG_ARTICLE]: 15,
|
|
107
|
-
[TAG_SECTION]: 8,
|
|
108
|
-
[TAG_MAIN]: 15,
|
|
109
|
-
[TAG_P]: 5,
|
|
110
|
-
[TAG_DIV]: 2,
|
|
111
|
-
[TAG_BLOCKQUOTE]: 5,
|
|
112
|
-
[TAG_PRE]: 8,
|
|
113
|
-
[TAG_CODE]: 6,
|
|
114
|
-
[TAG_IMG]: 3,
|
|
115
|
-
[TAG_FIGURE]: 4,
|
|
116
|
-
[TAG_FIGCAPTION]: 3,
|
|
117
|
-
[TAG_VIDEO]: 3,
|
|
118
|
-
[TAG_AUDIO]: 3,
|
|
119
|
-
[TAG_SVG]: 1,
|
|
120
|
-
[TAG_TABLE]: 0,
|
|
121
|
-
[TAG_CAPTION]: 2,
|
|
122
|
-
[TAG_THEAD]: 0,
|
|
123
|
-
[TAG_TBODY]: 0,
|
|
124
|
-
[TAG_TFOOT]: 0,
|
|
125
|
-
[TAG_TR]: -1,
|
|
126
|
-
[TAG_TH]: -2,
|
|
127
|
-
[TAG_TD]: 0,
|
|
128
|
-
[TAG_UL]: -8,
|
|
129
|
-
[TAG_OL]: -5,
|
|
130
|
-
[TAG_LI]: -6,
|
|
131
|
-
[TAG_DL]: 0,
|
|
132
|
-
[TAG_DT]: 0,
|
|
133
|
-
[TAG_DD]: 0,
|
|
134
|
-
[TAG_H1]: 1,
|
|
135
|
-
[TAG_H2]: 1,
|
|
136
|
-
[TAG_H3]: 1,
|
|
137
|
-
[TAG_H4]: 0,
|
|
138
|
-
[TAG_H5]: 0,
|
|
139
|
-
[TAG_H6]: 0,
|
|
140
|
-
[TAG_HEADER]: -15,
|
|
141
|
-
[TAG_FOOTER]: -25,
|
|
142
|
-
[TAG_NAV]: -30,
|
|
143
|
-
[TAG_ASIDE]: -25,
|
|
144
|
-
[TAG_FORM]: -8,
|
|
145
|
-
[TAG_BUTTON]: -5,
|
|
146
|
-
[TAG_INPUT]: -5,
|
|
147
|
-
[TAG_TEXTAREA]: -5,
|
|
148
|
-
[TAG_SELECT]: -5,
|
|
149
|
-
[TAG_FIELDSET]: -5,
|
|
150
|
-
[TAG_IFRAME]: -3,
|
|
151
|
-
[TAG_EMBED]: -3,
|
|
152
|
-
[TAG_OBJECT]: -3,
|
|
153
|
-
[TAG_A]: -8,
|
|
154
|
-
[TAG_STRONG]: 1,
|
|
155
|
-
[TAG_B]: 1,
|
|
156
|
-
[TAG_EM]: 1,
|
|
157
|
-
[TAG_I]: 1,
|
|
158
|
-
[TAG_HR]: 0,
|
|
159
|
-
[TAG_BR]: 0,
|
|
160
|
-
[TAG_SPAN]: 0,
|
|
161
|
-
[TAG_SCRIPT]: -25,
|
|
162
|
-
[TAG_STYLE]: -25,
|
|
163
|
-
[TAG_DETAILS]: 2,
|
|
164
|
-
[TAG_SUMMARY]: 1,
|
|
165
|
-
[TAG_ADDRESS]: -3
|
|
166
|
-
};
|
|
167
|
-
/**
|
|
168
|
-
* Apply score adjustments based on class and ID names
|
|
169
|
-
*/
|
|
170
|
-
function scoreClassAndId(node) {
|
|
171
|
-
let scoreAdjustment = 0;
|
|
172
|
-
if (node.attributes?.class) {
|
|
173
|
-
const className = node.attributes.class;
|
|
174
|
-
if (/nav|menu|header|footer|sidebar|ad-|advertisement|banner|promo|cta|button|apply|trial|engagement|sharing|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i.test(className)) scoreAdjustment -= 35;
|
|
175
|
-
else if (REGEXPS.negative.test(className)) scoreAdjustment -= 15;
|
|
176
|
-
else if (REGEXPS.positive.test(className)) {
|
|
177
|
-
scoreAdjustment += 10;
|
|
178
|
-
if (/docs?|guide|tutorial|reference|manual|article/i.test(className)) scoreAdjustment += 5;
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
if (node.attributes?.id) {
|
|
182
|
-
const id = node.attributes.id;
|
|
183
|
-
if (/nav|menu|header|footer|sidebar|ad-|advertisement|banner|promo|cta|button|apply|trial|engagement|sharing|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i.test(id)) scoreAdjustment -= 35;
|
|
184
|
-
else if (REGEXPS.negative.test(id)) scoreAdjustment -= 15;
|
|
185
|
-
else if (REGEXPS.positive.test(id)) {
|
|
186
|
-
scoreAdjustment += 10;
|
|
187
|
-
if (/docs?|guide|tutorial|reference|manual|article/i.test(id)) scoreAdjustment += 5;
|
|
188
|
-
}
|
|
189
|
-
}
|
|
190
|
-
return scoreAdjustment;
|
|
191
|
-
}
|
|
192
|
-
/**
|
|
193
|
-
* Creates a plugin that implements readability.js style heuristics for content quality assessment
|
|
194
|
-
* Controls content inclusion/exclusion using buffer regions
|
|
195
|
-
*/
|
|
196
|
-
function readabilityPlugin() {
|
|
197
|
-
let inHead = false;
|
|
198
|
-
return createPlugin({
|
|
199
|
-
onNodeEnter(node, state) {
|
|
200
|
-
if (inHead) return;
|
|
201
|
-
if (!node.context) node.context = {};
|
|
202
|
-
if (node.tagId === TAG_BODY || node.tagId === TAG_HTML) return;
|
|
203
|
-
if (node.tagId === TAG_HEAD) {
|
|
204
|
-
createBufferRegion(node, state, true);
|
|
205
|
-
inHead = true;
|
|
206
|
-
return;
|
|
207
|
-
}
|
|
208
|
-
const tagScore = TagScores[node.tagId] ?? 0;
|
|
209
|
-
const classAndIdScore = scoreClassAndId(node);
|
|
210
|
-
node.context.score = tagScore + classAndIdScore;
|
|
211
|
-
node.context.tagCount = 1;
|
|
212
|
-
node.context.linkTextLength = 0;
|
|
213
|
-
node.context.textLength = 0;
|
|
214
|
-
const hasStrongNegativePattern = node.name && /nav|header|footer|aside|form|fieldset|button/i.test(node.name) || node.attributes?.class && /nav|menu|header|footer|sidebar|hidden|copyright|ad-|advertisement|banner|promo|related|comment|login|register|subscribe|newsletter|category|meta|tag|cta|button|apply|trial|engagement|sharing|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i.test(node.attributes.class) || node.attributes?.id && /nav|menu|header|footer|sidebar|hidden|copyright|ad-|advertisement|banner|promo|related|comment|login|register|subscribe|newsletter|category|meta|tag|cta|button|apply|trial|engagement|sharing|likes|views|metrics|stats|breadcrumb|pagination|filter|sort|search/i.test(node.attributes.id) || node.attributes?.style && /display:\s*none|visibility:\s*hidden/i.test(node.attributes.style) || node.attributes && Object.keys(node.attributes).some((attr) => attr.startsWith("aria-") && node.attributes[attr] === "true" && /hidden|invisible/i.test(attr));
|
|
215
|
-
if (hasStrongNegativePattern) createBufferRegion(node, state, false);
|
|
216
|
-
else if (node.parent && node.parent.context) node.context.score += node.parent.context.score || 0;
|
|
217
|
-
},
|
|
218
|
-
processTextNode(node) {
|
|
219
|
-
if (!node.parent || inHead) return;
|
|
220
|
-
const textValue = node.value;
|
|
221
|
-
const len = textValue.length;
|
|
222
|
-
const commaCount = Math.min(3, (textValue.match(REGEXPS.commas) || []).length);
|
|
223
|
-
const isInsideLink = !!node.parent.depthMap?.[TAG_A];
|
|
224
|
-
let parent = node.parent;
|
|
225
|
-
while (parent) {
|
|
226
|
-
if (!parent.context) parent.context = {};
|
|
227
|
-
parent.context.score = (parent.context.score || 0) + commaCount;
|
|
228
|
-
parent.context.textLength = (parent.context.textLength || 0) + len;
|
|
229
|
-
if (isInsideLink) parent.context.linkTextLength = (parent.context.linkTextLength || 0) + len;
|
|
230
|
-
parent = parent.parent;
|
|
231
|
-
}
|
|
232
|
-
},
|
|
233
|
-
onNodeExit(node, state) {
|
|
234
|
-
if (!node.context) return;
|
|
235
|
-
if (node.tagId === TAG_BODY || node.tagId === TAG_HTML) return;
|
|
236
|
-
if (node.tagId === TAG_HEAD) {
|
|
237
|
-
inHead = false;
|
|
238
|
-
return;
|
|
239
|
-
}
|
|
240
|
-
if (inHead) return;
|
|
241
|
-
const textLength = node.context.textLength || 0;
|
|
242
|
-
if (textLength === 0 && node.tagId !== TAG_BODY && !node.childTextNodeIndex) {} else if (textLength > 100) node.context.score += 3;
|
|
243
|
-
else if (textLength >= 50) node.context.score += 2;
|
|
244
|
-
else if (textLength >= 25) node.context.score += 1;
|
|
245
|
-
const linkTextLength = node.context.linkTextLength || 0;
|
|
246
|
-
if (textLength > 0) {
|
|
247
|
-
const linkDensity = linkTextLength / textLength;
|
|
248
|
-
if (linkDensity > .4) if (linkDensity > .6) {
|
|
249
|
-
node.context.score = node.context.score * .02;
|
|
250
|
-
if (linkTextLength > 50) node.context.isHighLinkDensity = true;
|
|
251
|
-
} else node.context.score *= 1 - linkDensity * 2;
|
|
252
|
-
else if (linkDensity > .2) node.context.score *= 1 - linkDensity * 1;
|
|
253
|
-
}
|
|
254
|
-
if (linkTextLength > 0 && textLength > 0) {
|
|
255
|
-
const linkRatio = linkTextLength / textLength;
|
|
256
|
-
const hasDocumentationMarkers = node.attributes?.class && /docs?|guide|tutorial|reference|manual|article|content/i.test(node.attributes.class) || node.attributes?.id && /docs?|guide|tutorial|reference|manual|article|content/i.test(node.attributes.id) || node.name && /main|article|section/i.test(node.name);
|
|
257
|
-
if (linkRatio > .3 && linkTextLength > 30 && !hasDocumentationMarkers) node.context.score -= 10;
|
|
258
|
-
}
|
|
259
|
-
const finalScore = node.context.score;
|
|
260
|
-
if (finalScore <= -12) createBufferRegion(node, state, false);
|
|
261
|
-
else if (node.context.isHighLinkDensity || linkTextLength > 50 && textLength > 0 && linkTextLength / textLength > .5) createBufferRegion(node, state, false);
|
|
262
|
-
if (node.tagHandler?.isInline) {
|
|
263
|
-
const parent = node.parent;
|
|
264
|
-
if (parent && parent.context) parent.context.score += finalScore - (parent.context.score || 0);
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
});
|
|
268
|
-
}
|
|
269
|
-
|
|
270
|
-
//#endregion
|
|
271
|
-
export { frontmatterPlugin, readabilityPlugin };
|