@mdream/js 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +9 -0
- package/README.md +135 -0
- package/bin/mdream.mjs +2 -0
- package/dist/_chunks/const.mjs +137 -0
- package/dist/_chunks/index.d.mts +14 -0
- package/dist/_chunks/minimal.d.mts +10 -0
- package/dist/_chunks/parse.mjs +1201 -0
- package/dist/_chunks/plugins.mjs +791 -0
- package/dist/_chunks/resolve-plugins.mjs +302 -0
- package/dist/_chunks/src.mjs +344 -0
- package/dist/_chunks/types.d.mts +390 -0
- package/dist/cli.d.mts +1 -0
- package/dist/cli.mjs +27 -0
- package/dist/index.d.mts +4 -0
- package/dist/index.mjs +7 -0
- package/dist/llms-txt.d.mts +89 -0
- package/dist/llms-txt.mjs +347 -0
- package/dist/negotiate.d.mts +26 -0
- package/dist/negotiate.mjs +92 -0
- package/dist/parse.d.mts +57 -0
- package/dist/parse.mjs +3 -0
- package/dist/plugins.d.mts +93 -0
- package/dist/plugins.mjs +3 -0
- package/dist/preset/minimal.d.mts +2 -0
- package/dist/preset/minimal.mjs +34 -0
- package/dist/splitter.d.mts +21 -0
- package/dist/splitter.mjs +215 -0
- package/package.json +93 -0
|
@@ -0,0 +1,791 @@
|
|
|
1
|
+
import { _ as TagIdMap } from "./const.mjs";
|
|
2
|
+
//#region src/pluggable/plugin.ts
|
|
3
|
+
/**
|
|
4
|
+
* Create a plugin with type-safe hook definitions.
|
|
5
|
+
* All TransformPlugin fields are optional, so this is a typed identity function.
|
|
6
|
+
*/
|
|
7
|
+
function createPlugin(plugin) {
|
|
8
|
+
return plugin;
|
|
9
|
+
}
|
|
10
|
+
//#endregion
|
|
11
|
+
//#region src/libs/query-selector.ts
|
|
12
|
+
/**
|
|
13
|
+
* Creates a tag selector matcher (e.g., 'div', 'p', 'h1')
|
|
14
|
+
*/
|
|
15
|
+
function createTagSelector(tagName) {
|
|
16
|
+
return {
|
|
17
|
+
matches: (element) => element.name === tagName,
|
|
18
|
+
toString: () => tagName
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Creates an ID selector matcher (e.g., '#main', '#content')
|
|
23
|
+
*/
|
|
24
|
+
function createIdSelector(selector) {
|
|
25
|
+
const id = selector.slice(1);
|
|
26
|
+
return {
|
|
27
|
+
matches: (element) => element.attributes?.id === id,
|
|
28
|
+
toString: () => `#${id}`
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Creates a class selector matcher (e.g., '.container', '.header')
|
|
33
|
+
*/
|
|
34
|
+
function createClassSelector(selector) {
|
|
35
|
+
const className = selector.slice(1);
|
|
36
|
+
return {
|
|
37
|
+
matches: (element) => {
|
|
38
|
+
if (!element.attributes?.class) return false;
|
|
39
|
+
return element.attributes.class.trim().split(" ").filter(Boolean).includes(className);
|
|
40
|
+
},
|
|
41
|
+
toString: () => `.${className}`
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Parses attribute selectors like [attr], [attr=value], [attr^="value"]
|
|
46
|
+
* Uses a manual parser to avoid polynomial regex backtracking (CodeQL ReDoS).
|
|
47
|
+
*/
|
|
48
|
+
function parseAttributeSelector(selector) {
|
|
49
|
+
if (selector.charCodeAt(0) !== 91) return null;
|
|
50
|
+
const end = selector.indexOf("]", 1);
|
|
51
|
+
if (end === -1) return null;
|
|
52
|
+
const inner = selector.slice(1, end);
|
|
53
|
+
let opIdx = -1;
|
|
54
|
+
for (let i = 0; i < inner.length; i++) {
|
|
55
|
+
const c = inner.charCodeAt(i);
|
|
56
|
+
if (c === 61 || c === 126 || c === 124 || c === 94 || c === 36 || c === 42) {
|
|
57
|
+
opIdx = i;
|
|
58
|
+
break;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
if (opIdx === -1) return { attr: inner };
|
|
62
|
+
const attr = inner.slice(0, opIdx);
|
|
63
|
+
let opEnd = opIdx + 1;
|
|
64
|
+
if (opEnd < inner.length && inner.charCodeAt(opEnd) === 61) opEnd++;
|
|
65
|
+
const op = inner.slice(opIdx, opEnd);
|
|
66
|
+
let value = inner.slice(opEnd);
|
|
67
|
+
if ((value.charCodeAt(0) === 34 || value.charCodeAt(0) === 39) && value.charCodeAt(value.length - 1) === value.charCodeAt(0)) value = value.slice(1, -1);
|
|
68
|
+
return {
|
|
69
|
+
attr,
|
|
70
|
+
op,
|
|
71
|
+
value
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Creates an attribute selector matcher (e.g., '[data-id]', '[href="https://example.com"]')
|
|
76
|
+
*/
|
|
77
|
+
function createAttributeSelector(selector) {
|
|
78
|
+
const parsed = parseAttributeSelector(selector);
|
|
79
|
+
const attrName = parsed ? parsed.attr : selector.slice(1, -1);
|
|
80
|
+
const operator = parsed?.op;
|
|
81
|
+
const attrValue = parsed?.value;
|
|
82
|
+
return {
|
|
83
|
+
matches: (element) => {
|
|
84
|
+
if (!(attrName in (element.attributes || {}))) return false;
|
|
85
|
+
if (!operator || !attrValue) return true;
|
|
86
|
+
const value = element.attributes[attrName];
|
|
87
|
+
if (value === void 0) return false;
|
|
88
|
+
switch (operator) {
|
|
89
|
+
case "=": return value === attrValue;
|
|
90
|
+
case "^=": return value.startsWith(attrValue);
|
|
91
|
+
case "$=": return value.endsWith(attrValue);
|
|
92
|
+
case "*=": return value.includes(attrValue);
|
|
93
|
+
case "~=": return value.trim().split(" ").filter(Boolean).includes(attrValue);
|
|
94
|
+
case "|=": return value === attrValue || value.startsWith(`${attrValue}-`);
|
|
95
|
+
default: return false;
|
|
96
|
+
}
|
|
97
|
+
},
|
|
98
|
+
toString: () => {
|
|
99
|
+
if (!operator || !attrValue) return `[${attrName}]`;
|
|
100
|
+
return `[${attrName}${operator}${attrValue}]`;
|
|
101
|
+
}
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Creates a compound selector that combines multiple selectors (e.g., 'div.container', 'h1#title')
|
|
106
|
+
*/
|
|
107
|
+
function createCompoundSelector(selectors) {
|
|
108
|
+
return {
|
|
109
|
+
matches: (element) => selectors.every((selector) => selector.matches(element)),
|
|
110
|
+
toString: () => selectors.map((s) => s.toString()).join("")
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Parses a CSS selector into a matcher
|
|
115
|
+
*/
|
|
116
|
+
function parseSelector(selector) {
|
|
117
|
+
selector = selector.trim();
|
|
118
|
+
if (!selector) throw new Error("Empty selector");
|
|
119
|
+
const selectorParts = [];
|
|
120
|
+
let current = "";
|
|
121
|
+
let inAttribute = false;
|
|
122
|
+
for (let i = 0; i < selector.length; i++) {
|
|
123
|
+
const char = selector[i];
|
|
124
|
+
if ((char === "." || char === "#" || char === "[") && current) {
|
|
125
|
+
if (current[0] === ".") selectorParts.push(createClassSelector(current));
|
|
126
|
+
else if (current[0] === "#") selectorParts.push(createIdSelector(current));
|
|
127
|
+
else if (current[0] === "[") selectorParts.push(createAttributeSelector(current));
|
|
128
|
+
else if (current) selectorParts.push(createTagSelector(current));
|
|
129
|
+
current = char;
|
|
130
|
+
} else current += char;
|
|
131
|
+
if (char === "[") inAttribute = true;
|
|
132
|
+
if (char === "]") inAttribute = false;
|
|
133
|
+
if (inAttribute && char !== "[") {}
|
|
134
|
+
}
|
|
135
|
+
if (current) {
|
|
136
|
+
if (current[0] === ".") selectorParts.push(createClassSelector(current));
|
|
137
|
+
else if (current[0] === "#") selectorParts.push(createIdSelector(current));
|
|
138
|
+
else if (current[0] === "[") selectorParts.push(createAttributeSelector(current));
|
|
139
|
+
else if (current) selectorParts.push(createTagSelector(current));
|
|
140
|
+
}
|
|
141
|
+
if (selectorParts.length === 1) return selectorParts[0];
|
|
142
|
+
return createCompoundSelector(selectorParts);
|
|
143
|
+
}
|
|
144
|
+
//#endregion
|
|
145
|
+
//#region src/plugins/extraction.ts
|
|
146
|
+
/**
|
|
147
|
+
* @deprecated Use `plugins.extraction` config for declarative extraction that works with both JS and Rust engines.
|
|
148
|
+
*/
|
|
149
|
+
function extractionPlugin(selectors) {
|
|
150
|
+
const matcherCallbacks = Object.entries(selectors).map(([selector, callback]) => ({
|
|
151
|
+
matcher: parseSelector(selector),
|
|
152
|
+
callback
|
|
153
|
+
}));
|
|
154
|
+
const trackedElements = /* @__PURE__ */ new Map();
|
|
155
|
+
return createPlugin({
|
|
156
|
+
onNodeEnter(element) {
|
|
157
|
+
matcherCallbacks.forEach(({ matcher, callback }) => {
|
|
158
|
+
if (matcher.matches(element)) trackedElements.set(element, {
|
|
159
|
+
textContent: "",
|
|
160
|
+
callback
|
|
161
|
+
});
|
|
162
|
+
});
|
|
163
|
+
},
|
|
164
|
+
processTextNode(textNode) {
|
|
165
|
+
let currentParent = textNode.parent;
|
|
166
|
+
while (currentParent) {
|
|
167
|
+
const tracked = trackedElements.get(currentParent);
|
|
168
|
+
if (tracked) tracked.textContent += textNode.value;
|
|
169
|
+
currentParent = currentParent.parent;
|
|
170
|
+
}
|
|
171
|
+
},
|
|
172
|
+
onNodeExit(element, state) {
|
|
173
|
+
const tracked = trackedElements.get(element);
|
|
174
|
+
if (tracked) {
|
|
175
|
+
const extractedElement = {
|
|
176
|
+
...element,
|
|
177
|
+
textContent: tracked.textContent.trim()
|
|
178
|
+
};
|
|
179
|
+
tracked.callback(extractedElement, state);
|
|
180
|
+
trackedElements.delete(element);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Extraction collector for `plugins.extraction` config.
|
|
187
|
+
* Collects results during processing; callbacks are called post-conversion
|
|
188
|
+
* to match Rust engine behavior.
|
|
189
|
+
*/
|
|
190
|
+
function extractionCollectorPlugin(config) {
|
|
191
|
+
const matchers = Object.entries(config).map(([selector, callback]) => ({
|
|
192
|
+
selector,
|
|
193
|
+
matcher: parseSelector(selector),
|
|
194
|
+
callback
|
|
195
|
+
}));
|
|
196
|
+
const results = [];
|
|
197
|
+
const trackedElements = /* @__PURE__ */ new Map();
|
|
198
|
+
const plugin = createPlugin({
|
|
199
|
+
onNodeEnter(element) {
|
|
200
|
+
for (let i = 0; i < matchers.length; i++) {
|
|
201
|
+
const m = matchers[i];
|
|
202
|
+
if (m.matcher.matches(element)) trackedElements.set(element, {
|
|
203
|
+
textContent: "",
|
|
204
|
+
selector: m.selector,
|
|
205
|
+
callback: m.callback
|
|
206
|
+
});
|
|
207
|
+
}
|
|
208
|
+
},
|
|
209
|
+
processTextNode(textNode) {
|
|
210
|
+
let currentParent = textNode.parent;
|
|
211
|
+
while (currentParent) {
|
|
212
|
+
const tracked = trackedElements.get(currentParent);
|
|
213
|
+
if (tracked) tracked.textContent += textNode.value;
|
|
214
|
+
currentParent = currentParent.parent;
|
|
215
|
+
}
|
|
216
|
+
},
|
|
217
|
+
onNodeExit(element) {
|
|
218
|
+
const tracked = trackedElements.get(element);
|
|
219
|
+
if (tracked) {
|
|
220
|
+
const extracted = {
|
|
221
|
+
selector: tracked.selector,
|
|
222
|
+
tagName: element.name,
|
|
223
|
+
textContent: tracked.textContent.trim(),
|
|
224
|
+
attributes: { ...element.attributes }
|
|
225
|
+
};
|
|
226
|
+
results.push(extracted);
|
|
227
|
+
trackedElements.delete(element);
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
});
|
|
231
|
+
function callHandlers() {
|
|
232
|
+
for (let i = 0; i < results.length; i++) {
|
|
233
|
+
const el = results[i];
|
|
234
|
+
for (let j = 0; j < matchers.length; j++) if (matchers[j].selector === el.selector) {
|
|
235
|
+
matchers[j].callback(el);
|
|
236
|
+
break;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
return {
|
|
241
|
+
plugin,
|
|
242
|
+
getResults: () => results,
|
|
243
|
+
callHandlers
|
|
244
|
+
};
|
|
245
|
+
}
|
|
246
|
+
//#endregion
|
|
247
|
+
//#region src/plugins/filter.ts
|
|
248
|
+
/**
|
|
249
|
+
* Compiles a selector (string or TAG_* number) into a fast matcher.
|
|
250
|
+
* String tag names (e.g. 'form') are compiled to TAG_* ID comparisons at creation time,
|
|
251
|
+
* avoiding per-element string comparison. CSS selectors (e.g. '.class', '#id') use parseSelector.
|
|
252
|
+
*/
|
|
253
|
+
function compileSelector(selector) {
|
|
254
|
+
if (typeof selector === "number") return {
|
|
255
|
+
matches: (element) => element.tagId === selector,
|
|
256
|
+
toString: () => String(selector)
|
|
257
|
+
};
|
|
258
|
+
const tagId = TagIdMap[selector];
|
|
259
|
+
if (tagId !== void 0) return {
|
|
260
|
+
matches: (element) => element.tagId === tagId,
|
|
261
|
+
toString: () => selector
|
|
262
|
+
};
|
|
263
|
+
return parseSelector(selector);
|
|
264
|
+
}
|
|
265
|
+
/**
|
|
266
|
+
* Plugin that filters nodes based on CSS selectors.
|
|
267
|
+
* Allows including or excluding nodes based on selectors.
|
|
268
|
+
*
|
|
269
|
+
* @example
|
|
270
|
+
* // Include only heading elements and their children
|
|
271
|
+
* withQuerySelectorPlugin({ include: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] })
|
|
272
|
+
*
|
|
273
|
+
* @example
|
|
274
|
+
* // Exclude navigation, sidebar, and footer
|
|
275
|
+
* withQuerySelectorPlugin({ exclude: ['nav', '#sidebar', '.footer'] })
|
|
276
|
+
*/
|
|
277
|
+
function filterPlugin(options = {}) {
|
|
278
|
+
const includeSelectors = options.include?.map((selector) => compileSelector(selector)) || [];
|
|
279
|
+
const excludeSelectors = options.exclude?.map((selector) => compileSelector(selector)) || [];
|
|
280
|
+
const processChildren = options.processChildren !== false;
|
|
281
|
+
return createPlugin({ beforeNodeProcess(event) {
|
|
282
|
+
const { node } = event;
|
|
283
|
+
if (node.type === 2) {
|
|
284
|
+
let currentParent = node.parent;
|
|
285
|
+
while (currentParent && excludeSelectors.length) {
|
|
286
|
+
if (excludeSelectors.some((selector) => selector.matches(currentParent))) return { skip: true };
|
|
287
|
+
currentParent = currentParent.parent;
|
|
288
|
+
}
|
|
289
|
+
return;
|
|
290
|
+
}
|
|
291
|
+
if (node.type !== 1) return;
|
|
292
|
+
const element = node;
|
|
293
|
+
if (excludeSelectors.length) {
|
|
294
|
+
if (element.attributes.style?.includes("absolute") || element.attributes.style?.includes("fixed")) return { skip: true };
|
|
295
|
+
if (excludeSelectors.some((selector) => selector.matches(element))) return { skip: true };
|
|
296
|
+
}
|
|
297
|
+
let currentParent = element.parent;
|
|
298
|
+
while (currentParent) {
|
|
299
|
+
if (excludeSelectors.length) {
|
|
300
|
+
if (excludeSelectors.some((selector) => selector.matches(currentParent))) return { skip: true };
|
|
301
|
+
}
|
|
302
|
+
currentParent = currentParent.parent;
|
|
303
|
+
}
|
|
304
|
+
if (includeSelectors.length) {
|
|
305
|
+
let currentElement = element;
|
|
306
|
+
while (currentElement) {
|
|
307
|
+
if (includeSelectors.some((selector) => selector.matches(currentElement))) return;
|
|
308
|
+
if (!processChildren) break;
|
|
309
|
+
currentElement = currentElement.parent;
|
|
310
|
+
}
|
|
311
|
+
return { skip: true };
|
|
312
|
+
}
|
|
313
|
+
} });
|
|
314
|
+
}
|
|
315
|
+
//#endregion
|
|
316
|
+
//#region src/plugins/frontmatter.ts
|
|
317
|
+
const BACKSLASH_RE = /\\/g;
|
|
318
|
+
const DOUBLE_QUOTE_RE = /"/g;
|
|
319
|
+
const ESCAPED_DOUBLE_QUOTE_RE = /\\"/g;
|
|
320
|
+
/**
|
|
321
|
+
* A plugin that manages frontmatter generation from HTML head elements
|
|
322
|
+
* Extracts metadata from meta tags and title and generates YAML frontmatter
|
|
323
|
+
*/
|
|
324
|
+
function frontmatterPlugin(options = {}) {
|
|
325
|
+
const additionalFields = options.additionalFields || {};
|
|
326
|
+
const metaFields = new Set([
|
|
327
|
+
"description",
|
|
328
|
+
"keywords",
|
|
329
|
+
"author",
|
|
330
|
+
"date",
|
|
331
|
+
"og:title",
|
|
332
|
+
"og:description",
|
|
333
|
+
"twitter:title",
|
|
334
|
+
"twitter:description",
|
|
335
|
+
...options.metaFields || []
|
|
336
|
+
]);
|
|
337
|
+
const frontmatter = {
|
|
338
|
+
...additionalFields,
|
|
339
|
+
meta: {}
|
|
340
|
+
};
|
|
341
|
+
let inHead = false;
|
|
342
|
+
function formatValue(_name, value) {
|
|
343
|
+
value = value.replace(BACKSLASH_RE, "\\\\").replace(DOUBLE_QUOTE_RE, "\\\"");
|
|
344
|
+
if (value.includes("\n") || value.includes(":") || value.includes("#") || value.includes(" ")) return `"${value}"`;
|
|
345
|
+
return value;
|
|
346
|
+
}
|
|
347
|
+
function getStructuredData() {
|
|
348
|
+
const result = {};
|
|
349
|
+
if (frontmatter.title) {
|
|
350
|
+
const raw = frontmatter.title;
|
|
351
|
+
result.title = raw.startsWith("\"") && raw.endsWith("\"") ? raw.slice(1, -1).replace(ESCAPED_DOUBLE_QUOTE_RE, "\"") : raw;
|
|
352
|
+
}
|
|
353
|
+
for (const [k, v] of Object.entries(frontmatter.meta)) {
|
|
354
|
+
const cleanKey = k.startsWith("\"") && k.endsWith("\"") ? k.slice(1, -1) : k;
|
|
355
|
+
result[cleanKey] = typeof v === "string" && v.startsWith("\"") && v.endsWith("\"") ? v.slice(1, -1).replace(ESCAPED_DOUBLE_QUOTE_RE, "\"") : String(v);
|
|
356
|
+
}
|
|
357
|
+
if (additionalFields) {
|
|
358
|
+
for (const [k, v] of Object.entries(additionalFields)) if (typeof v === "string") result[k] = v;
|
|
359
|
+
}
|
|
360
|
+
return Object.keys(result).length > 0 ? result : void 0;
|
|
361
|
+
}
|
|
362
|
+
const plugin = createPlugin({
|
|
363
|
+
onNodeEnter(node) {
|
|
364
|
+
if (node.tagId === 1) {
|
|
365
|
+
inHead = true;
|
|
366
|
+
return;
|
|
367
|
+
}
|
|
368
|
+
if (inHead && node.type === 1 && node.tagId === 4) return;
|
|
369
|
+
if (inHead && node.type === 1 && node.tagId === 5) {
|
|
370
|
+
const { name, property, content } = node.attributes || {};
|
|
371
|
+
const metaName = property || name;
|
|
372
|
+
if (metaName && content && metaFields.has(metaName)) frontmatter.meta[metaName.includes(":") ? `"${metaName}"` : metaName] = formatValue(metaName, content);
|
|
373
|
+
return;
|
|
374
|
+
}
|
|
375
|
+
},
|
|
376
|
+
onNodeExit(node, state) {
|
|
377
|
+
if (node.type === 1 && node.tagId === 1) {
|
|
378
|
+
inHead = false;
|
|
379
|
+
if (Object.keys(frontmatter).length > 0) {
|
|
380
|
+
const frontmatterContent = generateFrontmatter();
|
|
381
|
+
if (frontmatterContent) {
|
|
382
|
+
state.buffer.push(frontmatterContent);
|
|
383
|
+
state.lastContentCache = frontmatterContent;
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
},
|
|
388
|
+
processTextNode(node) {
|
|
389
|
+
if (!inHead) return;
|
|
390
|
+
const parent = node.parent;
|
|
391
|
+
if (parent && parent.tagId === 4 && node.value) {
|
|
392
|
+
frontmatter.title = formatValue("title", node.value.trim());
|
|
393
|
+
return {
|
|
394
|
+
content: "",
|
|
395
|
+
skip: true
|
|
396
|
+
};
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
});
|
|
400
|
+
plugin.getFrontmatter = getStructuredData;
|
|
401
|
+
return plugin;
|
|
402
|
+
/**
|
|
403
|
+
* Generate YAML frontmatter string from collected metadata
|
|
404
|
+
*/
|
|
405
|
+
function generateFrontmatter() {
|
|
406
|
+
if (Object.keys(frontmatter).length === 0) return "";
|
|
407
|
+
let yamlLines = [];
|
|
408
|
+
const entries = Object.entries(frontmatter).sort(([a], [b]) => {
|
|
409
|
+
if (a === "title") return -1;
|
|
410
|
+
if (b === "title") return 1;
|
|
411
|
+
if (a === "description") return -1;
|
|
412
|
+
if (b === "description") return 1;
|
|
413
|
+
return a.localeCompare(b);
|
|
414
|
+
});
|
|
415
|
+
for (const [key, value] of entries) if (key === "meta" && typeof value === "object" && value && Object.keys(value).length > 0) {
|
|
416
|
+
yamlLines.push("meta:");
|
|
417
|
+
const metaEntries = Object.entries(value).sort(([a], [b]) => a.localeCompare(b)).map(([metaKey, metaValue]) => ` ${metaKey}: ${metaValue}`);
|
|
418
|
+
yamlLines.push(...metaEntries);
|
|
419
|
+
} else if (key !== "meta" && typeof value === "string") yamlLines.push(`${key}: ${value}`);
|
|
420
|
+
if (Object.keys(frontmatter.meta).length === 0) yamlLines = yamlLines.filter((line) => !line.startsWith("meta:"));
|
|
421
|
+
return `---\n${yamlLines.join("\n")}\n---\n\n`;
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
//#endregion
|
|
425
|
+
//#region src/plugins/isolate-main.ts
|
|
426
|
+
/**
|
|
427
|
+
* Plugin that isolates main content using the following priority order:
|
|
428
|
+
* 1. If an explicit <main> element exists (within 5 depth levels), use its content exclusively
|
|
429
|
+
* 2. Otherwise, find content between the first header tag (h1-h6) and first footer
|
|
430
|
+
* 3. If footer is within 5 levels of nesting from the header, use it as the end boundary
|
|
431
|
+
* 4. Exclude all content before the start marker and after the end marker
|
|
432
|
+
*
|
|
433
|
+
* @example
|
|
434
|
+
* ```html
|
|
435
|
+
* <body>
|
|
436
|
+
* <nav>Navigation (excluded)</nav>
|
|
437
|
+
* <main>
|
|
438
|
+
* <h1>Main Title (included)</h1>
|
|
439
|
+
* <p>Main content (included)</p>
|
|
440
|
+
* </main>
|
|
441
|
+
* <footer>Footer (excluded)</footer>
|
|
442
|
+
* </body>
|
|
443
|
+
* ```
|
|
444
|
+
*
|
|
445
|
+
* @example
|
|
446
|
+
* ```html
|
|
447
|
+
* <body>
|
|
448
|
+
* <nav>Navigation (excluded)</nav>
|
|
449
|
+
* <h1>Main Title (included)</h1>
|
|
450
|
+
* <p>Main content (included)</p>
|
|
451
|
+
* <footer>Footer (excluded)</footer>
|
|
452
|
+
* </body>
|
|
453
|
+
* ```
|
|
454
|
+
*/
|
|
455
|
+
function isolateMainPlugin() {
|
|
456
|
+
let mainElement = null;
|
|
457
|
+
let firstHeaderElement = null;
|
|
458
|
+
let afterFooter = false;
|
|
459
|
+
const headerTagIds = new Set([
|
|
460
|
+
7,
|
|
461
|
+
8,
|
|
462
|
+
9,
|
|
463
|
+
10,
|
|
464
|
+
11,
|
|
465
|
+
12
|
|
466
|
+
]);
|
|
467
|
+
return createPlugin({ beforeNodeProcess(event) {
|
|
468
|
+
const { node } = event;
|
|
469
|
+
if (node.type === 1) {
|
|
470
|
+
const element = node;
|
|
471
|
+
if (!mainElement && element.tagId === 104 && element.depth <= 5) {
|
|
472
|
+
mainElement = element;
|
|
473
|
+
return;
|
|
474
|
+
}
|
|
475
|
+
if (mainElement) {
|
|
476
|
+
let current = element.parent;
|
|
477
|
+
let isInsideMain = element === mainElement;
|
|
478
|
+
while (current && !isInsideMain) {
|
|
479
|
+
if (current === mainElement) {
|
|
480
|
+
isInsideMain = true;
|
|
481
|
+
break;
|
|
482
|
+
}
|
|
483
|
+
current = current.parent;
|
|
484
|
+
}
|
|
485
|
+
if (!isInsideMain) return { skip: true };
|
|
486
|
+
return;
|
|
487
|
+
}
|
|
488
|
+
if (!firstHeaderElement && element.tagId !== void 0 && headerTagIds.has(element.tagId)) {
|
|
489
|
+
let current = element.parent;
|
|
490
|
+
let isInHeaderTag = false;
|
|
491
|
+
while (current) {
|
|
492
|
+
if (current.tagId === 105) {
|
|
493
|
+
isInHeaderTag = true;
|
|
494
|
+
break;
|
|
495
|
+
}
|
|
496
|
+
current = current.parent;
|
|
497
|
+
}
|
|
498
|
+
if (!isInHeaderTag) {
|
|
499
|
+
firstHeaderElement = element;
|
|
500
|
+
return;
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
if (firstHeaderElement && !afterFooter && element.tagId === 47) {
|
|
504
|
+
if (element.depth - firstHeaderElement.depth <= 5) {
|
|
505
|
+
afterFooter = true;
|
|
506
|
+
return { skip: true };
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
if (!firstHeaderElement) {
|
|
510
|
+
if (element.tagId === 1) return;
|
|
511
|
+
let current = element.parent;
|
|
512
|
+
while (current) {
|
|
513
|
+
if (current.tagId === 1) return;
|
|
514
|
+
current = current.parent;
|
|
515
|
+
}
|
|
516
|
+
return { skip: true };
|
|
517
|
+
}
|
|
518
|
+
if (afterFooter) return { skip: true };
|
|
519
|
+
}
|
|
520
|
+
if (node.type === 2) {
|
|
521
|
+
if (mainElement) {
|
|
522
|
+
let current = node.parent;
|
|
523
|
+
let isInsideMain = false;
|
|
524
|
+
while (current) {
|
|
525
|
+
if (current === mainElement) {
|
|
526
|
+
isInsideMain = true;
|
|
527
|
+
break;
|
|
528
|
+
}
|
|
529
|
+
current = current.parent;
|
|
530
|
+
}
|
|
531
|
+
if (!isInsideMain) return { skip: true };
|
|
532
|
+
return;
|
|
533
|
+
}
|
|
534
|
+
if (!firstHeaderElement || afterFooter) {
|
|
535
|
+
let current = node.parent;
|
|
536
|
+
while (current) {
|
|
537
|
+
if (current.tagId === 1) return;
|
|
538
|
+
current = current.parent;
|
|
539
|
+
}
|
|
540
|
+
return { skip: true };
|
|
541
|
+
}
|
|
542
|
+
}
|
|
543
|
+
} });
|
|
544
|
+
}
|
|
545
|
+
//#endregion
|
|
546
|
+
//#region src/plugins/tailwind.ts
|
|
547
|
+
/**
|
|
548
|
+
* Mapping of Tailwind classes to Markdown formatting
|
|
549
|
+
*/
|
|
550
|
+
const TAILWIND_TO_MARKDOWN_MAP = {
|
|
551
|
+
"font-bold": {
|
|
552
|
+
prefix: "**",
|
|
553
|
+
suffix: "**"
|
|
554
|
+
},
|
|
555
|
+
"font-semibold": {
|
|
556
|
+
prefix: "**",
|
|
557
|
+
suffix: "**"
|
|
558
|
+
},
|
|
559
|
+
"font-black": {
|
|
560
|
+
prefix: "**",
|
|
561
|
+
suffix: "**"
|
|
562
|
+
},
|
|
563
|
+
"font-extrabold": {
|
|
564
|
+
prefix: "**",
|
|
565
|
+
suffix: "**"
|
|
566
|
+
},
|
|
567
|
+
"font-medium": {
|
|
568
|
+
prefix: "**",
|
|
569
|
+
suffix: "**"
|
|
570
|
+
},
|
|
571
|
+
"font-italic": {
|
|
572
|
+
prefix: "*",
|
|
573
|
+
suffix: "*"
|
|
574
|
+
},
|
|
575
|
+
"italic": {
|
|
576
|
+
prefix: "*",
|
|
577
|
+
suffix: "*"
|
|
578
|
+
},
|
|
579
|
+
"line-through": {
|
|
580
|
+
prefix: "~~",
|
|
581
|
+
suffix: "~~"
|
|
582
|
+
},
|
|
583
|
+
"hidden": { hidden: true },
|
|
584
|
+
"invisible": { hidden: true },
|
|
585
|
+
"absolute": { hidden: true },
|
|
586
|
+
"fixed": { hidden: true },
|
|
587
|
+
"sticky": { hidden: true }
|
|
588
|
+
};
|
|
589
|
+
/**
|
|
590
|
+
* Extract base class name from a responsive breakpoint variant
|
|
591
|
+
*/
|
|
592
|
+
function extractBaseClass(className) {
|
|
593
|
+
for (const bp of [
|
|
594
|
+
"sm:",
|
|
595
|
+
"md:",
|
|
596
|
+
"lg:",
|
|
597
|
+
"xl:",
|
|
598
|
+
"2xl:"
|
|
599
|
+
]) if (className.startsWith(bp)) return {
|
|
600
|
+
baseClass: className.substring(bp.length),
|
|
601
|
+
breakpoint: bp
|
|
602
|
+
};
|
|
603
|
+
return {
|
|
604
|
+
baseClass: className,
|
|
605
|
+
breakpoint: ""
|
|
606
|
+
};
|
|
607
|
+
}
|
|
608
|
+
/**
|
|
609
|
+
* Sort classes by breakpoint for mobile-first processing
|
|
610
|
+
*/
|
|
611
|
+
function sortByBreakpoint(classes) {
|
|
612
|
+
const breakpointOrder = {
|
|
613
|
+
"": 0,
|
|
614
|
+
"sm:": 1,
|
|
615
|
+
"md:": 2,
|
|
616
|
+
"lg:": 3,
|
|
617
|
+
"xl:": 4,
|
|
618
|
+
"2xl:": 5
|
|
619
|
+
};
|
|
620
|
+
return classes.toSorted((a, b) => {
|
|
621
|
+
const aBreakpoint = extractBaseClass(a).breakpoint;
|
|
622
|
+
const bBreakpoint = extractBaseClass(b).breakpoint;
|
|
623
|
+
return (breakpointOrder[aBreakpoint] || 0) - (breakpointOrder[bBreakpoint] || 0);
|
|
624
|
+
});
|
|
625
|
+
}
|
|
626
|
+
/**
|
|
627
|
+
* Group classes by their formatting type to handle overrides
|
|
628
|
+
*/
|
|
629
|
+
function groupByFormattingType(classes) {
|
|
630
|
+
const sorted = sortByBreakpoint(classes);
|
|
631
|
+
const groups = {
|
|
632
|
+
emphasis: [],
|
|
633
|
+
weight: [],
|
|
634
|
+
decoration: [],
|
|
635
|
+
display: [],
|
|
636
|
+
position: [],
|
|
637
|
+
other: []
|
|
638
|
+
};
|
|
639
|
+
for (const cls of sorted) {
|
|
640
|
+
const { baseClass } = extractBaseClass(cls);
|
|
641
|
+
if (baseClass.includes("italic")) groups.emphasis?.push(cls);
|
|
642
|
+
else if (baseClass.includes("font-") || baseClass === "bold") groups.weight?.push(cls);
|
|
643
|
+
else if (baseClass.includes("line-through") || baseClass.includes("underline")) groups.decoration?.push(cls);
|
|
644
|
+
else if (baseClass === "hidden" || baseClass.includes("invisible")) groups.display?.push(cls);
|
|
645
|
+
else if ([
|
|
646
|
+
"absolute",
|
|
647
|
+
"fixed",
|
|
648
|
+
"sticky"
|
|
649
|
+
].includes(baseClass)) groups.position?.push(cls);
|
|
650
|
+
else groups.other?.push(cls);
|
|
651
|
+
}
|
|
652
|
+
return groups;
|
|
653
|
+
}
|
|
654
|
+
/**
|
|
655
|
+
* Fix redundant markdown delimiters without regex
|
|
656
|
+
*/
|
|
657
|
+
function fixRedundantDelimiters(content) {
|
|
658
|
+
content = content.replaceAll("****", "**");
|
|
659
|
+
content = content.replaceAll("~~~~", "~~");
|
|
660
|
+
if (content.includes("***") && content.split("***").length > 3) {
|
|
661
|
+
const parts = content.split("***");
|
|
662
|
+
if (parts.length >= 4) content = `${parts[0]}***${parts[1]} ${parts[2]}***${parts.slice(3).join("***")}`;
|
|
663
|
+
}
|
|
664
|
+
return content;
|
|
665
|
+
}
|
|
666
|
+
/**
|
|
667
|
+
* Normalizes a list of Tailwind classes by processing breakpoints and resolving conflicts
|
|
668
|
+
*/
|
|
669
|
+
function normalizeClasses(classes) {
|
|
670
|
+
const result = [];
|
|
671
|
+
const mobileClasses = classes.filter((cls) => !hasBreakpoint(cls));
|
|
672
|
+
const breakpointClasses = classes.filter((cls) => hasBreakpoint(cls));
|
|
673
|
+
result.push(...mobileClasses);
|
|
674
|
+
result.push(...breakpointClasses);
|
|
675
|
+
return result;
|
|
676
|
+
}
|
|
677
|
+
/**
|
|
678
|
+
* Check if a class has a breakpoint prefix
|
|
679
|
+
*/
|
|
680
|
+
function hasBreakpoint(className) {
|
|
681
|
+
const { breakpoint } = extractBaseClass(className);
|
|
682
|
+
return breakpoint !== "";
|
|
683
|
+
}
|
|
684
|
+
/**
|
|
685
|
+
* Process Tailwind classes for an element with mobile-first approach
|
|
686
|
+
*/
|
|
687
|
+
function processTailwindClasses(classes) {
|
|
688
|
+
let prefix = "";
|
|
689
|
+
let suffix = "";
|
|
690
|
+
let hidden = false;
|
|
691
|
+
const grouped = groupByFormattingType(normalizeClasses(classes));
|
|
692
|
+
if (grouped.weight && grouped.weight.length > 0) {
|
|
693
|
+
const { baseClass } = extractBaseClass(grouped.weight[0]);
|
|
694
|
+
const mapping = TAILWIND_TO_MARKDOWN_MAP[baseClass];
|
|
695
|
+
if (mapping) {
|
|
696
|
+
if (mapping.prefix) prefix += mapping.prefix;
|
|
697
|
+
if (mapping.suffix) suffix = mapping.suffix + suffix;
|
|
698
|
+
}
|
|
699
|
+
}
|
|
700
|
+
if (grouped.emphasis && grouped.emphasis.length > 0) {
|
|
701
|
+
const { baseClass } = extractBaseClass(grouped.emphasis[0]);
|
|
702
|
+
const mapping = TAILWIND_TO_MARKDOWN_MAP[baseClass];
|
|
703
|
+
if (mapping) {
|
|
704
|
+
if (mapping.prefix) prefix += mapping.prefix;
|
|
705
|
+
if (mapping.suffix) suffix = mapping.suffix + suffix;
|
|
706
|
+
}
|
|
707
|
+
}
|
|
708
|
+
if (grouped.decoration && grouped.decoration.length > 0) {
|
|
709
|
+
const { baseClass } = extractBaseClass(grouped.decoration[0]);
|
|
710
|
+
const mapping = TAILWIND_TO_MARKDOWN_MAP[baseClass];
|
|
711
|
+
if (mapping) {
|
|
712
|
+
if (mapping.prefix) prefix += mapping.prefix;
|
|
713
|
+
if (mapping.suffix) suffix = mapping.suffix + suffix;
|
|
714
|
+
}
|
|
715
|
+
}
|
|
716
|
+
if (grouped.display) for (const cls of grouped.display) {
|
|
717
|
+
const { baseClass } = extractBaseClass(cls);
|
|
718
|
+
const mapping = TAILWIND_TO_MARKDOWN_MAP[baseClass];
|
|
719
|
+
if (mapping && mapping.hidden) {
|
|
720
|
+
hidden = true;
|
|
721
|
+
break;
|
|
722
|
+
}
|
|
723
|
+
}
|
|
724
|
+
if (grouped.position) for (const cls of grouped.position) {
|
|
725
|
+
const { baseClass } = extractBaseClass(cls);
|
|
726
|
+
const mapping = TAILWIND_TO_MARKDOWN_MAP[baseClass];
|
|
727
|
+
if (mapping && mapping.hidden) {
|
|
728
|
+
hidden = true;
|
|
729
|
+
break;
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
return {
|
|
733
|
+
prefix,
|
|
734
|
+
suffix,
|
|
735
|
+
hidden
|
|
736
|
+
};
|
|
737
|
+
}
|
|
738
|
+
/**
|
|
739
|
+
* Creates a plugin that adds Tailwind class processing
|
|
740
|
+
*/
|
|
741
|
+
function tailwindPlugin() {
|
|
742
|
+
return createPlugin({
|
|
743
|
+
processAttributes(node) {
|
|
744
|
+
const parentHidden = node.parent?.context?.tailwind?.hidden;
|
|
745
|
+
const classAttr = node.attributes?.class;
|
|
746
|
+
if (!classAttr && !parentHidden) return;
|
|
747
|
+
let prefix = "";
|
|
748
|
+
let suffix = "";
|
|
749
|
+
let hidden = false;
|
|
750
|
+
if (classAttr) {
|
|
751
|
+
const result = processTailwindClasses(classAttr.trim().split(" ").filter(Boolean));
|
|
752
|
+
prefix = result.prefix;
|
|
753
|
+
suffix = result.suffix;
|
|
754
|
+
hidden = result.hidden;
|
|
755
|
+
}
|
|
756
|
+
node.context = node.context || {};
|
|
757
|
+
node.context.tailwind = {
|
|
758
|
+
prefix,
|
|
759
|
+
suffix,
|
|
760
|
+
hidden: hidden || !!parentHidden
|
|
761
|
+
};
|
|
762
|
+
},
|
|
763
|
+
processTextNode(node) {
|
|
764
|
+
const parentNode = node.parent;
|
|
765
|
+
if (!parentNode || parentNode.type !== 1) return;
|
|
766
|
+
const tailwindData = parentNode.context?.tailwind;
|
|
767
|
+
if (tailwindData?.hidden) return {
|
|
768
|
+
content: "",
|
|
769
|
+
skip: true
|
|
770
|
+
};
|
|
771
|
+
let content = node.value;
|
|
772
|
+
const prefix = tailwindData?.prefix || "";
|
|
773
|
+
const suffix = tailwindData?.suffix || "";
|
|
774
|
+
if (prefix || suffix) {
|
|
775
|
+
content = prefix + content + suffix;
|
|
776
|
+
content = fixRedundantDelimiters(content);
|
|
777
|
+
}
|
|
778
|
+
return {
|
|
779
|
+
content,
|
|
780
|
+
skip: false
|
|
781
|
+
};
|
|
782
|
+
},
|
|
783
|
+
beforeNodeProcess({ node }) {
|
|
784
|
+
if (node.type === 1) {
|
|
785
|
+
if (node.context?.tailwind?.hidden) return { skip: true };
|
|
786
|
+
}
|
|
787
|
+
}
|
|
788
|
+
});
|
|
789
|
+
}
|
|
790
|
+
//#endregion
|
|
791
|
+
export { extractionCollectorPlugin as a, filterPlugin as i, isolateMainPlugin as n, extractionPlugin as o, frontmatterPlugin as r, createPlugin as s, tailwindPlugin as t };
|