@stream-mdx/core 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -0
- package/dist/index.cjs +358 -27
- package/dist/index.d.cts +3 -2
- package/dist/index.d.ts +3 -2
- package/dist/index.mjs +356 -27
- package/dist/inline-parser.cjs +100 -18
- package/dist/inline-parser.d.cts +10 -0
- package/dist/inline-parser.d.ts +10 -0
- package/dist/inline-parser.mjs +100 -18
- package/dist/mixed-content.cjs +130 -9
- package/dist/mixed-content.d.cts +16 -2
- package/dist/mixed-content.d.ts +16 -2
- package/dist/mixed-content.mjs +130 -9
- package/dist/streaming/inline-streaming.cjs +153 -0
- package/dist/streaming/inline-streaming.d.cts +28 -0
- package/dist/streaming/inline-streaming.d.ts +28 -0
- package/dist/streaming/inline-streaming.mjs +127 -0
- package/dist/types.d.cts +24 -1
- package/dist/types.d.ts +24 -1
- package/dist/worker-html-sanitizer.cjs +16 -2
- package/dist/worker-html-sanitizer.mjs +16 -2
- package/package.json +7 -2
package/dist/inline-parser.d.ts
CHANGED
|
@@ -7,6 +7,11 @@ interface InlineParserOptions {
|
|
|
7
7
|
* parsing many intermediate states.
|
|
8
8
|
*/
|
|
9
9
|
maxCacheEntries?: number;
|
|
10
|
+
/**
|
|
11
|
+
* Enable parsing `$...$` and `$$...$$` math nodes.
|
|
12
|
+
* Defaults to `true`.
|
|
13
|
+
*/
|
|
14
|
+
enableMath?: boolean;
|
|
10
15
|
}
|
|
11
16
|
interface InlineParseOptions {
|
|
12
17
|
/**
|
|
@@ -31,6 +36,11 @@ declare class InlineParser {
|
|
|
31
36
|
* Parse inline content with memoization
|
|
32
37
|
*/
|
|
33
38
|
parse(content: string, options?: InlineParseOptions): InlineNode[];
|
|
39
|
+
/**
|
|
40
|
+
* Streaming regex anticipation helper. Returns an append string if a plugin
|
|
41
|
+
* declares an incomplete match at the end of the buffer.
|
|
42
|
+
*/
|
|
43
|
+
getRegexAnticipationAppend(content: string): string | null;
|
|
34
44
|
/**
|
|
35
45
|
* Clear the memoization cache
|
|
36
46
|
*/
|
package/dist/inline-parser.mjs
CHANGED
|
@@ -1,10 +1,40 @@
|
|
|
1
1
|
// src/inline-parser.ts
|
|
2
|
+
function ensureGlobal(pattern) {
|
|
3
|
+
if (pattern.global) return pattern;
|
|
4
|
+
const flags = pattern.flags.includes("g") ? pattern.flags : `${pattern.flags}g`;
|
|
5
|
+
return new RegExp(pattern.source, flags);
|
|
6
|
+
}
|
|
7
|
+
function findLastMatch(pattern, value) {
|
|
8
|
+
const re = ensureGlobal(pattern);
|
|
9
|
+
let match = null;
|
|
10
|
+
let next;
|
|
11
|
+
while ((next = re.exec(value)) !== null) {
|
|
12
|
+
match = next;
|
|
13
|
+
}
|
|
14
|
+
return match;
|
|
15
|
+
}
|
|
16
|
+
function findMatchAfter(pattern, value, startIndex) {
|
|
17
|
+
const re = ensureGlobal(pattern);
|
|
18
|
+
re.lastIndex = Math.max(0, startIndex);
|
|
19
|
+
return re.exec(value);
|
|
20
|
+
}
|
|
21
|
+
function isSamePattern(a, b) {
|
|
22
|
+
return a.source === b.source && a.flags === b.flags;
|
|
23
|
+
}
|
|
24
|
+
function countMatches(pattern, value) {
|
|
25
|
+
const re = ensureGlobal(pattern);
|
|
26
|
+
let count = 0;
|
|
27
|
+
while (re.exec(value)) {
|
|
28
|
+
count += 1;
|
|
29
|
+
}
|
|
30
|
+
return count;
|
|
31
|
+
}
|
|
2
32
|
var InlineParser = class {
|
|
3
33
|
constructor(options = {}) {
|
|
4
34
|
this.plugins = [];
|
|
5
35
|
this.cache = /* @__PURE__ */ new Map();
|
|
6
36
|
this.maxCacheEntries = Number.isFinite(options.maxCacheEntries ?? Number.NaN) ? Math.max(0, options.maxCacheEntries ?? 0) : 2e3;
|
|
7
|
-
this.registerDefaultPlugins();
|
|
37
|
+
this.registerDefaultPlugins({ enableMath: options.enableMath !== false });
|
|
8
38
|
}
|
|
9
39
|
/**
|
|
10
40
|
* Register a plugin with the parser
|
|
@@ -46,6 +76,42 @@ var InlineParser = class {
|
|
|
46
76
|
}
|
|
47
77
|
return result;
|
|
48
78
|
}
|
|
79
|
+
/**
|
|
80
|
+
* Streaming regex anticipation helper. Returns an append string if a plugin
|
|
81
|
+
* declares an incomplete match at the end of the buffer.
|
|
82
|
+
*/
|
|
83
|
+
getRegexAnticipationAppend(content) {
|
|
84
|
+
if (!content || this.plugins.length === 0) {
|
|
85
|
+
return null;
|
|
86
|
+
}
|
|
87
|
+
for (const plugin of this.plugins) {
|
|
88
|
+
if (!("re" in plugin)) continue;
|
|
89
|
+
const regexPlugin = plugin;
|
|
90
|
+
const anticipation = regexPlugin.anticipation;
|
|
91
|
+
if (!anticipation) continue;
|
|
92
|
+
const maxScanChars = Number.isFinite(anticipation.maxScanChars ?? Number.NaN) ? Math.max(1, anticipation.maxScanChars ?? 0) : 240;
|
|
93
|
+
const scan = content.slice(Math.max(0, content.length - maxScanChars));
|
|
94
|
+
if (regexPlugin.fastCheck && !regexPlugin.fastCheck(scan)) {
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
const lastStart = findLastMatch(anticipation.start, scan);
|
|
98
|
+
if (!lastStart) continue;
|
|
99
|
+
if (isSamePattern(anticipation.start, anticipation.end)) {
|
|
100
|
+
const occurrences = countMatches(anticipation.start, scan);
|
|
101
|
+
if (occurrences % 2 === 0) {
|
|
102
|
+
continue;
|
|
103
|
+
}
|
|
104
|
+
} else {
|
|
105
|
+
const startIndex = lastStart.index + lastStart[0].length;
|
|
106
|
+
const hasEnd = Boolean(findMatchAfter(anticipation.end, scan, startIndex));
|
|
107
|
+
if (hasEnd) continue;
|
|
108
|
+
}
|
|
109
|
+
const appendValue = typeof anticipation.append === "function" ? anticipation.append(lastStart, content) : anticipation.append;
|
|
110
|
+
if (!appendValue) continue;
|
|
111
|
+
return appendValue;
|
|
112
|
+
}
|
|
113
|
+
return null;
|
|
114
|
+
}
|
|
49
115
|
/**
|
|
50
116
|
* Clear the memoization cache
|
|
51
117
|
*/
|
|
@@ -70,10 +136,27 @@ var InlineParser = class {
|
|
|
70
136
|
* Register default plugins with proper precedence ordering
|
|
71
137
|
* Lower priority numbers = higher precedence (run first)
|
|
72
138
|
*/
|
|
73
|
-
registerDefaultPlugins() {
|
|
139
|
+
registerDefaultPlugins(options) {
|
|
140
|
+
if (options.enableMath) {
|
|
141
|
+
this.registerPlugin({
|
|
142
|
+
id: "math-display",
|
|
143
|
+
priority: 0,
|
|
144
|
+
re: /\$\$([\s\S]+?)\$\$/g,
|
|
145
|
+
toNode: (match) => ({ kind: "math-display", tex: match[1].trim() }),
|
|
146
|
+
fastCheck: (text) => text.indexOf("$$") !== -1
|
|
147
|
+
});
|
|
148
|
+
this.registerPlugin({
|
|
149
|
+
id: "math-inline",
|
|
150
|
+
priority: 1,
|
|
151
|
+
re: /\$([^$\n]+?)\$/g,
|
|
152
|
+
// Non-greedy to prevent spanning multiple expressions
|
|
153
|
+
toNode: (match) => ({ kind: "math-inline", tex: match[1].trim() }),
|
|
154
|
+
fastCheck: (text) => text.indexOf("$") !== -1
|
|
155
|
+
});
|
|
156
|
+
}
|
|
74
157
|
this.registerPlugin({
|
|
75
158
|
id: "escaped-character",
|
|
76
|
-
priority:
|
|
159
|
+
priority: 2,
|
|
77
160
|
re: /\\([\\`*_{}\[\]()#+\-.!>])/g,
|
|
78
161
|
toNode: (match) => ({
|
|
79
162
|
kind: "text",
|
|
@@ -82,19 +165,11 @@ var InlineParser = class {
|
|
|
82
165
|
fastCheck: (text) => text.indexOf("\\") !== -1
|
|
83
166
|
});
|
|
84
167
|
this.registerPlugin({
|
|
85
|
-
id: "
|
|
86
|
-
priority: 1,
|
|
87
|
-
re: /\$\$([^$]+?)\$\$/g,
|
|
88
|
-
toNode: (match) => ({ kind: "math-display", tex: match[1].trim() }),
|
|
89
|
-
fastCheck: (text) => text.indexOf("$$") !== -1
|
|
90
|
-
});
|
|
91
|
-
this.registerPlugin({
|
|
92
|
-
id: "math-inline",
|
|
168
|
+
id: "hard-break",
|
|
93
169
|
priority: 2,
|
|
94
|
-
re:
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
fastCheck: (text) => text.indexOf("$") !== -1
|
|
170
|
+
re: /\\\r?\n| {2,}\r?\n/g,
|
|
171
|
+
toNode: (_match) => ({ kind: "br" }),
|
|
172
|
+
fastCheck: (text) => text.indexOf("\n") !== -1 || text.indexOf("\r") !== -1
|
|
98
173
|
});
|
|
99
174
|
this.registerPlugin({
|
|
100
175
|
id: "code-spans",
|
|
@@ -165,9 +240,16 @@ var InlineParser = class {
|
|
|
165
240
|
this.registerPlugin({
|
|
166
241
|
id: "citations",
|
|
167
242
|
priority: 10,
|
|
168
|
-
re: /\[\^([^\]]+)\]|@cite\{([^}]+)\}/g,
|
|
169
|
-
toNode: (match) => ({ kind: "citation", id: match[1] || match[2] }),
|
|
170
|
-
fastCheck: (text) => text.indexOf("@") !== -1 || text.indexOf("[^") !== -1
|
|
243
|
+
re: /\[\^([^\]\n]+)\]|@cite\{([^}\n]+)\}|\{cite:([^}\n]+)\}/g,
|
|
244
|
+
toNode: (match) => ({ kind: "citation", id: match[1] || match[2] || match[3] }),
|
|
245
|
+
fastCheck: (text) => text.indexOf("@cite") !== -1 || text.indexOf("[^") !== -1 || text.indexOf("{cite:") !== -1,
|
|
246
|
+
anticipation: {
|
|
247
|
+
start: /@cite\{|\{cite:/g,
|
|
248
|
+
end: /\}/g,
|
|
249
|
+
full: /@cite\{[^}\n]+?\}|\{cite:[^}\n]+?\}/g,
|
|
250
|
+
append: "}",
|
|
251
|
+
maxScanChars: 120
|
|
252
|
+
}
|
|
171
253
|
});
|
|
172
254
|
this.registerPlugin({
|
|
173
255
|
id: "mentions",
|
package/dist/mixed-content.cjs
CHANGED
|
@@ -41,9 +41,23 @@ var rehypeParse = __toESM(require("rehype-parse"), 1);
|
|
|
41
41
|
var rehypeSanitize = __toESM(require("rehype-sanitize"), 1);
|
|
42
42
|
var rehypeStringify = __toESM(require("rehype-stringify"), 1);
|
|
43
43
|
var import_unified = require("unified");
|
|
44
|
-
var
|
|
44
|
+
var rehypeSanitizeModule = rehypeSanitize;
|
|
45
|
+
var defaultSchema = rehypeSanitizeModule.defaultSchema;
|
|
46
|
+
var resolvePlugin = (mod) => {
|
|
47
|
+
if (typeof mod === "function") return mod;
|
|
48
|
+
if (mod && typeof mod.default === "function") {
|
|
49
|
+
return mod.default;
|
|
50
|
+
}
|
|
51
|
+
if (mod && typeof mod.default?.default === "function") {
|
|
52
|
+
return mod.default?.default;
|
|
53
|
+
}
|
|
54
|
+
return mod;
|
|
55
|
+
};
|
|
56
|
+
var rehypeParsePlugin = resolvePlugin(rehypeParse);
|
|
57
|
+
var rehypeSanitizePlugin = resolvePlugin(rehypeSanitizeModule);
|
|
58
|
+
var rehypeStringifyPlugin = resolvePlugin(rehypeStringify);
|
|
45
59
|
var SANITIZED_SCHEMA = createSchema();
|
|
46
|
-
var sanitizeProcessor = (0, import_unified.unified)().use(
|
|
60
|
+
var sanitizeProcessor = (0, import_unified.unified)().use(rehypeParsePlugin, { fragment: true }).use(rehypeSanitizePlugin, SANITIZED_SCHEMA).use(rehypeStringifyPlugin).freeze();
|
|
47
61
|
function sanitizeHtmlInWorker(html) {
|
|
48
62
|
if (!html) return "";
|
|
49
63
|
try {
|
|
@@ -159,9 +173,27 @@ function mergeAttributes(existing, additions) {
|
|
|
159
173
|
}
|
|
160
174
|
|
|
161
175
|
// src/mixed-content.ts
|
|
162
|
-
|
|
176
|
+
var DEFAULT_INLINE_HTML_AUTOCLOSE_TAGS = /* @__PURE__ */ new Set([
|
|
177
|
+
"span",
|
|
178
|
+
"em",
|
|
179
|
+
"strong",
|
|
180
|
+
"code",
|
|
181
|
+
"kbd",
|
|
182
|
+
"del",
|
|
183
|
+
"s",
|
|
184
|
+
"mark",
|
|
185
|
+
"sub",
|
|
186
|
+
"sup",
|
|
187
|
+
"i",
|
|
188
|
+
"b",
|
|
189
|
+
"u",
|
|
190
|
+
"small",
|
|
191
|
+
"abbr",
|
|
192
|
+
"a"
|
|
193
|
+
]);
|
|
194
|
+
function extractMixedContentSegments(raw, baseOffset, parseInline, options) {
|
|
163
195
|
if (!raw) return [];
|
|
164
|
-
const initial = splitByTagSegments(raw, baseOffset, parseInline);
|
|
196
|
+
const initial = splitByTagSegments(raw, baseOffset, parseInline, options);
|
|
165
197
|
const expanded = [];
|
|
166
198
|
for (const segment of initial) {
|
|
167
199
|
if (segment.kind === "text") {
|
|
@@ -172,22 +204,58 @@ function extractMixedContentSegments(raw, baseOffset, parseInline) {
|
|
|
172
204
|
}
|
|
173
205
|
return mergeAdjacentTextSegments(expanded, parseInline);
|
|
174
206
|
}
|
|
175
|
-
function splitByTagSegments(source, baseOffset, parseInline) {
|
|
207
|
+
function splitByTagSegments(source, baseOffset, parseInline, options) {
|
|
176
208
|
const segments = [];
|
|
177
209
|
const lowerSource = source.toLowerCase();
|
|
178
210
|
const tagPattern = /<([A-Za-z][\w:-]*)([^<>]*?)\/?>/g;
|
|
179
211
|
let cursor = 0;
|
|
180
212
|
let match = tagPattern.exec(source);
|
|
181
213
|
const baseIsFinite = typeof baseOffset === "number" && Number.isFinite(baseOffset);
|
|
214
|
+
const htmlAllowTags = normalizeHtmlAllowlist(options?.html?.allowTags);
|
|
215
|
+
const htmlAutoClose = options?.html?.autoClose === true;
|
|
216
|
+
const htmlMaxNewlines = normalizeNewlineLimit(options?.html?.maxNewlines);
|
|
217
|
+
const mdxAutoClose = options?.mdx?.autoClose === true;
|
|
218
|
+
const mdxMaxNewlines = normalizeNewlineLimit(options?.mdx?.maxNewlines);
|
|
219
|
+
const mdxAllowlist = normalizeComponentAllowlist(options?.mdx?.componentAllowlist);
|
|
182
220
|
while (match !== null) {
|
|
183
221
|
const start = match.index;
|
|
184
222
|
const tagName = match[1];
|
|
185
223
|
const matchText = match[0];
|
|
186
|
-
const
|
|
224
|
+
const tagNameLower = tagName.toLowerCase();
|
|
225
|
+
const isSelfClosing = matchText.endsWith("/>") || isVoidHtmlTag(tagNameLower);
|
|
226
|
+
const mdxCandidate = isLikelyMdxComponent(tagName);
|
|
227
|
+
const mdxAllowed = mdxCandidate && (!mdxAllowlist || mdxAllowlist.has(tagName));
|
|
228
|
+
if (mdxCandidate && mdxAllowlist && !mdxAllowed) {
|
|
229
|
+
tagPattern.lastIndex = start + 1;
|
|
230
|
+
match = tagPattern.exec(source);
|
|
231
|
+
continue;
|
|
232
|
+
}
|
|
187
233
|
let end = tagPattern.lastIndex;
|
|
188
|
-
if (!isSelfClosing && !
|
|
234
|
+
if (!isSelfClosing && !mdxAllowed) {
|
|
189
235
|
const closingIndex = findClosingHtmlTag(lowerSource, tagName.toLowerCase(), end);
|
|
190
236
|
if (closingIndex === -1) {
|
|
237
|
+
if (htmlAutoClose && htmlAllowTags.has(tagNameLower)) {
|
|
238
|
+
const tail = source.slice(end);
|
|
239
|
+
const newlineCount = countNewlines(tail, htmlMaxNewlines + 1);
|
|
240
|
+
if (newlineCount <= htmlMaxNewlines) {
|
|
241
|
+
if (start > cursor) {
|
|
242
|
+
const absoluteFrom = baseIsFinite ? baseOffset + cursor : void 0;
|
|
243
|
+
const absoluteTo = baseIsFinite ? baseOffset + start : void 0;
|
|
244
|
+
pushTextSegment(segments, source.slice(cursor, start), absoluteFrom, absoluteTo, parseInline);
|
|
245
|
+
}
|
|
246
|
+
const rawSegment2 = source.slice(start);
|
|
247
|
+
const closedValue = `${rawSegment2}</${tagName}>`;
|
|
248
|
+
const segment2 = {
|
|
249
|
+
kind: "html",
|
|
250
|
+
value: closedValue,
|
|
251
|
+
range: createSegmentRange(baseOffset, start, source.length),
|
|
252
|
+
sanitized: sanitizeHtmlInWorker(closedValue)
|
|
253
|
+
};
|
|
254
|
+
segments.push(segment2);
|
|
255
|
+
cursor = source.length;
|
|
256
|
+
break;
|
|
257
|
+
}
|
|
258
|
+
}
|
|
191
259
|
tagPattern.lastIndex = start + 1;
|
|
192
260
|
match = tagPattern.exec(source);
|
|
193
261
|
continue;
|
|
@@ -199,8 +267,8 @@ function splitByTagSegments(source, baseOffset, parseInline) {
|
|
|
199
267
|
const absoluteTo = baseIsFinite ? baseOffset + start : void 0;
|
|
200
268
|
pushTextSegment(segments, source.slice(cursor, start), absoluteFrom, absoluteTo, parseInline);
|
|
201
269
|
}
|
|
202
|
-
|
|
203
|
-
const kind =
|
|
270
|
+
let rawSegment = source.slice(start, end);
|
|
271
|
+
const kind = mdxAllowed ? "mdx" : "html";
|
|
204
272
|
const segment = {
|
|
205
273
|
kind,
|
|
206
274
|
value: rawSegment,
|
|
@@ -209,6 +277,17 @@ function splitByTagSegments(source, baseOffset, parseInline) {
|
|
|
209
277
|
if (kind === "html") {
|
|
210
278
|
segment.sanitized = sanitizeHtmlInWorker(rawSegment);
|
|
211
279
|
} else {
|
|
280
|
+
const tail = source.slice(end);
|
|
281
|
+
const newlineCount = countNewlines(tail, mdxMaxNewlines + 1);
|
|
282
|
+
if (mdxAutoClose && newlineCount > mdxMaxNewlines) {
|
|
283
|
+
tagPattern.lastIndex = start + 1;
|
|
284
|
+
match = tagPattern.exec(source);
|
|
285
|
+
continue;
|
|
286
|
+
}
|
|
287
|
+
if (mdxAutoClose && !rawSegment.endsWith("/>")) {
|
|
288
|
+
rawSegment = selfCloseTag(rawSegment);
|
|
289
|
+
segment.value = rawSegment;
|
|
290
|
+
}
|
|
212
291
|
segment.status = "pending";
|
|
213
292
|
}
|
|
214
293
|
segments.push(segment);
|
|
@@ -333,6 +412,48 @@ var VOID_HTML_TAGS = /* @__PURE__ */ new Set(["br", "hr", "img", "meta", "input"
|
|
|
333
412
|
function isVoidHtmlTag(tagName) {
|
|
334
413
|
return VOID_HTML_TAGS.has(tagName.toLowerCase());
|
|
335
414
|
}
|
|
415
|
+
function normalizeNewlineLimit(value) {
|
|
416
|
+
if (!Number.isFinite(value ?? Number.NaN)) {
|
|
417
|
+
return 2;
|
|
418
|
+
}
|
|
419
|
+
return Math.max(0, value ?? 0);
|
|
420
|
+
}
|
|
421
|
+
function normalizeHtmlAllowlist(value) {
|
|
422
|
+
if (!value) return DEFAULT_INLINE_HTML_AUTOCLOSE_TAGS;
|
|
423
|
+
const tags = /* @__PURE__ */ new Set();
|
|
424
|
+
for (const tag of value) {
|
|
425
|
+
if (tag) {
|
|
426
|
+
tags.add(tag.toLowerCase());
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
return tags.size > 0 ? tags : DEFAULT_INLINE_HTML_AUTOCLOSE_TAGS;
|
|
430
|
+
}
|
|
431
|
+
function normalizeComponentAllowlist(value) {
|
|
432
|
+
if (!value) return null;
|
|
433
|
+
const tags = /* @__PURE__ */ new Set();
|
|
434
|
+
for (const tag of value) {
|
|
435
|
+
if (tag) tags.add(tag);
|
|
436
|
+
}
|
|
437
|
+
return tags.size > 0 ? tags : null;
|
|
438
|
+
}
|
|
439
|
+
function countNewlines(value, limit) {
|
|
440
|
+
let count = 0;
|
|
441
|
+
for (let i = 0; i < value.length; i++) {
|
|
442
|
+
if (value.charCodeAt(i) === 10) {
|
|
443
|
+
count += 1;
|
|
444
|
+
if (limit !== void 0 && count >= limit) {
|
|
445
|
+
return count;
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
return count;
|
|
450
|
+
}
|
|
451
|
+
function selfCloseTag(rawTag) {
|
|
452
|
+
if (rawTag.endsWith("/>")) return rawTag;
|
|
453
|
+
const closeIndex = rawTag.lastIndexOf(">");
|
|
454
|
+
if (closeIndex === -1) return rawTag;
|
|
455
|
+
return `${rawTag.slice(0, closeIndex)}/>`;
|
|
456
|
+
}
|
|
336
457
|
function isLikelyMdxComponent(tagName) {
|
|
337
458
|
const first = tagName.charAt(0);
|
|
338
459
|
return first.toUpperCase() === first && first.toLowerCase() !== first;
|
package/dist/mixed-content.d.cts
CHANGED
|
@@ -1,7 +1,21 @@
|
|
|
1
1
|
import { InlineNode, MixedContentSegment } from './types.cjs';
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
interface MixedContentAutoCloseHtmlOptions {
|
|
4
|
+
autoClose?: boolean;
|
|
5
|
+
maxNewlines?: number;
|
|
6
|
+
allowTags?: Iterable<string>;
|
|
7
|
+
}
|
|
8
|
+
interface MixedContentAutoCloseMdxOptions {
|
|
9
|
+
autoClose?: boolean;
|
|
10
|
+
maxNewlines?: number;
|
|
11
|
+
componentAllowlist?: Iterable<string>;
|
|
12
|
+
}
|
|
13
|
+
interface MixedContentOptions {
|
|
14
|
+
html?: MixedContentAutoCloseHtmlOptions;
|
|
15
|
+
mdx?: MixedContentAutoCloseMdxOptions;
|
|
16
|
+
}
|
|
17
|
+
declare function extractMixedContentSegments(raw: string, baseOffset: number | undefined, parseInline: (content: string) => InlineNode[], options?: MixedContentOptions): MixedContentSegment[];
|
|
4
18
|
declare function isLikelyMdxComponent(tagName: string): boolean;
|
|
5
19
|
declare function findClosingHtmlTag(lowerSource: string, lowerTagName: string, startIndex: number): number;
|
|
6
20
|
|
|
7
|
-
export { extractMixedContentSegments, findClosingHtmlTag, isLikelyMdxComponent };
|
|
21
|
+
export { type MixedContentAutoCloseHtmlOptions, type MixedContentAutoCloseMdxOptions, type MixedContentOptions, extractMixedContentSegments, findClosingHtmlTag, isLikelyMdxComponent };
|
package/dist/mixed-content.d.ts
CHANGED
|
@@ -1,7 +1,21 @@
|
|
|
1
1
|
import { InlineNode, MixedContentSegment } from './types.js';
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
interface MixedContentAutoCloseHtmlOptions {
|
|
4
|
+
autoClose?: boolean;
|
|
5
|
+
maxNewlines?: number;
|
|
6
|
+
allowTags?: Iterable<string>;
|
|
7
|
+
}
|
|
8
|
+
interface MixedContentAutoCloseMdxOptions {
|
|
9
|
+
autoClose?: boolean;
|
|
10
|
+
maxNewlines?: number;
|
|
11
|
+
componentAllowlist?: Iterable<string>;
|
|
12
|
+
}
|
|
13
|
+
interface MixedContentOptions {
|
|
14
|
+
html?: MixedContentAutoCloseHtmlOptions;
|
|
15
|
+
mdx?: MixedContentAutoCloseMdxOptions;
|
|
16
|
+
}
|
|
17
|
+
declare function extractMixedContentSegments(raw: string, baseOffset: number | undefined, parseInline: (content: string) => InlineNode[], options?: MixedContentOptions): MixedContentSegment[];
|
|
4
18
|
declare function isLikelyMdxComponent(tagName: string): boolean;
|
|
5
19
|
declare function findClosingHtmlTag(lowerSource: string, lowerTagName: string, startIndex: number): number;
|
|
6
20
|
|
|
7
|
-
export { extractMixedContentSegments, findClosingHtmlTag, isLikelyMdxComponent };
|
|
21
|
+
export { type MixedContentAutoCloseHtmlOptions, type MixedContentAutoCloseMdxOptions, type MixedContentOptions, extractMixedContentSegments, findClosingHtmlTag, isLikelyMdxComponent };
|
package/dist/mixed-content.mjs
CHANGED
|
@@ -3,9 +3,23 @@ import * as rehypeParse from "rehype-parse";
|
|
|
3
3
|
import * as rehypeSanitize from "rehype-sanitize";
|
|
4
4
|
import * as rehypeStringify from "rehype-stringify";
|
|
5
5
|
import { unified } from "unified";
|
|
6
|
-
var
|
|
6
|
+
var rehypeSanitizeModule = rehypeSanitize;
|
|
7
|
+
var defaultSchema = rehypeSanitizeModule.defaultSchema;
|
|
8
|
+
var resolvePlugin = (mod) => {
|
|
9
|
+
if (typeof mod === "function") return mod;
|
|
10
|
+
if (mod && typeof mod.default === "function") {
|
|
11
|
+
return mod.default;
|
|
12
|
+
}
|
|
13
|
+
if (mod && typeof mod.default?.default === "function") {
|
|
14
|
+
return mod.default?.default;
|
|
15
|
+
}
|
|
16
|
+
return mod;
|
|
17
|
+
};
|
|
18
|
+
var rehypeParsePlugin = resolvePlugin(rehypeParse);
|
|
19
|
+
var rehypeSanitizePlugin = resolvePlugin(rehypeSanitizeModule);
|
|
20
|
+
var rehypeStringifyPlugin = resolvePlugin(rehypeStringify);
|
|
7
21
|
var SANITIZED_SCHEMA = createSchema();
|
|
8
|
-
var sanitizeProcessor = unified().use(
|
|
22
|
+
var sanitizeProcessor = unified().use(rehypeParsePlugin, { fragment: true }).use(rehypeSanitizePlugin, SANITIZED_SCHEMA).use(rehypeStringifyPlugin).freeze();
|
|
9
23
|
function sanitizeHtmlInWorker(html) {
|
|
10
24
|
if (!html) return "";
|
|
11
25
|
try {
|
|
@@ -121,9 +135,27 @@ function mergeAttributes(existing, additions) {
|
|
|
121
135
|
}
|
|
122
136
|
|
|
123
137
|
// src/mixed-content.ts
|
|
124
|
-
|
|
138
|
+
var DEFAULT_INLINE_HTML_AUTOCLOSE_TAGS = /* @__PURE__ */ new Set([
|
|
139
|
+
"span",
|
|
140
|
+
"em",
|
|
141
|
+
"strong",
|
|
142
|
+
"code",
|
|
143
|
+
"kbd",
|
|
144
|
+
"del",
|
|
145
|
+
"s",
|
|
146
|
+
"mark",
|
|
147
|
+
"sub",
|
|
148
|
+
"sup",
|
|
149
|
+
"i",
|
|
150
|
+
"b",
|
|
151
|
+
"u",
|
|
152
|
+
"small",
|
|
153
|
+
"abbr",
|
|
154
|
+
"a"
|
|
155
|
+
]);
|
|
156
|
+
function extractMixedContentSegments(raw, baseOffset, parseInline, options) {
|
|
125
157
|
if (!raw) return [];
|
|
126
|
-
const initial = splitByTagSegments(raw, baseOffset, parseInline);
|
|
158
|
+
const initial = splitByTagSegments(raw, baseOffset, parseInline, options);
|
|
127
159
|
const expanded = [];
|
|
128
160
|
for (const segment of initial) {
|
|
129
161
|
if (segment.kind === "text") {
|
|
@@ -134,22 +166,58 @@ function extractMixedContentSegments(raw, baseOffset, parseInline) {
|
|
|
134
166
|
}
|
|
135
167
|
return mergeAdjacentTextSegments(expanded, parseInline);
|
|
136
168
|
}
|
|
137
|
-
function splitByTagSegments(source, baseOffset, parseInline) {
|
|
169
|
+
function splitByTagSegments(source, baseOffset, parseInline, options) {
|
|
138
170
|
const segments = [];
|
|
139
171
|
const lowerSource = source.toLowerCase();
|
|
140
172
|
const tagPattern = /<([A-Za-z][\w:-]*)([^<>]*?)\/?>/g;
|
|
141
173
|
let cursor = 0;
|
|
142
174
|
let match = tagPattern.exec(source);
|
|
143
175
|
const baseIsFinite = typeof baseOffset === "number" && Number.isFinite(baseOffset);
|
|
176
|
+
const htmlAllowTags = normalizeHtmlAllowlist(options?.html?.allowTags);
|
|
177
|
+
const htmlAutoClose = options?.html?.autoClose === true;
|
|
178
|
+
const htmlMaxNewlines = normalizeNewlineLimit(options?.html?.maxNewlines);
|
|
179
|
+
const mdxAutoClose = options?.mdx?.autoClose === true;
|
|
180
|
+
const mdxMaxNewlines = normalizeNewlineLimit(options?.mdx?.maxNewlines);
|
|
181
|
+
const mdxAllowlist = normalizeComponentAllowlist(options?.mdx?.componentAllowlist);
|
|
144
182
|
while (match !== null) {
|
|
145
183
|
const start = match.index;
|
|
146
184
|
const tagName = match[1];
|
|
147
185
|
const matchText = match[0];
|
|
148
|
-
const
|
|
186
|
+
const tagNameLower = tagName.toLowerCase();
|
|
187
|
+
const isSelfClosing = matchText.endsWith("/>") || isVoidHtmlTag(tagNameLower);
|
|
188
|
+
const mdxCandidate = isLikelyMdxComponent(tagName);
|
|
189
|
+
const mdxAllowed = mdxCandidate && (!mdxAllowlist || mdxAllowlist.has(tagName));
|
|
190
|
+
if (mdxCandidate && mdxAllowlist && !mdxAllowed) {
|
|
191
|
+
tagPattern.lastIndex = start + 1;
|
|
192
|
+
match = tagPattern.exec(source);
|
|
193
|
+
continue;
|
|
194
|
+
}
|
|
149
195
|
let end = tagPattern.lastIndex;
|
|
150
|
-
if (!isSelfClosing && !
|
|
196
|
+
if (!isSelfClosing && !mdxAllowed) {
|
|
151
197
|
const closingIndex = findClosingHtmlTag(lowerSource, tagName.toLowerCase(), end);
|
|
152
198
|
if (closingIndex === -1) {
|
|
199
|
+
if (htmlAutoClose && htmlAllowTags.has(tagNameLower)) {
|
|
200
|
+
const tail = source.slice(end);
|
|
201
|
+
const newlineCount = countNewlines(tail, htmlMaxNewlines + 1);
|
|
202
|
+
if (newlineCount <= htmlMaxNewlines) {
|
|
203
|
+
if (start > cursor) {
|
|
204
|
+
const absoluteFrom = baseIsFinite ? baseOffset + cursor : void 0;
|
|
205
|
+
const absoluteTo = baseIsFinite ? baseOffset + start : void 0;
|
|
206
|
+
pushTextSegment(segments, source.slice(cursor, start), absoluteFrom, absoluteTo, parseInline);
|
|
207
|
+
}
|
|
208
|
+
const rawSegment2 = source.slice(start);
|
|
209
|
+
const closedValue = `${rawSegment2}</${tagName}>`;
|
|
210
|
+
const segment2 = {
|
|
211
|
+
kind: "html",
|
|
212
|
+
value: closedValue,
|
|
213
|
+
range: createSegmentRange(baseOffset, start, source.length),
|
|
214
|
+
sanitized: sanitizeHtmlInWorker(closedValue)
|
|
215
|
+
};
|
|
216
|
+
segments.push(segment2);
|
|
217
|
+
cursor = source.length;
|
|
218
|
+
break;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
153
221
|
tagPattern.lastIndex = start + 1;
|
|
154
222
|
match = tagPattern.exec(source);
|
|
155
223
|
continue;
|
|
@@ -161,8 +229,8 @@ function splitByTagSegments(source, baseOffset, parseInline) {
|
|
|
161
229
|
const absoluteTo = baseIsFinite ? baseOffset + start : void 0;
|
|
162
230
|
pushTextSegment(segments, source.slice(cursor, start), absoluteFrom, absoluteTo, parseInline);
|
|
163
231
|
}
|
|
164
|
-
|
|
165
|
-
const kind =
|
|
232
|
+
let rawSegment = source.slice(start, end);
|
|
233
|
+
const kind = mdxAllowed ? "mdx" : "html";
|
|
166
234
|
const segment = {
|
|
167
235
|
kind,
|
|
168
236
|
value: rawSegment,
|
|
@@ -171,6 +239,17 @@ function splitByTagSegments(source, baseOffset, parseInline) {
|
|
|
171
239
|
if (kind === "html") {
|
|
172
240
|
segment.sanitized = sanitizeHtmlInWorker(rawSegment);
|
|
173
241
|
} else {
|
|
242
|
+
const tail = source.slice(end);
|
|
243
|
+
const newlineCount = countNewlines(tail, mdxMaxNewlines + 1);
|
|
244
|
+
if (mdxAutoClose && newlineCount > mdxMaxNewlines) {
|
|
245
|
+
tagPattern.lastIndex = start + 1;
|
|
246
|
+
match = tagPattern.exec(source);
|
|
247
|
+
continue;
|
|
248
|
+
}
|
|
249
|
+
if (mdxAutoClose && !rawSegment.endsWith("/>")) {
|
|
250
|
+
rawSegment = selfCloseTag(rawSegment);
|
|
251
|
+
segment.value = rawSegment;
|
|
252
|
+
}
|
|
174
253
|
segment.status = "pending";
|
|
175
254
|
}
|
|
176
255
|
segments.push(segment);
|
|
@@ -295,6 +374,48 @@ var VOID_HTML_TAGS = /* @__PURE__ */ new Set(["br", "hr", "img", "meta", "input"
|
|
|
295
374
|
function isVoidHtmlTag(tagName) {
|
|
296
375
|
return VOID_HTML_TAGS.has(tagName.toLowerCase());
|
|
297
376
|
}
|
|
377
|
+
function normalizeNewlineLimit(value) {
|
|
378
|
+
if (!Number.isFinite(value ?? Number.NaN)) {
|
|
379
|
+
return 2;
|
|
380
|
+
}
|
|
381
|
+
return Math.max(0, value ?? 0);
|
|
382
|
+
}
|
|
383
|
+
function normalizeHtmlAllowlist(value) {
|
|
384
|
+
if (!value) return DEFAULT_INLINE_HTML_AUTOCLOSE_TAGS;
|
|
385
|
+
const tags = /* @__PURE__ */ new Set();
|
|
386
|
+
for (const tag of value) {
|
|
387
|
+
if (tag) {
|
|
388
|
+
tags.add(tag.toLowerCase());
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
return tags.size > 0 ? tags : DEFAULT_INLINE_HTML_AUTOCLOSE_TAGS;
|
|
392
|
+
}
|
|
393
|
+
function normalizeComponentAllowlist(value) {
|
|
394
|
+
if (!value) return null;
|
|
395
|
+
const tags = /* @__PURE__ */ new Set();
|
|
396
|
+
for (const tag of value) {
|
|
397
|
+
if (tag) tags.add(tag);
|
|
398
|
+
}
|
|
399
|
+
return tags.size > 0 ? tags : null;
|
|
400
|
+
}
|
|
401
|
+
function countNewlines(value, limit) {
|
|
402
|
+
let count = 0;
|
|
403
|
+
for (let i = 0; i < value.length; i++) {
|
|
404
|
+
if (value.charCodeAt(i) === 10) {
|
|
405
|
+
count += 1;
|
|
406
|
+
if (limit !== void 0 && count >= limit) {
|
|
407
|
+
return count;
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
return count;
|
|
412
|
+
}
|
|
413
|
+
function selfCloseTag(rawTag) {
|
|
414
|
+
if (rawTag.endsWith("/>")) return rawTag;
|
|
415
|
+
const closeIndex = rawTag.lastIndexOf(">");
|
|
416
|
+
if (closeIndex === -1) return rawTag;
|
|
417
|
+
return `${rawTag.slice(0, closeIndex)}/>`;
|
|
418
|
+
}
|
|
298
419
|
function isLikelyMdxComponent(tagName) {
|
|
299
420
|
const first = tagName.charAt(0);
|
|
300
421
|
return first.toUpperCase() === first && first.toLowerCase() !== first;
|