@stream-mdx/core 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,11 @@ interface InlineParserOptions {
7
7
  * parsing many intermediate states.
8
8
  */
9
9
  maxCacheEntries?: number;
10
+ /**
11
+ * Enable parsing `$...$` and `$$...$$` math nodes.
12
+ * Defaults to `true`.
13
+ */
14
+ enableMath?: boolean;
10
15
  }
11
16
  interface InlineParseOptions {
12
17
  /**
@@ -31,6 +36,11 @@ declare class InlineParser {
31
36
  * Parse inline content with memoization
32
37
  */
33
38
  parse(content: string, options?: InlineParseOptions): InlineNode[];
39
+ /**
40
+ * Streaming regex anticipation helper. Returns an append string if a plugin
41
+ * declares an incomplete match at the end of the buffer.
42
+ */
43
+ getRegexAnticipationAppend(content: string): string | null;
34
44
  /**
35
45
  * Clear the memoization cache
36
46
  */
@@ -1,10 +1,40 @@
1
1
  // src/inline-parser.ts
2
+ function ensureGlobal(pattern) {
3
+ if (pattern.global) return pattern;
4
+ const flags = pattern.flags.includes("g") ? pattern.flags : `${pattern.flags}g`;
5
+ return new RegExp(pattern.source, flags);
6
+ }
7
+ function findLastMatch(pattern, value) {
8
+ const re = ensureGlobal(pattern);
9
+ let match = null;
10
+ let next;
11
+ while ((next = re.exec(value)) !== null) {
12
+ match = next;
13
+ }
14
+ return match;
15
+ }
16
+ function findMatchAfter(pattern, value, startIndex) {
17
+ const re = ensureGlobal(pattern);
18
+ re.lastIndex = Math.max(0, startIndex);
19
+ return re.exec(value);
20
+ }
21
+ function isSamePattern(a, b) {
22
+ return a.source === b.source && a.flags === b.flags;
23
+ }
24
+ function countMatches(pattern, value) {
25
+ const re = ensureGlobal(pattern);
26
+ let count = 0;
27
+ while (re.exec(value)) {
28
+ count += 1;
29
+ }
30
+ return count;
31
+ }
2
32
  var InlineParser = class {
3
33
  constructor(options = {}) {
4
34
  this.plugins = [];
5
35
  this.cache = /* @__PURE__ */ new Map();
6
36
  this.maxCacheEntries = Number.isFinite(options.maxCacheEntries ?? Number.NaN) ? Math.max(0, options.maxCacheEntries ?? 0) : 2e3;
7
- this.registerDefaultPlugins();
37
+ this.registerDefaultPlugins({ enableMath: options.enableMath !== false });
8
38
  }
9
39
  /**
10
40
  * Register a plugin with the parser
@@ -46,6 +76,42 @@ var InlineParser = class {
46
76
  }
47
77
  return result;
48
78
  }
79
+ /**
80
+ * Streaming regex anticipation helper. Returns an append string if a plugin
81
+ * declares an incomplete match at the end of the buffer.
82
+ */
83
+ getRegexAnticipationAppend(content) {
84
+ if (!content || this.plugins.length === 0) {
85
+ return null;
86
+ }
87
+ for (const plugin of this.plugins) {
88
+ if (!("re" in plugin)) continue;
89
+ const regexPlugin = plugin;
90
+ const anticipation = regexPlugin.anticipation;
91
+ if (!anticipation) continue;
92
+ const maxScanChars = Number.isFinite(anticipation.maxScanChars ?? Number.NaN) ? Math.max(1, anticipation.maxScanChars ?? 0) : 240;
93
+ const scan = content.slice(Math.max(0, content.length - maxScanChars));
94
+ if (regexPlugin.fastCheck && !regexPlugin.fastCheck(scan)) {
95
+ continue;
96
+ }
97
+ const lastStart = findLastMatch(anticipation.start, scan);
98
+ if (!lastStart) continue;
99
+ if (isSamePattern(anticipation.start, anticipation.end)) {
100
+ const occurrences = countMatches(anticipation.start, scan);
101
+ if (occurrences % 2 === 0) {
102
+ continue;
103
+ }
104
+ } else {
105
+ const startIndex = lastStart.index + lastStart[0].length;
106
+ const hasEnd = Boolean(findMatchAfter(anticipation.end, scan, startIndex));
107
+ if (hasEnd) continue;
108
+ }
109
+ const appendValue = typeof anticipation.append === "function" ? anticipation.append(lastStart, content) : anticipation.append;
110
+ if (!appendValue) continue;
111
+ return appendValue;
112
+ }
113
+ return null;
114
+ }
49
115
  /**
50
116
  * Clear the memoization cache
51
117
  */
@@ -70,10 +136,27 @@ var InlineParser = class {
70
136
  * Register default plugins with proper precedence ordering
71
137
  * Lower priority numbers = higher precedence (run first)
72
138
  */
73
- registerDefaultPlugins() {
139
+ registerDefaultPlugins(options) {
140
+ if (options.enableMath) {
141
+ this.registerPlugin({
142
+ id: "math-display",
143
+ priority: 0,
144
+ re: /\$\$([\s\S]+?)\$\$/g,
145
+ toNode: (match) => ({ kind: "math-display", tex: match[1].trim() }),
146
+ fastCheck: (text) => text.indexOf("$$") !== -1
147
+ });
148
+ this.registerPlugin({
149
+ id: "math-inline",
150
+ priority: 1,
151
+ re: /\$([^$\n]+?)\$/g,
152
+ // Non-greedy to prevent spanning multiple expressions
153
+ toNode: (match) => ({ kind: "math-inline", tex: match[1].trim() }),
154
+ fastCheck: (text) => text.indexOf("$") !== -1
155
+ });
156
+ }
74
157
  this.registerPlugin({
75
158
  id: "escaped-character",
76
- priority: 0,
159
+ priority: 2,
77
160
  re: /\\([\\`*_{}\[\]()#+\-.!>])/g,
78
161
  toNode: (match) => ({
79
162
  kind: "text",
@@ -82,19 +165,11 @@ var InlineParser = class {
82
165
  fastCheck: (text) => text.indexOf("\\") !== -1
83
166
  });
84
167
  this.registerPlugin({
85
- id: "math-display",
86
- priority: 1,
87
- re: /\$\$([^$]+?)\$\$/g,
88
- toNode: (match) => ({ kind: "math-display", tex: match[1].trim() }),
89
- fastCheck: (text) => text.indexOf("$$") !== -1
90
- });
91
- this.registerPlugin({
92
- id: "math-inline",
168
+ id: "hard-break",
93
169
  priority: 2,
94
- re: /\$([^$\n]+?)\$/g,
95
- // Non-greedy to prevent spanning multiple expressions
96
- toNode: (match) => ({ kind: "math-inline", tex: match[1].trim() }),
97
- fastCheck: (text) => text.indexOf("$") !== -1
170
+ re: /\\\r?\n| {2,}\r?\n/g,
171
+ toNode: (_match) => ({ kind: "br" }),
172
+ fastCheck: (text) => text.indexOf("\n") !== -1 || text.indexOf("\r") !== -1
98
173
  });
99
174
  this.registerPlugin({
100
175
  id: "code-spans",
@@ -165,9 +240,16 @@ var InlineParser = class {
165
240
  this.registerPlugin({
166
241
  id: "citations",
167
242
  priority: 10,
168
- re: /\[\^([^\]]+)\]|@cite\{([^}]+)\}/g,
169
- toNode: (match) => ({ kind: "citation", id: match[1] || match[2] }),
170
- fastCheck: (text) => text.indexOf("@") !== -1 || text.indexOf("[^") !== -1
243
+ re: /\[\^([^\]\n]+)\]|@cite\{([^}\n]+)\}|\{cite:([^}\n]+)\}/g,
244
+ toNode: (match) => ({ kind: "citation", id: match[1] || match[2] || match[3] }),
245
+ fastCheck: (text) => text.indexOf("@cite") !== -1 || text.indexOf("[^") !== -1 || text.indexOf("{cite:") !== -1,
246
+ anticipation: {
247
+ start: /@cite\{|\{cite:/g,
248
+ end: /\}/g,
249
+ full: /@cite\{[^}\n]+?\}|\{cite:[^}\n]+?\}/g,
250
+ append: "}",
251
+ maxScanChars: 120
252
+ }
171
253
  });
172
254
  this.registerPlugin({
173
255
  id: "mentions",
@@ -41,9 +41,23 @@ var rehypeParse = __toESM(require("rehype-parse"), 1);
41
41
  var rehypeSanitize = __toESM(require("rehype-sanitize"), 1);
42
42
  var rehypeStringify = __toESM(require("rehype-stringify"), 1);
43
43
  var import_unified = require("unified");
44
- var { defaultSchema } = rehypeSanitize;
44
+ var rehypeSanitizeModule = rehypeSanitize;
45
+ var defaultSchema = rehypeSanitizeModule.defaultSchema;
46
+ var resolvePlugin = (mod) => {
47
+ if (typeof mod === "function") return mod;
48
+ if (mod && typeof mod.default === "function") {
49
+ return mod.default;
50
+ }
51
+ if (mod && typeof mod.default?.default === "function") {
52
+ return mod.default?.default;
53
+ }
54
+ return mod;
55
+ };
56
+ var rehypeParsePlugin = resolvePlugin(rehypeParse);
57
+ var rehypeSanitizePlugin = resolvePlugin(rehypeSanitizeModule);
58
+ var rehypeStringifyPlugin = resolvePlugin(rehypeStringify);
45
59
  var SANITIZED_SCHEMA = createSchema();
46
- var sanitizeProcessor = (0, import_unified.unified)().use(rehypeParse.default, { fragment: true }).use(rehypeSanitize.default, SANITIZED_SCHEMA).use(rehypeStringify.default).freeze();
60
+ var sanitizeProcessor = (0, import_unified.unified)().use(rehypeParsePlugin, { fragment: true }).use(rehypeSanitizePlugin, SANITIZED_SCHEMA).use(rehypeStringifyPlugin).freeze();
47
61
  function sanitizeHtmlInWorker(html) {
48
62
  if (!html) return "";
49
63
  try {
@@ -159,9 +173,27 @@ function mergeAttributes(existing, additions) {
159
173
  }
160
174
 
161
175
  // src/mixed-content.ts
162
- function extractMixedContentSegments(raw, baseOffset, parseInline) {
176
+ var DEFAULT_INLINE_HTML_AUTOCLOSE_TAGS = /* @__PURE__ */ new Set([
177
+ "span",
178
+ "em",
179
+ "strong",
180
+ "code",
181
+ "kbd",
182
+ "del",
183
+ "s",
184
+ "mark",
185
+ "sub",
186
+ "sup",
187
+ "i",
188
+ "b",
189
+ "u",
190
+ "small",
191
+ "abbr",
192
+ "a"
193
+ ]);
194
+ function extractMixedContentSegments(raw, baseOffset, parseInline, options) {
163
195
  if (!raw) return [];
164
- const initial = splitByTagSegments(raw, baseOffset, parseInline);
196
+ const initial = splitByTagSegments(raw, baseOffset, parseInline, options);
165
197
  const expanded = [];
166
198
  for (const segment of initial) {
167
199
  if (segment.kind === "text") {
@@ -172,22 +204,58 @@ function extractMixedContentSegments(raw, baseOffset, parseInline) {
172
204
  }
173
205
  return mergeAdjacentTextSegments(expanded, parseInline);
174
206
  }
175
- function splitByTagSegments(source, baseOffset, parseInline) {
207
+ function splitByTagSegments(source, baseOffset, parseInline, options) {
176
208
  const segments = [];
177
209
  const lowerSource = source.toLowerCase();
178
210
  const tagPattern = /<([A-Za-z][\w:-]*)([^<>]*?)\/?>/g;
179
211
  let cursor = 0;
180
212
  let match = tagPattern.exec(source);
181
213
  const baseIsFinite = typeof baseOffset === "number" && Number.isFinite(baseOffset);
214
+ const htmlAllowTags = normalizeHtmlAllowlist(options?.html?.allowTags);
215
+ const htmlAutoClose = options?.html?.autoClose === true;
216
+ const htmlMaxNewlines = normalizeNewlineLimit(options?.html?.maxNewlines);
217
+ const mdxAutoClose = options?.mdx?.autoClose === true;
218
+ const mdxMaxNewlines = normalizeNewlineLimit(options?.mdx?.maxNewlines);
219
+ const mdxAllowlist = normalizeComponentAllowlist(options?.mdx?.componentAllowlist);
182
220
  while (match !== null) {
183
221
  const start = match.index;
184
222
  const tagName = match[1];
185
223
  const matchText = match[0];
186
- const isSelfClosing = matchText.endsWith("/>") || isVoidHtmlTag(tagName);
224
+ const tagNameLower = tagName.toLowerCase();
225
+ const isSelfClosing = matchText.endsWith("/>") || isVoidHtmlTag(tagNameLower);
226
+ const mdxCandidate = isLikelyMdxComponent(tagName);
227
+ const mdxAllowed = mdxCandidate && (!mdxAllowlist || mdxAllowlist.has(tagName));
228
+ if (mdxCandidate && mdxAllowlist && !mdxAllowed) {
229
+ tagPattern.lastIndex = start + 1;
230
+ match = tagPattern.exec(source);
231
+ continue;
232
+ }
187
233
  let end = tagPattern.lastIndex;
188
- if (!isSelfClosing && !isLikelyMdxComponent(tagName)) {
234
+ if (!isSelfClosing && !mdxAllowed) {
189
235
  const closingIndex = findClosingHtmlTag(lowerSource, tagName.toLowerCase(), end);
190
236
  if (closingIndex === -1) {
237
+ if (htmlAutoClose && htmlAllowTags.has(tagNameLower)) {
238
+ const tail = source.slice(end);
239
+ const newlineCount = countNewlines(tail, htmlMaxNewlines + 1);
240
+ if (newlineCount <= htmlMaxNewlines) {
241
+ if (start > cursor) {
242
+ const absoluteFrom = baseIsFinite ? baseOffset + cursor : void 0;
243
+ const absoluteTo = baseIsFinite ? baseOffset + start : void 0;
244
+ pushTextSegment(segments, source.slice(cursor, start), absoluteFrom, absoluteTo, parseInline);
245
+ }
246
+ const rawSegment2 = source.slice(start);
247
+ const closedValue = `${rawSegment2}</${tagName}>`;
248
+ const segment2 = {
249
+ kind: "html",
250
+ value: closedValue,
251
+ range: createSegmentRange(baseOffset, start, source.length),
252
+ sanitized: sanitizeHtmlInWorker(closedValue)
253
+ };
254
+ segments.push(segment2);
255
+ cursor = source.length;
256
+ break;
257
+ }
258
+ }
191
259
  tagPattern.lastIndex = start + 1;
192
260
  match = tagPattern.exec(source);
193
261
  continue;
@@ -199,8 +267,8 @@ function splitByTagSegments(source, baseOffset, parseInline) {
199
267
  const absoluteTo = baseIsFinite ? baseOffset + start : void 0;
200
268
  pushTextSegment(segments, source.slice(cursor, start), absoluteFrom, absoluteTo, parseInline);
201
269
  }
202
- const rawSegment = source.slice(start, end);
203
- const kind = isLikelyMdxComponent(tagName) ? "mdx" : "html";
270
+ let rawSegment = source.slice(start, end);
271
+ const kind = mdxAllowed ? "mdx" : "html";
204
272
  const segment = {
205
273
  kind,
206
274
  value: rawSegment,
@@ -209,6 +277,17 @@ function splitByTagSegments(source, baseOffset, parseInline) {
209
277
  if (kind === "html") {
210
278
  segment.sanitized = sanitizeHtmlInWorker(rawSegment);
211
279
  } else {
280
+ const tail = source.slice(end);
281
+ const newlineCount = countNewlines(tail, mdxMaxNewlines + 1);
282
+ if (mdxAutoClose && newlineCount > mdxMaxNewlines) {
283
+ tagPattern.lastIndex = start + 1;
284
+ match = tagPattern.exec(source);
285
+ continue;
286
+ }
287
+ if (mdxAutoClose && !rawSegment.endsWith("/>")) {
288
+ rawSegment = selfCloseTag(rawSegment);
289
+ segment.value = rawSegment;
290
+ }
212
291
  segment.status = "pending";
213
292
  }
214
293
  segments.push(segment);
@@ -333,6 +412,48 @@ var VOID_HTML_TAGS = /* @__PURE__ */ new Set(["br", "hr", "img", "meta", "input"
333
412
  function isVoidHtmlTag(tagName) {
334
413
  return VOID_HTML_TAGS.has(tagName.toLowerCase());
335
414
  }
415
+ function normalizeNewlineLimit(value) {
416
+ if (!Number.isFinite(value ?? Number.NaN)) {
417
+ return 2;
418
+ }
419
+ return Math.max(0, value ?? 0);
420
+ }
421
+ function normalizeHtmlAllowlist(value) {
422
+ if (!value) return DEFAULT_INLINE_HTML_AUTOCLOSE_TAGS;
423
+ const tags = /* @__PURE__ */ new Set();
424
+ for (const tag of value) {
425
+ if (tag) {
426
+ tags.add(tag.toLowerCase());
427
+ }
428
+ }
429
+ return tags.size > 0 ? tags : DEFAULT_INLINE_HTML_AUTOCLOSE_TAGS;
430
+ }
431
+ function normalizeComponentAllowlist(value) {
432
+ if (!value) return null;
433
+ const tags = /* @__PURE__ */ new Set();
434
+ for (const tag of value) {
435
+ if (tag) tags.add(tag);
436
+ }
437
+ return tags.size > 0 ? tags : null;
438
+ }
439
+ function countNewlines(value, limit) {
440
+ let count = 0;
441
+ for (let i = 0; i < value.length; i++) {
442
+ if (value.charCodeAt(i) === 10) {
443
+ count += 1;
444
+ if (limit !== void 0 && count >= limit) {
445
+ return count;
446
+ }
447
+ }
448
+ }
449
+ return count;
450
+ }
451
+ function selfCloseTag(rawTag) {
452
+ if (rawTag.endsWith("/>")) return rawTag;
453
+ const closeIndex = rawTag.lastIndexOf(">");
454
+ if (closeIndex === -1) return rawTag;
455
+ return `${rawTag.slice(0, closeIndex)}/>`;
456
+ }
336
457
  function isLikelyMdxComponent(tagName) {
337
458
  const first = tagName.charAt(0);
338
459
  return first.toUpperCase() === first && first.toLowerCase() !== first;
@@ -1,7 +1,21 @@
1
1
  import { InlineNode, MixedContentSegment } from './types.cjs';
2
2
 
3
- declare function extractMixedContentSegments(raw: string, baseOffset: number | undefined, parseInline: (content: string) => InlineNode[]): MixedContentSegment[];
3
+ interface MixedContentAutoCloseHtmlOptions {
4
+ autoClose?: boolean;
5
+ maxNewlines?: number;
6
+ allowTags?: Iterable<string>;
7
+ }
8
+ interface MixedContentAutoCloseMdxOptions {
9
+ autoClose?: boolean;
10
+ maxNewlines?: number;
11
+ componentAllowlist?: Iterable<string>;
12
+ }
13
+ interface MixedContentOptions {
14
+ html?: MixedContentAutoCloseHtmlOptions;
15
+ mdx?: MixedContentAutoCloseMdxOptions;
16
+ }
17
+ declare function extractMixedContentSegments(raw: string, baseOffset: number | undefined, parseInline: (content: string) => InlineNode[], options?: MixedContentOptions): MixedContentSegment[];
4
18
  declare function isLikelyMdxComponent(tagName: string): boolean;
5
19
  declare function findClosingHtmlTag(lowerSource: string, lowerTagName: string, startIndex: number): number;
6
20
 
7
- export { extractMixedContentSegments, findClosingHtmlTag, isLikelyMdxComponent };
21
+ export { type MixedContentAutoCloseHtmlOptions, type MixedContentAutoCloseMdxOptions, type MixedContentOptions, extractMixedContentSegments, findClosingHtmlTag, isLikelyMdxComponent };
@@ -1,7 +1,21 @@
1
1
  import { InlineNode, MixedContentSegment } from './types.js';
2
2
 
3
- declare function extractMixedContentSegments(raw: string, baseOffset: number | undefined, parseInline: (content: string) => InlineNode[]): MixedContentSegment[];
3
+ interface MixedContentAutoCloseHtmlOptions {
4
+ autoClose?: boolean;
5
+ maxNewlines?: number;
6
+ allowTags?: Iterable<string>;
7
+ }
8
+ interface MixedContentAutoCloseMdxOptions {
9
+ autoClose?: boolean;
10
+ maxNewlines?: number;
11
+ componentAllowlist?: Iterable<string>;
12
+ }
13
+ interface MixedContentOptions {
14
+ html?: MixedContentAutoCloseHtmlOptions;
15
+ mdx?: MixedContentAutoCloseMdxOptions;
16
+ }
17
+ declare function extractMixedContentSegments(raw: string, baseOffset: number | undefined, parseInline: (content: string) => InlineNode[], options?: MixedContentOptions): MixedContentSegment[];
4
18
  declare function isLikelyMdxComponent(tagName: string): boolean;
5
19
  declare function findClosingHtmlTag(lowerSource: string, lowerTagName: string, startIndex: number): number;
6
20
 
7
- export { extractMixedContentSegments, findClosingHtmlTag, isLikelyMdxComponent };
21
+ export { type MixedContentAutoCloseHtmlOptions, type MixedContentAutoCloseMdxOptions, type MixedContentOptions, extractMixedContentSegments, findClosingHtmlTag, isLikelyMdxComponent };
@@ -3,9 +3,23 @@ import * as rehypeParse from "rehype-parse";
3
3
  import * as rehypeSanitize from "rehype-sanitize";
4
4
  import * as rehypeStringify from "rehype-stringify";
5
5
  import { unified } from "unified";
6
- var { defaultSchema } = rehypeSanitize;
6
+ var rehypeSanitizeModule = rehypeSanitize;
7
+ var defaultSchema = rehypeSanitizeModule.defaultSchema;
8
+ var resolvePlugin = (mod) => {
9
+ if (typeof mod === "function") return mod;
10
+ if (mod && typeof mod.default === "function") {
11
+ return mod.default;
12
+ }
13
+ if (mod && typeof mod.default?.default === "function") {
14
+ return mod.default?.default;
15
+ }
16
+ return mod;
17
+ };
18
+ var rehypeParsePlugin = resolvePlugin(rehypeParse);
19
+ var rehypeSanitizePlugin = resolvePlugin(rehypeSanitizeModule);
20
+ var rehypeStringifyPlugin = resolvePlugin(rehypeStringify);
7
21
  var SANITIZED_SCHEMA = createSchema();
8
- var sanitizeProcessor = unified().use(rehypeParse.default, { fragment: true }).use(rehypeSanitize.default, SANITIZED_SCHEMA).use(rehypeStringify.default).freeze();
22
+ var sanitizeProcessor = unified().use(rehypeParsePlugin, { fragment: true }).use(rehypeSanitizePlugin, SANITIZED_SCHEMA).use(rehypeStringifyPlugin).freeze();
9
23
  function sanitizeHtmlInWorker(html) {
10
24
  if (!html) return "";
11
25
  try {
@@ -121,9 +135,27 @@ function mergeAttributes(existing, additions) {
121
135
  }
122
136
 
123
137
  // src/mixed-content.ts
124
- function extractMixedContentSegments(raw, baseOffset, parseInline) {
138
+ var DEFAULT_INLINE_HTML_AUTOCLOSE_TAGS = /* @__PURE__ */ new Set([
139
+ "span",
140
+ "em",
141
+ "strong",
142
+ "code",
143
+ "kbd",
144
+ "del",
145
+ "s",
146
+ "mark",
147
+ "sub",
148
+ "sup",
149
+ "i",
150
+ "b",
151
+ "u",
152
+ "small",
153
+ "abbr",
154
+ "a"
155
+ ]);
156
+ function extractMixedContentSegments(raw, baseOffset, parseInline, options) {
125
157
  if (!raw) return [];
126
- const initial = splitByTagSegments(raw, baseOffset, parseInline);
158
+ const initial = splitByTagSegments(raw, baseOffset, parseInline, options);
127
159
  const expanded = [];
128
160
  for (const segment of initial) {
129
161
  if (segment.kind === "text") {
@@ -134,22 +166,58 @@ function extractMixedContentSegments(raw, baseOffset, parseInline) {
134
166
  }
135
167
  return mergeAdjacentTextSegments(expanded, parseInline);
136
168
  }
137
- function splitByTagSegments(source, baseOffset, parseInline) {
169
+ function splitByTagSegments(source, baseOffset, parseInline, options) {
138
170
  const segments = [];
139
171
  const lowerSource = source.toLowerCase();
140
172
  const tagPattern = /<([A-Za-z][\w:-]*)([^<>]*?)\/?>/g;
141
173
  let cursor = 0;
142
174
  let match = tagPattern.exec(source);
143
175
  const baseIsFinite = typeof baseOffset === "number" && Number.isFinite(baseOffset);
176
+ const htmlAllowTags = normalizeHtmlAllowlist(options?.html?.allowTags);
177
+ const htmlAutoClose = options?.html?.autoClose === true;
178
+ const htmlMaxNewlines = normalizeNewlineLimit(options?.html?.maxNewlines);
179
+ const mdxAutoClose = options?.mdx?.autoClose === true;
180
+ const mdxMaxNewlines = normalizeNewlineLimit(options?.mdx?.maxNewlines);
181
+ const mdxAllowlist = normalizeComponentAllowlist(options?.mdx?.componentAllowlist);
144
182
  while (match !== null) {
145
183
  const start = match.index;
146
184
  const tagName = match[1];
147
185
  const matchText = match[0];
148
- const isSelfClosing = matchText.endsWith("/>") || isVoidHtmlTag(tagName);
186
+ const tagNameLower = tagName.toLowerCase();
187
+ const isSelfClosing = matchText.endsWith("/>") || isVoidHtmlTag(tagNameLower);
188
+ const mdxCandidate = isLikelyMdxComponent(tagName);
189
+ const mdxAllowed = mdxCandidate && (!mdxAllowlist || mdxAllowlist.has(tagName));
190
+ if (mdxCandidate && mdxAllowlist && !mdxAllowed) {
191
+ tagPattern.lastIndex = start + 1;
192
+ match = tagPattern.exec(source);
193
+ continue;
194
+ }
149
195
  let end = tagPattern.lastIndex;
150
- if (!isSelfClosing && !isLikelyMdxComponent(tagName)) {
196
+ if (!isSelfClosing && !mdxAllowed) {
151
197
  const closingIndex = findClosingHtmlTag(lowerSource, tagName.toLowerCase(), end);
152
198
  if (closingIndex === -1) {
199
+ if (htmlAutoClose && htmlAllowTags.has(tagNameLower)) {
200
+ const tail = source.slice(end);
201
+ const newlineCount = countNewlines(tail, htmlMaxNewlines + 1);
202
+ if (newlineCount <= htmlMaxNewlines) {
203
+ if (start > cursor) {
204
+ const absoluteFrom = baseIsFinite ? baseOffset + cursor : void 0;
205
+ const absoluteTo = baseIsFinite ? baseOffset + start : void 0;
206
+ pushTextSegment(segments, source.slice(cursor, start), absoluteFrom, absoluteTo, parseInline);
207
+ }
208
+ const rawSegment2 = source.slice(start);
209
+ const closedValue = `${rawSegment2}</${tagName}>`;
210
+ const segment2 = {
211
+ kind: "html",
212
+ value: closedValue,
213
+ range: createSegmentRange(baseOffset, start, source.length),
214
+ sanitized: sanitizeHtmlInWorker(closedValue)
215
+ };
216
+ segments.push(segment2);
217
+ cursor = source.length;
218
+ break;
219
+ }
220
+ }
153
221
  tagPattern.lastIndex = start + 1;
154
222
  match = tagPattern.exec(source);
155
223
  continue;
@@ -161,8 +229,8 @@ function splitByTagSegments(source, baseOffset, parseInline) {
161
229
  const absoluteTo = baseIsFinite ? baseOffset + start : void 0;
162
230
  pushTextSegment(segments, source.slice(cursor, start), absoluteFrom, absoluteTo, parseInline);
163
231
  }
164
- const rawSegment = source.slice(start, end);
165
- const kind = isLikelyMdxComponent(tagName) ? "mdx" : "html";
232
+ let rawSegment = source.slice(start, end);
233
+ const kind = mdxAllowed ? "mdx" : "html";
166
234
  const segment = {
167
235
  kind,
168
236
  value: rawSegment,
@@ -171,6 +239,17 @@ function splitByTagSegments(source, baseOffset, parseInline) {
171
239
  if (kind === "html") {
172
240
  segment.sanitized = sanitizeHtmlInWorker(rawSegment);
173
241
  } else {
242
+ const tail = source.slice(end);
243
+ const newlineCount = countNewlines(tail, mdxMaxNewlines + 1);
244
+ if (mdxAutoClose && newlineCount > mdxMaxNewlines) {
245
+ tagPattern.lastIndex = start + 1;
246
+ match = tagPattern.exec(source);
247
+ continue;
248
+ }
249
+ if (mdxAutoClose && !rawSegment.endsWith("/>")) {
250
+ rawSegment = selfCloseTag(rawSegment);
251
+ segment.value = rawSegment;
252
+ }
174
253
  segment.status = "pending";
175
254
  }
176
255
  segments.push(segment);
@@ -295,6 +374,48 @@ var VOID_HTML_TAGS = /* @__PURE__ */ new Set(["br", "hr", "img", "meta", "input"
295
374
  function isVoidHtmlTag(tagName) {
296
375
  return VOID_HTML_TAGS.has(tagName.toLowerCase());
297
376
  }
377
+ function normalizeNewlineLimit(value) {
378
+ if (!Number.isFinite(value ?? Number.NaN)) {
379
+ return 2;
380
+ }
381
+ return Math.max(0, value ?? 0);
382
+ }
383
+ function normalizeHtmlAllowlist(value) {
384
+ if (!value) return DEFAULT_INLINE_HTML_AUTOCLOSE_TAGS;
385
+ const tags = /* @__PURE__ */ new Set();
386
+ for (const tag of value) {
387
+ if (tag) {
388
+ tags.add(tag.toLowerCase());
389
+ }
390
+ }
391
+ return tags.size > 0 ? tags : DEFAULT_INLINE_HTML_AUTOCLOSE_TAGS;
392
+ }
393
+ function normalizeComponentAllowlist(value) {
394
+ if (!value) return null;
395
+ const tags = /* @__PURE__ */ new Set();
396
+ for (const tag of value) {
397
+ if (tag) tags.add(tag);
398
+ }
399
+ return tags.size > 0 ? tags : null;
400
+ }
401
+ function countNewlines(value, limit) {
402
+ let count = 0;
403
+ for (let i = 0; i < value.length; i++) {
404
+ if (value.charCodeAt(i) === 10) {
405
+ count += 1;
406
+ if (limit !== void 0 && count >= limit) {
407
+ return count;
408
+ }
409
+ }
410
+ }
411
+ return count;
412
+ }
413
+ function selfCloseTag(rawTag) {
414
+ if (rawTag.endsWith("/>")) return rawTag;
415
+ const closeIndex = rawTag.lastIndexOf(">");
416
+ if (closeIndex === -1) return rawTag;
417
+ return `${rawTag.slice(0, closeIndex)}/>`;
418
+ }
298
419
  function isLikelyMdxComponent(tagName) {
299
420
  const first = tagName.charAt(0);
300
421
  return first.toUpperCase() === first && first.toLowerCase() !== first;