stream-markdown-parser 0.0.40 → 0.0.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -6888,7 +6888,24 @@ const BASE_COMMON_HTML_TAGS = new Set([
6888
6888
  ]);
6889
6889
  const OPEN_TAG_RE = /<([A-Z][\w-]*)(?=[\s/>]|$)/gi;
6890
6890
  const CLOSE_TAG_RE = /<\/\s*([A-Z][\w-]*)(?=[\s/>]|$)/gi;
6891
- const FULL_TAG_RE = /^<\s*(?:\/\s*)?([A-Z][\w-]*)(?:\s[^>]*?)?\/?>/i;
6891
+ const TAG_NAME_AT_START_RE = /^<\s*(?:\/\s*)?([A-Z][\w-]*)/i;
6892
+ function findTagCloseIndexOutsideQuotes(html) {
6893
+ let inSingle = false;
6894
+ let inDouble = false;
6895
+ for (let i = 0; i < html.length; i++) {
6896
+ const ch = html[i];
6897
+ if (ch === "\"" && !inSingle) {
6898
+ inDouble = !inDouble;
6899
+ continue;
6900
+ }
6901
+ if (ch === "'" && !inDouble) {
6902
+ inSingle = !inSingle;
6903
+ continue;
6904
+ }
6905
+ if (ch === ">" && !inSingle && !inDouble) return i;
6906
+ }
6907
+ return -1;
6908
+ }
6892
6909
  function tokenToRaw$1(token) {
6893
6910
  const shape = token;
6894
6911
  return String(shape.raw ?? shape.content ?? shape.markup ?? "");
@@ -6916,7 +6933,7 @@ function findFirstIncompleteTag(content, tagSet) {
6916
6933
  if (idx < 0) continue;
6917
6934
  const tag = (m[1] ?? "").toLowerCase();
6918
6935
  if (!tagSet.has(tag)) continue;
6919
- if (content.slice(idx).includes(">")) continue;
6936
+ if (findTagCloseIndexOutsideQuotes(content.slice(idx)) !== -1) continue;
6920
6937
  if (!first || idx < first.index) first = {
6921
6938
  index: idx,
6922
6939
  tag,
@@ -6928,7 +6945,7 @@ function findFirstIncompleteTag(content, tagSet) {
6928
6945
  if (idx < 0) continue;
6929
6946
  const tag = (m[1] ?? "").toLowerCase();
6930
6947
  if (!isCommonHtmlTagOrPrefix(tag, tagSet)) continue;
6931
- if (content.slice(idx).includes(">")) continue;
6948
+ if (findTagCloseIndexOutsideQuotes(content.slice(idx)) !== -1) continue;
6932
6949
  if (!first || idx < first.index) first = {
6933
6950
  index: idx,
6934
6951
  tag,
@@ -6987,14 +7004,21 @@ function fixStreamingHtmlInlineChildren(children, tagSet) {
6987
7004
  break;
6988
7005
  }
6989
7006
  pushTextPart(chunk.slice(cursor, lt), baseToken);
6990
- const fullMatch = chunk.slice(lt).match(FULL_TAG_RE);
6991
- if (!fullMatch) {
7007
+ const sub = chunk.slice(lt);
7008
+ const tagMatch = sub.match(TAG_NAME_AT_START_RE);
7009
+ if (!tagMatch) {
7010
+ pushTextPart("<", baseToken);
7011
+ cursor = lt + 1;
7012
+ continue;
7013
+ }
7014
+ const closeIdx = findTagCloseIndexOutsideQuotes(sub);
7015
+ if (closeIdx === -1) {
6992
7016
  pushTextPart("<", baseToken);
6993
7017
  cursor = lt + 1;
6994
7018
  continue;
6995
7019
  }
6996
- const tagText = fullMatch[0];
6997
- const tagName = (fullMatch[1] ?? "").toLowerCase();
7020
+ const tagText = sub.slice(0, closeIdx + 1);
7021
+ const tagName = (tagMatch[1] ?? "").toLowerCase();
6998
7022
  if (tagSet.has(tagName)) out.push({
6999
7023
  type: "html_inline",
7000
7024
  tag: "",
@@ -7025,7 +7049,7 @@ function fixStreamingHtmlInlineChildren(children, tagSet) {
7025
7049
  if (pending) {
7026
7050
  pending.buffer += tokenToRaw$1(child);
7027
7051
  pendingAtEnd = pending.buffer;
7028
- const closeIdx = pending.buffer.indexOf(">");
7052
+ const closeIdx = findTagCloseIndexOutsideQuotes(pending.buffer);
7029
7053
  if (closeIdx === -1) continue;
7030
7054
  const tagChunk = pending.buffer.slice(0, closeIdx + 1);
7031
7055
  const afterChunk = pending.buffer.slice(closeIdx + 1);
@@ -7040,6 +7064,19 @@ function fixStreamingHtmlInlineChildren(children, tagSet) {
7040
7064
  if (afterChunk) processTextChunk(afterChunk);
7041
7065
  continue;
7042
7066
  }
7067
+ if (child.type === "html_inline") {
7068
+ const content = tokenToRaw$1(child);
7069
+ const tagName = (content.match(TAG_NAME_AT_START_RE)?.[1] ?? "").toLowerCase();
7070
+ if (tagName && tagSet.has(tagName) && findTagCloseIndexOutsideQuotes(content) === -1) {
7071
+ pending = {
7072
+ tag: tagName,
7073
+ buffer: content,
7074
+ closing: /^<\s*\//.test(content)
7075
+ };
7076
+ pendingAtEnd = pending.buffer;
7077
+ continue;
7078
+ }
7079
+ }
7043
7080
  if (child.type === "text") {
7044
7081
  const content = String(child.content ?? "");
7045
7082
  if (!content.includes("<")) {
@@ -7165,16 +7202,26 @@ function applyFixHtmlInlineTokens(md, options = {}) {
7165
7202
  attrs.push([attrName, attrValue]);
7166
7203
  }
7167
7204
  if (customTagSet.has(tag)) {
7168
- const contentMatch = t.content?.match(new RegExp(`<\\s*${tag}[^>]*>([\\s\\S]*?)<\\s*\\/\\s*${tag}\\s*>`, "i"));
7205
+ const contentMatch = t.content?.match(new RegExp(`<\\s*${tag}[^>]*>([\\s\\S]*)`, "i"));
7169
7206
  const raw$1 = t.content;
7207
+ const endTagRegex = new RegExp(`</\\s*${tag}\\s*>`, "i");
7208
+ const endTagIndex = t.content?.toLowerCase().indexOf(`</${tag}>`) ?? -1;
7170
7209
  t.children = [{
7171
7210
  type: tag,
7172
- content: contentMatch ? contentMatch[1] : "",
7211
+ content: endTagIndex !== -1 ? contentMatch[1].split(endTagRegex)[0] ? contentMatch ? contentMatch[1] : "" : "" : contentMatch ? contentMatch[1].replace(/<.*$/, "") : "",
7173
7212
  raw: raw$1,
7174
7213
  attrs,
7175
7214
  tag,
7176
7215
  loading
7177
7216
  }];
7217
+ if (endTagIndex !== -1) {
7218
+ const afterContent = t.content?.slice(endTagIndex + tag.length + 3) || "";
7219
+ if (afterContent.trim()) toks.splice(i + 1, 0, {
7220
+ type: "text",
7221
+ content: afterContent,
7222
+ raw: afterContent
7223
+ });
7224
+ }
7178
7225
  } else t.children = [{
7179
7226
  type: "html_block",
7180
7227
  content: t.content,