stream-markdown-parser 0.0.40 → 0.0.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +57 -10
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -6888,7 +6888,24 @@ const BASE_COMMON_HTML_TAGS = new Set([
|
|
|
6888
6888
|
]);
|
|
6889
6889
|
const OPEN_TAG_RE = /<([A-Z][\w-]*)(?=[\s/>]|$)/gi;
|
|
6890
6890
|
const CLOSE_TAG_RE = /<\/\s*([A-Z][\w-]*)(?=[\s/>]|$)/gi;
|
|
6891
|
-
const
|
|
6891
|
+
const TAG_NAME_AT_START_RE = /^<\s*(?:\/\s*)?([A-Z][\w-]*)/i;
|
|
6892
|
+
function findTagCloseIndexOutsideQuotes(html) {
|
|
6893
|
+
let inSingle = false;
|
|
6894
|
+
let inDouble = false;
|
|
6895
|
+
for (let i = 0; i < html.length; i++) {
|
|
6896
|
+
const ch = html[i];
|
|
6897
|
+
if (ch === "\"" && !inSingle) {
|
|
6898
|
+
inDouble = !inDouble;
|
|
6899
|
+
continue;
|
|
6900
|
+
}
|
|
6901
|
+
if (ch === "'" && !inDouble) {
|
|
6902
|
+
inSingle = !inSingle;
|
|
6903
|
+
continue;
|
|
6904
|
+
}
|
|
6905
|
+
if (ch === ">" && !inSingle && !inDouble) return i;
|
|
6906
|
+
}
|
|
6907
|
+
return -1;
|
|
6908
|
+
}
|
|
6892
6909
|
function tokenToRaw$1(token) {
|
|
6893
6910
|
const shape = token;
|
|
6894
6911
|
return String(shape.raw ?? shape.content ?? shape.markup ?? "");
|
|
@@ -6916,7 +6933,7 @@ function findFirstIncompleteTag(content, tagSet) {
|
|
|
6916
6933
|
if (idx < 0) continue;
|
|
6917
6934
|
const tag = (m[1] ?? "").toLowerCase();
|
|
6918
6935
|
if (!tagSet.has(tag)) continue;
|
|
6919
|
-
if (content.slice(idx)
|
|
6936
|
+
if (findTagCloseIndexOutsideQuotes(content.slice(idx)) !== -1) continue;
|
|
6920
6937
|
if (!first || idx < first.index) first = {
|
|
6921
6938
|
index: idx,
|
|
6922
6939
|
tag,
|
|
@@ -6928,7 +6945,7 @@ function findFirstIncompleteTag(content, tagSet) {
|
|
|
6928
6945
|
if (idx < 0) continue;
|
|
6929
6946
|
const tag = (m[1] ?? "").toLowerCase();
|
|
6930
6947
|
if (!isCommonHtmlTagOrPrefix(tag, tagSet)) continue;
|
|
6931
|
-
if (content.slice(idx)
|
|
6948
|
+
if (findTagCloseIndexOutsideQuotes(content.slice(idx)) !== -1) continue;
|
|
6932
6949
|
if (!first || idx < first.index) first = {
|
|
6933
6950
|
index: idx,
|
|
6934
6951
|
tag,
|
|
@@ -6987,14 +7004,21 @@ function fixStreamingHtmlInlineChildren(children, tagSet) {
|
|
|
6987
7004
|
break;
|
|
6988
7005
|
}
|
|
6989
7006
|
pushTextPart(chunk.slice(cursor, lt), baseToken);
|
|
6990
|
-
const
|
|
6991
|
-
|
|
7007
|
+
const sub = chunk.slice(lt);
|
|
7008
|
+
const tagMatch = sub.match(TAG_NAME_AT_START_RE);
|
|
7009
|
+
if (!tagMatch) {
|
|
7010
|
+
pushTextPart("<", baseToken);
|
|
7011
|
+
cursor = lt + 1;
|
|
7012
|
+
continue;
|
|
7013
|
+
}
|
|
7014
|
+
const closeIdx = findTagCloseIndexOutsideQuotes(sub);
|
|
7015
|
+
if (closeIdx === -1) {
|
|
6992
7016
|
pushTextPart("<", baseToken);
|
|
6993
7017
|
cursor = lt + 1;
|
|
6994
7018
|
continue;
|
|
6995
7019
|
}
|
|
6996
|
-
const tagText =
|
|
6997
|
-
const tagName = (
|
|
7020
|
+
const tagText = sub.slice(0, closeIdx + 1);
|
|
7021
|
+
const tagName = (tagMatch[1] ?? "").toLowerCase();
|
|
6998
7022
|
if (tagSet.has(tagName)) out.push({
|
|
6999
7023
|
type: "html_inline",
|
|
7000
7024
|
tag: "",
|
|
@@ -7025,7 +7049,7 @@ function fixStreamingHtmlInlineChildren(children, tagSet) {
|
|
|
7025
7049
|
if (pending) {
|
|
7026
7050
|
pending.buffer += tokenToRaw$1(child);
|
|
7027
7051
|
pendingAtEnd = pending.buffer;
|
|
7028
|
-
const closeIdx = pending.buffer
|
|
7052
|
+
const closeIdx = findTagCloseIndexOutsideQuotes(pending.buffer);
|
|
7029
7053
|
if (closeIdx === -1) continue;
|
|
7030
7054
|
const tagChunk = pending.buffer.slice(0, closeIdx + 1);
|
|
7031
7055
|
const afterChunk = pending.buffer.slice(closeIdx + 1);
|
|
@@ -7040,6 +7064,19 @@ function fixStreamingHtmlInlineChildren(children, tagSet) {
|
|
|
7040
7064
|
if (afterChunk) processTextChunk(afterChunk);
|
|
7041
7065
|
continue;
|
|
7042
7066
|
}
|
|
7067
|
+
if (child.type === "html_inline") {
|
|
7068
|
+
const content = tokenToRaw$1(child);
|
|
7069
|
+
const tagName = (content.match(TAG_NAME_AT_START_RE)?.[1] ?? "").toLowerCase();
|
|
7070
|
+
if (tagName && tagSet.has(tagName) && findTagCloseIndexOutsideQuotes(content) === -1) {
|
|
7071
|
+
pending = {
|
|
7072
|
+
tag: tagName,
|
|
7073
|
+
buffer: content,
|
|
7074
|
+
closing: /^<\s*\//.test(content)
|
|
7075
|
+
};
|
|
7076
|
+
pendingAtEnd = pending.buffer;
|
|
7077
|
+
continue;
|
|
7078
|
+
}
|
|
7079
|
+
}
|
|
7043
7080
|
if (child.type === "text") {
|
|
7044
7081
|
const content = String(child.content ?? "");
|
|
7045
7082
|
if (!content.includes("<")) {
|
|
@@ -7165,16 +7202,26 @@ function applyFixHtmlInlineTokens(md, options = {}) {
|
|
|
7165
7202
|
attrs.push([attrName, attrValue]);
|
|
7166
7203
|
}
|
|
7167
7204
|
if (customTagSet.has(tag)) {
|
|
7168
|
-
const contentMatch = t.content?.match(new RegExp(`<\\s*${tag}[^>]*>([\\s\\S]
|
|
7205
|
+
const contentMatch = t.content?.match(new RegExp(`<\\s*${tag}[^>]*>([\\s\\S]*)`, "i"));
|
|
7169
7206
|
const raw$1 = t.content;
|
|
7207
|
+
const endTagRegex = new RegExp(`</\\s*${tag}\\s*>`, "i");
|
|
7208
|
+
const endTagIndex = t.content?.toLowerCase().indexOf(`</${tag}>`) ?? -1;
|
|
7170
7209
|
t.children = [{
|
|
7171
7210
|
type: tag,
|
|
7172
|
-
content: contentMatch ? contentMatch[1] : "",
|
|
7211
|
+
content: endTagIndex !== -1 ? contentMatch[1].split(endTagRegex)[0] ? contentMatch ? contentMatch[1] : "" : "" : contentMatch ? contentMatch[1].replace(/<.*$/, "") : "",
|
|
7173
7212
|
raw: raw$1,
|
|
7174
7213
|
attrs,
|
|
7175
7214
|
tag,
|
|
7176
7215
|
loading
|
|
7177
7216
|
}];
|
|
7217
|
+
if (endTagIndex !== -1) {
|
|
7218
|
+
const afterContent = t.content?.slice(endTagIndex + tag.length + 3) || "";
|
|
7219
|
+
if (afterContent.trim()) toks.splice(i + 1, 0, {
|
|
7220
|
+
type: "text",
|
|
7221
|
+
content: afterContent,
|
|
7222
|
+
raw: afterContent
|
|
7223
|
+
});
|
|
7224
|
+
}
|
|
7178
7225
|
} else t.children = [{
|
|
7179
7226
|
type: "html_block",
|
|
7180
7227
|
content: t.content,
|