@haklex/rich-litexml 0.15.4 → 0.15.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/browser.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { a as createDefaultRegistry, c as LitexmlRegistry, d as registerParseHTML, i as deserializeNodesFromXml, l as registerCustomReaders, n as serializeToXml, o as registerCustomWriters, r as deserializeFromXml, s as registerBuiltinWriters, t as serializeNodesToXml, u as registerBuiltinReaders } from "./src-DmI3DOeG.js";
1
+ import { a as createDefaultRegistry, c as LitexmlRegistry, d as registerParseHTML, i as deserializeNodesFromXml, l as registerCustomReaders, n as serializeToXml, o as registerCustomWriters, r as deserializeFromXml, s as registerBuiltinWriters, t as serializeNodesToXml, u as registerBuiltinReaders } from "./src-z3l9E6En.js";
2
2
  //#region src/index-browser.ts
3
3
  registerParseHTML((html) => new DOMParser().parseFromString(html, "text/html"));
4
4
  //#endregion
@@ -1 +1 @@
1
- {"version":3,"file":"deserializer.d.ts","sourceRoot":"","sources":["../src/deserializer.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,qBAAqB,EAAE,qBAAqB,EAAE,MAAM,SAAS,CAAC;AAG5E,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAQlD,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,EAAE,QAAQ,EAAE,eAAe,GAAG,qBAAqB,CAkBhG;AAED,wBAAgB,uBAAuB,CACrC,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,eAAe,GACxB,qBAAqB,EAAE,CAKzB"}
1
+ {"version":3,"file":"deserializer.d.ts","sourceRoot":"","sources":["../src/deserializer.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,qBAAqB,EAAE,qBAAqB,EAAE,MAAM,SAAS,CAAC;AAG5E,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAwElD,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,EAAE,QAAQ,EAAE,eAAe,GAAG,qBAAqB,CAkBhG;AAED,wBAAgB,uBAAuB,CACrC,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,eAAe,GACxB,qBAAqB,EAAE,CAKzB"}
package/dist/node.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { a as createDefaultRegistry, c as LitexmlRegistry, d as registerParseHTML, i as deserializeNodesFromXml, l as registerCustomReaders, n as serializeToXml, o as registerCustomWriters, r as deserializeFromXml, s as registerBuiltinWriters, t as serializeNodesToXml, u as registerBuiltinReaders } from "./src-DmI3DOeG.js";
1
+ import { a as createDefaultRegistry, c as LitexmlRegistry, d as registerParseHTML, i as deserializeNodesFromXml, l as registerCustomReaders, n as serializeToXml, o as registerCustomWriters, r as deserializeFromXml, s as registerBuiltinWriters, t as serializeNodesToXml, u as registerBuiltinReaders } from "./src-z3l9E6En.js";
2
2
  import { parseHTML } from "linkedom";
3
3
  //#region src/index-node.ts
4
4
  registerParseHTML((html) => parseHTML(html).document);
@@ -1025,8 +1025,66 @@ function getFormatBit(tagName) {
1025
1025
  }
1026
1026
  //#endregion
1027
1027
  //#region src/deserializer.ts
1028
+ /**
1029
+ * HTML5 void elements — tags the HTML parser already treats as self-closing
1030
+ * regardless of `/>` syntax. Custom or non-void tags written as `<tag … />`
1031
+ * are otherwise interpreted as opening tags by the HTML parser, which then
1032
+ * swallows all following siblings as children until a matching close tag (or
1033
+ * end of document) is found. We expand those into explicit `<tag …></tag>`
1034
+ * before handing the string to linkedom / DOMParser.
1035
+ */
1036
+ var HTML_VOID_ELEMENTS = new Set([
1037
+ "area",
1038
+ "base",
1039
+ "br",
1040
+ "col",
1041
+ "embed",
1042
+ "hr",
1043
+ "img",
1044
+ "input",
1045
+ "link",
1046
+ "meta",
1047
+ "param",
1048
+ "source",
1049
+ "track",
1050
+ "wbr"
1051
+ ]);
1052
+ var SELF_CLOSING_RE = /<([\w-]+)((?:\s+[\w-]+(?:\s*=\s*(?:"[^"]*"|'[^']*'))?)*)\s*\/>/g;
1053
+ var CDATA_OPEN = "<![CDATA[";
1054
+ var CDATA_CLOSE = "]]>";
1055
+ function expandSelfClosing(chunk) {
1056
+ return chunk.replaceAll(SELF_CLOSING_RE, (match, tag, attrs) => {
1057
+ if (HTML_VOID_ELEMENTS.has(tag.toLowerCase())) return match;
1058
+ return `<${tag}${attrs}></${tag}>`;
1059
+ });
1060
+ }
1061
+ /**
1062
+ * Expand `<custom-tag … />` to `<custom-tag …></custom-tag>` outside CDATA.
1063
+ * Leaves HTML void elements and already-paired tags untouched. CDATA content
1064
+ * is sliced out verbatim with `indexOf` to avoid regex pitfalls around `]]>`.
1065
+ */
1066
+ function normalizeSelfClosingTags(xml) {
1067
+ const out = [];
1068
+ let cursor = 0;
1069
+ while (cursor < xml.length) {
1070
+ const start = xml.indexOf(CDATA_OPEN, cursor);
1071
+ if (start < 0) {
1072
+ out.push(expandSelfClosing(xml.slice(cursor)));
1073
+ break;
1074
+ }
1075
+ out.push(expandSelfClosing(xml.slice(cursor, start)));
1076
+ const end = xml.indexOf(CDATA_CLOSE, start + 9);
1077
+ if (end < 0) {
1078
+ out.push(xml.slice(start));
1079
+ break;
1080
+ }
1081
+ out.push(xml.slice(start, end + 3));
1082
+ cursor = end + 3;
1083
+ }
1084
+ return out.join("");
1085
+ }
1028
1086
  function parseXml(xml) {
1029
- return parseHTML(`<!DOCTYPE html><html><body>${xml}</body></html>`);
1087
+ return parseHTML(`<!DOCTYPE html><html><body>${normalizeSelfClosingTags(xml)}</body></html>`);
1030
1088
  }
1031
1089
  function deserializeFromXml(xml, registry) {
1032
1090
  const doc = parseXml(xml);
@@ -1075,16 +1133,24 @@ function createReaderContext(registry) {
1075
1133
  parseChildren(element) {
1076
1134
  const blockLevel = isBlockContainer(element);
1077
1135
  const nodes = [];
1078
- for (const child of element.childNodes) if (child.nodeType === 3) {
1079
- const text = child.textContent ?? "";
1080
- if (blockLevel && text.trim() === "") continue;
1081
- if (text === "") continue;
1082
- nodes.push(makeTextNode(text, 0));
1083
- } else if (child.nodeType === 1) {
1136
+ let pendingText = "";
1137
+ const flushText = () => {
1138
+ if (pendingText === "") return;
1139
+ if (blockLevel && pendingText.trim() === "") {
1140
+ pendingText = "";
1141
+ return;
1142
+ }
1143
+ nodes.push(makeTextNode(pendingText, 0));
1144
+ pendingText = "";
1145
+ };
1146
+ for (const child of element.childNodes) if (child.nodeType === 3) pendingText += child.textContent ?? "";
1147
+ else if (child.nodeType === 1) {
1148
+ flushText();
1084
1149
  const parsed = parseElement(child, registry, ctx, 0);
1085
1150
  if (parsed) if (Array.isArray(parsed)) nodes.push(...parsed);
1086
1151
  else nodes.push(parsed);
1087
1152
  }
1153
+ flushText();
1088
1154
  return nodes;
1089
1155
  },
1090
1156
  parseNestedState(xml) {
@@ -1110,15 +1176,20 @@ function parseElement(element, registry, ctx, inheritedFormat) {
1110
1176
  }
1111
1177
  function parseInlineChildren(element, registry, ctx, format) {
1112
1178
  const nodes = [];
1113
- for (const child of element.childNodes) if (child.nodeType === 3) {
1114
- const text = child.textContent ?? "";
1115
- if (text === "") continue;
1116
- nodes.push(makeTextNode(text, format));
1117
- } else if (child.nodeType === 1) {
1179
+ let pendingText = "";
1180
+ const flushText = () => {
1181
+ if (pendingText === "") return;
1182
+ nodes.push(makeTextNode(pendingText, format));
1183
+ pendingText = "";
1184
+ };
1185
+ for (const child of element.childNodes) if (child.nodeType === 3) pendingText += child.textContent ?? "";
1186
+ else if (child.nodeType === 1) {
1187
+ flushText();
1118
1188
  const parsed = parseElement(child, registry, ctx, format);
1119
1189
  if (parsed) if (Array.isArray(parsed)) nodes.push(...parsed);
1120
1190
  else nodes.push(parsed);
1121
1191
  }
1192
+ flushText();
1122
1193
  return nodes;
1123
1194
  }
1124
1195
  function parseFallbackNode(element) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@haklex/rich-litexml",
3
- "version": "0.15.4",
3
+ "version": "0.15.6",
4
4
  "description": "Bidirectional Lexical SerializedNode <-> XML conversion with plugin registry",
5
5
  "repository": {
6
6
  "type": "git",