npm - @ai-sdk-tool/rxml - Versions diffs - 0.1.0 → 0.1.1 - Mend

@ai-sdk-tool/rxml 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/index.cjs CHANGED Viewed

@@ -27,16 +27,19 @@ __export(index_exports, {
   RXMLStringifyError: () => RXMLStringifyError,
   XMLTokenizer: () => XMLTokenizer,
   XMLTransformStream: () => XMLTransformStream,
+  coerceBySchema: () => coerceBySchema,
   coerceDomBySchema: () => coerceDomBySchema,
   countTagOccurrences: () => countTagOccurrences,
   createXMLStream: () => createXMLStream,
   domToObject: () => domToObject,
   extractRawInner: () => extractRawInner,
   filter: () => filter,
+  findAllTopLevelRanges: () => findAllTopLevelRanges,
   findElementByIdStream: () => findElementByIdStream,
   findElementsByClassStream: () => findElementsByClassStream,
   findFirstTopLevelRange: () => findFirstTopLevelRange,
   getPropertySchema: () => getPropertySchema,
+  getSchemaType: () => getSchemaType,
   getStringTypedProperties: () => getStringTypedProperties,
   parse: () => parse,
   parseFromStream: () => parseFromStream,
@@ -49,7 +52,9 @@ __export(index_exports, {
   stringify: () => stringify,
   stringifyNode: () => stringifyNode,
   stringifyNodes: () => stringifyNodes,
-  toContentString: () => toContentString
+  toContentString: () => toContentString,
+  unescapeXml: () => unescapeXml,
+  unwrapJsonSchema: () => unwrapJsonSchema
 });
 module.exports = __toCommonJS(index_exports);
@@ -395,22 +400,33 @@ function coerceDomBySchema(domObject, schema) {
   }
 }
 function getStringTypedProperties(schema) {
-  const set = /* @__PURE__ */ new Set();
-  const unwrapped = unwrapJsonSchema(schema);
-  if (unwrapped && typeof unwrapped === "object") {
+  const collected = /* @__PURE__ */ new Set();
+  const visit = (s) => {
+    const unwrapped = unwrapJsonSchema(s);
+    if (!unwrapped || typeof unwrapped !== "object") return;
     const u = unwrapped;
-    const props = u.properties;
-    if (props && typeof props === "object") {
-      for (const key of Object.keys(props)) {
-        const propSchema = props[key];
-        const propType = getSchemaType(propSchema);
-        if (propType === "string") {
-          set.add(key);
+    const type = getSchemaType(unwrapped);
+    if (type === "object") {
+      const props = u.properties;
+      if (props && typeof props === "object") {
+        for (const [key, propSchema] of Object.entries(props)) {
+          const t = getSchemaType(propSchema);
+          if (t === "string") {
+            collected.add(key);
+          } else if (t === "object" || t === "array") {
+            visit(propSchema);
+          }
         }
       }
+    } else if (type === "array") {
+      const items = u.items;
+      if (items) visit(items);
+      const prefix = u.prefixItems;
+      if (Array.isArray(prefix)) prefix.forEach(visit);
     }
-  }
-  return set;
+  };
+  visit(schema);
+  return collected;
 }
 function processArrayContent(value, schema, textNodeName) {
   if (!Array.isArray(value)) return value;
@@ -554,6 +570,9 @@ function escapeXmlMinimalAttr(value, wrapper = '"') {
   }
   return escaped;
 }
+function unescapeXml(text) {
+  return text.replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&quot;/g, '"').replace(/&apos;/g, "'").replace(/&amp;/g, "&");
+}
 // src/schema/extraction.ts
 function extractRawInner(xmlContent, tagName) {
@@ -571,6 +590,11 @@ function extractRawInner(xmlContent, tagName) {
     if (i >= len) return void 0;
     const ch = xmlContent[i];
     if (ch === "!") {
+      if (xmlContent.startsWith("!DOCTYPE", i + 1)) {
+        const gt2 = xmlContent.indexOf(">", i + 1);
+        i = gt2 === -1 ? len : gt2 + 1;
+        continue;
+      }
       if (xmlContent.startsWith("!--", i + 1)) {
         const close = xmlContent.indexOf("-->", i + 4);
         i = close === -1 ? len : close + 3;
@@ -635,6 +659,11 @@ function extractRawInner(xmlContent, tagName) {
             if (nx >= len) break;
             const h = xmlContent[nx];
             if (h === "!") {
+              if (xmlContent.startsWith("!DOCTYPE", nx + 1)) {
+                const gt22 = xmlContent.indexOf(">", nx + 1);
+                pos = gt22 === -1 ? len : gt22 + 1;
+                continue;
+              }
               if (xmlContent.startsWith("!--", nx + 1)) {
                 const close = xmlContent.indexOf("-->", nx + 4);
                 pos = close === -1 ? len : close + 3;
@@ -715,6 +744,155 @@ function extractRawInner(xmlContent, tagName) {
   }
   return void 0;
 }
+function findAllInnerRanges(xmlContent, tagName) {
+  const len = xmlContent.length;
+  const target = tagName;
+  const ranges = [];
+  let i = 0;
+  while (i < len) {
+    const lt = xmlContent.indexOf("<", i);
+    if (lt === -1) break;
+    i = lt + 1;
+    if (i >= len) break;
+    const ch = xmlContent[i];
+    if (ch === "!") {
+      if (xmlContent.startsWith("!--", i + 1)) {
+        const close = xmlContent.indexOf("-->", i + 4);
+        i = close === -1 ? len : close + 3;
+        continue;
+      }
+      if (xmlContent.startsWith("![CDATA[", i + 1)) {
+        const close = xmlContent.indexOf("]]>", i + 9);
+        i = close === -1 ? len : close + 3;
+        continue;
+      }
+      const gt = xmlContent.indexOf(">", i + 1);
+      i = gt === -1 ? len : gt + 1;
+      continue;
+    }
+    if (ch === "?") {
+      const close = xmlContent.indexOf("?>", i + 1);
+      i = close === -1 ? len : close + 2;
+      continue;
+    }
+    if (ch === "/") {
+      const gt = xmlContent.indexOf(">", i + 1);
+      i = gt === -1 ? len : gt + 1;
+      continue;
+    }
+    let j = i;
+    if (j < len && isNameStartChar(xmlContent[j])) {
+      j++;
+      while (j < len && isNameChar(xmlContent[j])) j++;
+    }
+    const name = xmlContent.slice(i, j);
+    let k = j;
+    let isSelfClosing = false;
+    while (k < len) {
+      const c = xmlContent[k];
+      if (c === '"' || c === "'") {
+        k = skipQuoted(xmlContent, k);
+        continue;
+      }
+      if (c === ">") break;
+      if (c === "/" && xmlContent[k + 1] === ">") {
+        isSelfClosing = true;
+        k++;
+        break;
+      }
+      k++;
+    }
+    const tagEnd = k;
+    if (name !== target) {
+      i = xmlContent[tagEnd] === ">" ? tagEnd + 1 : tagEnd + 1;
+      continue;
+    }
+    const contentStart = xmlContent[tagEnd] === ">" ? tagEnd + 1 : tagEnd + 1;
+    if (isSelfClosing) {
+      ranges.push({ start: contentStart, end: contentStart });
+      i = contentStart;
+      continue;
+    }
+    let pos = contentStart;
+    let sameDepth = 1;
+    while (pos < len) {
+      const nextLt = xmlContent.indexOf("<", pos);
+      if (nextLt === -1) break;
+      const nx = nextLt + 1;
+      if (nx >= len) break;
+      const h = xmlContent[nx];
+      if (h === "!") {
+        if (xmlContent.startsWith("!--", nx + 1)) {
+          const close = xmlContent.indexOf("-->", nx + 4);
+          pos = close === -1 ? len : close + 3;
+          continue;
+        }
+        if (xmlContent.startsWith("![CDATA[", nx + 1)) {
+          const close = xmlContent.indexOf("]]>", nx + 9);
+          pos = close === -1 ? len : close + 3;
+          continue;
+        }
+        const gt2 = xmlContent.indexOf(">", nx + 1);
+        pos = gt2 === -1 ? len : gt2 + 1;
+        continue;
+      } else if (h === "?") {
+        const close = xmlContent.indexOf("?>", nx + 1);
+        pos = close === -1 ? len : close + 2;
+        continue;
+      } else if (h === "/") {
+        let t = nx + 1;
+        if (t < len && isNameStartChar(xmlContent[t])) {
+          t++;
+          while (t < len && isNameChar(xmlContent[t])) t++;
+        }
+        const endName = xmlContent.slice(nx + 1, t);
+        const gt2 = xmlContent.indexOf(">", t);
+        if (endName === target) {
+          sameDepth--;
+          if (sameDepth === 0) {
+            ranges.push({ start: contentStart, end: nextLt });
+            i = gt2 === -1 ? len : gt2 + 1;
+            break;
+          }
+        }
+        pos = gt2 === -1 ? len : gt2 + 1;
+        continue;
+      } else {
+        let t = nx;
+        if (t < len && isNameStartChar(xmlContent[t])) {
+          t++;
+          while (t < len && isNameChar(xmlContent[t])) t++;
+        }
+        let u = t;
+        let isSelfClosingNested = false;
+        while (u < len) {
+          const cu = xmlContent[u];
+          if (cu === '"' || cu === "'") {
+            u = skipQuoted(xmlContent, u);
+            continue;
+          }
+          if (cu === ">") break;
+          if (cu === "/" && xmlContent[u + 1] === ">") {
+            isSelfClosingNested = true;
+            u++;
+            break;
+          }
+          u++;
+        }
+        const startName = xmlContent.slice(nx, t);
+        if (startName === target && !isSelfClosingNested) {
+          sameDepth++;
+        }
+        pos = xmlContent[u] === ">" ? u + 1 : u + 1;
+        continue;
+      }
+    }
+    if (sameDepth !== 0) {
+      break;
+    }
+  }
+  return ranges;
+}
 function findFirstTopLevelRange(xmlContent, tagName) {
   const len = xmlContent.length;
   const target = tagName;
@@ -727,6 +905,11 @@ function findFirstTopLevelRange(xmlContent, tagName) {
     if (i >= len) return void 0;
     const ch = xmlContent[i];
     if (ch === "!") {
+      if (xmlContent.startsWith("!DOCTYPE", i + 1)) {
+        const gt2 = xmlContent.indexOf(">", i + 1);
+        i = gt2 === -1 ? len : gt2 + 1;
+        continue;
+      }
       if (xmlContent.startsWith("!--", i + 1)) {
         const close = xmlContent.indexOf("-->", i + 4);
         i = close === -1 ? len : close + 3;
@@ -785,6 +968,11 @@ function findFirstTopLevelRange(xmlContent, tagName) {
           if (nx >= len) break;
           const h = xmlContent[nx];
           if (h === "!") {
+            if (xmlContent.startsWith("!DOCTYPE", nx + 1)) {
+              const gt22 = xmlContent.indexOf(">", nx + 1);
+              pos = gt22 === -1 ? len : gt22 + 1;
+              continue;
+            }
             if (xmlContent.startsWith("!--", nx + 1)) {
               const close = xmlContent.indexOf("-->", nx + 4);
               pos = close === -1 ? len : close + 3;
@@ -932,6 +1120,101 @@ function countTagOccurrences(xmlContent, tagName, excludeRanges, shouldSkipFirst
   }
   return count;
 }
+function findAllTopLevelRanges(xmlContent, tagName) {
+  const ranges = [];
+  const len = xmlContent.length;
+  const target = tagName;
+  let i = 0;
+  let depth = 0;
+  while (i < len) {
+    const lt = xmlContent.indexOf("<", i);
+    if (lt === -1) break;
+    i = lt + 1;
+    if (i >= len) break;
+    const ch = xmlContent[i];
+    if (ch === "!") {
+      if (xmlContent.startsWith("!DOCTYPE", i + 1)) {
+        const gt2 = xmlContent.indexOf(">", i + 1);
+        i = gt2 === -1 ? len : gt2 + 1;
+        continue;
+      }
+      if (xmlContent.startsWith("!--", i + 1)) {
+        const close = xmlContent.indexOf("-->", i + 4);
+        i = close === -1 ? len : close + 3;
+        continue;
+      }
+      if (xmlContent.startsWith("![CDATA[", i + 1)) {
+        const close = xmlContent.indexOf("]]>", i + 9);
+        i = close === -1 ? len : close + 3;
+        continue;
+      }
+      const gt = xmlContent.indexOf(">", i + 1);
+      i = gt === -1 ? len : gt + 1;
+      continue;
+    } else if (ch === "?") {
+      const close = xmlContent.indexOf("?>", i + 1);
+      i = close === -1 ? len : close + 2;
+      continue;
+    } else if (ch === "/") {
+      i++;
+      const { name: name2, newPos: newPos2 } = parseName(xmlContent, i);
+      if (name2 === target) depth--;
+      i = xmlContent.indexOf(">", newPos2);
+      if (i === -1) break;
+      i++;
+      continue;
+    }
+    const { name, newPos } = parseName(xmlContent, i);
+    i = newPos;
+    let k = i;
+    while (k < len && xmlContent[k] !== ">") {
+      const c = xmlContent[k];
+      if (c === '"' || c === "'") {
+        k = skipQuoted(xmlContent, k);
+        continue;
+      }
+      if (c === "/" && xmlContent[k + 1] === ">") {
+        k++;
+        break;
+      }
+      k++;
+    }
+    if (name === target && depth === 0) {
+      const tagStart = lt;
+      const isSelfClosing = xmlContent[k] === "/" || xmlContent.startsWith("/>", k);
+      if (isSelfClosing) {
+        ranges.push({
+          start: tagStart,
+          end: k + (xmlContent[k] === "/" ? 2 : 1)
+        });
+      } else {
+        depth++;
+        let closeDepth = 1;
+        let j = k + 1;
+        while (j < len && closeDepth > 0) {
+          const nextLt = xmlContent.indexOf("<", j);
+          if (nextLt === -1) break;
+          if (xmlContent[nextLt + 1] === "/") {
+            const { name: closeName } = parseName(xmlContent, nextLt + 2);
+            if (closeName === target) closeDepth--;
+          } else if (xmlContent[nextLt + 1] !== "!" && xmlContent[nextLt + 1] !== "?") {
+            const { name: openName } = parseName(xmlContent, nextLt + 1);
+            if (openName === target) closeDepth++;
+          }
+          j = xmlContent.indexOf(">", nextLt + 1);
+          if (j === -1) break;
+          j++;
+        }
+        if (closeDepth === 0) {
+          ranges.push({ start: tagStart, end: j });
+        }
+        depth--;
+      }
+    }
+    i = k + 1;
+  }
+  return ranges;
+}
 // src/core/tokenizer.ts
 var XMLTokenizer = class {
@@ -1183,6 +1466,30 @@ var XMLTokenizer = class {
 };
 // src/core/parser.ts
+function deepDecodeStringsBySchema(input, schema) {
+  var _a;
+  if (input == null || schema == null) return input;
+  const type = getSchemaType(schema);
+  if (type === "string" && typeof input === "string") {
+    return unescapeXml(input);
+  }
+  if (type === "array" && Array.isArray(input)) {
+    const unwrapped = unwrapJsonSchema(schema);
+    const itemSchema = (_a = unwrapped == null ? void 0 : unwrapped.items) != null ? _a : {};
+    return input.map((item) => deepDecodeStringsBySchema(item, itemSchema));
+  }
+  if (type === "object" && input && typeof input === "object") {
+    const obj = input;
+    const out = {};
+    for (const key of Object.keys(obj)) {
+      const childSchema = getPropertySchema(schema, key);
+      out[key] = deepDecodeStringsBySchema(obj[key], childSchema);
+    }
+    return out;
+  }
+  if (typeof input === "string") return unescapeXml(input);
+  return input;
+}
 function parse(xmlInner, schema, options = {}) {
   var _a, _b, _c;
   const textNodeName = (_a = options.textNodeName) != null ? _a : "#text";
@@ -1248,11 +1555,25 @@ function parse(xmlInner, schema, options = {}) {
       }
     }
   }
-  const stringTypedProps = getStringTypedProperties(schema);
+  const getTopLevelStringProps = (s) => {
+    const set = /* @__PURE__ */ new Set();
+    const unwrapped = unwrapJsonSchema(s);
+    if (unwrapped && typeof unwrapped === "object") {
+      const props = unwrapped.properties;
+      if (props && typeof props === "object") {
+        for (const [k, v] of Object.entries(props)) {
+          if (getSchemaType(v) === "string") set.add(k);
+        }
+      }
+    }
+    return set;
+  };
+  const topLevelStringProps = getTopLevelStringProps(schema);
+  const deepStringTypedProps = getStringTypedProperties(schema);
   const duplicateKeys = /* @__PURE__ */ new Set();
-  for (const key of stringTypedProps) {
+  for (const key of topLevelStringProps) {
     const excludeRanges = [];
-    for (const other of stringTypedProps) {
+    for (const other of topLevelStringProps) {
       if (other === key) continue;
       const range = findFirstTopLevelRange(actualXmlInner, other);
       if (range) excludeRanges.push(range);
@@ -1282,37 +1603,30 @@ function parse(xmlInner, schema, options = {}) {
   const originalContentMap = /* @__PURE__ */ new Map();
   try {
     const ranges = [];
-    for (const key of stringTypedProps) {
-      const r = findFirstTopLevelRange(actualXmlInner, key);
-      if (r && r.end > r.start) ranges.push({ ...r, key });
+    for (const key of deepStringTypedProps) {
+      const innerRanges = findAllInnerRanges(actualXmlInner, key);
+      for (const r of innerRanges) {
+        if (r.end > r.start) ranges.push({ ...r, key });
+      }
     }
     if (ranges.length > 0) {
       const sorted = [...ranges].sort((a, b) => a.start - b.start);
-      const filtered = [];
+      let rebuilt = "";
+      let cursor = 0;
       for (const r of sorted) {
-        const last = filtered[filtered.length - 1];
-        if (last && r.start >= last.start && r.end <= last.end) {
+        if (r.start < cursor) {
           continue;
         }
-        filtered.push(r);
-      }
-      if (filtered.length > 0) {
-        filtered.sort((a, b) => a.start - b.start);
-        let rebuilt = "";
-        let cursor = 0;
-        for (const r of filtered) {
-          if (cursor < r.start)
-            rebuilt += actualXmlInner.slice(cursor, r.start);
-          const placeholder = `__RXML_PLACEHOLDER_${r.key}__`;
-          const originalContent = actualXmlInner.slice(r.start, r.end);
-          originalContentMap.set(placeholder, originalContent);
-          rebuilt += placeholder;
-          cursor = r.end;
-        }
-        if (cursor < actualXmlInner.length)
-          rebuilt += actualXmlInner.slice(cursor);
-        xmlInnerForParsing = rebuilt;
+        if (cursor < r.start) rebuilt += actualXmlInner.slice(cursor, r.start);
+        const placeholder = `__RXML_PLACEHOLDER_${r.key}_${r.start}_${r.end}__`;
+        const originalContent = actualXmlInner.slice(r.start, r.end);
+        originalContentMap.set(placeholder, originalContent);
+        rebuilt += placeholder;
+        cursor = r.end;
       }
+      if (cursor < actualXmlInner.length)
+        rebuilt += actualXmlInner.slice(cursor);
+      xmlInnerForParsing = rebuilt;
     }
   } catch (error) {
     if (options.onError) {
@@ -1336,9 +1650,35 @@ function parse(xmlInner, schema, options = {}) {
     throw new RXMLParseError("Failed to parse XML", cause);
   }
   const parsedArgs = domToObject(parsedNodes, schema, textNodeName);
+  const restorePlaceholdersDeep = (val) => {
+    if (val == null) return val;
+    if (typeof val === "string") {
+      if (val.startsWith("__RXML_PLACEHOLDER_")) {
+        const orig = originalContentMap.get(val);
+        return orig !== void 0 ? orig : val;
+      }
+      return val;
+    }
+    if (Array.isArray(val)) return val.map(restorePlaceholdersDeep);
+    if (typeof val === "object") {
+      const obj = val;
+      const out = {};
+      for (const [k, v] of Object.entries(obj)) {
+        const restored = restorePlaceholdersDeep(v);
+        if (k === textNodeName && typeof restored === "string") {
+          out[k] = restored.trim();
+        } else {
+          out[k] = restored;
+        }
+      }
+      return out;
+    }
+    return val;
+  };
+  const parsedArgsRestored = restorePlaceholdersDeep(parsedArgs);
   const args = {};
-  for (const k of Object.keys(parsedArgs || {})) {
-    const v = parsedArgs[k];
+  for (const k of Object.keys(parsedArgsRestored || {})) {
+    const v = parsedArgsRestored[k];
     let val = v;
     const propSchema = getPropertySchema(schema, k);
     const propType = getSchemaType(propSchema);
@@ -1448,7 +1788,7 @@ function parse(xmlInner, schema, options = {}) {
     }
     args[k] = typeof val === "string" ? val.trim() : val;
   }
-  for (const key of stringTypedProps) {
+  for (const key of topLevelStringProps) {
     if (!Object.prototype.hasOwnProperty.call(args, key)) {
       const raw = extractRawInner(actualXmlInner, key);
       if (typeof raw === "string") {
@@ -1471,7 +1811,8 @@ function parse(xmlInner, schema, options = {}) {
   }
   try {
     const coerced = coerceDomBySchema(dataToCoerce, schema);
-    return coerced;
+    const decoded = deepDecodeStringsBySchema(coerced, schema);
+    return decoded;
   } catch (error) {
     throw new RXMLCoercionError("Failed to coerce by schema", error);
   }
@@ -1705,7 +2046,6 @@ var XMLTransformStream = class extends import_stream.Transform {
           continue;
         }
       }
-      const closingTag = `</${tagName}>`;
       let depth = 1;
       let searchStart = openTagEnd + 1;
       let elementEnd = -1;
@@ -2097,16 +2437,19 @@ function toContentString(nodes) {
   RXMLStringifyError,
   XMLTokenizer,
   XMLTransformStream,
+  coerceBySchema,
   coerceDomBySchema,
   countTagOccurrences,
   createXMLStream,
   domToObject,
   extractRawInner,
   filter,
+  findAllTopLevelRanges,
   findElementByIdStream,
   findElementsByClassStream,
   findFirstTopLevelRange,
   getPropertySchema,
+  getSchemaType,
   getStringTypedProperties,
   parse,
   parseFromStream,
@@ -2119,6 +2462,8 @@ function toContentString(nodes) {
   stringify,
   stringifyNode,
   stringifyNodes,
-  toContentString
+  toContentString,
+  unescapeXml,
+  unwrapJsonSchema
 });
 //# sourceMappingURL=index.cjs.map