npm - @rgrove/parse-xml - Versions diffs - 4.0.1 → 4.2.0 - Mend

@rgrove/parse-xml 4.0.1 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/README.md +46 -31
package/dist/browser.js +692 -300
package/dist/browser.js.map +4 -4
package/dist/global.min.js +9 -8
package/dist/global.min.js.map +4 -4
package/dist/index.d.ts +3 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +8 -2
package/dist/index.js.map +1 -1
package/dist/lib/Parser.d.ts +53 -6
package/dist/lib/Parser.d.ts.map +1 -1
package/dist/lib/Parser.js +166 -126
package/dist/lib/Parser.js.map +1 -1
package/dist/lib/StringScanner.d.ts +15 -21
package/dist/lib/StringScanner.d.ts.map +1 -1
package/dist/lib/StringScanner.js +63 -86
package/dist/lib/StringScanner.js.map +1 -1
package/dist/lib/XmlDeclaration.d.ts +30 -0
package/dist/lib/XmlDeclaration.d.ts.map +1 -0
package/dist/lib/XmlDeclaration.js +36 -0
package/dist/lib/XmlDeclaration.js.map +1 -0
package/dist/lib/XmlDocument.d.ts +4 -2
package/dist/lib/XmlDocument.d.ts.map +1 -1
package/dist/lib/XmlDocument.js.map +1 -1
package/dist/lib/XmlDocumentType.d.ts +37 -0
package/dist/lib/XmlDocumentType.d.ts.map +1 -0
package/dist/lib/XmlDocumentType.js +39 -0
package/dist/lib/XmlDocumentType.js.map +1 -0
package/dist/lib/XmlElement.js.map +1 -1
package/dist/lib/XmlError.d.ts +24 -0
package/dist/lib/XmlError.d.ts.map +1 -0
package/dist/lib/XmlError.js +52 -0
package/dist/lib/XmlError.js.map +1 -0
package/dist/lib/XmlNode.d.ts +20 -1
package/dist/lib/XmlNode.d.ts.map +1 -1
package/dist/lib/XmlNode.js +28 -3
package/dist/lib/XmlNode.js.map +1 -1
package/dist/lib/syntax.d.ts.map +1 -1
package/dist/lib/syntax.js +18 -23
package/dist/lib/syntax.js.map +1 -1
package/dist/lib/types.d.ts +2 -2
package/dist/lib/types.d.ts.map +1 -1
package/package.json +20 -23
package/src/index.ts +3 -0
package/src/lib/Parser.ts +228 -141
package/src/lib/StringScanner.ts +66 -103
package/src/lib/XmlDeclaration.ts +58 -0
package/src/lib/XmlDocument.ts +4 -2
package/src/lib/XmlDocumentType.ts +67 -0
package/src/lib/XmlError.ts +80 -0
package/src/lib/XmlNode.ts +33 -3
package/src/lib/syntax.ts +12 -18

package/dist/browser.js CHANGED Viewed

@@ -1,4 +1,4 @@
-/*! @rgrove/parse-xml v4.0.1 | ISC License | Copyright Ryan Grove */
+/*! @rgrove/parse-xml v4.2.0 | ISC License | Copyright Ryan Grove */
 "use strict";
 var __defProp = Object.defineProperty;
 var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
@@ -23,8 +23,11 @@ var src_exports = {};
 __export(src_exports, {
   XmlCdata: () => XmlCdata,
   XmlComment: () => XmlComment,
+  XmlDeclaration: () => XmlDeclaration,
   XmlDocument: () => XmlDocument,
+  XmlDocumentType: () => XmlDocumentType,
   XmlElement: () => XmlElement,
+  XmlError: () => XmlError,
   XmlNode: () => XmlNode,
   XmlProcessingInstruction: () => XmlProcessingInstruction,
   XmlText: () => XmlText,
@@ -37,125 +40,168 @@ var emptyString = "";
 var surrogatePair = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
 var StringScanner = class {
   constructor(string) {
-    this.h = this.m(string, true);
+    this.k = this.u(string, true);
     this.d = 0;
     this.length = string.length;
-    this.j = this.h !== this.length;
-    this.string = string;
-    if (this.j) {
+    this.l = this.k !== this.length;
+    this.h = string;
+    if (this.l) {
       let charsToBytes = [];
-      for (let byteIndex = 0, charIndex = 0; charIndex < this.h; ++charIndex) {
+      for (let byteIndex = 0, charIndex = 0; charIndex < this.k; ++charIndex) {
         charsToBytes[charIndex] = byteIndex;
         byteIndex += string.codePointAt(byteIndex) > 65535 ? 2 : 1;
       }
-      this.x = charsToBytes;
+      this.A = charsToBytes;
     }
   }
-  get y() {
-    return this.d >= this.h;
-  }
-  n(charIndex = this.d) {
-    var _a;
-    return this.j ? (_a = this.x[charIndex]) != null ? _a : Infinity : charIndex;
-  }
-  m(string, multiByteSafe = this.j) {
+  /**
+   * Whether the current character index is at the end of the input string.
+   */
+  get B() {
+    return this.d >= this.k;
+  }
+  // -- Protected Methods ------------------------------------------------------
+  /**
+   * Returns the number of characters in the given string, which may differ from
+   * the byte length if the string contains multibyte characters.
+   */
+  u(string, multiByteSafe = this.l) {
     return multiByteSafe ? string.replace(surrogatePair, "_").length : string.length;
   }
-  f(count = 1) {
-    this.d = Math.min(this.h, this.d + count);
-  }
-  D(count = 1) {
-    let chars = this.g(count);
-    this.f(count);
+  // -- Public Methods ---------------------------------------------------------
+  /**
+   * Advances the scanner by the given number of characters, stopping if the end
+   * of the string is reached.
+   */
+  p(count = 1) {
+    this.d = Math.min(this.k, this.d + count);
+  }
+  /**
+   * Returns the byte index of the given character index in the string. The two
+   * may differ in strings that contain multibyte characters.
+   */
+  f(charIndex = this.d) {
+    var _a;
+    return this.l ? (_a = this.A[charIndex]) != null ? _a : Infinity : charIndex;
+  }
+  /**
+   * Consumes and returns the given number of characters if possible, advancing
+   * the scanner and stopping if the end of the string is reached.
+   *
+   * If no characters could be consumed, an empty string will be returned.
+   */
+  G(charCount = 1) {
+    let chars = this.m(charCount);
+    this.p(charCount);
     return chars;
   }
-  s(regex) {
-    if (!regex.sticky) {
-      throw new Error('`regex` must have a sticky flag ("y")');
-    }
-    regex.lastIndex = this.n();
-    let result = regex.exec(this.string);
-    if (result === null || result.length === 0) {
-      return emptyString;
-    }
-    let match = result[0];
-    this.f(this.m(match));
-    return match;
-  }
-  t(fn) {
-    let char;
-    let match = emptyString;
-    while ((char = this.g()) && fn(char)) {
-      match += char;
-      this.f();
-    }
-    return match;
+  /**
+   * Consumes and returns the given number of bytes if possible, advancing the
+   * scanner and stopping if the end of the string is reached.
+   *
+   * It's up to the caller to ensure that the given byte count doesn't split a
+   * multibyte character.
+   *
+   * If no bytes could be consumed, an empty string will be returned.
+   */
+  v(byteCount) {
+    let byteIndex = this.f();
+    let result = this.h.slice(byteIndex, byteIndex + byteCount);
+    this.p(this.u(result));
+    return result;
   }
-  N(stringToConsume) {
-    if (this.b(stringToConsume)) {
-      return stringToConsume;
-    }
-    if (this.j) {
-      let { length } = stringToConsume;
-      let charLengthToMatch = this.m(stringToConsume);
-      if (charLengthToMatch !== length && stringToConsume === this.g(charLengthToMatch)) {
-        this.f(charLengthToMatch);
-        return stringToConsume;
+  /**
+   * Consumes and returns all characters for which the given function returns
+   * `true`, stopping when `false` is returned or the end of the input is
+   * reached.
+   */
+  w(fn) {
+    let { length, l: multiByteMode, h: string } = this;
+    let startByteIndex = this.f();
+    let endByteIndex = startByteIndex;
+    if (multiByteMode) {
+      while (endByteIndex < length) {
+        let char = string[endByteIndex];
+        let isSurrogatePair = char >= "\uD800" && char <= "\uDBFF";
+        if (isSurrogatePair) {
+          char += string[endByteIndex + 1];
+        }
+        if (!fn(char)) {
+          break;
+        }
+        endByteIndex += isSurrogatePair ? 2 : 1;
+      }
+    } else {
+      while (endByteIndex < length && fn(string[endByteIndex])) {
+        ++endByteIndex;
       }
     }
-    return emptyString;
-  }
+    return this.v(endByteIndex - startByteIndex);
+  }
+  /**
+   * Consumes the given string if it exists at the current character index, and
+   * advances the scanner.
+   *
+   * If the given string doesn't exist at the current character index, an empty
+   * string will be returned and the scanner will not be advanced.
+   */
   b(stringToConsume) {
     let { length } = stringToConsume;
-    if (this.g(length) === stringToConsume) {
-      this.f(length);
+    let byteIndex = this.f();
+    if (stringToConsume === this.h.slice(byteIndex, byteIndex + length)) {
+      this.p(length === 1 ? 1 : this.u(stringToConsume));
       return stringToConsume;
     }
     return emptyString;
   }
-  E(regex) {
-    let restOfString = this.string.slice(this.n());
-    let matchByteIndex = restOfString.search(regex);
-    if (matchByteIndex <= 0) {
-      return emptyString;
-    }
-    let result = restOfString.slice(0, matchByteIndex);
-    this.f(this.m(result));
-    return result;
-  }
-  p(searchString) {
-    let { string } = this;
-    let byteIndex = this.n();
-    let matchByteIndex = string.indexOf(searchString, byteIndex);
-    if (matchByteIndex <= 0) {
-      return emptyString;
-    }
-    let result = string.slice(byteIndex, matchByteIndex);
-    this.f(this.m(result));
-    return result;
-  }
-  g(count = 1) {
-    let { d: charIndex, j: multiByteMode, string } = this;
-    if (multiByteMode) {
-      if (charIndex >= this.h) {
-        return emptyString;
-      }
-      return string.slice(
-        this.n(charIndex),
-        this.n(charIndex + count)
-      );
-    }
-    return string.slice(charIndex, charIndex + count);
-  }
-  o(index = 0) {
-    this.d = index >= 0 ? Math.min(this.h, index) : Math.max(0, this.d + index);
+  /**
+   * Consumes characters until the given global regex is matched, advancing the
+   * scanner up to (but not beyond) the beginning of the match. If the regex
+   * doesn't match, nothing will be consumed.
+   *
+   * Returns the consumed string, or an empty string if nothing was consumed.
+   */
+  x(regex) {
+    let matchByteIndex = this.h.slice(this.f()).search(regex);
+    return matchByteIndex > 0 ? this.v(matchByteIndex) : emptyString;
+  }
+  /**
+   * Consumes characters until the given string is found, advancing the scanner
+   * up to (but not beyond) that point. If the string is never found, nothing
+   * will be consumed.
+   *
+   * Returns the consumed string, or an empty string if nothing was consumed.
+   */
+  s(searchString) {
+    let byteIndex = this.f();
+    let matchByteIndex = this.h.indexOf(searchString, byteIndex);
+    return matchByteIndex > 0 ? this.v(matchByteIndex - byteIndex) : emptyString;
+  }
+  /**
+   * Returns the given number of characters starting at the current character
+   * index, without advancing the scanner and without exceeding the end of the
+   * input string.
+   */
+  m(count = 1) {
+    let { d: charIndex, h: string } = this;
+    return this.l ? string.slice(this.f(charIndex), this.f(charIndex + count)) : string.slice(charIndex, charIndex + count);
+  }
+  /**
+   * Resets the scanner position to the given character _index_, or to the start
+   * of the input string if no index is given.
+   *
+   * If _index_ is negative, the scanner position will be moved backward by that
+   * many characters, stopping if the beginning of the string is reached.
+   */
+  n(index = 0) {
+    this.d = index >= 0 ? Math.min(this.k, index) : Math.max(0, this.d + index);
   }
 };
 // src/lib/syntax.ts
-var attValueCharDoubleQuote = /[^"&<]+/y;
-var attValueCharSingleQuote = /[^'&<]+/y;
-var attValueNormalizedWhitespace = /[\t\n]/g;
+var attValueCharDoubleQuote = /["&<]/;
+var attValueCharSingleQuote = /['&<]/;
+var attValueNormalizedWhitespace = /\r\n|[\n\r\t]/g;
 var endCharData = /<|&|]]>/;
 var predefinedEntities = Object.freeze(Object.assign(/* @__PURE__ */ Object.create(null), {
   amp: "&",
@@ -165,45 +211,88 @@ var predefinedEntities = Object.freeze(Object.assign(/* @__PURE__ */ Object.crea
   quot: '"'
 }));
 function isNameChar(char) {
-  let cp = getCodePoint(char);
-  return cp >= 97 && cp <= 122 || cp >= 65 && cp <= 90 || cp >= 48 && cp <= 57 || cp === 45 || cp === 46 || cp === 183 || cp >= 768 && cp <= 879 || cp >= 8255 && cp <= 8256 || isNameStartChar(char, cp);
+  let cp = char.codePointAt(0);
+  return cp >= 97 && cp <= 122 || cp >= 65 && cp <= 90 || cp >= 48 && cp <= 57 || cp === 45 || cp === 46 || cp === 183 || cp >= 768 && cp <= 879 || cp === 8255 || cp === 8256 || isNameStartChar(char, cp);
 }
-function isNameStartChar(char, cp = getCodePoint(char)) {
-  return cp >= 97 && cp <= 122 || cp >= 65 && cp <= 90 || cp === 58 || cp === 95 || cp >= 192 && cp <= 214 || cp >= 216 && cp <= 246 || cp >= 248 && cp <= 767 || cp >= 880 && cp <= 893 || cp >= 895 && cp <= 8191 || cp >= 8204 && cp <= 8205 || cp >= 8304 && cp <= 8591 || cp >= 11264 && cp <= 12271 || cp >= 12289 && cp <= 55295 || cp >= 63744 && cp <= 64975 || cp >= 65008 && cp <= 65533 || cp >= 65536 && cp <= 983039;
+function isNameStartChar(char, cp = char.codePointAt(0)) {
+  return cp >= 97 && cp <= 122 || cp >= 65 && cp <= 90 || cp === 58 || cp === 95 || cp >= 192 && cp <= 214 || cp >= 216 && cp <= 246 || cp >= 248 && cp <= 767 || cp >= 880 && cp <= 893 || cp >= 895 && cp <= 8191 || cp === 8204 || cp === 8205 || cp >= 8304 && cp <= 8591 || cp >= 11264 && cp <= 12271 || cp >= 12289 && cp <= 55295 || cp >= 63744 && cp <= 64975 || cp >= 65008 && cp <= 65533 || cp >= 65536 && cp <= 983039;
 }
 function isReferenceChar(char) {
   return char === "#" || isNameChar(char);
 }
 function isWhitespace(char) {
-  let cp = getCodePoint(char);
+  let cp = char.codePointAt(0);
   return cp === 32 || cp === 9 || cp === 10 || cp === 13;
 }
 function isXmlCodePoint(cp) {
-  return cp === 9 || cp === 10 || cp === 13 || cp >= 32 && cp <= 55295 || cp >= 57344 && cp <= 65533 || cp >= 65536 && cp <= 1114111;
-}
-function getCodePoint(char) {
-  return char.codePointAt(0) || -1;
+  return cp >= 32 && cp <= 55295 || cp === 10 || cp === 9 || cp === 13 || cp >= 57344 && cp <= 65533 || cp >= 65536 && cp <= 1114111;
 }
 // src/lib/XmlNode.ts
-var XmlNode = class {
+var _XmlNode = class _XmlNode {
   constructor() {
+    /**
+     * Parent node of this node, or `null` if this node has no parent.
+     */
     this.parent = null;
-  }
+    /**
+     * Starting byte offset of this node in the original XML string, or `-1` if
+     * the offset is unknown.
+     */
+    this.start = -1;
+    /**
+     * Ending byte offset of this node in the original XML string, or `-1` if the
+     * offset is unknown.
+     */
+    this.end = -1;
+  }
+  /**
+   * Document that contains this node, or `null` if this node is not associated
+   * with a document.
+   */
   get document() {
     var _a, _b;
     return (_b = (_a = this.parent) == null ? void 0 : _a.document) != null ? _b : null;
   }
+  /**
+   * Whether this node is the root node of the document (also known as the
+   * document element).
+   */
   get isRootNode() {
-    return this.parent !== null && this.parent === this.document;
-  }
+    return this.parent !== null && this.parent === this.document && this.type === _XmlNode.TYPE_ELEMENT;
+  }
+  /**
+   * Whether whitespace should be preserved in the content of this element and
+   * its children.
+   *
+   * This is influenced by the value of the special `xml:space` attribute, and
+   * will be `true` for any node whose `xml:space` attribute is set to
+   * "preserve". If a node has no such attribute, it will inherit the value of
+   * the nearest ancestor that does (if any).
+   *
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-white-space
+   */
   get preserveWhitespace() {
     var _a;
-    return Boolean((_a = this.parent) == null ? void 0 : _a.preserveWhitespace);
-  }
+    return !!((_a = this.parent) == null ? void 0 : _a.preserveWhitespace);
+  }
+  /**
+   * Type of this node.
+   *
+   * The value of this property is a string that matches one of the static
+   * `TYPE_*` properties on the `XmlNode` class (e.g. `TYPE_ELEMENT`,
+   * `TYPE_TEXT`, etc.).
+   *
+   * The `XmlNode` class itself is a base class and doesn't have its own type
+   * name.
+   */
   get type() {
     return "";
   }
+  /**
+   * Returns a JSON-serializable object representing this node, minus properties
+   * that could result in circular references.
+   */
   toJSON() {
     let json = {
       type: this.type
@@ -214,15 +303,46 @@ var XmlNode = class {
     if (this.preserveWhitespace) {
       json.preserveWhitespace = true;
     }
+    if (this.start !== -1) {
+      json.start = this.start;
+      json.end = this.end;
+    }
     return json;
   }
 };
-XmlNode.TYPE_CDATA = "cdata";
-XmlNode.TYPE_COMMENT = "comment";
-XmlNode.TYPE_DOCUMENT = "document";
-XmlNode.TYPE_ELEMENT = "element";
-XmlNode.TYPE_PROCESSING_INSTRUCTION = "pi";
-XmlNode.TYPE_TEXT = "text";
+/**
+ * Type value for an `XmlCdata` node.
+ */
+_XmlNode.TYPE_CDATA = "cdata";
+/**
+ * Type value for an `XmlComment` node.
+ */
+_XmlNode.TYPE_COMMENT = "comment";
+/**
+ * Type value for an `XmlDocument` node.
+ */
+_XmlNode.TYPE_DOCUMENT = "document";
+/**
+ * Type value for an `XmlDocumentType` node.
+ */
+_XmlNode.TYPE_DOCUMENT_TYPE = "doctype";
+/**
+ * Type value for an `XmlElement` node.
+ */
+_XmlNode.TYPE_ELEMENT = "element";
+/**
+ * Type value for an `XmlProcessingInstruction` node.
+ */
+_XmlNode.TYPE_PROCESSING_INSTRUCTION = "pi";
+/**
+ * Type value for an `XmlText` node.
+ */
+_XmlNode.TYPE_TEXT = "text";
+/**
+ * Type value for an `XmlDeclaration` node.
+ */
+_XmlNode.TYPE_XML_DECLARATION = "xmldecl";
+var XmlNode = _XmlNode;
 // src/lib/XmlText.ts
 var XmlText = class extends XmlNode {
@@ -263,20 +383,46 @@ var XmlComment = class extends XmlNode {
   }
 };
+// src/lib/XmlDeclaration.ts
+var XmlDeclaration = class extends XmlNode {
+  constructor(version, encoding, standalone) {
+    super();
+    this.version = version;
+    this.encoding = encoding != null ? encoding : null;
+    this.standalone = standalone != null ? standalone : null;
+  }
+  get type() {
+    return XmlNode.TYPE_XML_DECLARATION;
+  }
+  toJSON() {
+    let json = XmlNode.prototype.toJSON.call(this);
+    json.version = this.version;
+    for (let key of ["encoding", "standalone"]) {
+      if (this[key] !== null) {
+        json[key] = this[key];
+      }
+    }
+    return json;
+  }
+};
 // src/lib/XmlElement.ts
-var XmlElement = class extends XmlNode {
+var XmlElement = class _XmlElement extends XmlNode {
   constructor(name, attributes = /* @__PURE__ */ Object.create(null), children = []) {
     super();
     this.name = name;
     this.attributes = attributes;
     this.children = children;
   }
+  /**
+   * Whether this element is empty (meaning it has no children).
+   */
   get isEmpty() {
     return this.children.length === 0;
   }
   get preserveWhitespace() {
     let node = this;
-    while (node instanceof XmlElement) {
+    while (node instanceof _XmlElement) {
       if ("xml:space" in node.attributes) {
         return node.attributes["xml:space"] === "preserve";
       }
@@ -284,6 +430,9 @@ var XmlElement = class extends XmlNode {
     }
     return false;
   }
+  /**
+   * Text content of this element and all its descendants.
+   */
   get text() {
     return this.children.map((child) => "text" in child ? child.text : "").join("");
   }
@@ -308,6 +457,9 @@ var XmlDocument = class extends XmlNode {
   get document() {
     return this;
   }
+  /**
+   * Root element of this document, or `null` if this document is empty.
+   */
   get root() {
     for (let child of this.children) {
       if (child instanceof XmlElement) {
@@ -316,6 +468,9 @@ var XmlDocument = class extends XmlNode {
     }
     return null;
   }
+  /**
+   * Text content of this document and all its descendants.
+   */
   get text() {
     return this.children.map((child) => "text" in child ? child.text : "").join("");
   }
@@ -329,6 +484,71 @@ var XmlDocument = class extends XmlNode {
   }
 };
+// src/lib/XmlDocumentType.ts
+var XmlDocumentType = class extends XmlNode {
+  constructor(name, publicId, systemId, internalSubset) {
+    super();
+    this.name = name;
+    this.publicId = publicId != null ? publicId : null;
+    this.systemId = systemId != null ? systemId : null;
+    this.internalSubset = internalSubset != null ? internalSubset : null;
+  }
+  get type() {
+    return XmlNode.TYPE_DOCUMENT_TYPE;
+  }
+  toJSON() {
+    let json = XmlNode.prototype.toJSON.call(this);
+    json.name = this.name;
+    for (let key of ["publicId", "systemId", "internalSubset"]) {
+      if (this[key] !== null) {
+        json[key] = this[key];
+      }
+    }
+    return json;
+  }
+};
+// src/lib/XmlError.ts
+var XmlError = class extends Error {
+  constructor(message, charIndex, xml) {
+    let column = 1;
+    let excerpt = "";
+    let line = 1;
+    for (let i = 0; i < charIndex; ++i) {
+      let char = xml[i];
+      if (char === "\n") {
+        column = 1;
+        excerpt = "";
+        line += 1;
+      } else {
+        column += 1;
+        excerpt += char;
+      }
+    }
+    let eol = xml.indexOf("\n", charIndex);
+    excerpt += eol === -1 ? xml.slice(charIndex) : xml.slice(charIndex, eol);
+    let excerptStart = 0;
+    if (excerpt.length > 50) {
+      if (column < 40) {
+        excerpt = excerpt.slice(0, 50);
+      } else {
+        excerptStart = column - 20;
+        excerpt = excerpt.slice(excerptStart, column + 30);
+      }
+    }
+    super(
+      `${message} (line ${line}, column ${column})
+  ${excerpt}
+` + " ".repeat(column - excerptStart + 1) + "^\n"
+    );
+    this.column = column;
+    this.excerpt = excerpt;
+    this.line = line;
+    this.name = "XmlError";
+    this.pos = charIndex;
+  }
+};
 // src/lib/XmlProcessingInstruction.ts
 var XmlProcessingInstruction = class extends XmlNode {
   constructor(name, content = "") {
@@ -350,45 +570,67 @@ var XmlProcessingInstruction = class extends XmlNode {
 // src/lib/Parser.ts
 var emptyString2 = "";
 var Parser = class {
+  /**
+   * @param xml XML string to parse.
+   * @param options Parser options.
+   */
   constructor(xml, options = {}) {
-    this.document = new XmlDocument();
-    this.i = this.document;
-    this.options = options;
-    this.c = new StringScanner(normalizeXmlString(xml));
-    this.F();
-    if (!this.z()) {
-      throw this.a("Root element is missing or invalid");
+    let doc = this.document = new XmlDocument();
+    this.j = doc;
+    this.g = options;
+    this.c = new StringScanner(xml);
+    if (this.g.includeOffsets) {
+      doc.start = 0;
+      doc.end = xml.length;
     }
-    while (this.u()) {
+    this.parse();
+  }
+  /**
+   * Adds the given `XmlNode` as a child of `this.currentNode`.
+   */
+  i(node, charIndex) {
+    node.parent = this.j;
+    if (this.g.includeOffsets) {
+      node.start = this.c.f(charIndex);
+      node.end = this.c.f();
     }
-    if (!this.c.y) {
-      throw this.a("Extra content at the end of the document");
-    }
-  }
-  k(node) {
-    node.parent = this.i;
-    this.i.children.push(node);
+    this.j.children.push(node);
+    return true;
   }
-  v(text) {
-    let { children } = this.i;
+  /**
+   * Adds the given _text_ to the document, either by appending it to a
+   * preceding `XmlText` node (if possible) or by creating a new `XmlText` node.
+   */
+  y(text, charIndex) {
+    let { children } = this.j;
     let { length } = children;
+    text = normalizeLineBreaks(text);
     if (length > 0) {
       let prevNode = children[length - 1];
-      if (prevNode instanceof XmlText) {
-        prevNode.text += text;
-        return;
+      if ((prevNode == null ? void 0 : prevNode.type) === XmlNode.TYPE_TEXT) {
+        let textNode = prevNode;
+        textNode.text += text;
+        if (this.g.includeOffsets) {
+          textNode.end = this.c.f();
+        }
+        return true;
       }
     }
-    this.k(new XmlText(text));
+    return this.i(new XmlText(text), charIndex);
   }
-  G() {
+  /**
+   * Consumes element attributes.
+   *
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-starttags
+   */
+  H() {
     let attributes = /* @__PURE__ */ Object.create(null);
     while (this.e()) {
       let attrName = this.q();
       if (!attrName) {
         break;
       }
-      let attrValue = this.r() && this.H();
+      let attrValue = this.t() && this.I();
       if (attrValue === false) {
         throw this.a("Attribute value expected");
       }
@@ -400,7 +642,7 @@ var Parser = class {
       }
       attributes[attrName] = attrValue;
     }
-    if (this.options.sortAttributes) {
+    if (this.g.sortAttributes) {
       let attrNames = Object.keys(attributes).sort();
       let sortedAttributes = /* @__PURE__ */ Object.create(null);
       for (let i = 0; i < attrNames.length; ++i) {
@@ -411,152 +653,227 @@ var Parser = class {
     }
     return attributes;
   }
-  H() {
+  /**
+   * Consumes an `AttValue` (attribute value) if possible.
+   *
+   * @returns
+   *   Contents of the `AttValue` minus quotes, or `false` if nothing was
+   *   consumed. An empty string indicates that an `AttValue` was consumed but
+   *   was empty.
+   *
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-AttValue
+   */
+  I() {
     let { c: scanner } = this;
-    let quote = scanner.g();
+    let quote = scanner.m();
     if (quote !== '"' && quote !== "'") {
       return false;
     }
-    scanner.f();
+    scanner.p();
     let chars;
     let isClosed = false;
     let value = emptyString2;
     let regex = quote === '"' ? attValueCharDoubleQuote : attValueCharSingleQuote;
-    matchLoop:
-      while (!scanner.y) {
-        chars = scanner.s(regex);
-        if (chars) {
-          this.l(chars);
-          value += chars.replace(attValueNormalizedWhitespace, " ");
-        }
-        switch (scanner.g()) {
-          case quote:
-            isClosed = true;
-            break matchLoop;
-          case "&":
-            value += this.A();
-            continue;
-          case "<":
-            throw this.a("Unescaped `<` is not allowed in an attribute value");
-          case emptyString2:
-            break matchLoop;
-        }
+    matchLoop: while (!scanner.B) {
+      chars = scanner.x(regex);
+      if (chars) {
+        this.o(chars);
+        value += chars.replace(attValueNormalizedWhitespace, " ");
+      }
+      switch (scanner.m()) {
+        case quote:
+          isClosed = true;
+          break matchLoop;
+        case "&":
+          value += this.C();
+          continue;
+        case "<":
+          throw this.a("Unescaped `<` is not allowed in an attribute value");
+        default:
+          break matchLoop;
       }
+    }
     if (!isClosed) {
       throw this.a("Unclosed attribute");
     }
-    scanner.f();
+    scanner.p();
     return value;
   }
-  I() {
+  /**
+   * Consumes a CDATA section if possible.
+   *
+   * @returns Whether a CDATA section was consumed.
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-cdata-sect
+   */
+  J() {
     let { c: scanner } = this;
+    let startIndex = scanner.d;
     if (!scanner.b("<![CDATA[")) {
       return false;
     }
-    let text = scanner.p("]]>");
-    this.l(text);
+    let text = scanner.s("]]>");
+    this.o(text);
     if (!scanner.b("]]>")) {
       throw this.a("Unclosed CDATA section");
     }
-    if (this.options.preserveCdata) {
-      this.k(new XmlCdata(text));
-    } else {
-      this.v(text);
-    }
-    return true;
-  }
-  J() {
+    return this.g.preserveCdata ? this.i(new XmlCdata(normalizeLineBreaks(text)), startIndex) : this.y(text, startIndex);
+  }
+  /**
+   * Consumes character data if possible.
+   *
+   * @returns Whether character data was consumed.
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#dt-chardata
+   */
+  K() {
     let { c: scanner } = this;
-    let charData = scanner.E(endCharData);
+    let startIndex = scanner.d;
+    let charData = scanner.x(endCharData);
     if (!charData) {
       return false;
     }
-    this.l(charData);
-    if (scanner.g(3) === "]]>") {
+    this.o(charData);
+    if (scanner.m(3) === "]]>") {
       throw this.a("Element content may not contain the CDATA section close delimiter `]]>`");
     }
-    this.v(charData);
-    return true;
-  }
-  B() {
+    return this.y(charData, startIndex);
+  }
+  /**
+   * Consumes a comment if possible.
+   *
+   * @returns Whether a comment was consumed.
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Comment
+   */
+  D() {
     let { c: scanner } = this;
+    let startIndex = scanner.d;
     if (!scanner.b("<!--")) {
       return false;
     }
-    let content = scanner.p("--");
-    this.l(content);
+    let content = scanner.s("--");
+    this.o(content);
     if (!scanner.b("-->")) {
-      if (scanner.g(2) === "--") {
+      if (scanner.m(2) === "--") {
         throw this.a("The string `--` isn't allowed inside a comment");
       }
       throw this.a("Unclosed comment");
     }
-    if (this.options.preserveComments) {
-      this.k(new XmlComment(content.trim()));
-    }
-    return true;
-  }
-  K() {
-    let ref = this.A();
-    if (ref) {
-      this.v(ref);
-      return true;
-    }
-    return false;
-  }
+    return this.g.preserveComments ? this.i(new XmlComment(normalizeLineBreaks(content)), startIndex) : true;
+  }
+  /**
+   * Consumes a reference in a content context if possible.
+   *
+   * This differs from `consumeReference()` in that a consumed reference will be
+   * added to the document as a text node instead of returned.
+   *
+   * @returns Whether a reference was consumed.
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#entproc
+   */
   L() {
+    let startIndex = this.c.d;
+    let ref = this.C();
+    return ref ? this.y(ref, startIndex) : false;
+  }
+  /**
+   * Consumes a doctype declaration if possible.
+   *
+   * This is a loose implementation since doctype declarations are currently
+   * discarded without further parsing.
+   *
+   * @returns Whether a doctype declaration was consumed.
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#dtd
+   */
+  M() {
     let { c: scanner } = this;
-    if (!scanner.b("<!DOCTYPE") || !this.e()) {
+    let startIndex = scanner.d;
+    if (!scanner.b("<!DOCTYPE")) {
       return false;
     }
-    scanner.s(/[^[>]+/y);
-    if (scanner.s(/\[[\s\S]+?\][\x20\t\r\n]*>/y)) {
-      return true;
+    let name = this.e() && this.q();
+    if (!name) {
+      throw this.a("Expected a name");
+    }
+    let publicId;
+    let systemId;
+    if (this.e()) {
+      if (scanner.b("PUBLIC")) {
+        publicId = this.e() && this.N();
+        if (publicId === false) {
+          throw this.a("Expected a public identifier");
+        }
+        this.e();
+      }
+      if (publicId !== void 0 || scanner.b("SYSTEM")) {
+        this.e();
+        systemId = this.r();
+        if (systemId === false) {
+          throw this.a("Expected a system identifier");
+        }
+        this.e();
+      }
+    }
+    let internalSubset;
+    if (scanner.b("[")) {
+      internalSubset = scanner.x(/\][\x20\t\r\n]*>/);
+      if (!scanner.b("]")) {
+        throw this.a("Unclosed internal subset");
+      }
+      this.e();
     }
     if (!scanner.b(">")) {
       throw this.a("Unclosed doctype declaration");
     }
-    return true;
-  }
-  z() {
+    return this.g.preserveDocumentType ? this.i(new XmlDocumentType(name, publicId, systemId, internalSubset), startIndex) : true;
+  }
+  /**
+   * Consumes an element if possible.
+   *
+   * @returns Whether an element was consumed.
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-element
+   */
+  E() {
     let { c: scanner } = this;
-    let mark = scanner.d;
+    let startIndex = scanner.d;
     if (!scanner.b("<")) {
       return false;
     }
     let name = this.q();
     if (!name) {
-      scanner.o(mark);
+      scanner.n(startIndex);
       return false;
     }
-    let attributes = this.G();
-    let isEmpty = Boolean(scanner.b("/>"));
+    let attributes = this.H();
+    let isEmpty = !!scanner.b("/>");
     let element = new XmlElement(name, attributes);
-    element.parent = this.i;
+    element.parent = this.j;
     if (!isEmpty) {
       if (!scanner.b(">")) {
         throw this.a(`Unclosed start tag for element \`${name}\``);
       }
-      this.i = element;
+      this.j = element;
       do {
-        this.J();
-      } while (this.z() || this.K() || this.I() || this.C() || this.B());
+        this.K();
+      } while (this.E() || this.L() || this.J() || this.F() || this.D());
       let endTagMark = scanner.d;
       let endTagName;
       if (!scanner.b("</") || !(endTagName = this.q()) || endTagName !== name) {
-        scanner.o(endTagMark);
+        scanner.n(endTagMark);
         throw this.a(`Missing end tag for element ${name}`);
       }
       this.e();
       if (!scanner.b(">")) {
         throw this.a(`Unclosed end tag for element ${name}`);
       }
-      this.i = element.parent;
+      this.j = element.parent;
     }
-    this.k(element);
-    return true;
-  }
-  r() {
+    return this.i(element, startIndex);
+  }
+  /**
+   * Consumes an `Eq` production if possible.
+   *
+   * @returns Whether an `Eq` production was consumed.
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Eq
+   */
+  t() {
     this.e();
     if (this.c.b("=")) {
       this.e();
@@ -564,22 +881,40 @@ var Parser = class {
     }
     return false;
   }
-  u() {
-    return this.B() || this.C() || this.e();
-  }
+  /**
+   * Consumes `Misc` content if possible.
+   *
+   * @returns Whether anything was consumed.
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Misc
+   */
+  z() {
+    return this.D() || this.F() || this.e();
+  }
+  /**
+   * Consumes one or more `Name` characters if possible.
+   *
+   * @returns `Name` characters, or an empty string if none were consumed.
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Name
+   */
   q() {
-    return isNameStartChar(this.c.g()) ? this.c.t(isNameChar) : emptyString2;
-  }
-  C() {
+    return isNameStartChar(this.c.m()) ? this.c.w(isNameChar) : emptyString2;
+  }
+  /**
+   * Consumes a processing instruction if possible.
+   *
+   * @returns Whether a processing instruction was consumed.
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-pi
+   */
+  F() {
     let { c: scanner } = this;
-    let mark = scanner.d;
+    let startIndex = scanner.d;
     if (!scanner.b("<?")) {
       return false;
     }
     let name = this.q();
     if (name) {
       if (name.toLowerCase() === "xml") {
-        scanner.o(mark);
+        scanner.n(startIndex);
         throw this.a("XML declaration isn't allowed here");
       }
     } else {
@@ -587,38 +922,73 @@ var Parser = class {
     }
     if (!this.e()) {
       if (scanner.b("?>")) {
-        this.k(new XmlProcessingInstruction(name));
-        return true;
+        return this.i(new XmlProcessingInstruction(name), startIndex);
       }
       throw this.a("Whitespace is required after a processing instruction name");
     }
-    let content = scanner.p("?>");
-    this.l(content);
+    let content = scanner.s("?>");
+    this.o(content);
     if (!scanner.b("?>")) {
       throw this.a("Unterminated processing instruction");
     }
-    this.k(new XmlProcessingInstruction(name, content));
-    return true;
-  }
-  F() {
+    return this.i(new XmlProcessingInstruction(name, normalizeLineBreaks(content)), startIndex);
+  }
+  /**
+   * Consumes a prolog if possible.
+   *
+   * @returns Whether a prolog was consumed.
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-prolog-dtd
+   */
+  O() {
     let { c: scanner } = this;
-    let mark = scanner.d;
-    this.M();
-    while (this.u()) {
+    let startIndex = scanner.d;
+    this.P();
+    while (this.z()) {
     }
-    if (this.L()) {
-      while (this.u()) {
+    if (this.M()) {
+      while (this.z()) {
       }
     }
-    return mark < scanner.d;
+    return startIndex < scanner.d;
+  }
+  /**
+   * Consumes a public identifier literal if possible.
+   *
+   * @returns
+   *   Value of the public identifier literal minus quotes, or `false` if
+   *   nothing was consumed. An empty string indicates that a public id literal
+   *   was consumed but was empty.
+   *
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-PubidLiteral
+   */
+  N() {
+    let startIndex = this.c.d;
+    let value = this.r();
+    if (value !== false && !/^[-\x20\r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*$/.test(value)) {
+      this.c.n(startIndex);
+      throw this.a("Invalid character in public identifier");
+    }
+    return value;
   }
-  A() {
+  /**
+   * Consumes a reference if possible.
+   *
+   * This differs from `consumeContentReference()` in that a consumed reference
+   * will be returned rather than added to the document.
+   *
+   * @returns
+   *   Parsed reference value, or `false` if nothing was consumed (to
+   *   distinguish from a reference that resolves to an empty string).
+   *
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Reference
+   */
+  C() {
     let { c: scanner } = this;
     if (!scanner.b("&")) {
       return false;
     }
-    let ref = scanner.t(isReferenceChar);
-    if (scanner.D() !== ";") {
+    let ref = scanner.w(isReferenceChar);
+    if (scanner.G() !== ";") {
       throw this.a("Unterminated reference (a reference must end with `;`)");
     }
     let parsedValue;
@@ -637,7 +1007,7 @@ var Parser = class {
         let {
           ignoreUndefinedEntities,
           resolveUndefinedEntity
-        } = this.options;
+        } = this.g;
         let wrappedRef = `&${ref};`;
         if (resolveUndefinedEntity) {
           let resolvedValue = resolveUndefinedEntity(wrappedRef);
@@ -652,48 +1022,79 @@ var Parser = class {
         if (ignoreUndefinedEntities) {
           return wrappedRef;
         }
-        scanner.o(-wrappedRef.length);
+        scanner.n(-wrappedRef.length);
         throw this.a(`Named entity isn't defined: ${wrappedRef}`);
       }
     }
     return parsedValue;
   }
-  w() {
+  /**
+   * Consumes a `SystemLiteral` if possible.
+   *
+   * A `SystemLiteral` is similar to an attribute value, but allows the
+   * characters `<` and `&` and doesn't replace references.
+   *
+   * @returns
+   *   Value of the `SystemLiteral` minus quotes, or `false` if nothing was
+   *   consumed. An empty string indicates that a `SystemLiteral` was consumed
+   *   but was empty.
+   *
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-SystemLiteral
+   */
+  r() {
     let { c: scanner } = this;
     let quote = scanner.b('"') || scanner.b("'");
     if (!quote) {
       return false;
     }
-    let value = scanner.p(quote);
-    this.l(value);
+    let value = scanner.s(quote);
+    this.o(value);
     if (!scanner.b(quote)) {
       throw this.a("Missing end quote");
     }
     return value;
   }
+  /**
+   * Consumes one or more whitespace characters if possible.
+   *
+   * @returns Whether any whitespace characters were consumed.
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#white
+   */
   e() {
-    return Boolean(this.c.t(isWhitespace));
-  }
-  M() {
+    return !!this.c.w(isWhitespace);
+  }
+  /**
+   * Consumes an XML declaration if possible.
+   *
+   * @returns Whether an XML declaration was consumed.
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-XMLDecl
+   */
+  P() {
     let { c: scanner } = this;
+    let startIndex = scanner.d;
     if (!scanner.b("<?xml")) {
       return false;
     }
     if (!this.e()) {
       throw this.a("Invalid XML declaration");
     }
-    let version = Boolean(scanner.b("version")) && this.r() && this.w();
+    let version = !!scanner.b("version") && this.t() && this.r();
     if (version === false) {
       throw this.a("XML version is missing or invalid");
     } else if (!/^1\.[0-9]+$/.test(version)) {
       throw this.a("Invalid character in version number");
     }
+    let encoding;
+    let standalone;
     if (this.e()) {
-      let encoding = Boolean(scanner.b("encoding")) && this.r() && this.w();
+      encoding = !!scanner.b("encoding") && this.t() && this.r();
       if (encoding) {
+        if (!/^[A-Za-z][\w.-]*$/.test(encoding)) {
+          throw this.a("Invalid character in encoding name");
+        }
         this.e();
       }
-      let standalone = Boolean(scanner.b("standalone")) && this.r() && this.w();
+      standalone = !!scanner.b("standalone") && this.t() && this.r();
       if (standalone) {
         if (standalone !== "yes" && standalone !== "no") {
           throw this.a('Only "yes" and "no" are permitted as values of `standalone`');
@@ -704,54 +1105,44 @@ var Parser = class {
     if (!scanner.b("?>")) {
       throw this.a("Invalid or unclosed XML declaration");
     }
-    return true;
-  }
+    return this.g.preserveXmlDeclaration ? this.i(new XmlDeclaration(
+      version,
+      encoding || void 0,
+      standalone || void 0
+    ), startIndex) : true;
+  }
+  /**
+   * Returns an `XmlError` for the current scanner position.
+   */
   a(message) {
-    let { d: charIndex, string: xml } = this.c;
-    let column = 1;
-    let excerpt = "";
-    let line = 1;
-    for (let i = 0; i < charIndex; ++i) {
-      let char = xml[i];
-      if (char === "\n") {
-        column = 1;
-        excerpt = "";
-        line += 1;
-      } else {
-        column += 1;
-        excerpt += char;
-      }
+    let { c: scanner } = this;
+    return new XmlError(message, scanner.d, scanner.h);
+  }
+  /**
+   * Parses the XML input.
+   */
+  parse() {
+    this.c.b("\uFEFF");
+    this.O();
+    if (!this.E()) {
+      throw this.a("Root element is missing or invalid");
     }
-    let eol = xml.indexOf("\n", charIndex);
-    excerpt += eol === -1 ? xml.slice(charIndex) : xml.slice(charIndex, eol);
-    let excerptStart = 0;
-    if (excerpt.length > 50) {
-      if (column < 40) {
-        excerpt = excerpt.slice(0, 50);
-      } else {
-        excerptStart = column - 20;
-        excerpt = excerpt.slice(excerptStart, column + 30);
-      }
+    while (this.z()) {
+    }
+    if (!this.c.B) {
+      throw this.a("Extra content at the end of the document");
     }
-    let err = new Error(
-      `${message} (line ${line}, column ${column})
-  ${excerpt}
-` + " ".repeat(column - excerptStart + 1) + "^\n"
-    );
-    Object.assign(err, {
-      column,
-      excerpt,
-      line,
-      pos: charIndex
-    });
-    return err;
   }
-  l(string) {
+  /**
+   * Throws an invalid character error if any character in the given _string_
+   * isn't a valid XML character.
+   */
+  o(string) {
     let { length } = string;
     for (let i = 0; i < length; ++i) {
       let cp = string.codePointAt(i);
       if (!isXmlCodePoint(cp)) {
-        this.c.o(-([...string].length - i));
+        this.c.n(-([...string].length - i));
         throw this.a("Invalid character");
       }
       if (cp > 65535) {
@@ -760,11 +1151,12 @@ var Parser = class {
     }
   }
 };
-function normalizeXmlString(xml) {
-  if (xml[0] === "\uFEFF") {
-    xml = xml.slice(1);
+function normalizeLineBreaks(text) {
+  let i = 0;
+  while ((i = text.indexOf("\r", i)) !== -1) {
+    text = text[i + 1] === "\n" ? text.slice(0, i) + text.slice(i + 1) : text.slice(0, i) + "\n" + text.slice(i + 1);
   }
-  return xml.replace(/\r\n?/g, "\n");
+  return text;
 }
 // src/index.ts