npm - @rgrove/parse-xml - Versions diffs - 4.1.0 → 4.2.1 - Mend

@rgrove/parse-xml 4.1.0 → 4.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/README.md +32 -32
package/dist/browser.js +217 -237
package/dist/browser.js.map +3 -3
package/dist/global.min.js +9 -9
package/dist/global.min.js.map +3 -3
package/dist/index.js +2 -2
package/dist/index.js.map +1 -1
package/dist/lib/Parser.d.ts +10 -1
package/dist/lib/Parser.d.ts.map +1 -1
package/dist/lib/Parser.js +58 -42
package/dist/lib/Parser.js.map +1 -1
package/dist/lib/StringScanner.d.ts +10 -16
package/dist/lib/StringScanner.d.ts.map +1 -1
package/dist/lib/StringScanner.js +54 -77
package/dist/lib/StringScanner.js.map +1 -1
package/dist/lib/XmlDeclaration.js.map +1 -1
package/dist/lib/XmlDocument.js.map +1 -1
package/dist/lib/XmlDocumentType.js.map +1 -1
package/dist/lib/XmlElement.js.map +1 -1
package/dist/lib/XmlError.js.map +1 -1
package/dist/lib/XmlNode.js.map +1 -1
package/dist/lib/syntax.d.ts.map +1 -1
package/dist/lib/syntax.js +17 -22
package/dist/lib/syntax.js.map +1 -1
package/package.json +14 -19
package/src/lib/Parser.ts +62 -45
package/src/lib/StringScanner.ts +56 -93
package/src/lib/syntax.ts +11 -17

package/src/lib/StringScanner.ts CHANGED Viewed

@@ -81,54 +81,60 @@ export class StringScanner {
    *
    * If no characters could be consumed, an empty string will be returned.
    */
-  consume(count = 1): string {
-    let chars = this.peek(count);
-    this.advance(count);
+  consume(charCount = 1): string {
+    let chars = this.peek(charCount);
+    this.advance(charCount);
     return chars;
   }
   /**
-   * Consumes a match for the given sticky regex, advances the scanner, updates
-   * the `lastIndex` property of the regex, and returns the matching string.
+   * Consumes and returns the given number of bytes if possible, advancing the
+   * scanner and stopping if the end of the string is reached.
    *
-   * The regex must have a sticky flag ("y") so that its `lastIndex` prop can be
-   * used to anchor the match at the current scanner position.
+   * It's up to the caller to ensure that the given byte count doesn't split a
+   * multibyte character.
    *
-   * Returns the consumed string, or an empty string if nothing was consumed.
+   * If no bytes could be consumed, an empty string will be returned.
    */
-  consumeMatch(regex: RegExp): string {
-    if (!regex.sticky) {
-      throw new Error('`regex` must have a sticky flag ("y")');
-    }
-    regex.lastIndex = this.charIndexToByteIndex();
-    let result = regex.exec(this.string);
-    if (result === null || result.length === 0) {
-      return emptyString;
-    }
-    let match = result[0] as string;
-    this.advance(this.charLength(match));
-    return match;
+  consumeBytes(byteCount: number): string {
+    let byteIndex = this.charIndexToByteIndex();
+    let result = this.string.slice(byteIndex, byteIndex + byteCount);
+    this.advance(this.charLength(result));
+    return result;
   }
   /**
-   * Consumes and returns all characters for which the given function returns a
-   * truthy value, stopping on the first falsy return value or if the end of the
-   * input is reached.
+   * Consumes and returns all characters for which the given function returns
+   * `true`, stopping when `false` is returned or the end of the input is
+   * reached.
    */
   consumeMatchFn(fn: (char: string) => boolean): string {
-    let char;
-    let match = emptyString;
+    let { length, multiByteMode, string } = this;
+    let startByteIndex = this.charIndexToByteIndex();
+    let endByteIndex = startByteIndex;
+    if (multiByteMode) {
+      while (endByteIndex < length) {
+        let char = string[endByteIndex] as string;
+        let isSurrogatePair = char >= '\uD800' && char <= '\uDBFF';
-    while ((char = this.peek()) && fn(char)) {
-      match += char;
-      this.advance();
+        if (isSurrogatePair) {
+          char += string[endByteIndex + 1];
+        }
+        if (!fn(char)) {
+          break;
+        }
+        endByteIndex += isSurrogatePair ? 2 : 1;
+      }
+    } else {
+      while (endByteIndex < length && fn(string[endByteIndex] as string)) {
+        ++endByteIndex;
+      }
     }
-    return match;
+    return this.consumeBytes(endByteIndex - startByteIndex);
   }
   /**
@@ -139,35 +145,11 @@ export class StringScanner {
    * string will be returned and the scanner will not be advanced.
    */
   consumeString(stringToConsume: string): string {
-    if (this.consumeStringFast(stringToConsume)) {
-      return stringToConsume;
-    }
-    if (this.multiByteMode) {
-      let { length } = stringToConsume;
-      let charLengthToMatch = this.charLength(stringToConsume);
-      if (charLengthToMatch !== length
-          && stringToConsume === this.peek(charLengthToMatch)) {
-        this.advance(charLengthToMatch);
-        return stringToConsume;
-      }
-    }
-    return emptyString;
-  }
-  /**
-   * Does the same thing as `consumeString()`, but doesn't support consuming
-   * multibyte characters. This can be faster if you only need to match single
-   * byte characters.
-   */
-  consumeStringFast(stringToConsume: string): string {
     let { length } = stringToConsume;
+    let byteIndex = this.charIndexToByteIndex();
-    if (this.peek(length) === stringToConsume) {
-      this.advance(length);
+    if (stringToConsume === this.string.slice(byteIndex, byteIndex + length)) {
+      this.advance(length === 1 ? 1 : this.charLength(stringToConsume));
       return stringToConsume;
     }
@@ -182,16 +164,13 @@ export class StringScanner {
    * Returns the consumed string, or an empty string if nothing was consumed.
    */
   consumeUntilMatch(regex: RegExp): string {
-    let restOfString = this.string.slice(this.charIndexToByteIndex());
-    let matchByteIndex = restOfString.search(regex);
+    let matchByteIndex = this.string
+      .slice(this.charIndexToByteIndex())
+      .search(regex);
-    if (matchByteIndex <= 0) {
-      return emptyString;
-    }
-    let result = restOfString.slice(0, matchByteIndex);
-    this.advance(this.charLength(result));
-    return result;
+    return matchByteIndex > 0
+      ? this.consumeBytes(matchByteIndex)
+      : emptyString;
   }
   /**
@@ -202,17 +181,12 @@ export class StringScanner {
    * Returns the consumed string, or an empty string if nothing was consumed.
    */
   consumeUntilString(searchString: string): string {
-    let { string } = this;
     let byteIndex = this.charIndexToByteIndex();
-    let matchByteIndex = string.indexOf(searchString, byteIndex);
-    if (matchByteIndex <= 0) {
-      return emptyString;
-    }
+    let matchByteIndex = this.string.indexOf(searchString, byteIndex);
-    let result = string.slice(byteIndex, matchByteIndex);
-    this.advance(this.charLength(result));
-    return result;
+    return matchByteIndex > 0
+      ? this.consumeBytes(matchByteIndex - byteIndex)
+      : emptyString;
   }
   /**
@@ -221,22 +195,11 @@ export class StringScanner {
    * input string.
    */
   peek(count = 1): string {
-    let { charIndex, multiByteMode, string } = this;
+    let { charIndex, string } = this;
-    if (multiByteMode) {
-      // Inlining this comparison instead of checking `this.isEnd` improves perf
-      // slightly since `peek()` is called so frequently.
-      if (charIndex >= this.charCount) {
-        return emptyString;
-      }
-      return string.slice(
-        this.charIndexToByteIndex(charIndex),
-        this.charIndexToByteIndex(charIndex + count),
-      );
-    }
-    return string.slice(charIndex, charIndex + count);
+    return this.multiByteMode
+      ? string.slice(this.charIndexToByteIndex(charIndex), this.charIndexToByteIndex(charIndex + count))
+      : string.slice(charIndex, charIndex + count);
   }
   /**

package/src/lib/syntax.ts CHANGED Viewed

@@ -4,7 +4,7 @@
  *
  * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-AttValue
  */
-export const attValueCharDoubleQuote = /[^"&<]+/y;
+export const attValueCharDoubleQuote = /["&<]/;
 /**
  * Regular expression that matches one or more `AttValue` characters in a
@@ -12,7 +12,7 @@ export const attValueCharDoubleQuote = /[^"&<]+/y;
  *
  * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-AttValue
  */
-export const attValueCharSingleQuote = /[^'&<]+/y;
+export const attValueCharSingleQuote = /['&<]/;
 /**
  * Regular expression that matches a whitespace character that should be
@@ -49,7 +49,7 @@ export const predefinedEntities: Readonly<{[name: string]: string;}> = Object.fr
  * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-NameChar
  */
 export function isNameChar(char: string): boolean {
-  let cp = getCodePoint(char);
+  let cp = char.codePointAt(0) as number;
   // Including the most common NameStartChars here improves performance
   // slightly.
@@ -60,7 +60,8 @@ export function isNameChar(char: string): boolean {
     || cp === 0x2E // .
     || cp === 0xB7
     || (cp >= 0x300 && cp <= 0x36F)
-    || (cp >= 0x203F && cp <= 0x2040)
+    || cp === 0x203F
+    || cp === 0x2040
     || isNameStartChar(char, cp);
 }
@@ -69,7 +70,7 @@ export function isNameChar(char: string): boolean {
  *
  * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-NameStartChar
  */
-export function isNameStartChar(char: string, cp = getCodePoint(char)): boolean {
+export function isNameStartChar(char: string, cp = char.codePointAt(0) as number): boolean {
   return (cp >= 0x61 && cp <= 0x7A) // a-z
     || (cp >= 0x41 && cp <= 0x5A) // A-Z
     || cp === 0x3A // :
@@ -79,7 +80,8 @@ export function isNameStartChar(char: string, cp = getCodePoint(char)): boolean
     || (cp >= 0xF8 && cp <= 0x2FF)
     || (cp >= 0x370 && cp <= 0x37D)
     || (cp >= 0x37F && cp <= 0x1FFF)
-    || (cp >= 0x200C && cp <= 0x200D)
+    || cp === 0x200C
+    || cp === 0x200D
     || (cp >= 0x2070 && cp <= 0x218F)
     || (cp >= 0x2C00 && cp <= 0x2FEF)
     || (cp >= 0x3001 && cp <= 0xD7FF)
@@ -104,7 +106,7 @@ export function isReferenceChar(char: string): boolean {
  * @see https://www.w3.org/TR/2008/REC-xml-20081126/#white
  */
 export function isWhitespace(char: string): boolean {
-  let cp = getCodePoint(char);
+  let cp = char.codePointAt(0);
   return cp === 0x20
     || cp === 0x9
@@ -119,18 +121,10 @@ export function isWhitespace(char: string): boolean {
  * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Char
  */
 export function isXmlCodePoint(cp: number): boolean {
-  return cp === 0x9
+  return (cp >= 0x20 && cp <= 0xD7FF)
     || cp === 0xA
+    || cp === 0x9
     || cp === 0xD
-    || (cp >= 0x20 && cp <= 0xD7FF)
     || (cp >= 0xE000 && cp <= 0xFFFD)
     || (cp >= 0x10000 && cp <= 0x10FFFF);
 }
-/**
- * Returns the Unicode code point value of the given character, or `-1` if
- * _char_ is empty.
- */
-function getCodePoint(char: string): number {
-  return char.codePointAt(0) || -1;
-}