npm - @rgrove/parse-xml - Versions diffs - 2.0.4 → 4.0.0 - Mend

@rgrove/parse-xml 2.0.4 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

package/LICENSE +1 -1
package/README.md +84 -337
package/dist/browser.js +774 -0
package/dist/browser.js.map +7 -0
package/dist/global.min.js +10 -0
package/dist/global.min.js.map +7 -0
package/dist/index.d.ts +24 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +50 -0
package/dist/index.js.map +1 -0
package/dist/lib/Parser.d.ts +218 -0
package/dist/lib/Parser.d.ts.map +1 -0
package/dist/lib/Parser.js +638 -0
package/dist/lib/Parser.js.map +1 -0
package/dist/lib/StringScanner.d.ts +97 -0
package/dist/lib/StringScanner.d.ts.map +1 -0
package/dist/lib/StringScanner.js +210 -0
package/dist/lib/StringScanner.js.map +1 -0
package/dist/lib/XmlCdata.d.ts +8 -0
package/dist/lib/XmlCdata.d.ts.map +1 -0
package/dist/lib/XmlCdata.js +15 -0
package/dist/lib/XmlCdata.js.map +1 -0
package/dist/lib/XmlComment.d.ts +16 -0
package/dist/lib/XmlComment.d.ts.map +1 -0
package/dist/lib/XmlComment.js +23 -0
package/dist/lib/XmlComment.js.map +1 -0
package/dist/lib/XmlDocument.d.ts +29 -0
package/dist/lib/XmlDocument.d.ts.map +1 -0
package/dist/lib/XmlDocument.js +47 -0
package/dist/lib/XmlDocument.js.map +1 -0
package/dist/lib/XmlElement.d.ts +40 -0
package/dist/lib/XmlElement.d.ts.map +1 -0
package/dist/lib/XmlElement.js +51 -0
package/dist/lib/XmlElement.js.map +1 -0
package/dist/lib/XmlNode.d.ts +74 -0
package/dist/lib/XmlNode.d.ts.map +1 -0
package/dist/lib/XmlNode.js +96 -0
package/dist/lib/XmlNode.js.map +1 -0
package/dist/lib/XmlProcessingInstruction.d.ts +22 -0
package/dist/lib/XmlProcessingInstruction.d.ts.map +1 -0
package/dist/lib/XmlProcessingInstruction.js +25 -0
package/dist/lib/XmlProcessingInstruction.js.map +1 -0
package/dist/lib/XmlText.d.ts +16 -0
package/dist/lib/XmlText.d.ts.map +1 -0
package/dist/lib/XmlText.js +23 -0
package/dist/lib/XmlText.js.map +1 -0
package/dist/lib/syntax.d.ts +69 -0
package/dist/lib/syntax.d.ts.map +1 -0
package/dist/lib/syntax.js +133 -0
package/dist/lib/syntax.js.map +1 -0
package/dist/lib/types.d.ts +5 -0
package/dist/lib/types.d.ts.map +1 -0
package/dist/lib/types.js +3 -0
package/dist/lib/types.js.map +1 -0
package/package.json +36 -22
package/src/index.ts +30 -0
package/src/lib/Parser.ts +819 -0
package/src/lib/StringScanner.ts +254 -0
package/src/lib/XmlCdata.ts +11 -0
package/src/lib/XmlComment.ts +26 -0
package/src/lib/XmlDocument.ts +57 -0
package/src/lib/XmlElement.ts +81 -0
package/src/lib/XmlNode.ts +107 -0
package/src/lib/XmlProcessingInstruction.ts +35 -0
package/src/lib/XmlText.ts +26 -0
package/src/lib/syntax.ts +136 -0
package/src/lib/types.ts +2 -0
package/CHANGELOG.md +0 -89
package/dist/commonjs/index.js +0 -434
package/dist/commonjs/lib/syntax.js +0 -262
package/dist/umd/parse-xml.min.js +0 -1
package/src/index.js +0 -451
package/src/lib/syntax.js +0 -263

package/src/lib/syntax.ts ADDED Viewed

@@ -0,0 +1,136 @@
+/**
+ * Regular expression that matches one or more `AttValue` characters in a
+ * double-quoted attribute value.
+ *
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-AttValue
+ */
+export const attValueCharDoubleQuote = /[^"&<]+/y;
+/**
+ * Regular expression that matches one or more `AttValue` characters in a
+ * single-quoted attribute value.
+ *
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-AttValue
+ */
+export const attValueCharSingleQuote = /[^'&<]+/y;
+/**
+ * Regular expression that matches a whitespace character that should be
+ * normalized to a space character in an attribute value.
+ *
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#AVNormalize
+ */
+export const attValueNormalizedWhitespace = /[\t\n]/g;
+/**
+ * Regular expression that matches one or more characters that signal the end of
+ * XML `CharData` content.
+ *
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#dt-chardata
+ */
+export const endCharData = /<|&|]]>/;
+/**
+ * Mapping of predefined entity names to their replacement values.
+ *
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-predefined-ent
+ */
+export const predefinedEntities: Readonly<{[name: string]: string;}> = Object.freeze(Object.assign(Object.create(null), {
+  amp: '&',
+  apos: "'",
+  gt: '>',
+  lt: '<',
+  quot: '"',
+}));
+/**
+ * Returns `true` if _char_ is an XML `NameChar`, `false` if it isn't.
+ *
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-NameChar
+ */
+export function isNameChar(char: string): boolean {
+  let cp = getCodePoint(char);
+  // Including the most common NameStartChars here improves performance
+  // slightly.
+  return (cp >= 0x61 && cp <= 0x7A) // a-z
+    || (cp >= 0x41 && cp <= 0x5A) // A-Z
+    || (cp >= 0x30 && cp <= 0x39) // 0-9
+    || cp === 0x2D // -
+    || cp === 0x2E // .
+    || cp === 0xB7
+    || (cp >= 0x300 && cp <= 0x36F)
+    || (cp >= 0x203F && cp <= 0x2040)
+    || isNameStartChar(char, cp);
+}
+/**
+ * Returns `true` if _char_ is an XML `NameStartChar`, `false` if it isn't.
+ *
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-NameStartChar
+ */
+export function isNameStartChar(char: string, cp = getCodePoint(char)): boolean {
+  return (cp >= 0x61 && cp <= 0x7A) // a-z
+    || (cp >= 0x41 && cp <= 0x5A) // A-Z
+    || cp === 0x3A // :
+    || cp === 0x5F // _
+    || (cp >= 0xC0 && cp <= 0xD6)
+    || (cp >= 0xD8 && cp <= 0xF6)
+    || (cp >= 0xF8 && cp <= 0x2FF)
+    || (cp >= 0x370 && cp <= 0x37D)
+    || (cp >= 0x37F && cp <= 0x1FFF)
+    || (cp >= 0x200C && cp <= 0x200D)
+    || (cp >= 0x2070 && cp <= 0x218F)
+    || (cp >= 0x2C00 && cp <= 0x2FEF)
+    || (cp >= 0x3001 && cp <= 0xD7FF)
+    || (cp >= 0xF900 && cp <= 0xFDCF)
+    || (cp >= 0xFDF0 && cp <= 0xFFFD)
+    || (cp >= 0x10000 && cp <= 0xEFFFF);
+}
+/**
+ * Returns `true` if _char_ is a valid reference character (which may appear
+ * between `&` and `;` in a reference), `false` otherwise.
+ *
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-references
+ */
+export function isReferenceChar(char: string): boolean {
+  return char === '#' || isNameChar(char);
+}
+/**
+ * Returns `true` if _char_ is an XML whitespace character, `false` otherwise.
+ *
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#white
+ */
+export function isWhitespace(char: string): boolean {
+  let cp = getCodePoint(char);
+  return cp === 0x20
+    || cp === 0x9
+    || cp === 0xA
+    || cp === 0xD;
+}
+/**
+ * Returns `true` if _codepoint_ is a valid XML `Char` code point, `false`
+ * otherwise.
+ *
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Char
+ */
+export function isXmlCodePoint(cp: number): boolean {
+  return cp === 0x9
+    || cp === 0xA
+    || cp === 0xD
+    || (cp >= 0x20 && cp <= 0xD7FF)
+    || (cp >= 0xE000 && cp <= 0xFFFD)
+    || (cp >= 0x10000 && cp <= 0x10FFFF);
+}
+/**
+ * Returns the Unicode code point value of the given character, or `-1` if
+ * _char_ is empty.
+ */
+function getCodePoint(char: string): number {
+  return char.codePointAt(0) || -1;
+}

package/src/lib/types.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ export type JsonObject = {[key in string]?: JsonValue};
2	+ export type JsonValue = string \| number \| boolean \| JsonObject \| JsonValue[] \| null;

package/CHANGELOG.md DELETED Viewed

@@ -1,89 +0,0 @@
-# parse-xml changelog
-All notable changes to parse-xml are documented in this file. The format is
-based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). This project
-adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-## 2.0.4 (2020-05-01)
-### Fixed
--   Extremely long attribute values no longer cause the parser to throw a
-    "Maximum call stack size exceeded" `RangeError`. [#13] (@rossj)
-[#13]:https://github.com/rgrove/parse-xml/pull/13
-## 2.0.3 (2020-04-20)
-### Fixed
--   Attribute values with many consecutive character references (such as `&lt;`)
-    no longer cause the parser to hang. [#10] (@rossj)
-[#10]:https://github.com/rgrove/parse-xml/pull/10
-## 2.0.2 (2020-01-10)
-### Fixed
--   Whitespace in attribute values is now normalized correctly. [#7]
-    Previously, attribute values were normalized according to the rules for
-    non-CDATA attributes, but this was incorrect and based on a misreading of
-    the spec.
-    Attribute values are now correctly parsed as CDATA, meaning that whitespace
-    is not collapsed or trimmed and whitespace character entities are resolved
-    to their respective characters rather than being normalized to spaces (which
-    was incorrect even by the non-CDATA rules!).
-[#7]:https://github.com/rgrove/parse-xml/pull/7
-## 2.0.1 (2019-04-09)
-### Fixed
--   A carriage return (`\r`) character that isn't followed by a line feed (`\n`)
-    character is now [correctly normalized][xml-line-ends] to a line feed before
-    parsing.
-[xml-line-ends]:https://www.w3.org/TR/2008/REC-xml-20081126/#sec-line-ends
-## 2.0.0 (2019-01-20)
-### Added
--   There's a new minified UMD bundle at `dist/umd/parse-xml.min.js` in the npm
-    package. This may be useful if you want to load parse-xml directly in a
-    browser using a service like
-    [unpkg](https://unpkg.com/@rgrove/parse-xml/dist/umd/parse-xml.min.js) or
-    [jsDelivr](https://cdn.jsdelivr.net/npm/@rgrove/parse-xml/dist/umd/parse-xml.min.js).
-### Changed
--   parse-xml no longer depends on CoreJS polyfills or the Babel runtime, which
-    reduces the browser bundle size significantly. If you need to support older
-    browsers, you should provide your own polyfills for `Object.assign()`,
-    `Object.freeze()`, and `String.fromCodePoint()`.
--   The browser-friendly CommonJS build has moved from `dist/` to
-    `dist/commonjs/` in the npm package.
-## 1.1.1 (2017-09-20)
-### Fixed
--   Attribute values are no longer truncated at the first `=` character.
-## 1.1.0 (2017-09-10)
-### Added
--   New parsing option `resolveUndefinedEntity`. [#2]
-    ([@retorquere](https://github.com/retorquere))
-[#2]:https://github.com/rgrove/parse-xml/pull/2
-## 1.0.0 (2017-06-04)
--   Initial release.

package/dist/commonjs/index.js DELETED Viewed

@@ -1,434 +0,0 @@
-'use strict';
-var emptyArray = Object.freeze([]);
-var emptyObject = Object.freeze(Object.create(null));
-var namedEntities = Object.freeze({
-  '&amp;': '&',
-  '&apos;': "'",
-  '&gt;': '>',
-  '&lt;': '<',
-  '&quot;': '"'
-});
-var NODE_TYPE_CDATA = 'cdata';
-var NODE_TYPE_COMMENT = 'comment';
-var NODE_TYPE_DOCUMENT = 'document';
-var NODE_TYPE_ELEMENT = 'element';
-var NODE_TYPE_TEXT = 'text';
-var Syntax;
-module.exports = function parseXml(xml, options) {
-  if (options === void 0) {
-    options = emptyObject;
-  }
-  if (Syntax === void 0) {
-    // Lazy require to defer regex parsing until first use.
-    Syntax = require('./lib/syntax');
-  }
-  if (xml[0] === "\uFEFF") {
-    // Strip byte order mark.
-    xml = xml.slice(1);
-  }
-  xml = xml.replace(/\r\n?/g, '\n'); // Normalize CRLF and CR to LF.
-  var doc = {
-    type: NODE_TYPE_DOCUMENT,
-    children: [],
-    parent: null,
-    toJSON: nodeToJson
-  };
-  var state = {
-    length: xml.length,
-    options: options,
-    parent: doc,
-    pos: 0,
-    prevPos: 0,
-    xml: xml
-  };
-  state.replaceReference = replaceReference.bind(state);
-  consumeProlog(state);
-  if (!consumeElement(state)) {
-    error(state, 'Root element is missing or invalid');
-  }
-  while (consumeMisc(state)) {} // eslint-disable-line no-empty
-  if (!isEof(state)) {
-    error(state, "Extra content at the end of the document");
-  }
-  return doc;
-}; // -- Private Functions --------------------------------------------------------
-function addNode(state, node) {
-  node.parent = state.parent;
-  node.toJSON = nodeToJson;
-  state.parent.children.push(node);
-}
-function addText(state, text) {
-  var children = state.parent.children;
-  var prevNode = children[children.length - 1];
-  if (prevNode !== void 0 && prevNode.type === NODE_TYPE_TEXT) {
-    // The previous node is a text node, so we can append to it and avoid
-    // creating another node.
-    prevNode.text += text;
-  } else {
-    addNode(state, {
-      type: NODE_TYPE_TEXT,
-      text: text
-    });
-  }
-} // Each `consume*` function takes the current state as an argument and returns
-// `true` if `state.pos` was advanced (meaning some XML was consumed) or `false`
-// if nothing was consumed.
-function consumeCDSect(state) {
-  var _scan = scan(state, Syntax.Anchored.CDSect),
-      match = _scan[0],
-      text = _scan[1];
-  if (match === void 0) {
-    return false;
-  }
-  if (state.options.preserveCdata) {
-    addNode(state, {
-      type: NODE_TYPE_CDATA,
-      text: text
-    });
-  } else {
-    addText(state, text);
-  }
-  return true;
-}
-function consumeCharData(state) {
-  var _scan2 = scan(state, Syntax.Anchored.CharData),
-      text = _scan2[0];
-  if (text === void 0) {
-    return false;
-  }
-  var cdataCloseIndex = text.indexOf(']]>');
-  if (cdataCloseIndex !== -1) {
-    state.pos = state.prevPos + cdataCloseIndex;
-    error(state, 'Element content may not contain the CDATA section close delimiter `]]>`');
-  } // Note: XML 1.0 5th ed. says `CharData` is "any string of characters which
-  // does not contain the start-delimiter of any markup and does not include the
-  // CDATA-section-close delimiter", but the conformance test suite and
-  // well-established parsers like libxml seem to restrict `CharData` to
-  // characters that match the `Char` symbol, so that's what I've done here.
-  if (!Syntax.CharOnly.test(text)) {
-    state.pos = state.prevPos + text.search(new RegExp("(?!" + Syntax.Char.source + ")"));
-    error(state, 'Element content contains an invalid character');
-  }
-  addText(state, text);
-  return true;
-}
-function consumeComment(state) {
-  var _scan3 = scan(state, Syntax.Anchored.Comment),
-      content = _scan3[1];
-  if (content === void 0) {
-    return false;
-  }
-  if (state.options.preserveComments) {
-    addNode(state, {
-      type: NODE_TYPE_COMMENT,
-      content: content.trim()
-    });
-  }
-  return true;
-}
-function consumeDoctypeDecl(state) {
-  return scan(state, Syntax.Anchored.doctypedecl).length > 0;
-}
-function consumeElement(state) {
-  var _scan4 = scan(state, Syntax.Anchored.EmptyElemTag),
-      tag = _scan4[0],
-      name = _scan4[1],
-      attrs = _scan4[2];
-  var isEmpty = tag !== void 0;
-  if (!isEmpty) {
-    var _scan5 = scan(state, Syntax.Anchored.STag);
-    tag = _scan5[0];
-    name = _scan5[1];
-    attrs = _scan5[2];
-    if (tag === void 0) {
-      return false;
-    }
-  }
-  var parent = state.parent;
-  var parsedAttrs = parseAttrs(state, attrs);
-  var node = {
-    type: NODE_TYPE_ELEMENT,
-    name: name,
-    attributes: parsedAttrs,
-    children: []
-  };
-  var xmlSpace = parsedAttrs['xml:space'];
-  if (xmlSpace === 'preserve' || xmlSpace !== 'default' && parent.preserveWhitespace) {
-    node.preserveWhitespace = true;
-  }
-  if (!isEmpty) {
-    state.parent = node;
-    consumeCharData(state);
-    while (consumeElement(state) || consumeReference(state) || consumeCDSect(state) || consumePI(state) || consumeComment(state)) {
-      consumeCharData(state);
-    }
-    var _scan6 = scan(state, Syntax.Anchored.ETag),
-        endName = _scan6[1];
-    if (endName !== name) {
-      state.pos = state.prevPos;
-      error(state, "Missing end tag for element " + name);
-    }
-    state.parent = parent;
-  }
-  addNode(state, node);
-  return true;
-}
-function consumeMisc(state) {
-  return consumeComment(state) || consumePI(state) || consumeWhitespace(state);
-}
-function consumePI(state) {
-  var _scan7 = scan(state, Syntax.Anchored.PI),
-      match = _scan7[0],
-      target = _scan7[1];
-  if (match === void 0) {
-    return false;
-  }
-  if (target.toLowerCase() === 'xml') {
-    state.pos = state.prevPos;
-    error(state, 'XML declaration is only allowed at the start of the document');
-  }
-  return true;
-}
-function consumeProlog(state) {
-  var pos = state.pos;
-  scan(state, Syntax.Anchored.XMLDecl);
-  while (consumeMisc(state)) {} // eslint-disable-line no-empty
-  if (consumeDoctypeDecl(state)) {
-    while (consumeMisc(state)) {} // eslint-disable-line no-empty
-  }
-  return state.pos > pos;
-}
-function consumeReference(state) {
-  var _scan8 = scan(state, Syntax.Anchored.Reference),
-      ref = _scan8[0];
-  if (ref === void 0) {
-    return false;
-  }
-  addText(state, state.replaceReference(ref));
-  return true;
-}
-function consumeWhitespace(state) {
-  return scan(state, Syntax.Anchored.S).length > 0;
-}
-function error(state, message) {
-  var pos = state.pos,
-      xml = state.xml;
-  var column = 1;
-  var excerpt = '';
-  var line = 1; // Find the line and column where the error occurred.
-  for (var i = 0; i < pos; ++i) {
-    var _char = xml[i];
-    if (_char === '\n') {
-      column = 1;
-      excerpt = '';
-      line += 1;
-    } else {
-      column += 1;
-      excerpt += _char;
-    }
-  }
-  var eol = xml.indexOf('\n', pos);
-  excerpt += eol === -1 ? xml.slice(pos) : xml.slice(pos, eol);
-  var excerptStart = 0; // Keep the excerpt below 50 chars, but always keep the error position in
-  // view.
-  if (excerpt.length > 50) {
-    if (column < 40) {
-      excerpt = excerpt.slice(0, 50);
-    } else {
-      excerptStart = column - 20;
-      excerpt = excerpt.slice(excerptStart, column + 30);
-    }
-  }
-  var err = new Error(message + " (line " + line + ", column " + column + ")\n" + ("  " + excerpt + "\n") + ' '.repeat(column - excerptStart + 1) + '^\n');
-  err.column = column;
-  err.excerpt = excerpt;
-  err.line = line;
-  err.pos = pos;
-  throw err;
-}
-function isEof(state) {
-  return state.pos >= state.length - 1;
-}
-function nodeToJson() {
-  var json = Object.assign(Object.create(null), this); // eslint-disable-line no-invalid-this
-  delete json.parent;
-  return json;
-}
-function normalizeAttrValue(state, value) {
-  return value.replace(/[\x20\t\r\n]/g, ' ').replace(Syntax.Global.Reference, state.replaceReference);
-}
-function parseAttrs(state, attrs) {
-  var parsedAttrs = Object.create(null);
-  if (!attrs) {
-    return parsedAttrs;
-  }
-  var attrPairs = attrs.match(Syntax.Global.Attribute).sort();
-  for (var i = 0, len = attrPairs.length; i < len; ++i) {
-    var attrPair = attrPairs[i];
-    var eqMatch = attrPair.match(Syntax.Eq);
-    var name = attrPair.slice(0, eqMatch.index);
-    var value = attrPair.slice(eqMatch.index + eqMatch[0].length);
-    if (name in parsedAttrs) {
-      state.pos = state.prevPos;
-      error(state, "Attribute `" + name + "` redefined");
-    }
-    value = normalizeAttrValue(state, value.slice(1, -1));
-    if (name === 'xml:space') {
-      if (value !== 'default' && value !== 'preserve') {
-        state.pos = state.prevPos;
-        error(state, "Value of the `xml:space` attribute must be \"default\" or \"preserve\"");
-      }
-    }
-    parsedAttrs[name] = value;
-  }
-  return parsedAttrs;
-}
-function replaceReference(ref) {
-  var state = this; // eslint-disable-line no-invalid-this
-  if (ref[ref.length - 1] !== ';') {
-    error(state, "Invalid reference: `" + ref + "`");
-  }
-  if (ref[1] === '#') {
-    // This is a character entity.
-    var codePoint;
-    if (ref[2] === 'x') {
-      codePoint = parseInt(ref.slice(3, -1), 16);
-    } else {
-      codePoint = parseInt(ref.slice(2, -1), 10);
-    }
-    if (isNaN(codePoint)) {
-      state.pos = state.prevPos;
-      error(state, "Invalid character entity `" + ref + "`");
-    }
-    var _char2 = String.fromCodePoint(codePoint);
-    if (!Syntax.Char.test(_char2)) {
-      state.pos = state.prevPos;
-      error(state, "Invalid character entity `" + ref + "`");
-    }
-    return _char2;
-  } // This is a named entity.
-  var value = namedEntities[ref];
-  if (value !== void 0) {
-    return value;
-  }
-  if (state.options.resolveUndefinedEntity) {
-    var resolvedValue = state.options.resolveUndefinedEntity(ref);
-    if (resolvedValue !== null && resolvedValue !== void 0) {
-      return resolvedValue;
-    }
-  }
-  if (state.options.ignoreUndefinedEntities) {
-    return ref;
-  }
-  state.pos = state.prevPos;
-  error(state, "Named entity isn't defined: `" + ref + "`");
-}
-function scan(state, regex) {
-  var pos = state.pos,
-      xml = state.xml;
-  var xmlToScan = pos > 0 ? xml.slice(pos) : xml;
-  var matches = xmlToScan.match(regex);
-  if (matches === null) {
-    return emptyArray;
-  }
-  state.prevPos = state.pos;
-  state.pos += matches[0].length;
-  return matches;
-}