npm - @rgrove/parse-xml - Versions diffs - 2.0.4 → 4.0.0 - Mend

@rgrove/parse-xml 2.0.4 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

package/LICENSE +1 -1
package/README.md +84 -337
package/dist/browser.js +774 -0
package/dist/browser.js.map +7 -0
package/dist/global.min.js +10 -0
package/dist/global.min.js.map +7 -0
package/dist/index.d.ts +24 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +50 -0
package/dist/index.js.map +1 -0
package/dist/lib/Parser.d.ts +218 -0
package/dist/lib/Parser.d.ts.map +1 -0
package/dist/lib/Parser.js +638 -0
package/dist/lib/Parser.js.map +1 -0
package/dist/lib/StringScanner.d.ts +97 -0
package/dist/lib/StringScanner.d.ts.map +1 -0
package/dist/lib/StringScanner.js +210 -0
package/dist/lib/StringScanner.js.map +1 -0
package/dist/lib/XmlCdata.d.ts +8 -0
package/dist/lib/XmlCdata.d.ts.map +1 -0
package/dist/lib/XmlCdata.js +15 -0
package/dist/lib/XmlCdata.js.map +1 -0
package/dist/lib/XmlComment.d.ts +16 -0
package/dist/lib/XmlComment.d.ts.map +1 -0
package/dist/lib/XmlComment.js +23 -0
package/dist/lib/XmlComment.js.map +1 -0
package/dist/lib/XmlDocument.d.ts +29 -0
package/dist/lib/XmlDocument.d.ts.map +1 -0
package/dist/lib/XmlDocument.js +47 -0
package/dist/lib/XmlDocument.js.map +1 -0
package/dist/lib/XmlElement.d.ts +40 -0
package/dist/lib/XmlElement.d.ts.map +1 -0
package/dist/lib/XmlElement.js +51 -0
package/dist/lib/XmlElement.js.map +1 -0
package/dist/lib/XmlNode.d.ts +74 -0
package/dist/lib/XmlNode.d.ts.map +1 -0
package/dist/lib/XmlNode.js +96 -0
package/dist/lib/XmlNode.js.map +1 -0
package/dist/lib/XmlProcessingInstruction.d.ts +22 -0
package/dist/lib/XmlProcessingInstruction.d.ts.map +1 -0
package/dist/lib/XmlProcessingInstruction.js +25 -0
package/dist/lib/XmlProcessingInstruction.js.map +1 -0
package/dist/lib/XmlText.d.ts +16 -0
package/dist/lib/XmlText.d.ts.map +1 -0
package/dist/lib/XmlText.js +23 -0
package/dist/lib/XmlText.js.map +1 -0
package/dist/lib/syntax.d.ts +69 -0
package/dist/lib/syntax.d.ts.map +1 -0
package/dist/lib/syntax.js +133 -0
package/dist/lib/syntax.js.map +1 -0
package/dist/lib/types.d.ts +5 -0
package/dist/lib/types.d.ts.map +1 -0
package/dist/lib/types.js +3 -0
package/dist/lib/types.js.map +1 -0
package/package.json +36 -22
package/src/index.ts +30 -0
package/src/lib/Parser.ts +819 -0
package/src/lib/StringScanner.ts +254 -0
package/src/lib/XmlCdata.ts +11 -0
package/src/lib/XmlComment.ts +26 -0
package/src/lib/XmlDocument.ts +57 -0
package/src/lib/XmlElement.ts +81 -0
package/src/lib/XmlNode.ts +107 -0
package/src/lib/XmlProcessingInstruction.ts +35 -0
package/src/lib/XmlText.ts +26 -0
package/src/lib/syntax.ts +136 -0
package/src/lib/types.ts +2 -0
package/CHANGELOG.md +0 -89
package/dist/commonjs/index.js +0 -434
package/dist/commonjs/lib/syntax.js +0 -262
package/dist/umd/parse-xml.min.js +0 -1
package/src/index.js +0 -451
package/src/lib/syntax.js +0 -263

package/src/index.js DELETED Viewed

@@ -1,451 +0,0 @@
-'use strict';
-const emptyArray = Object.freeze([]);
-const emptyObject = Object.freeze(Object.create(null));
-const namedEntities = Object.freeze({
-  '&amp;': '&',
-  '&apos;': "'",
-  '&gt;': '>',
-  '&lt;': '<',
-  '&quot;': '"'
-});
-const NODE_TYPE_CDATA = 'cdata';
-const NODE_TYPE_COMMENT = 'comment';
-const NODE_TYPE_DOCUMENT = 'document';
-const NODE_TYPE_ELEMENT = 'element';
-const NODE_TYPE_TEXT = 'text';
-let Syntax;
-module.exports = function parseXml(xml, options = emptyObject) {
-  if (Syntax === void 0) {
-    // Lazy require to defer regex parsing until first use.
-    Syntax = require('./lib/syntax');
-  }
-  if (xml[0] === '\uFEFF') {
-    // Strip byte order mark.
-    xml = xml.slice(1);
-  }
-  xml = xml.replace(/\r\n?/g, '\n'); // Normalize CRLF and CR to LF.
-  let doc = {
-    type: NODE_TYPE_DOCUMENT,
-    children: [],
-    parent: null,
-    toJSON: nodeToJson
-  };
-  let state = {
-    length: xml.length,
-    options,
-    parent: doc,
-    pos: 0,
-    prevPos: 0,
-    xml
-  };
-  state.replaceReference = replaceReference.bind(state);
-  consumeProlog(state);
-  if (!consumeElement(state)) {
-    error(state, 'Root element is missing or invalid');
-  }
-  while (consumeMisc(state)) {} // eslint-disable-line no-empty
-  if (!isEof(state)) {
-    error(state, `Extra content at the end of the document`);
-  }
-  return doc;
-};
-// -- Private Functions --------------------------------------------------------
-function addNode(state, node) {
-  node.parent = state.parent;
-  node.toJSON = nodeToJson;
-  state.parent.children.push(node);
-}
-function addText(state, text) {
-  let { children } = state.parent;
-  let prevNode = children[children.length - 1];
-  if (prevNode !== void 0 && prevNode.type === NODE_TYPE_TEXT) {
-    // The previous node is a text node, so we can append to it and avoid
-    // creating another node.
-    prevNode.text += text;
-  } else {
-    addNode(state, {
-      type: NODE_TYPE_TEXT,
-      text
-    });
-  }
-}
-// Each `consume*` function takes the current state as an argument and returns
-// `true` if `state.pos` was advanced (meaning some XML was consumed) or `false`
-// if nothing was consumed.
-function consumeCDSect(state) {
-  let [ match, text ] = scan(state, Syntax.Anchored.CDSect);
-  if (match === void 0) {
-    return false;
-  }
-  if (state.options.preserveCdata) {
-    addNode(state, {
-      type: NODE_TYPE_CDATA,
-      text
-    });
-  } else {
-    addText(state, text);
-  }
-  return true;
-}
-function consumeCharData(state) {
-  let [ text ] = scan(state, Syntax.Anchored.CharData);
-  if (text === void 0) {
-    return false;
-  }
-  let cdataCloseIndex = text.indexOf(']]>');
-  if (cdataCloseIndex !== -1) {
-    state.pos = state.prevPos + cdataCloseIndex;
-    error(state, 'Element content may not contain the CDATA section close delimiter `]]>`');
-  }
-  // Note: XML 1.0 5th ed. says `CharData` is "any string of characters which
-  // does not contain the start-delimiter of any markup and does not include the
-  // CDATA-section-close delimiter", but the conformance test suite and
-  // well-established parsers like libxml seem to restrict `CharData` to
-  // characters that match the `Char` symbol, so that's what I've done here.
-  if (!Syntax.CharOnly.test(text)) {
-    state.pos = state.prevPos + text.search(new RegExp(`(?!${Syntax.Char.source})`));
-    error(state, 'Element content contains an invalid character');
-  }
-  addText(state, text);
-  return true;
-}
-function consumeComment(state) {
-  let [ , content ] = scan(state, Syntax.Anchored.Comment);
-  if (content === void 0) {
-    return false;
-  }
-  if (state.options.preserveComments) {
-    addNode(state, {
-      type: NODE_TYPE_COMMENT,
-      content: content.trim()
-    });
-  }
-  return true;
-}
-function consumeDoctypeDecl(state) {
-  return scan(state, Syntax.Anchored.doctypedecl).length > 0;
-}
-function consumeElement(state) {
-  let [ tag, name, attrs ] = scan(state, Syntax.Anchored.EmptyElemTag);
-  let isEmpty = tag !== void 0;
-  if (!isEmpty) {
-    [ tag, name, attrs ] = scan(state, Syntax.Anchored.STag);
-    if (tag === void 0) {
-      return false;
-    }
-  }
-  let { parent } = state;
-  let parsedAttrs = parseAttrs(state, attrs);
-  let node = {
-    type: NODE_TYPE_ELEMENT,
-    name,
-    attributes: parsedAttrs,
-    children: []
-  };
-  let xmlSpace = parsedAttrs['xml:space'];
-  if (xmlSpace === 'preserve'
-      || (xmlSpace !== 'default' && parent.preserveWhitespace)) {
-    node.preserveWhitespace = true;
-  }
-  if (!isEmpty) {
-    state.parent = node;
-    consumeCharData(state);
-    while (
-      consumeElement(state)
-        || consumeReference(state)
-        || consumeCDSect(state)
-        || consumePI(state)
-        || consumeComment(state)
-    ) {
-      consumeCharData(state);
-    }
-    let [ , endName ] = scan(state, Syntax.Anchored.ETag);
-    if (endName !== name) {
-      state.pos = state.prevPos;
-      error(state, `Missing end tag for element ${name}`);
-    }
-    state.parent = parent;
-  }
-  addNode(state, node);
-  return true;
-}
-function consumeMisc(state) {
-  return consumeComment(state)
-    || consumePI(state)
-    || consumeWhitespace(state);
-}
-function consumePI(state) {
-  let [ match, target ] = scan(state, Syntax.Anchored.PI);
-  if (match === void 0) {
-    return false;
-  }
-  if (target.toLowerCase() === 'xml') {
-    state.pos = state.prevPos;
-    error(state, 'XML declaration is only allowed at the start of the document');
-  }
-  return true;
-}
-function consumeProlog(state) {
-  let { pos } = state;
-  scan(state, Syntax.Anchored.XMLDecl);
-  while (consumeMisc(state)) {}  // eslint-disable-line no-empty
-  if (consumeDoctypeDecl(state)) {
-    while (consumeMisc(state)) {}  // eslint-disable-line no-empty
-  }
-  return state.pos > pos;
-}
-function consumeReference(state) {
-  let [ ref ] = scan(state, Syntax.Anchored.Reference);
-  if (ref === void 0) {
-    return false;
-  }
-  addText(state, state.replaceReference(ref));
-  return true;
-}
-function consumeWhitespace(state) {
-  return scan(state, Syntax.Anchored.S).length > 0;
-}
-function error(state, message) {
-  let { pos, xml } = state;
-  let column = 1;
-  let excerpt = '';
-  let line = 1;
-  // Find the line and column where the error occurred.
-  for (let i = 0; i < pos; ++i) {
-    let char = xml[i];
-    if (char === '\n') {
-      column = 1;
-      excerpt = '';
-      line += 1;
-    } else {
-      column += 1;
-      excerpt += char;
-    }
-  }
-  let eol = xml.indexOf('\n', pos);
-  excerpt += eol === -1
-    ? xml.slice(pos)
-    : xml.slice(pos, eol);
-  let excerptStart = 0;
-  // Keep the excerpt below 50 chars, but always keep the error position in
-  // view.
-  if (excerpt.length > 50) {
-    if (column < 40) {
-      excerpt = excerpt.slice(0, 50);
-    } else {
-      excerptStart = column - 20;
-      excerpt = excerpt.slice(excerptStart, column + 30);
-    }
-  }
-  let err = new Error(
-    `${message} (line ${line}, column ${column})\n`
-      + `  ${excerpt}\n`
-      + ' '.repeat(column - excerptStart + 1) + '^\n'
-  );
-  err.column = column;
-  err.excerpt = excerpt;
-  err.line = line;
-  err.pos = pos;
-  throw err;
-}
-function isEof(state) {
-  return state.pos >= state.length - 1;
-}
-function nodeToJson() {
-  let json = Object.assign(Object.create(null), this); // eslint-disable-line no-invalid-this
-  delete json.parent;
-  return json;
-}
-function normalizeAttrValue(state, value) {
-  return value
-    .replace(/[\x20\t\r\n]/g, ' ')
-    .replace(Syntax.Global.Reference, state.replaceReference);
-}
-function parseAttrs(state, attrs) {
-  let parsedAttrs = Object.create(null);
-  if (!attrs) {
-    return parsedAttrs;
-  }
-  let attrPairs = attrs
-    .match(Syntax.Global.Attribute)
-    .sort();
-  for (let i = 0, len = attrPairs.length; i < len; ++i) {
-    let attrPair = attrPairs[i];
-    let eqMatch = attrPair.match(Syntax.Eq);
-    let name = attrPair.slice(0, eqMatch.index);
-    let value = attrPair.slice(eqMatch.index + eqMatch[0].length);
-    if (name in parsedAttrs) {
-      state.pos = state.prevPos;
-      error(state, `Attribute \`${name}\` redefined`);
-    }
-    value = normalizeAttrValue(state, value.slice(1, -1));
-    if (name === 'xml:space') {
-      if (value !== 'default' && value !== 'preserve') {
-        state.pos = state.prevPos;
-        error(state, `Value of the \`xml:space\` attribute must be "default" or "preserve"`);
-      }
-    }
-    parsedAttrs[name] = value;
-  }
-  return parsedAttrs;
-}
-function replaceReference(ref) {
-  let state = this; // eslint-disable-line no-invalid-this
-  if (ref[ref.length - 1] !== ';') {
-    error(state, `Invalid reference: \`${ref}\``);
-  }
-  if (ref[1] === '#') {
-    // This is a character entity.
-    let codePoint;
-    if (ref[2] === 'x') {
-      codePoint = parseInt(ref.slice(3, -1), 16);
-    } else {
-      codePoint = parseInt(ref.slice(2, -1), 10);
-    }
-    if (isNaN(codePoint)) {
-      state.pos = state.prevPos;
-      error(state, `Invalid character entity \`${ref}\``);
-    }
-    let char = String.fromCodePoint(codePoint);
-    if (!Syntax.Char.test(char)) {
-      state.pos = state.prevPos;
-      error(state, `Invalid character entity \`${ref}\``);
-    }
-    return char;
-  }
-  // This is a named entity.
-  let value = namedEntities[ref];
-  if (value !== void 0) {
-    return value;
-  }
-  if (state.options.resolveUndefinedEntity) {
-    let resolvedValue = state.options.resolveUndefinedEntity(ref);
-    if (resolvedValue !== null && resolvedValue !== void 0) {
-      return resolvedValue;
-    }
-  }
-  if (state.options.ignoreUndefinedEntities) {
-    return ref;
-  }
-  state.pos = state.prevPos;
-  error(state, `Named entity isn't defined: \`${ref}\``);
-}
-function scan(state, regex) {
-  let { pos, xml } = state;
-  let xmlToScan = pos > 0
-    ? xml.slice(pos)
-    : xml;
-  let matches = xmlToScan.match(regex);
-  if (matches === null) {
-    return emptyArray;
-  }
-  state.prevPos = state.pos;
-  state.pos += matches[0].length;
-  return matches;
-}

package/src/lib/syntax.js DELETED Viewed

@@ -1,263 +0,0 @@
-'use strict';
-// To improve readability, the regular expression patterns in this file are
-// written as tagged template literals. The `regex` tag function strips literal
-// whitespace characters and line comments beginning with `//` and returns a
-// RegExp instance.
-//
-// Escape sequences are preserved as-is in the resulting regex, so
-// double-escaping isn't necessary. A pattern may embed another pattern using
-// `${}` interpolation.
-// -- Common Symbols -----------------------------------------------------------
-exports.Char = regex`
-  (?:
-    [
-      \t
-      \n
-      \r
-      \x20-\uD7FF
-      \uE000-\uFFFD
-    ]
-    |
-    [\uD800-\uDBFF][\uDC00-\uDFFF]
-  )
-`;
-// Partial implementation.
-//
-// To be compliant, the matched text must result in an error if it contains the
-// string `]]>`, but that can't be easily represented here so we do it in the
-// parser.
-exports.CharData = regex`
-  [^<&]+
-`;
-exports.NameStartChar = regex`
-  (?:
-    [
-      :
-      A-Z
-      _
-      a-z
-      \xC0-\xD6
-      \xD8-\xF6
-      \xF8-\u02FF
-      \u0370-\u037D
-      \u037F-\u1FFF
-      \u200C-\u200D
-      \u2070-\u218F
-      \u2C00-\u2FEF
-      \u3001-\uD7FF
-      \uF900-\uFDCF
-      \uFDF0-\uFFFD
-    ]
-    |
-    [\uD800-\uDB7F][\uDC00-\uDFFF]
-  )
-`;
-exports.NameChar = regex`
-  (?:
-    ${exports.NameStartChar}
-    |
-    [
-      .
-      0-9
-      \xB7
-      \u0300-\u036F
-      \u203F-\u2040
-      -
-    ]
-  )
-`;
-exports.Name = regex`
-  ${exports.NameStartChar}
-  (?:${exports.NameChar})*
-`;
-// Loose implementation. The entity will be validated in the `replaceReference`
-// function.
-exports.Reference = regex`
-  &[^\s&;]*;?
-`;
-exports.S = regex`
-  [\x20\t\r\n]+
-`;
-// -- Attributes ---------------------------------------------------------------
-exports.Eq = regex`
-  (?:${exports.S})?
-  =
-  (?:${exports.S})?
-`;
-exports.Attribute = regex`
-  ${exports.Name}
-  ${exports.Eq}
-  (?:
-    "(?:
-      [^<"]
-    )*"
-    |
-    '(?:
-      [^<']
-    )*'
-  )
-`;
-// -- Elements -----------------------------------------------------------------
-exports.CDSect = regex`
-  <!\[CDATA\[
-    // Group 1: CData text content (optional)
-    (
-      (?:${exports.Char})*?
-    )
-  \]\]>
-`;
-exports.EmptyElemTag = regex`
-  <
-    // Group 1: Element name
-    (${exports.Name})
-    // Group 2: Attributes (optional)
-    (
-      (?:
-        ${exports.S}
-        ${exports.Attribute}
-      )*
-    )
-    (?:${exports.S})?
-  />
-`;
-exports.ETag = regex`
-  </
-    // Group 1: End tag name
-    (${exports.Name})
-    (?:${exports.S})?
-  >
-`;
-exports.STag = regex`
-  <
-    // Group 1: Start tag name
-    (${exports.Name})
-    // Group 2: Attributes (optional)
-    (
-      (?:
-        ${exports.S}
-        ${exports.Attribute}
-      )*
-    )
-    (?:${exports.S})?
-  >
-`;
-// -- Misc ---------------------------------------------------------------------
-// Special pattern that matches an entire string consisting only of `Char`
-// characters.
-exports.CharOnly = regex`
-  ^(?:${exports.Char})*$
-`;
-exports.Comment = regex`
-  <!--
-    // Group 1: Comment text (optional)
-    (
-      (?:
-        (?!-) ${exports.Char}
-        | - (?!-) ${exports.Char}
-      )*
-    )
-  -->
-`;
-// Loose implementation since doctype declarations are discarded.
-//
-// It's not possible to fully parse a doctype declaration with a regex, but
-// since we just discard them we can skip parsing the fiddly inner bits and use
-// a regex to speed things up.
-exports.doctypedecl = regex`
-  <!DOCTYPE
-    ${exports.S}
-    [^[>]*
-    (?:
-      \[ [\s\S]+? \]
-      (?:${exports.S})?
-    )?
-  >
-`;
-// Loose implementation since processing instructions are discarded.
-exports.PI = regex`
-  <\?
-    // Group 1: PITarget
-    (
-      ${exports.Name}
-    )
-    (?:
-      ${exports.S}
-      (?:${exports.Char})*?
-    )?
-  \?>
-`;
-// Loose implementation since XML declarations are discarded.
-exports.XMLDecl = regex`
-  <\?xml
-    ${exports.S}
-    [\s\S]+?
-  \?>
-`;
-// -- Helpers ------------------------------------------------------------------
-exports.Anchored = {};
-exports.Global = {};
-// Create anchored and global variations of each pattern.
-Object.keys(exports).forEach(name => {
-  if (name !== 'Anchored' && name !== 'CharOnly' && name !== 'Global') {
-    let pattern = exports[name];
-    exports.Anchored[name] = new RegExp('^' + pattern.source);
-    exports.Global[name] = new RegExp(pattern.source, 'g');
-  }
-});
-function regex(strings, ...embeddedPatterns) {
-  let { length, raw } = strings;
-  let lastIndex = length - 1;
-  let pattern = '';
-  for (let i = 0; i < length; ++i) {
-    pattern += raw[i]
-      .replace(/(^|[^\\])\/\/.*$/gm, '$1') // remove end-of-line comments
-      .replace(/\s+/g, ''); // remove all whitespace
-    if (i < lastIndex) {
-      pattern += embeddedPatterns[i].source;
-    }
-  }
-  return new RegExp(pattern);
-}