npm - @nodable/flexible-xml-parser - Versions diffs - 1.0.0 - Mend

@nodable/flexible-xml-parser 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/CHANGELOG.md +0 -0
package/LICENSE +21 -0
package/README.md +284 -0
package/lib/fxp.d.cts +652 -0
package/package.json +80 -0
package/src/AttributeProcessor.js +107 -0
package/src/AutoCloseHandler.js +257 -0
package/src/CharsSymbol.js +16 -0
package/src/DocTypeReader.js +522 -0
package/src/InputSource/BufferSource.js +228 -0
package/src/InputSource/FeedableSource.js +340 -0
package/src/InputSource/StreamSource.js +49 -0
package/src/InputSource/StringSource.js +225 -0
package/src/OptionsBuilder.js +400 -0
package/src/ParseError.js +91 -0
package/src/StopNodeProcessor.js +573 -0
package/src/XMLParser.js +293 -0
package/src/Xml2JsParser.js +573 -0
package/src/XmlPartReader.js +183 -0
package/src/XmlSpecialTagsReader.js +82 -0
package/src/fxp.d.ts +619 -0
package/src/fxp.js +8 -0
package/src/util.js +58 -0

package/package.json ADDED Viewed

@@ -0,0 +1,80 @@
+{
+  "name": "@nodable/flexible-xml-parser",
+  "version": "1.0.0",
+  "description": "Fastest XML parser in pure JS with fully customizable ouput",
+  "main": "./lib/fxp.cjs",
+  "type": "module",
+  "sideEffects": false,
+  "module": "./src/fxp.js",
+  "types": "./src/fxp.d.ts",
+  "exports": {
+    ".": {
+      "import": {
+        "types": "./src/fxp.d.ts",
+        "default": "./src/fxp.js"
+      },
+      "require": {
+        "types": "./lib/fxp.d.cts",
+        "default": "./lib/fxp.cjs"
+      }
+    }
+  },
+  "scripts": {
+    "test": "c8 --reporter=lcov --reporter=text jasmine specs/*spec.js",
+    "bundle": "webpack --config webpack.cjs.config.js"
+  },
+  "keywords": [
+    "xml",
+    "parser",
+    "fast",
+    "flexible",
+    "xml-parser",
+    "xml2js",
+    "xml2json",
+    "xml2xml",
+    "xml2yaml",
+    "stream",
+    "buffer",
+    "bytes",
+    "path-expression-matcher"
+  ],
+  "author": "Amit Gupta (https://solothought.com)",
+  "license": "MIT",
+  "publishConfig": {
+    "access": "public"
+  },
+  "dependencies": {
+    "@nodable/base-output-builder": "^1.0.2",
+    "@nodable/compact-builder": "^1.0.2",
+    "path-expression-matcher": "^1.4.0",
+    "strnum": "^2.2.2"
+  },
+  "devDependencies": {
+    "@babel/core": "^7.29.0",
+    "@babel/plugin-transform-runtime": "^7.29.0",
+    "@babel/preset-env": "^7.29.2",
+    "@babel/register": "^7.28.6",
+    "@types/node": "^20.19.37",
+    "babel-loader": "^10.1.1",
+    "c8": "^11.0.0",
+    "jasmine": "^6.1.0",
+    "typescript": "^6.0.2",
+    "webpack": "^5.105.4",
+    "webpack-cli": "^7.0.2"
+  },
+  "files": [
+    "lib/fxp.d.cts",
+    "src",
+    "CHANGELOG.md"
+  ],
+  "funding": [
+    {
+      "type": "github",
+      "url": "https://github.com/sponsors/nodable"
+    }
+  ],
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/nodable/flexible-xml-parser.git"
+  }
+}

package/src/AttributeProcessor.js ADDED Viewed

@@ -0,0 +1,107 @@
+'use strict';
+import { ParseError, ErrorCode } from './ParseError.js';
+/**
+ * AttributeProcessor — owns all attribute parsing logic.
+ *
+ * Two-pass attribute processing:
+ *
+ *   Pass 1 — collectRawAttributes()
+ *     Populates the rawAttributes map from the raw attribute expression string.
+ *     Called inside buildTagExpObj() (via XmlPartReader) so rawAttributes is
+ *     ready before readOpeningTag() calls matcher.updateCurrent(rawAttributes).
+ *     The matcher must reflect all raw attribute values before any value-parser
+ *     runs so that attribute-based path expressions (e.g. "div[class=code]")
+ *     resolve correctly during pass 2.
+ *
+ *   Pass 2 — flushAttributes()
+ *     Calls outputBuilder.addAttribute() for each attribute, running the full
+ *     value-parser chain. Called from readOpeningTag() AFTER
+ *     matcher.updateCurrent(), so the read-only matcher already carries the
+ *     complete attribute context when value parsers execute.
+ */
+// Module-level regex. Stateless between calls because getAllMatches() always
+// resets lastIndex to 0 before iterating — see getAllMatches() below.
+const attrsRegx = new RegExp('([^\\s=]+)\\s*(=\\s*([\'"])([\\s\\S]*?)\\3)?', 'gm');
+/**
+ * Pass 1: extract raw (unparsed) attribute values into rawAttributes.
+ *
+ * @param {string} attrStr      - raw attribute expression substring
+ * @param {object} parser       - Xml2JsParser instance (for processAttrName)
+ * @param {object} tagExp - tagExp object to populate rawAttributes (Object.create(null))
+ */
+export function collectRawAttributes(attrStr, parser, tagExp) {
+  if (!attrStr || attrStr.length === 0) return;
+  const matches = getAllMatches(attrStr, attrsRegx);
+  const len = matches.length;
+  let count = 0;
+  for (let i = 0; i < len; i++) {
+    const attrName = parser.processAttrName(matches[i][1]);
+    if (attrName === false) continue;
+    count++;
+    const rawVal = matches[i][4];
+    tagExp.rawAttributes[matches[i][1]] = rawVal !== undefined ? rawVal : true;
+  }
+  tagExp.rawAttributesLen = count;
+}
+/**
+ * Pass 2: run value parsers and push each attribute to the output builder.
+ *
+ * @param {string} attrStr - raw attribute expression substring
+ * @param {object} parser  - Xml2JsParser instance
+ */
+export function flushAttributes(attrStr, parser) {
+  if (!attrStr || attrStr.length === 0) return;
+  const matches = getAllMatches(attrStr, attrsRegx);
+  const len = matches.length;
+  const maxAttrs = parser.options.limits?.maxAttributesPerTag;
+  if (maxAttrs !== undefined && maxAttrs !== null && len > maxAttrs) {
+    const tagName = parser.currentTagDetail?.name ?? '(unknown)';
+    throw new ParseError(
+      `Tag '${tagName}' has ${len} attributes, exceeding limit of ${maxAttrs}`,
+      ErrorCode.LIMIT_MAX_ATTRIBUTES,
+      { line: parser.source.line, col: parser.source.cols, index: parser.source.startIndex }
+    );
+  }
+  for (let i = 0; i < len; i++) {
+    const attrName = parser.processAttrName(matches[i][1]);
+    if (attrName === false) continue;
+    const rawVal = matches[i][4];
+    const attrVal = rawVal !== undefined ? rawVal : true;
+    parser.outputBuilder.addAttribute(attrName, attrVal, parser.readonlyMatcher);
+  }
+}
+/**
+ * Run the regex against the string and return all capture groups.
+ * lastIndex is always reset to 0 before iterating so the module-level
+ * stateful regex is safe to share across calls.
+ *
+ * @param {string} string
+ * @param {RegExp} regex
+ * @returns {Array}
+ */
+function getAllMatches(string, regex) {
+  regex.lastIndex = 0;
+  const matches = [];
+  let match = regex.exec(string);
+  while (match) {
+    const allmatches = [];
+    allmatches.startIndex = regex.lastIndex - match[0].length;
+    const len = match.length;
+    for (let index = 0; index < len; index++) {
+      allmatches.push(match[index]);
+    }
+    matches.push(allmatches);
+    match = regex.exec(string);
+  }
+  return matches;
+}

package/src/AutoCloseHandler.js ADDED Viewed

@@ -0,0 +1,257 @@
+import { ParseError, ErrorCode } from './ParseError.js';
+/**
+ * AutoCloseHandler
+ *
+ * Handles two distinct failure modes that arise when XML is malformed
+ * or a data stream is interrupted:
+ *
+ *   1. EOF with open tags   — `onEof` option
+ *   2. Mismatched close tag — `onMismatch` option
+ *
+ * The handler is stateless; it receives the parser's live state on each
+ * call and mutates it directly (matching how the parser normally works).
+ */
+/**
+ * Error types returned by getParseErrors() when `collectErrors` is true.
+ * @enum {string}
+ */
+export const AutoCloseErrorType = Object.freeze({
+  /** A tag was still open when the document ended. */
+  UNCLOSED_EOF: 'unclosed-eof',
+  /**
+   * A closing tag didn't match the current open tag.
+   * The handler popped up the stack to find the nearest match.
+   */
+  MISMATCHED_CLOSE: 'mismatched-close',
+  /**
+   * A closing tag appeared whose opener doesn't exist anywhere in the stack.
+   * The tag is discarded.
+   */
+  PHANTOM_CLOSE: 'phantom-close',
+  /**
+   * The source ended mid-way through a tag — e.g. `<div><p` or `</di`.
+   * The partial tag is discarded; any already-open tags are closed by handleEof.
+   */
+  PARTIAL_TAG: 'partial-tag',
+});
+export default class AutoCloseHandler {
+  /**
+   * @param {object} autoCloseOptions - Resolved autoClose options
+   * @param {string} autoCloseOptions.onEof        - 'throw' | 'closeAll'
+   * @param {string} autoCloseOptions.onMismatch   - 'throw' | 'recover' | 'discard'
+   * @param {boolean} autoCloseOptions.collectErrors
+   */
+  constructor(autoCloseOptions) {
+    this.onEof = autoCloseOptions.onEof || 'throw';
+    this.onMismatch = autoCloseOptions.onMismatch || 'throw';
+    this.collectErrors = autoCloseOptions.collectErrors || false;
+    this.errors = [];
+  }
+  /**
+   * Called at end-of-document when `tagsStack` is non-empty.
+   *
+   * @param {object}   parserState
+   * @param {Array}    parserState.tagsStack       - Parser's open-tag stack
+   * @param {object}   parserState.currentTagDetail - The currently open TagDetail
+   * @param {object}   parserState.outputBuilder   - Live OutputBuilder instance
+   * @param {object}   parserState.readonlyMatcher - Read-only Matcher proxy
+   * @param {object}   parserState.source          - Current InputSource (for position)
+   * @param {Function} parserState.addTextNode     - Bound addTextNode on the parser
+   */
+  handleEof(parserState) {
+    if (this.onEof === 'throw') {
+      throw new ParseError('Unexpected data in the end of document', ErrorCode.UNEXPECTED_TRAILING_DATA);
+    }
+    // onEof === 'closeAll'
+    // Close from innermost outward using the parser's canonical popTag(),
+    // which keeps the parser stack and output builder in sync automatically.
+    const { addTextNode, popTag } = parserState;
+    let current = parserState.currentTagDetail;
+    while (current && !current.root) {
+      this._recordError(AutoCloseErrorType.UNCLOSED_EOF, {
+        tag: current.name,
+        expected: null,
+        line: current.line,
+        col: current.col,
+        index: current.index,
+      });
+      addTextNode();
+      popTag();
+      // popTag() already updated currentTagDetail via tagsStack.pop()
+      current = parserState.currentTagDetail;
+    }
+  }
+  /**
+   * Called when a closing tag name doesn't match `currentTagDetail.name`.
+   *
+   * Returns an object describing what the caller should do:
+   *   { action: 'close-matched' } — handler already closed intermediates;
+   *                                  caller should now close the matched tag normally
+   *   { action: 'discard' }       — caller should skip this closing tag entirely
+   *
+   * @param {string}   closingTagName   - The mismatched closing tag we just read
+   * @param {object}   parserState      - Same shape as handleEof
+   * @returns {{ action: string }}
+   */
+  handleMismatch(closingTagName, parserState) {
+    const { tagsStack, currentTagDetail, source, addTextNode } = parserState;
+    if (this.onMismatch === 'throw') {
+      throw new ParseError(
+        `Unexpected closing tag '${closingTagName}' expecting '${currentTagDetail.name}'`,
+        ErrorCode.MISMATCHED_CLOSE_TAG,
+        { line: source ? source.line : undefined, col: source ? source.cols : undefined, index: source ? source.startIndex : undefined }
+      );
+    }
+    if (this.onMismatch === 'discard') {
+      this._recordError(AutoCloseErrorType.MISMATCHED_CLOSE, {
+        tag: closingTagName,
+        expected: currentTagDetail.name,
+        line: source ? source.line : null,
+        col: source ? source.cols : null,
+        index: source ? source.startIndex : null,
+      });
+      return { action: 'discard' };
+    }
+    // onMismatch === 'recover'
+    // Scan the stack (top → bottom) for the closest matching opener.
+    // tagsStack holds ancestors with index 0 = root, last = parent of current.
+    // currentTagDetail is the open tag at the top that didn't match.
+    // Build a unified view: [root...ancestors, current] — we check current first
+    // (it's the top), then walk down toward the root.
+    const stackSnapshot = [...tagsStack, currentTagDetail];
+    let matchIndex = -1;
+    const stackSnapshotLength = stackSnapshot.length;
+    for (let i = stackSnapshotLength - 1; i >= 0; i--) {
+      if (stackSnapshot[i].name === closingTagName) {
+        matchIndex = i;
+        break;
+      }
+    }
+    if (matchIndex === -1) {
+      // No match anywhere — phantom closing tag
+      this._recordError(AutoCloseErrorType.PHANTOM_CLOSE, {
+        tag: closingTagName,
+        expected: currentTagDetail.name,
+        line: source ? source.line : null,
+        col: source ? source.cols : null,
+        index: source ? source.startIndex : null,
+      });
+      return { action: 'discard' };
+    }
+    // Close everything above the match (innermost first), then signal the
+    // caller to close the matched tag itself in the normal path.
+    const levelsToClose = stackSnapshotLength - 1 - matchIndex;
+    for (let i = 0; i < levelsToClose; i++) {
+      const tag = stackSnapshot[stackSnapshotLength - 1 - i];
+      this._recordError(AutoCloseErrorType.MISMATCHED_CLOSE, {
+        tag: tag.name,
+        expected: closingTagName,
+        line: tag.line,
+        col: tag.col,
+        index: tag.index,
+      });
+      addTextNode();
+      parserState.popTag();
+    }
+    // Update currentTagDetail to the matched one so the normal close path works.
+    // popTag() has already walked the stack up by levelsToClose steps; the next
+    // currentTagDetail is the one we want to match against.
+    parserState.currentTagDetail = stackSnapshot[matchIndex];
+    return { action: 'close-matched' };
+  }
+  /**
+   * Called when the source ended mid-way through a tag token.
+   * Records the partial-tag error and delegates remaining open tags to handleEof.
+   *
+   * @param {Error}  originalError  - The error thrown by the read function
+   * @param {object} parserState    - Same shape as handleEof
+   */
+  handlePartialTag(originalError, parserState) {
+    this._recordError(AutoCloseErrorType.PARTIAL_TAG, {
+      tag: _extractPartialTagName(originalError),
+      expected: null,
+      line: parserState.source ? parserState.source.line : null,
+      col: parserState.source ? parserState.source.cols : null,
+      index: parserState.source ? parserState.source.startIndex : null,
+    });
+    // Discard any partially-accumulated text from the broken tag
+    parserState.tagTextData = '';
+    // Close whatever was legitimately open before this truncation
+    this.handleEof(parserState);
+  }
+  /**
+   * Return a copy of the collected error list.
+   * Empty array when collectErrors is false or no errors occurred.
+   * @returns {Array}
+   */
+  getErrors() {
+    return this.errors.slice();
+  }
+  /**
+   * Reset error log (useful if the same handler instance is reused).
+   */
+  reset() {
+    this.errors = [];
+  }
+  // ── Private ──────────────────────────────────────────────────────────────
+  _recordError(type, detail) {
+    if (!this.collectErrors) return;
+    this.errors.push({ type, ...detail });
+  }
+}
+/**
+ * Best-effort extraction of a partial tag name from a source-exhausted error.
+ * Accepts the full error object so it can inspect both message and code.
+ *
+ * ParseError from readClosingTagName (new format):
+ *   message: "Unexpected end of source reading closing tag '</di'"
+ *
+ * Legacy plain Error (old format, kept for safety):
+ *   message: "Unexpected end of source. Reading closing tag '</di'"
+ *
+ * ParseError from readTagExp / readPiExp — opening tag truncated before '>':
+ *   No tag name is embedded; returns null.
+ */
+function _extractPartialTagName(err) {
+  if (!err) return null;
+  const message = typeof err.message === 'string' ? err.message : String(err);
+  // Match both "reading closing tag" (new, lowercase) and
+  // "Reading closing tag"  (old, capitalised, period-separated)
+  const closeMatch = message.match(/[Rr]eading closing tag '<\/([^']*)/);
+  if (closeMatch) return closeMatch[1] || null;
+  return null;
+}

package/src/CharsSymbol.js ADDED Viewed

@@ -0,0 +1,16 @@
+export default {
+  "<" : "<", //tag start
+  ">" : ">", //tag end
+  "/" : "/", //close tag
+  "!" : "!", //comment or docttype
+  "!--" : "!--", //comment
+  "-->" : "-->", //comment end
+  "?" : "?", //pi
+  "?>" : "?>", //pi end
+  "?xml" : "?xml", //pi end
+  "![" : "![", //cdata
+  "]]>" : "]]>", //cdata end
+  "[" : "[",
+  "-" : "-",
+  "D" : "D",
+}