@nodable/flexible-xml-parser 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json ADDED
@@ -0,0 +1,80 @@
1
+ {
2
+ "name": "@nodable/flexible-xml-parser",
3
+ "version": "1.0.0",
4
+ "description": "Fastest XML parser in pure JS with fully customizable ouput",
5
+ "main": "./lib/fxp.cjs",
6
+ "type": "module",
7
+ "sideEffects": false,
8
+ "module": "./src/fxp.js",
9
+ "types": "./src/fxp.d.ts",
10
+ "exports": {
11
+ ".": {
12
+ "import": {
13
+ "types": "./src/fxp.d.ts",
14
+ "default": "./src/fxp.js"
15
+ },
16
+ "require": {
17
+ "types": "./lib/fxp.d.cts",
18
+ "default": "./lib/fxp.cjs"
19
+ }
20
+ }
21
+ },
22
+ "scripts": {
23
+ "test": "c8 --reporter=lcov --reporter=text jasmine specs/*spec.js",
24
+ "bundle": "webpack --config webpack.cjs.config.js"
25
+ },
26
+ "keywords": [
27
+ "xml",
28
+ "parser",
29
+ "fast",
30
+ "flexible",
31
+ "xml-parser",
32
+ "xml2js",
33
+ "xml2json",
34
+ "xml2xml",
35
+ "xml2yaml",
36
+ "stream",
37
+ "buffer",
38
+ "bytes",
39
+ "path-expression-matcher"
40
+ ],
41
+ "author": "Amit Gupta (https://solothought.com)",
42
+ "license": "MIT",
43
+ "publishConfig": {
44
+ "access": "public"
45
+ },
46
+ "dependencies": {
47
+ "@nodable/base-output-builder": "^1.0.2",
48
+ "@nodable/compact-builder": "^1.0.2",
49
+ "path-expression-matcher": "^1.4.0",
50
+ "strnum": "^2.2.2"
51
+ },
52
+ "devDependencies": {
53
+ "@babel/core": "^7.29.0",
54
+ "@babel/plugin-transform-runtime": "^7.29.0",
55
+ "@babel/preset-env": "^7.29.2",
56
+ "@babel/register": "^7.28.6",
57
+ "@types/node": "^20.19.37",
58
+ "babel-loader": "^10.1.1",
59
+ "c8": "^11.0.0",
60
+ "jasmine": "^6.1.0",
61
+ "typescript": "^6.0.2",
62
+ "webpack": "^5.105.4",
63
+ "webpack-cli": "^7.0.2"
64
+ },
65
+ "files": [
66
+ "lib/fxp.d.cts",
67
+ "src",
68
+ "CHANGELOG.md"
69
+ ],
70
+ "funding": [
71
+ {
72
+ "type": "github",
73
+ "url": "https://github.com/sponsors/nodable"
74
+ }
75
+ ],
76
+ "repository": {
77
+ "type": "git",
78
+ "url": "git+https://github.com/nodable/flexible-xml-parser.git"
79
+ }
80
+ }
@@ -0,0 +1,107 @@
1
+ 'use strict';
2
+ import { ParseError, ErrorCode } from './ParseError.js';
3
+
4
+ /**
5
+ * AttributeProcessor — owns all attribute parsing logic.
6
+ *
7
+ * Two-pass attribute processing:
8
+ *
9
+ * Pass 1 — collectRawAttributes()
10
+ * Populates the rawAttributes map from the raw attribute expression string.
11
+ * Called inside buildTagExpObj() (via XmlPartReader) so rawAttributes is
12
+ * ready before readOpeningTag() calls matcher.updateCurrent(rawAttributes).
13
+ * The matcher must reflect all raw attribute values before any value-parser
14
+ * runs so that attribute-based path expressions (e.g. "div[class=code]")
15
+ * resolve correctly during pass 2.
16
+ *
17
+ * Pass 2 — flushAttributes()
18
+ * Calls outputBuilder.addAttribute() for each attribute, running the full
19
+ * value-parser chain. Called from readOpeningTag() AFTER
20
+ * matcher.updateCurrent(), so the read-only matcher already carries the
21
+ * complete attribute context when value parsers execute.
22
+ */
23
+
24
+ // Module-level regex. Stateless between calls because getAllMatches() always
25
+ // resets lastIndex to 0 before iterating — see getAllMatches() below.
26
+ const attrsRegx = new RegExp('([^\\s=]+)\\s*(=\\s*([\'"])([\\s\\S]*?)\\3)?', 'gm');
27
+
28
+ /**
29
+ * Pass 1: extract raw (unparsed) attribute values into rawAttributes.
30
+ *
31
+ * @param {string} attrStr - raw attribute expression substring
32
+ * @param {object} parser - Xml2JsParser instance (for processAttrName)
33
+ * @param {object} tagExp - tagExp object to populate rawAttributes (Object.create(null))
34
+ */
35
+ export function collectRawAttributes(attrStr, parser, tagExp) {
36
+
37
+ if (!attrStr || attrStr.length === 0) return;
38
+ const matches = getAllMatches(attrStr, attrsRegx);
39
+ const len = matches.length;
40
+ let count = 0;
41
+ for (let i = 0; i < len; i++) {
42
+ const attrName = parser.processAttrName(matches[i][1]);
43
+ if (attrName === false) continue;
44
+ count++;
45
+ const rawVal = matches[i][4];
46
+ tagExp.rawAttributes[matches[i][1]] = rawVal !== undefined ? rawVal : true;
47
+ }
48
+ tagExp.rawAttributesLen = count;
49
+ }
50
+
51
+ /**
52
+ * Pass 2: run value parsers and push each attribute to the output builder.
53
+ *
54
+ * @param {string} attrStr - raw attribute expression substring
55
+ * @param {object} parser - Xml2JsParser instance
56
+ */
57
+ export function flushAttributes(attrStr, parser) {
58
+ if (!attrStr || attrStr.length === 0) return;
59
+ const matches = getAllMatches(attrStr, attrsRegx);
60
+ const len = matches.length;
61
+
62
+ const maxAttrs = parser.options.limits?.maxAttributesPerTag;
63
+ if (maxAttrs !== undefined && maxAttrs !== null && len > maxAttrs) {
64
+ const tagName = parser.currentTagDetail?.name ?? '(unknown)';
65
+ throw new ParseError(
66
+ `Tag '${tagName}' has ${len} attributes, exceeding limit of ${maxAttrs}`,
67
+ ErrorCode.LIMIT_MAX_ATTRIBUTES,
68
+ { line: parser.source.line, col: parser.source.cols, index: parser.source.startIndex }
69
+ );
70
+ }
71
+
72
+ for (let i = 0; i < len; i++) {
73
+ const attrName = parser.processAttrName(matches[i][1]);
74
+ if (attrName === false) continue;
75
+
76
+ const rawVal = matches[i][4];
77
+ const attrVal = rawVal !== undefined ? rawVal : true;
78
+
79
+ parser.outputBuilder.addAttribute(attrName, attrVal, parser.readonlyMatcher);
80
+ }
81
+ }
82
+
83
+ /**
84
+ * Run the regex against the string and return all capture groups.
85
+ * lastIndex is always reset to 0 before iterating so the module-level
86
+ * stateful regex is safe to share across calls.
87
+ *
88
+ * @param {string} string
89
+ * @param {RegExp} regex
90
+ * @returns {Array}
91
+ */
92
+ function getAllMatches(string, regex) {
93
+ regex.lastIndex = 0;
94
+ const matches = [];
95
+ let match = regex.exec(string);
96
+ while (match) {
97
+ const allmatches = [];
98
+ allmatches.startIndex = regex.lastIndex - match[0].length;
99
+ const len = match.length;
100
+ for (let index = 0; index < len; index++) {
101
+ allmatches.push(match[index]);
102
+ }
103
+ matches.push(allmatches);
104
+ match = regex.exec(string);
105
+ }
106
+ return matches;
107
+ }
@@ -0,0 +1,257 @@
1
+ import { ParseError, ErrorCode } from './ParseError.js';
2
+
3
+ /**
4
+ * AutoCloseHandler
5
+ *
6
+ * Handles two distinct failure modes that arise when XML is malformed
7
+ * or a data stream is interrupted:
8
+ *
9
+ * 1. EOF with open tags — `onEof` option
10
+ * 2. Mismatched close tag — `onMismatch` option
11
+ *
12
+ * The handler is stateless; it receives the parser's live state on each
13
+ * call and mutates it directly (matching how the parser normally works).
14
+ */
15
+
16
+ /**
17
+ * Error types returned by getParseErrors() when `collectErrors` is true.
18
+ * @enum {string}
19
+ */
20
+ export const AutoCloseErrorType = Object.freeze({
21
+ /** A tag was still open when the document ended. */
22
+ UNCLOSED_EOF: 'unclosed-eof',
23
+
24
+ /**
25
+ * A closing tag didn't match the current open tag.
26
+ * The handler popped up the stack to find the nearest match.
27
+ */
28
+ MISMATCHED_CLOSE: 'mismatched-close',
29
+
30
+ /**
31
+ * A closing tag appeared whose opener doesn't exist anywhere in the stack.
32
+ * The tag is discarded.
33
+ */
34
+ PHANTOM_CLOSE: 'phantom-close',
35
+
36
+ /**
37
+ * The source ended mid-way through a tag — e.g. `<div><p` or `</di`.
38
+ * The partial tag is discarded; any already-open tags are closed by handleEof.
39
+ */
40
+ PARTIAL_TAG: 'partial-tag',
41
+ });
42
+
43
+ export default class AutoCloseHandler {
44
+ /**
45
+ * @param {object} autoCloseOptions - Resolved autoClose options
46
+ * @param {string} autoCloseOptions.onEof - 'throw' | 'closeAll'
47
+ * @param {string} autoCloseOptions.onMismatch - 'throw' | 'recover' | 'discard'
48
+ * @param {boolean} autoCloseOptions.collectErrors
49
+ */
50
+ constructor(autoCloseOptions) {
51
+ this.onEof = autoCloseOptions.onEof || 'throw';
52
+ this.onMismatch = autoCloseOptions.onMismatch || 'throw';
53
+ this.collectErrors = autoCloseOptions.collectErrors || false;
54
+ this.errors = [];
55
+ }
56
+
57
+ /**
58
+ * Called at end-of-document when `tagsStack` is non-empty.
59
+ *
60
+ * @param {object} parserState
61
+ * @param {Array} parserState.tagsStack - Parser's open-tag stack
62
+ * @param {object} parserState.currentTagDetail - The currently open TagDetail
63
+ * @param {object} parserState.outputBuilder - Live OutputBuilder instance
64
+ * @param {object} parserState.readonlyMatcher - Read-only Matcher proxy
65
+ * @param {object} parserState.source - Current InputSource (for position)
66
+ * @param {Function} parserState.addTextNode - Bound addTextNode on the parser
67
+ */
68
+ handleEof(parserState) {
69
+ if (this.onEof === 'throw') {
70
+ throw new ParseError('Unexpected data in the end of document', ErrorCode.UNEXPECTED_TRAILING_DATA);
71
+ }
72
+
73
+ // onEof === 'closeAll'
74
+ // Close from innermost outward using the parser's canonical popTag(),
75
+ // which keeps the parser stack and output builder in sync automatically.
76
+
77
+ const { addTextNode, popTag } = parserState;
78
+
79
+ let current = parserState.currentTagDetail;
80
+
81
+ while (current && !current.root) {
82
+ this._recordError(AutoCloseErrorType.UNCLOSED_EOF, {
83
+ tag: current.name,
84
+ expected: null,
85
+ line: current.line,
86
+ col: current.col,
87
+ index: current.index,
88
+ });
89
+
90
+ addTextNode();
91
+ popTag();
92
+
93
+ // popTag() already updated currentTagDetail via tagsStack.pop()
94
+ current = parserState.currentTagDetail;
95
+ }
96
+ }
97
+
98
+ /**
99
+ * Called when a closing tag name doesn't match `currentTagDetail.name`.
100
+ *
101
+ * Returns an object describing what the caller should do:
102
+ * { action: 'close-matched' } — handler already closed intermediates;
103
+ * caller should now close the matched tag normally
104
+ * { action: 'discard' } — caller should skip this closing tag entirely
105
+ *
106
+ * @param {string} closingTagName - The mismatched closing tag we just read
107
+ * @param {object} parserState - Same shape as handleEof
108
+ * @returns {{ action: string }}
109
+ */
110
+ handleMismatch(closingTagName, parserState) {
111
+ const { tagsStack, currentTagDetail, source, addTextNode } = parserState;
112
+
113
+ if (this.onMismatch === 'throw') {
114
+ throw new ParseError(
115
+ `Unexpected closing tag '${closingTagName}' expecting '${currentTagDetail.name}'`,
116
+ ErrorCode.MISMATCHED_CLOSE_TAG,
117
+ { line: source ? source.line : undefined, col: source ? source.cols : undefined, index: source ? source.startIndex : undefined }
118
+ );
119
+ }
120
+
121
+ if (this.onMismatch === 'discard') {
122
+ this._recordError(AutoCloseErrorType.MISMATCHED_CLOSE, {
123
+ tag: closingTagName,
124
+ expected: currentTagDetail.name,
125
+ line: source ? source.line : null,
126
+ col: source ? source.cols : null,
127
+ index: source ? source.startIndex : null,
128
+ });
129
+ return { action: 'discard' };
130
+ }
131
+
132
+ // onMismatch === 'recover'
133
+ // Scan the stack (top → bottom) for the closest matching opener.
134
+ // tagsStack holds ancestors with index 0 = root, last = parent of current.
135
+ // currentTagDetail is the open tag at the top that didn't match.
136
+
137
+ // Build a unified view: [root...ancestors, current] — we check current first
138
+ // (it's the top), then walk down toward the root.
139
+ const stackSnapshot = [...tagsStack, currentTagDetail];
140
+
141
+ let matchIndex = -1;
142
+ const stackSnapshotLength = stackSnapshot.length;
143
+ for (let i = stackSnapshotLength - 1; i >= 0; i--) {
144
+ if (stackSnapshot[i].name === closingTagName) {
145
+ matchIndex = i;
146
+ break;
147
+ }
148
+ }
149
+
150
+ if (matchIndex === -1) {
151
+ // No match anywhere — phantom closing tag
152
+ this._recordError(AutoCloseErrorType.PHANTOM_CLOSE, {
153
+ tag: closingTagName,
154
+ expected: currentTagDetail.name,
155
+ line: source ? source.line : null,
156
+ col: source ? source.cols : null,
157
+ index: source ? source.startIndex : null,
158
+ });
159
+ return { action: 'discard' };
160
+ }
161
+
162
+ // Close everything above the match (innermost first), then signal the
163
+ // caller to close the matched tag itself in the normal path.
164
+ const levelsToClose = stackSnapshotLength - 1 - matchIndex;
165
+
166
+ for (let i = 0; i < levelsToClose; i++) {
167
+ const tag = stackSnapshot[stackSnapshotLength - 1 - i];
168
+
169
+ this._recordError(AutoCloseErrorType.MISMATCHED_CLOSE, {
170
+ tag: tag.name,
171
+ expected: closingTagName,
172
+ line: tag.line,
173
+ col: tag.col,
174
+ index: tag.index,
175
+ });
176
+
177
+ addTextNode();
178
+ parserState.popTag();
179
+ }
180
+
181
+ // Update currentTagDetail to the matched one so the normal close path works.
182
+ // popTag() has already walked the stack up by levelsToClose steps; the next
183
+ // currentTagDetail is the one we want to match against.
184
+ parserState.currentTagDetail = stackSnapshot[matchIndex];
185
+
186
+ return { action: 'close-matched' };
187
+ }
188
+
189
+ /**
190
+ * Called when the source ended mid-way through a tag token.
191
+ * Records the partial-tag error and delegates remaining open tags to handleEof.
192
+ *
193
+ * @param {Error} originalError - The error thrown by the read function
194
+ * @param {object} parserState - Same shape as handleEof
195
+ */
196
+ handlePartialTag(originalError, parserState) {
197
+ this._recordError(AutoCloseErrorType.PARTIAL_TAG, {
198
+ tag: _extractPartialTagName(originalError),
199
+ expected: null,
200
+ line: parserState.source ? parserState.source.line : null,
201
+ col: parserState.source ? parserState.source.cols : null,
202
+ index: parserState.source ? parserState.source.startIndex : null,
203
+ });
204
+
205
+ // Discard any partially-accumulated text from the broken tag
206
+ parserState.tagTextData = '';
207
+
208
+ // Close whatever was legitimately open before this truncation
209
+ this.handleEof(parserState);
210
+ }
211
+
212
+ /**
213
+ * Return a copy of the collected error list.
214
+ * Empty array when collectErrors is false or no errors occurred.
215
+ * @returns {Array}
216
+ */
217
+ getErrors() {
218
+ return this.errors.slice();
219
+ }
220
+
221
+ /**
222
+ * Reset error log (useful if the same handler instance is reused).
223
+ */
224
+ reset() {
225
+ this.errors = [];
226
+ }
227
+
228
+ // ── Private ──────────────────────────────────────────────────────────────
229
+
230
+ _recordError(type, detail) {
231
+ if (!this.collectErrors) return;
232
+ this.errors.push({ type, ...detail });
233
+ }
234
+ }
235
+
236
+ /**
237
+ * Best-effort extraction of a partial tag name from a source-exhausted error.
238
+ * Accepts the full error object so it can inspect both message and code.
239
+ *
240
+ * ParseError from readClosingTagName (new format):
241
+ * message: "Unexpected end of source reading closing tag '</di'"
242
+ *
243
+ * Legacy plain Error (old format, kept for safety):
244
+ * message: "Unexpected end of source. Reading closing tag '</di'"
245
+ *
246
+ * ParseError from readTagExp / readPiExp — opening tag truncated before '>':
247
+ * No tag name is embedded; returns null.
248
+ */
249
+ function _extractPartialTagName(err) {
250
+ if (!err) return null;
251
+ const message = typeof err.message === 'string' ? err.message : String(err);
252
+ // Match both "reading closing tag" (new, lowercase) and
253
+ // "Reading closing tag" (old, capitalised, period-separated)
254
+ const closeMatch = message.match(/[Rr]eading closing tag '<\/([^']*)/);
255
+ if (closeMatch) return closeMatch[1] || null;
256
+ return null;
257
+ }
@@ -0,0 +1,16 @@
1
+ export default {
2
+ "<" : "<", //tag start
3
+ ">" : ">", //tag end
4
+ "/" : "/", //close tag
5
+ "!" : "!", //comment or docttype
6
+ "!--" : "!--", //comment
7
+ "-->" : "-->", //comment end
8
+ "?" : "?", //pi
9
+ "?>" : "?>", //pi end
10
+ "?xml" : "?xml", //pi end
11
+ "![" : "![", //cdata
12
+ "]]>" : "]]>", //cdata end
13
+ "[" : "[",
14
+ "-" : "-",
15
+ "D" : "D",
16
+ }