@depup/fast-xml-parser 5.5.6-depup.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/CHANGELOG.md +752 -0
  2. package/LICENSE +21 -0
  3. package/README.md +31 -0
  4. package/changes.json +10 -0
  5. package/lib/fxbuilder.min.js +2 -0
  6. package/lib/fxbuilder.min.js.map +1 -0
  7. package/lib/fxp.cjs +1 -0
  8. package/lib/fxp.d.cts +595 -0
  9. package/lib/fxp.min.js +2 -0
  10. package/lib/fxp.min.js.map +1 -0
  11. package/lib/fxparser.min.js +2 -0
  12. package/lib/fxparser.min.js.map +1 -0
  13. package/lib/fxvalidator.min.js +2 -0
  14. package/lib/fxvalidator.min.js.map +1 -0
  15. package/package.json +112 -0
  16. package/src/cli/cli.js +97 -0
  17. package/src/cli/man.js +17 -0
  18. package/src/cli/read.js +43 -0
  19. package/src/fxp.d.ts +577 -0
  20. package/src/fxp.js +14 -0
  21. package/src/ignoreAttributes.js +18 -0
  22. package/src/util.js +61 -0
  23. package/src/v6/CharsSymbol.js +16 -0
  24. package/src/v6/EntitiesParser.js +106 -0
  25. package/src/v6/OptionsBuilder.js +61 -0
  26. package/src/v6/OutputBuilders/BaseOutputBuilder.js +69 -0
  27. package/src/v6/OutputBuilders/JsArrBuilder.js +103 -0
  28. package/src/v6/OutputBuilders/JsMinArrBuilder.js +100 -0
  29. package/src/v6/OutputBuilders/JsObjBuilder.js +154 -0
  30. package/src/v6/OutputBuilders/ParserOptionsBuilder.js +94 -0
  31. package/src/v6/Report.js +0 -0
  32. package/src/v6/TagPath.js +81 -0
  33. package/src/v6/TagPathMatcher.js +13 -0
  34. package/src/v6/XMLParser.js +83 -0
  35. package/src/v6/Xml2JsParser.js +235 -0
  36. package/src/v6/XmlPartReader.js +210 -0
  37. package/src/v6/XmlSpecialTagsReader.js +111 -0
  38. package/src/v6/inputSource/BufferSource.js +116 -0
  39. package/src/v6/inputSource/StringSource.js +121 -0
  40. package/src/v6/valueParsers/EntitiesParser.js +105 -0
  41. package/src/v6/valueParsers/booleanParser.js +22 -0
  42. package/src/v6/valueParsers/booleanParserExt.js +19 -0
  43. package/src/v6/valueParsers/currency.js +38 -0
  44. package/src/v6/valueParsers/join.js +13 -0
  45. package/src/v6/valueParsers/number.js +14 -0
  46. package/src/v6/valueParsers/trim.js +6 -0
  47. package/src/validator.js +425 -0
  48. package/src/xmlbuilder/json2xml.js +6 -0
  49. package/src/xmlparser/DocTypeReader.js +401 -0
  50. package/src/xmlparser/OptionsBuilder.js +159 -0
  51. package/src/xmlparser/OrderedObjParser.js +905 -0
  52. package/src/xmlparser/XMLParser.js +71 -0
  53. package/src/xmlparser/node2json.js +174 -0
  54. package/src/xmlparser/xmlNode.js +40 -0
@@ -0,0 +1,905 @@
1
+ 'use strict';
2
+ ///@ts-check
3
+
4
+ import { getAllMatches, isExist, DANGEROUS_PROPERTY_NAMES, criticalProperties } from '../util.js';
5
+ import xmlNode from './xmlNode.js';
6
+ import DocTypeReader from './DocTypeReader.js';
7
+ import toNumber from "strnum";
8
+ import getIgnoreAttributesFn from "../ignoreAttributes.js";
9
+ import { Expression, Matcher } from 'path-expression-matcher';
10
+
11
+ // const regx =
12
+ // '<((!\\[CDATA\\[([\\s\\S]*?)(]]>))|((NAME:)?(NAME))([^>]*)>|((\\/)(NAME)\\s*>))([^<]*)'
13
+ // .replace(/NAME/g, util.nameRegexp);
14
+
15
+ //const tagsRegx = new RegExp("<(\\/?[\\w:\\-\._]+)([^>]*)>(\\s*"+cdataRegx+")*([^<]+)?","g");
16
+ //const tagsRegx = new RegExp("<(\\/?)((\\w*:)?([\\w:\\-\._]+))([^>]*)>([^<]*)("+cdataRegx+"([^<]*))*([^<]+)?","g");
17
+
18
+ // Helper functions for attribute and namespace handling
19
+
20
+ /**
21
+ * Extract raw attributes (without prefix) from prefixed attribute map
22
+ * @param {object} prefixedAttrs - Attributes with prefix from buildAttributesMap
23
+ * @param {object} options - Parser options containing attributeNamePrefix
24
+ * @returns {object} Raw attributes for matcher
25
+ */
26
+ function extractRawAttributes(prefixedAttrs, options) {
27
+ if (!prefixedAttrs) return {};
28
+
29
+ // Handle attributesGroupName option
30
+ const attrs = options.attributesGroupName
31
+ ? prefixedAttrs[options.attributesGroupName]
32
+ : prefixedAttrs;
33
+
34
+ if (!attrs) return {};
35
+
36
+ const rawAttrs = {};
37
+ for (const key in attrs) {
38
+ // Remove the attribute prefix to get raw name
39
+ if (key.startsWith(options.attributeNamePrefix)) {
40
+ const rawName = key.substring(options.attributeNamePrefix.length);
41
+ rawAttrs[rawName] = attrs[key];
42
+ } else {
43
+ // Attribute without prefix (shouldn't normally happen, but be safe)
44
+ rawAttrs[key] = attrs[key];
45
+ }
46
+ }
47
+ return rawAttrs;
48
+ }
49
+
50
+ /**
51
+ * Extract namespace from raw tag name
52
+ * @param {string} rawTagName - Tag name possibly with namespace (e.g., "soap:Envelope")
53
+ * @returns {string|undefined} Namespace or undefined
54
+ */
55
+ function extractNamespace(rawTagName) {
56
+ if (!rawTagName || typeof rawTagName !== 'string') return undefined;
57
+
58
+ const colonIndex = rawTagName.indexOf(':');
59
+ if (colonIndex !== -1 && colonIndex > 0) {
60
+ const ns = rawTagName.substring(0, colonIndex);
61
+ // Don't treat xmlns as a namespace
62
+ if (ns !== 'xmlns') {
63
+ return ns;
64
+ }
65
+ }
66
+ return undefined;
67
+ }
68
+
69
+ export default class OrderedObjParser {
70
+ constructor(options) {
71
+ this.options = options;
72
+ this.currentNode = null;
73
+ this.tagsNodeStack = [];
74
+ this.docTypeEntities = {};
75
+ this.lastEntities = {
76
+ "apos": { regex: /&(apos|#39|#x27);/g, val: "'" },
77
+ "gt": { regex: /&(gt|#62|#x3E);/g, val: ">" },
78
+ "lt": { regex: /&(lt|#60|#x3C);/g, val: "<" },
79
+ "quot": { regex: /&(quot|#34|#x22);/g, val: "\"" },
80
+ };
81
+ this.ampEntity = { regex: /&(amp|#38|#x26);/g, val: "&" };
82
+ this.htmlEntities = {
83
+ "space": { regex: /&(nbsp|#160);/g, val: " " },
84
+ // "lt" : { regex: /&(lt|#60);/g, val: "<" },
85
+ // "gt" : { regex: /&(gt|#62);/g, val: ">" },
86
+ // "amp" : { regex: /&(amp|#38);/g, val: "&" },
87
+ // "quot" : { regex: /&(quot|#34);/g, val: "\"" },
88
+ // "apos" : { regex: /&(apos|#39);/g, val: "'" },
89
+ "cent": { regex: /&(cent|#162);/g, val: "¢" },
90
+ "pound": { regex: /&(pound|#163);/g, val: "£" },
91
+ "yen": { regex: /&(yen|#165);/g, val: "¥" },
92
+ "euro": { regex: /&(euro|#8364);/g, val: "€" },
93
+ "copyright": { regex: /&(copy|#169);/g, val: "©" },
94
+ "reg": { regex: /&(reg|#174);/g, val: "®" },
95
+ "inr": { regex: /&(inr|#8377);/g, val: "₹" },
96
+ "num_dec": { regex: /&#([0-9]{1,7});/g, val: (_, str) => fromCodePoint(str, 10, "&#") },
97
+ "num_hex": { regex: /&#x([0-9a-fA-F]{1,6});/g, val: (_, str) => fromCodePoint(str, 16, "&#x") },
98
+ };
99
+ this.addExternalEntities = addExternalEntities;
100
+ this.parseXml = parseXml;
101
+ this.parseTextData = parseTextData;
102
+ this.resolveNameSpace = resolveNameSpace;
103
+ this.buildAttributesMap = buildAttributesMap;
104
+ this.isItStopNode = isItStopNode;
105
+ this.replaceEntitiesValue = replaceEntitiesValue;
106
+ this.readStopNodeData = readStopNodeData;
107
+ this.saveTextToParentTag = saveTextToParentTag;
108
+ this.addChild = addChild;
109
+ this.ignoreAttributesFn = getIgnoreAttributesFn(this.options.ignoreAttributes)
110
+ this.entityExpansionCount = 0;
111
+ this.currentExpandedLength = 0;
112
+
113
+ // Initialize path matcher for path-expression-matcher
114
+ this.matcher = new Matcher();
115
+
116
+ // Flag to track if current node is a stop node (optimization)
117
+ this.isCurrentNodeStopNode = false;
118
+
119
+ // Pre-compile stopNodes expressions
120
+ if (this.options.stopNodes && this.options.stopNodes.length > 0) {
121
+ this.stopNodeExpressions = [];
122
+ for (let i = 0; i < this.options.stopNodes.length; i++) {
123
+ const stopNodeExp = this.options.stopNodes[i];
124
+ if (typeof stopNodeExp === 'string') {
125
+ // Convert string to Expression object
126
+ this.stopNodeExpressions.push(new Expression(stopNodeExp));
127
+ } else if (stopNodeExp instanceof Expression) {
128
+ // Already an Expression object
129
+ this.stopNodeExpressions.push(stopNodeExp);
130
+ }
131
+ }
132
+ }
133
+ }
134
+
135
+ }
136
+
137
+ function addExternalEntities(externalEntities) {
138
+ const entKeys = Object.keys(externalEntities);
139
+ for (let i = 0; i < entKeys.length; i++) {
140
+ const ent = entKeys[i];
141
+ const escaped = ent.replace(/[.\-+*:]/g, '\\.');
142
+ this.lastEntities[ent] = {
143
+ regex: new RegExp("&" + escaped + ";", "g"),
144
+ val: externalEntities[ent]
145
+ }
146
+ }
147
+ }
148
+
149
+ /**
150
+ * @param {string} val
151
+ * @param {string} tagName
152
+ * @param {string|Matcher} jPath - jPath string or Matcher instance based on options.jPath
153
+ * @param {boolean} dontTrim
154
+ * @param {boolean} hasAttributes
155
+ * @param {boolean} isLeafNode
156
+ * @param {boolean} escapeEntities
157
+ */
158
+ function parseTextData(val, tagName, jPath, dontTrim, hasAttributes, isLeafNode, escapeEntities) {
159
+ if (val !== undefined) {
160
+ if (this.options.trimValues && !dontTrim) {
161
+ val = val.trim();
162
+ }
163
+ if (val.length > 0) {
164
+ if (!escapeEntities) val = this.replaceEntitiesValue(val, tagName, jPath);
165
+
166
+ // Pass jPath string or matcher based on options.jPath setting
167
+ const jPathOrMatcher = this.options.jPath ? jPath.toString() : jPath;
168
+ const newval = this.options.tagValueProcessor(tagName, val, jPathOrMatcher, hasAttributes, isLeafNode);
169
+ if (newval === null || newval === undefined) {
170
+ //don't parse
171
+ return val;
172
+ } else if (typeof newval !== typeof val || newval !== val) {
173
+ //overwrite
174
+ return newval;
175
+ } else if (this.options.trimValues) {
176
+ return parseValue(val, this.options.parseTagValue, this.options.numberParseOptions);
177
+ } else {
178
+ const trimmedVal = val.trim();
179
+ if (trimmedVal === val) {
180
+ return parseValue(val, this.options.parseTagValue, this.options.numberParseOptions);
181
+ } else {
182
+ return val;
183
+ }
184
+ }
185
+ }
186
+ }
187
+ }
188
+
189
+ function resolveNameSpace(tagname) {
190
+ if (this.options.removeNSPrefix) {
191
+ const tags = tagname.split(':');
192
+ const prefix = tagname.charAt(0) === '/' ? '/' : '';
193
+ if (tags[0] === 'xmlns') {
194
+ return '';
195
+ }
196
+ if (tags.length === 2) {
197
+ tagname = prefix + tags[1];
198
+ }
199
+ }
200
+ return tagname;
201
+ }
202
+
203
+ //TODO: change regex to capture NS
204
+ //const attrsRegx = new RegExp("([\\w\\-\\.\\:]+)\\s*=\\s*(['\"])((.|\n)*?)\\2","gm");
205
+ const attrsRegx = new RegExp('([^\\s=]+)\\s*(=\\s*([\'"])([\\s\\S]*?)\\3)?', 'gm');
206
+
207
+ function buildAttributesMap(attrStr, jPath, tagName) {
208
+ if (this.options.ignoreAttributes !== true && typeof attrStr === 'string') {
209
+ // attrStr = attrStr.replace(/\r?\n/g, ' ');
210
+ //attrStr = attrStr || attrStr.trim();
211
+
212
+ const matches = getAllMatches(attrStr, attrsRegx);
213
+ const len = matches.length; //don't make it inline
214
+ const attrs = {};
215
+
216
+ // First pass: parse all attributes and update matcher with raw values
217
+ // This ensures the matcher has all attribute values when processors run
218
+ const rawAttrsForMatcher = {};
219
+ for (let i = 0; i < len; i++) {
220
+ const attrName = this.resolveNameSpace(matches[i][1]);
221
+ const oldVal = matches[i][4];
222
+
223
+ if (attrName.length && oldVal !== undefined) {
224
+ let parsedVal = oldVal;
225
+ if (this.options.trimValues) {
226
+ parsedVal = parsedVal.trim();
227
+ }
228
+ parsedVal = this.replaceEntitiesValue(parsedVal, tagName, jPath);
229
+ rawAttrsForMatcher[attrName] = parsedVal;
230
+ }
231
+ }
232
+
233
+ // Update matcher with raw attribute values BEFORE running processors
234
+ if (Object.keys(rawAttrsForMatcher).length > 0 && typeof jPath === 'object' && jPath.updateCurrent) {
235
+ jPath.updateCurrent(rawAttrsForMatcher);
236
+ }
237
+
238
+ // Second pass: now process attributes with matcher having full attribute context
239
+ for (let i = 0; i < len; i++) {
240
+ const attrName = this.resolveNameSpace(matches[i][1]);
241
+
242
+ // Convert jPath to string if needed for ignoreAttributesFn
243
+ const jPathStr = this.options.jPath ? jPath.toString() : jPath;
244
+ if (this.ignoreAttributesFn(attrName, jPathStr)) {
245
+ continue
246
+ }
247
+
248
+ let oldVal = matches[i][4];
249
+ let aName = this.options.attributeNamePrefix + attrName;
250
+
251
+ if (attrName.length) {
252
+ if (this.options.transformAttributeName) {
253
+ aName = this.options.transformAttributeName(aName);
254
+ }
255
+ //if (aName === "__proto__") aName = "#__proto__";
256
+ aName = sanitizeName(aName, this.options);
257
+
258
+ if (oldVal !== undefined) {
259
+ if (this.options.trimValues) {
260
+ oldVal = oldVal.trim();
261
+ }
262
+ oldVal = this.replaceEntitiesValue(oldVal, tagName, jPath);
263
+
264
+ // Pass jPath string or matcher based on options.jPath setting
265
+ const jPathOrMatcher = this.options.jPath ? jPath.toString() : jPath;
266
+ const newVal = this.options.attributeValueProcessor(attrName, oldVal, jPathOrMatcher);
267
+ if (newVal === null || newVal === undefined) {
268
+ //don't parse
269
+ attrs[aName] = oldVal;
270
+ } else if (typeof newVal !== typeof oldVal || newVal !== oldVal) {
271
+ //overwrite
272
+ attrs[aName] = newVal;
273
+ } else {
274
+ //parse
275
+ attrs[aName] = parseValue(
276
+ oldVal,
277
+ this.options.parseAttributeValue,
278
+ this.options.numberParseOptions
279
+ );
280
+ }
281
+ } else if (this.options.allowBooleanAttributes) {
282
+ attrs[aName] = true;
283
+ }
284
+ }
285
+ }
286
+
287
+ if (!Object.keys(attrs).length) {
288
+ return;
289
+ }
290
+ if (this.options.attributesGroupName) {
291
+ const attrCollection = {};
292
+ attrCollection[this.options.attributesGroupName] = attrs;
293
+ return attrCollection;
294
+ }
295
+ return attrs
296
+ }
297
+ }
298
+
299
+ const parseXml = function (xmlData) {
300
+ xmlData = xmlData.replace(/\r\n?/g, "\n"); //TODO: remove this line
301
+ const xmlObj = new xmlNode('!xml');
302
+ let currentNode = xmlObj;
303
+ let textData = "";
304
+
305
+ // Reset matcher for new document
306
+ this.matcher.reset();
307
+
308
+ // Reset entity expansion counters for this document
309
+ this.entityExpansionCount = 0;
310
+ this.currentExpandedLength = 0;
311
+
312
+ const docTypeReader = new DocTypeReader(this.options.processEntities);
313
+ for (let i = 0; i < xmlData.length; i++) {//for each char in XML data
314
+ const ch = xmlData[i];
315
+ if (ch === '<') {
316
+ // const nextIndex = i+1;
317
+ // const _2ndChar = xmlData[nextIndex];
318
+ if (xmlData[i + 1] === '/') {//Closing Tag
319
+ const closeIndex = findClosingIndex(xmlData, ">", i, "Closing Tag is not closed.")
320
+ let tagName = xmlData.substring(i + 2, closeIndex).trim();
321
+
322
+ if (this.options.removeNSPrefix) {
323
+ const colonIndex = tagName.indexOf(":");
324
+ if (colonIndex !== -1) {
325
+ tagName = tagName.substr(colonIndex + 1);
326
+ }
327
+ }
328
+
329
+ tagName = transformTagName(this.options.transformTagName, tagName, "", this.options).tagName;
330
+
331
+ if (currentNode) {
332
+ textData = this.saveTextToParentTag(textData, currentNode, this.matcher);
333
+ }
334
+
335
+ //check if last tag of nested tag was unpaired tag
336
+ const lastTagName = this.matcher.getCurrentTag();
337
+ if (tagName && this.options.unpairedTags.indexOf(tagName) !== -1) {
338
+ throw new Error(`Unpaired tag can not be used as closing tag: </${tagName}>`);
339
+ }
340
+ if (lastTagName && this.options.unpairedTags.indexOf(lastTagName) !== -1) {
341
+ // Pop the unpaired tag
342
+ this.matcher.pop();
343
+ this.tagsNodeStack.pop();
344
+ }
345
+ // Pop the closing tag
346
+ this.matcher.pop();
347
+ this.isCurrentNodeStopNode = false; // Reset flag when closing tag
348
+
349
+ currentNode = this.tagsNodeStack.pop();//avoid recursion, set the parent tag scope
350
+ textData = "";
351
+ i = closeIndex;
352
+ } else if (xmlData[i + 1] === '?') {
353
+
354
+ let tagData = readTagExp(xmlData, i, false, "?>");
355
+ if (!tagData) throw new Error("Pi Tag is not closed.");
356
+
357
+ textData = this.saveTextToParentTag(textData, currentNode, this.matcher);
358
+ if ((this.options.ignoreDeclaration && tagData.tagName === "?xml") || this.options.ignorePiTags) {
359
+ //do nothing
360
+ } else {
361
+
362
+ const childNode = new xmlNode(tagData.tagName);
363
+ childNode.add(this.options.textNodeName, "");
364
+
365
+ if (tagData.tagName !== tagData.tagExp && tagData.attrExpPresent) {
366
+ childNode[":@"] = this.buildAttributesMap(tagData.tagExp, this.matcher, tagData.tagName);
367
+ }
368
+ this.addChild(currentNode, childNode, this.matcher, i);
369
+ }
370
+
371
+
372
+ i = tagData.closeIndex + 1;
373
+ } else if (xmlData.substr(i + 1, 3) === '!--') {
374
+ const endIndex = findClosingIndex(xmlData, "-->", i + 4, "Comment is not closed.")
375
+ if (this.options.commentPropName) {
376
+ const comment = xmlData.substring(i + 4, endIndex - 2);
377
+
378
+ textData = this.saveTextToParentTag(textData, currentNode, this.matcher);
379
+
380
+ currentNode.add(this.options.commentPropName, [{ [this.options.textNodeName]: comment }]);
381
+ }
382
+ i = endIndex;
383
+ } else if (xmlData.substr(i + 1, 2) === '!D') {
384
+ const result = docTypeReader.readDocType(xmlData, i);
385
+ this.docTypeEntities = result.entities;
386
+ i = result.i;
387
+ } else if (xmlData.substr(i + 1, 2) === '![') {
388
+ const closeIndex = findClosingIndex(xmlData, "]]>", i, "CDATA is not closed.") - 2;
389
+ const tagExp = xmlData.substring(i + 9, closeIndex);
390
+
391
+ textData = this.saveTextToParentTag(textData, currentNode, this.matcher);
392
+
393
+ let val = this.parseTextData(tagExp, currentNode.tagname, this.matcher, true, false, true, true);
394
+ if (val == undefined) val = "";
395
+
396
+ //cdata should be set even if it is 0 length string
397
+ if (this.options.cdataPropName) {
398
+ currentNode.add(this.options.cdataPropName, [{ [this.options.textNodeName]: tagExp }]);
399
+ } else {
400
+ currentNode.add(this.options.textNodeName, val);
401
+ }
402
+
403
+ i = closeIndex + 2;
404
+ } else {//Opening tag
405
+ let result = readTagExp(xmlData, i, this.options.removeNSPrefix);
406
+
407
+ // Safety check: readTagExp can return undefined
408
+ if (!result) {
409
+ // Log context for debugging
410
+ const context = xmlData.substring(Math.max(0, i - 50), Math.min(xmlData.length, i + 50));
411
+ throw new Error(`readTagExp returned undefined at position ${i}. Context: "${context}"`);
412
+ }
413
+
414
+ let tagName = result.tagName;
415
+ const rawTagName = result.rawTagName;
416
+ let tagExp = result.tagExp;
417
+ let attrExpPresent = result.attrExpPresent;
418
+ let closeIndex = result.closeIndex;
419
+
420
+ ({ tagName, tagExp } = transformTagName(this.options.transformTagName, tagName, tagExp, this.options));
421
+
422
+ if (this.options.strictReservedNames &&
423
+ (tagName === this.options.commentPropName
424
+ || tagName === this.options.cdataPropName
425
+ )) {
426
+ throw new Error(`Invalid tag name: ${tagName}`);
427
+ }
428
+
429
+ //save text as child node
430
+ if (currentNode && textData) {
431
+ if (currentNode.tagname !== '!xml') {
432
+ //when nested tag is found
433
+ textData = this.saveTextToParentTag(textData, currentNode, this.matcher, false);
434
+ }
435
+ }
436
+
437
+ //check if last tag was unpaired tag
438
+ const lastTag = currentNode;
439
+ if (lastTag && this.options.unpairedTags.indexOf(lastTag.tagname) !== -1) {
440
+ currentNode = this.tagsNodeStack.pop();
441
+ this.matcher.pop();
442
+ }
443
+
444
+ // Clean up self-closing syntax BEFORE processing attributes
445
+ // This is where tagExp gets the trailing / removed
446
+ let isSelfClosing = false;
447
+ if (tagExp.length > 0 && tagExp.lastIndexOf("/") === tagExp.length - 1) {
448
+ isSelfClosing = true;
449
+ if (tagName[tagName.length - 1] === "/") {
450
+ tagName = tagName.substr(0, tagName.length - 1);
451
+ tagExp = tagName;
452
+ } else {
453
+ tagExp = tagExp.substr(0, tagExp.length - 1);
454
+ }
455
+
456
+ // Re-check attrExpPresent after cleaning
457
+ attrExpPresent = (tagName !== tagExp);
458
+ }
459
+
460
+ // Now process attributes with CLEAN tagExp (no trailing /)
461
+ let prefixedAttrs = null;
462
+ let rawAttrs = {};
463
+ let namespace = undefined;
464
+
465
+ // Extract namespace from rawTagName
466
+ namespace = extractNamespace(rawTagName);
467
+
468
+ // Push tag to matcher FIRST (with empty attrs for now) so callbacks see correct path
469
+ if (tagName !== xmlObj.tagname) {
470
+ this.matcher.push(tagName, {}, namespace);
471
+ }
472
+
473
+ // Now build attributes - callbacks will see correct matcher state
474
+ if (tagName !== tagExp && attrExpPresent) {
475
+ // Build attributes (returns prefixed attributes for the tree)
476
+ // Note: buildAttributesMap now internally updates the matcher with raw attributes
477
+ prefixedAttrs = this.buildAttributesMap(tagExp, this.matcher, tagName);
478
+
479
+ if (prefixedAttrs) {
480
+ // Extract raw attributes (without prefix) for our use
481
+ rawAttrs = extractRawAttributes(prefixedAttrs, this.options);
482
+ }
483
+ }
484
+
485
+ // Now check if this is a stop node (after attributes are set)
486
+ if (tagName !== xmlObj.tagname) {
487
+ this.isCurrentNodeStopNode = this.isItStopNode(this.stopNodeExpressions, this.matcher);
488
+ }
489
+
490
+ const startIndex = i;
491
+ if (this.isCurrentNodeStopNode) {
492
+ let tagContent = "";
493
+
494
+ // For self-closing tags, content is empty
495
+ if (isSelfClosing) {
496
+ i = result.closeIndex;
497
+ }
498
+ //unpaired tag
499
+ else if (this.options.unpairedTags.indexOf(tagName) !== -1) {
500
+ i = result.closeIndex;
501
+ }
502
+ //normal tag
503
+ else {
504
+ //read until closing tag is found
505
+ const result = this.readStopNodeData(xmlData, rawTagName, closeIndex + 1);
506
+ if (!result) throw new Error(`Unexpected end of ${rawTagName}`);
507
+ i = result.i;
508
+ tagContent = result.tagContent;
509
+ }
510
+
511
+ const childNode = new xmlNode(tagName);
512
+
513
+ if (prefixedAttrs) {
514
+ childNode[":@"] = prefixedAttrs;
515
+ }
516
+
517
+ // For stop nodes, store raw content as-is without any processing
518
+ childNode.add(this.options.textNodeName, tagContent);
519
+
520
+ this.matcher.pop(); // Pop the stop node tag
521
+ this.isCurrentNodeStopNode = false; // Reset flag
522
+
523
+ this.addChild(currentNode, childNode, this.matcher, startIndex);
524
+ } else {
525
+ //selfClosing tag
526
+ if (isSelfClosing) {
527
+ ({ tagName, tagExp } = transformTagName(this.options.transformTagName, tagName, tagExp, this.options));
528
+
529
+ const childNode = new xmlNode(tagName);
530
+ if (prefixedAttrs) {
531
+ childNode[":@"] = prefixedAttrs;
532
+ }
533
+ this.addChild(currentNode, childNode, this.matcher, startIndex);
534
+ this.matcher.pop(); // Pop self-closing tag
535
+ this.isCurrentNodeStopNode = false; // Reset flag
536
+ }
537
+ else if (this.options.unpairedTags.indexOf(tagName) !== -1) {//unpaired tag
538
+ const childNode = new xmlNode(tagName);
539
+ if (prefixedAttrs) {
540
+ childNode[":@"] = prefixedAttrs;
541
+ }
542
+ this.addChild(currentNode, childNode, this.matcher, startIndex);
543
+ this.matcher.pop(); // Pop unpaired tag
544
+ this.isCurrentNodeStopNode = false; // Reset flag
545
+ i = result.closeIndex;
546
+ // Continue to next iteration without changing currentNode
547
+ continue;
548
+ }
549
+ //opening tag
550
+ else {
551
+ const childNode = new xmlNode(tagName);
552
+ if (this.tagsNodeStack.length > this.options.maxNestedTags) {
553
+ throw new Error("Maximum nested tags exceeded");
554
+ }
555
+ this.tagsNodeStack.push(currentNode);
556
+
557
+ if (prefixedAttrs) {
558
+ childNode[":@"] = prefixedAttrs;
559
+ }
560
+ this.addChild(currentNode, childNode, this.matcher, startIndex);
561
+ currentNode = childNode;
562
+ }
563
+ textData = "";
564
+ i = closeIndex;
565
+ }
566
+ }
567
+ } else {
568
+ textData += xmlData[i];
569
+ }
570
+ }
571
+ return xmlObj.child;
572
+ }
573
+
574
+ function addChild(currentNode, childNode, matcher, startIndex) {
575
+ // unset startIndex if not requested
576
+ if (!this.options.captureMetaData) startIndex = undefined;
577
+
578
+ // Pass jPath string or matcher based on options.jPath setting
579
+ const jPathOrMatcher = this.options.jPath ? matcher.toString() : matcher;
580
+ const result = this.options.updateTag(childNode.tagname, jPathOrMatcher, childNode[":@"])
581
+ if (result === false) {
582
+ //do nothing
583
+ } else if (typeof result === "string") {
584
+ childNode.tagname = result
585
+ currentNode.addChild(childNode, startIndex);
586
+ } else {
587
+ currentNode.addChild(childNode, startIndex);
588
+ }
589
+ }
590
+
591
+ /**
592
+ * @param {object} val - Entity object with regex and val properties
593
+ * @param {string} tagName - Tag name
594
+ * @param {string|Matcher} jPath - jPath string or Matcher instance based on options.jPath
595
+ */
596
+ function replaceEntitiesValue(val, tagName, jPath) {
597
+ const entityConfig = this.options.processEntities;
598
+
599
+ if (!entityConfig || !entityConfig.enabled) {
600
+ return val;
601
+ }
602
+
603
+ // Check if tag is allowed to contain entities
604
+ if (entityConfig.allowedTags) {
605
+ const jPathOrMatcher = this.options.jPath ? jPath.toString() : jPath;
606
+ const allowed = Array.isArray(entityConfig.allowedTags)
607
+ ? entityConfig.allowedTags.includes(tagName)
608
+ : entityConfig.allowedTags(tagName, jPathOrMatcher);
609
+
610
+ if (!allowed) {
611
+ return val;
612
+ }
613
+ }
614
+
615
+ // Apply custom tag filter if provided
616
+ if (entityConfig.tagFilter) {
617
+ const jPathOrMatcher = this.options.jPath ? jPath.toString() : jPath;
618
+ if (!entityConfig.tagFilter(tagName, jPathOrMatcher)) {
619
+ return val; // Skip based on custom filter
620
+ }
621
+ }
622
+
623
+ // Replace DOCTYPE entities
624
+ for (const entityName of Object.keys(this.docTypeEntities)) {
625
+ const entity = this.docTypeEntities[entityName];
626
+ const matches = val.match(entity.regx);
627
+
628
+ if (matches) {
629
+ // Track expansions
630
+ this.entityExpansionCount += matches.length;
631
+
632
+ // Check expansion limit
633
+ if (entityConfig.maxTotalExpansions &&
634
+ this.entityExpansionCount > entityConfig.maxTotalExpansions) {
635
+ throw new Error(
636
+ `Entity expansion limit exceeded: ${this.entityExpansionCount} > ${entityConfig.maxTotalExpansions}`
637
+ );
638
+ }
639
+
640
+ // Store length before replacement
641
+ const lengthBefore = val.length;
642
+ val = val.replace(entity.regx, entity.val);
643
+
644
+ // Check expanded length immediately after replacement
645
+ if (entityConfig.maxExpandedLength) {
646
+ this.currentExpandedLength += (val.length - lengthBefore);
647
+
648
+ if (this.currentExpandedLength > entityConfig.maxExpandedLength) {
649
+ throw new Error(
650
+ `Total expanded content size exceeded: ${this.currentExpandedLength} > ${entityConfig.maxExpandedLength}`
651
+ );
652
+ }
653
+ }
654
+ }
655
+ }
656
+ // Replace standard entities
657
+ for (const entityName of Object.keys(this.lastEntities)) {
658
+ const entity = this.lastEntities[entityName];
659
+ const matches = val.match(entity.regex);
660
+ if (matches) {
661
+ this.entityExpansionCount += matches.length;
662
+ if (entityConfig.maxTotalExpansions &&
663
+ this.entityExpansionCount > entityConfig.maxTotalExpansions) {
664
+ throw new Error(
665
+ `Entity expansion limit exceeded: ${this.entityExpansionCount} > ${entityConfig.maxTotalExpansions}`
666
+ );
667
+ }
668
+ }
669
+ val = val.replace(entity.regex, entity.val);
670
+ }
671
+ if (val.indexOf('&') === -1) return val;
672
+
673
+ // Replace HTML entities if enabled
674
+ if (this.options.htmlEntities) {
675
+ for (const entityName of Object.keys(this.htmlEntities)) {
676
+ const entity = this.htmlEntities[entityName];
677
+ const matches = val.match(entity.regex);
678
+ if (matches) {
679
+ //console.log(matches);
680
+ this.entityExpansionCount += matches.length;
681
+ if (entityConfig.maxTotalExpansions &&
682
+ this.entityExpansionCount > entityConfig.maxTotalExpansions) {
683
+ throw new Error(
684
+ `Entity expansion limit exceeded: ${this.entityExpansionCount} > ${entityConfig.maxTotalExpansions}`
685
+ );
686
+ }
687
+ }
688
+ val = val.replace(entity.regex, entity.val);
689
+ }
690
+ }
691
+
692
+ // Replace ampersand entity last
693
+ val = val.replace(this.ampEntity.regex, this.ampEntity.val);
694
+
695
+ return val;
696
+ }
697
+
698
+
699
+ function saveTextToParentTag(textData, parentNode, matcher, isLeafNode) {
700
+ if (textData) { //store previously collected data as textNode
701
+ if (isLeafNode === undefined) isLeafNode = parentNode.child.length === 0
702
+
703
+ textData = this.parseTextData(textData,
704
+ parentNode.tagname,
705
+ matcher,
706
+ false,
707
+ parentNode[":@"] ? Object.keys(parentNode[":@"]).length !== 0 : false,
708
+ isLeafNode);
709
+
710
+ if (textData !== undefined && textData !== "")
711
+ parentNode.add(this.options.textNodeName, textData);
712
+ textData = "";
713
+ }
714
+ return textData;
715
+ }
716
+
717
+ //TODO: use jPath to simplify the logic
718
+ /**
719
+ * @param {Array<Expression>} stopNodeExpressions - Array of compiled Expression objects
720
+ * @param {Matcher} matcher - Current path matcher
721
+ */
722
+ function isItStopNode(stopNodeExpressions, matcher) {
723
+ if (!stopNodeExpressions || stopNodeExpressions.length === 0) return false;
724
+
725
+ for (let i = 0; i < stopNodeExpressions.length; i++) {
726
+ if (matcher.matches(stopNodeExpressions[i])) {
727
+ return true;
728
+ }
729
+ }
730
+ return false;
731
+ }
732
+
733
+ /**
734
+ * Returns the tag Expression and where it is ending handling single-double quotes situation
735
+ * @param {string} xmlData
736
+ * @param {number} i starting index
737
+ * @returns
738
+ */
739
+ function tagExpWithClosingIndex(xmlData, i, closingChar = ">") {
740
+ let attrBoundary;
741
+ let tagExp = "";
742
+ for (let index = i; index < xmlData.length; index++) {
743
+ let ch = xmlData[index];
744
+ if (attrBoundary) {
745
+ if (ch === attrBoundary) attrBoundary = "";//reset
746
+ } else if (ch === '"' || ch === "'") {
747
+ attrBoundary = ch;
748
+ } else if (ch === closingChar[0]) {
749
+ if (closingChar[1]) {
750
+ if (xmlData[index + 1] === closingChar[1]) {
751
+ return {
752
+ data: tagExp,
753
+ index: index
754
+ }
755
+ }
756
+ } else {
757
+ return {
758
+ data: tagExp,
759
+ index: index
760
+ }
761
+ }
762
+ } else if (ch === '\t') {
763
+ ch = " "
764
+ }
765
+ tagExp += ch;
766
+ }
767
+ }
768
+
769
+ function findClosingIndex(xmlData, str, i, errMsg) {
770
+ const closingIndex = xmlData.indexOf(str, i);
771
+ if (closingIndex === -1) {
772
+ throw new Error(errMsg)
773
+ } else {
774
+ return closingIndex + str.length - 1;
775
+ }
776
+ }
777
+
778
+ function readTagExp(xmlData, i, removeNSPrefix, closingChar = ">") {
779
+ const result = tagExpWithClosingIndex(xmlData, i + 1, closingChar);
780
+ if (!result) return;
781
+ let tagExp = result.data;
782
+ const closeIndex = result.index;
783
+ const separatorIndex = tagExp.search(/\s/);
784
+ let tagName = tagExp;
785
+ let attrExpPresent = true;
786
+ if (separatorIndex !== -1) {//separate tag name and attributes expression
787
+ tagName = tagExp.substring(0, separatorIndex);
788
+ tagExp = tagExp.substring(separatorIndex + 1).trimStart();
789
+ }
790
+
791
+ const rawTagName = tagName;
792
+ if (removeNSPrefix) {
793
+ const colonIndex = tagName.indexOf(":");
794
+ if (colonIndex !== -1) {
795
+ tagName = tagName.substr(colonIndex + 1);
796
+ attrExpPresent = tagName !== result.data.substr(colonIndex + 1);
797
+ }
798
+ }
799
+
800
+ return {
801
+ tagName: tagName,
802
+ tagExp: tagExp,
803
+ closeIndex: closeIndex,
804
+ attrExpPresent: attrExpPresent,
805
+ rawTagName: rawTagName,
806
+ }
807
+ }
808
+ /**
809
+ * find paired tag for a stop node
810
+ * @param {string} xmlData
811
+ * @param {string} tagName
812
+ * @param {number} i
813
+ */
814
+ function readStopNodeData(xmlData, tagName, i) {
815
+ const startIndex = i;
816
+ // Starting at 1 since we already have an open tag
817
+ let openTagCount = 1;
818
+
819
+ for (; i < xmlData.length; i++) {
820
+ if (xmlData[i] === "<") {
821
+ if (xmlData[i + 1] === "/") {//close tag
822
+ const closeIndex = findClosingIndex(xmlData, ">", i, `${tagName} is not closed`);
823
+ let closeTagName = xmlData.substring(i + 2, closeIndex).trim();
824
+ if (closeTagName === tagName) {
825
+ openTagCount--;
826
+ if (openTagCount === 0) {
827
+ return {
828
+ tagContent: xmlData.substring(startIndex, i),
829
+ i: closeIndex
830
+ }
831
+ }
832
+ }
833
+ i = closeIndex;
834
+ } else if (xmlData[i + 1] === '?') {
835
+ const closeIndex = findClosingIndex(xmlData, "?>", i + 1, "StopNode is not closed.")
836
+ i = closeIndex;
837
+ } else if (xmlData.substr(i + 1, 3) === '!--') {
838
+ const closeIndex = findClosingIndex(xmlData, "-->", i + 3, "StopNode is not closed.")
839
+ i = closeIndex;
840
+ } else if (xmlData.substr(i + 1, 2) === '![') {
841
+ const closeIndex = findClosingIndex(xmlData, "]]>", i, "StopNode is not closed.") - 2;
842
+ i = closeIndex;
843
+ } else {
844
+ const tagData = readTagExp(xmlData, i, '>')
845
+
846
+ if (tagData) {
847
+ const openTagName = tagData && tagData.tagName;
848
+ if (openTagName === tagName && tagData.tagExp[tagData.tagExp.length - 1] !== "/") {
849
+ openTagCount++;
850
+ }
851
+ i = tagData.closeIndex;
852
+ }
853
+ }
854
+ }
855
+ }//end for loop
856
+ }
857
+
858
+ function parseValue(val, shouldParse, options) {
859
+ if (shouldParse && typeof val === 'string') {
860
+ //console.log(options)
861
+ const newval = val.trim();
862
+ if (newval === 'true') return true;
863
+ else if (newval === 'false') return false;
864
+ else return toNumber(val, options);
865
+ } else {
866
+ if (isExist(val)) {
867
+ return val;
868
+ } else {
869
+ return '';
870
+ }
871
+ }
872
+ }
873
+
874
+ function fromCodePoint(str, base, prefix) {
875
+ const codePoint = Number.parseInt(str, base);
876
+
877
+ if (codePoint >= 0 && codePoint <= 0x10FFFF) {
878
+ return String.fromCodePoint(codePoint);
879
+ } else {
880
+ return prefix + str + ";";
881
+ }
882
+ }
883
+
884
+ function transformTagName(fn, tagName, tagExp, options) {
885
+ if (fn) {
886
+ const newTagName = fn(tagName);
887
+ if (tagExp === tagName) {
888
+ tagExp = newTagName
889
+ }
890
+ tagName = newTagName;
891
+ }
892
+ tagName = sanitizeName(tagName, options);
893
+ return { tagName, tagExp };
894
+ }
895
+
896
+
897
+
898
+ function sanitizeName(name, options) {
899
+ if (criticalProperties.includes(name)) {
900
+ throw new Error(`[SECURITY] Invalid name: "${name}" is a reserved JavaScript keyword that could cause prototype pollution`);
901
+ } else if (DANGEROUS_PROPERTY_NAMES.includes(name)) {
902
+ return options.onDangerousProperty(name);
903
+ }
904
+ return name;
905
+ }