fast-xml-parser 4.5.2 → 4.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,7 @@
3
3
 
4
4
  const util = require('../util');
5
5
  const xmlNode = require('./xmlNode');
6
- const readDocType = require("./DocTypeReader");
6
+ const DocTypeReader = require('./DocTypeReader');
7
7
  const toNumber = require("strnum");
8
8
  const getIgnoreAttributesFn = require('../ignoreAttributes')
9
9
 
@@ -14,19 +14,19 @@ const getIgnoreAttributesFn = require('../ignoreAttributes')
14
14
  //const tagsRegx = new RegExp("<(\\/?[\\w:\\-\._]+)([^>]*)>(\\s*"+cdataRegx+")*([^<]+)?","g");
15
15
  //const tagsRegx = new RegExp("<(\\/?)((\\w*:)?([\\w:\\-\._]+))([^>]*)>([^<]*)("+cdataRegx+"([^<]*))*([^<]+)?","g");
16
16
 
17
- class OrderedObjParser{
18
- constructor(options){
17
+ class OrderedObjParser {
18
+ constructor(options) {
19
19
  this.options = options;
20
20
  this.currentNode = null;
21
21
  this.tagsNodeStack = [];
22
22
  this.docTypeEntities = {};
23
23
  this.lastEntities = {
24
- "apos" : { regex: /&(apos|#39|#x27);/g, val : "'"},
25
- "gt" : { regex: /&(gt|#62|#x3E);/g, val : ">"},
26
- "lt" : { regex: /&(lt|#60|#x3C);/g, val : "<"},
27
- "quot" : { regex: /&(quot|#34|#x22);/g, val : "\""},
24
+ "apos": { regex: /&(apos|#39|#x27);/g, val: "'" },
25
+ "gt": { regex: /&(gt|#62|#x3E);/g, val: ">" },
26
+ "lt": { regex: /&(lt|#60|#x3C);/g, val: "<" },
27
+ "quot": { regex: /&(quot|#34|#x22);/g, val: "\"" },
28
28
  };
29
- this.ampEntity = { regex: /&(amp|#38|#x26);/g, val : "&"};
29
+ this.ampEntity = { regex: /&(amp|#38|#x26);/g, val: "&" };
30
30
  this.htmlEntities = {
31
31
  "space": { regex: /&(nbsp|#160);/g, val: " " },
32
32
  // "lt" : { regex: /&(lt|#60);/g, val: "<" },
@@ -34,15 +34,15 @@ class OrderedObjParser{
34
34
  // "amp" : { regex: /&(amp|#38);/g, val: "&" },
35
35
  // "quot" : { regex: /&(quot|#34);/g, val: "\"" },
36
36
  // "apos" : { regex: /&(apos|#39);/g, val: "'" },
37
- "cent" : { regex: /&(cent|#162);/g, val: "¢" },
38
- "pound" : { regex: /&(pound|#163);/g, val: "£" },
39
- "yen" : { regex: /&(yen|#165);/g, val: "¥" },
40
- "euro" : { regex: /&(euro|#8364);/g, val: "€" },
41
- "copyright" : { regex: /&(copy|#169);/g, val: "©" },
42
- "reg" : { regex: /&(reg|#174);/g, val: "®" },
43
- "inr" : { regex: /&(inr|#8377);/g, val: "₹" },
44
- "num_dec": { regex: /&#([0-9]{1,7});/g, val : (_, str) => String.fromCharCode(Number.parseInt(str, 10)) },
45
- "num_hex": { regex: /&#x([0-9a-fA-F]{1,6});/g, val : (_, str) => String.fromCharCode(Number.parseInt(str, 16)) },
37
+ "cent": { regex: /&(cent|#162);/g, val: "¢" },
38
+ "pound": { regex: /&(pound|#163);/g, val: "£" },
39
+ "yen": { regex: /&(yen|#165);/g, val: "¥" },
40
+ "euro": { regex: /&(euro|#8364);/g, val: "€" },
41
+ "copyright": { regex: /&(copy|#169);/g, val: "©" },
42
+ "reg": { regex: /&(reg|#174);/g, val: "®" },
43
+ "inr": { regex: /&(inr|#8377);/g, val: "₹" },
44
+ "num_dec": { regex: /&#([0-9]{1,7});/g, val: (_, str) => fromCodePoint(str, 10, "&#") },
45
+ "num_hex": { regex: /&#x([0-9a-fA-F]{1,6});/g, val: (_, str) => fromCodePoint(str, 16, "&#x") },
46
46
  };
47
47
  this.addExternalEntities = addExternalEntities;
48
48
  this.parseXml = parseXml;
@@ -55,17 +55,34 @@ class OrderedObjParser{
55
55
  this.saveTextToParentTag = saveTextToParentTag;
56
56
  this.addChild = addChild;
57
57
  this.ignoreAttributesFn = getIgnoreAttributesFn(this.options.ignoreAttributes)
58
+ this.entityExpansionCount = 0;
59
+ this.currentExpandedLength = 0;
60
+
61
+ if (this.options.stopNodes && this.options.stopNodes.length > 0) {
62
+ this.stopNodesExact = new Set();
63
+ this.stopNodesWildcard = new Set();
64
+ for (let i = 0; i < this.options.stopNodes.length; i++) {
65
+ const stopNodeExp = this.options.stopNodes[i];
66
+ if (typeof stopNodeExp !== 'string') continue;
67
+ if (stopNodeExp.startsWith("*.")) {
68
+ this.stopNodesWildcard.add(stopNodeExp.substring(2));
69
+ } else {
70
+ this.stopNodesExact.add(stopNodeExp);
71
+ }
72
+ }
73
+ }
58
74
  }
59
75
 
60
76
  }
61
77
 
62
- function addExternalEntities(externalEntities){
78
+ function addExternalEntities(externalEntities) {
63
79
  const entKeys = Object.keys(externalEntities);
64
80
  for (let i = 0; i < entKeys.length; i++) {
65
81
  const ent = entKeys[i];
82
+ const escaped = ent.replace(/[.\-+*:]/g, '\\.');
66
83
  this.lastEntities[ent] = {
67
- regex: new RegExp("&"+ent+";","g"),
68
- val : externalEntities[ent]
84
+ regex: new RegExp("&" + escaped + ";", "g"),
85
+ val: externalEntities[ent]
69
86
  }
70
87
  }
71
88
  }
@@ -84,23 +101,23 @@ function parseTextData(val, tagName, jPath, dontTrim, hasAttributes, isLeafNode,
84
101
  if (this.options.trimValues && !dontTrim) {
85
102
  val = val.trim();
86
103
  }
87
- if(val.length > 0){
88
- if(!escapeEntities) val = this.replaceEntitiesValue(val);
89
-
104
+ if (val.length > 0) {
105
+ if (!escapeEntities) val = this.replaceEntitiesValue(val, tagName, jPath);
106
+
90
107
  const newval = this.options.tagValueProcessor(tagName, val, jPath, hasAttributes, isLeafNode);
91
- if(newval === null || newval === undefined){
108
+ if (newval === null || newval === undefined) {
92
109
  //don't parse
93
110
  return val;
94
- }else if(typeof newval !== typeof val || newval !== val){
111
+ } else if (typeof newval !== typeof val || newval !== val) {
95
112
  //overwrite
96
113
  return newval;
97
- }else if(this.options.trimValues){
114
+ } else if (this.options.trimValues) {
98
115
  return parseValue(val, this.options.parseTagValue, this.options.numberParseOptions);
99
- }else{
116
+ } else {
100
117
  const trimmedVal = val.trim();
101
- if(trimmedVal === val){
118
+ if (trimmedVal === val) {
102
119
  return parseValue(val, this.options.parseTagValue, this.options.numberParseOptions);
103
- }else{
120
+ } else {
104
121
  return val;
105
122
  }
106
123
  }
@@ -145,20 +162,20 @@ function buildAttributesMap(attrStr, jPath, tagName) {
145
162
  if (this.options.transformAttributeName) {
146
163
  aName = this.options.transformAttributeName(aName);
147
164
  }
148
- if(aName === "__proto__") aName = "#__proto__";
165
+ if (aName === "__proto__") aName = "#__proto__";
149
166
  if (oldVal !== undefined) {
150
167
  if (this.options.trimValues) {
151
168
  oldVal = oldVal.trim();
152
169
  }
153
- oldVal = this.replaceEntitiesValue(oldVal);
170
+ oldVal = this.replaceEntitiesValue(oldVal, tagName, jPath);
154
171
  const newVal = this.options.attributeValueProcessor(attrName, oldVal, jPath);
155
- if(newVal === null || newVal === undefined){
172
+ if (newVal === null || newVal === undefined) {
156
173
  //don't parse
157
174
  attrs[aName] = oldVal;
158
- }else if(typeof newVal !== typeof oldVal || newVal !== oldVal){
175
+ } else if (typeof newVal !== typeof oldVal || newVal !== oldVal) {
159
176
  //overwrite
160
177
  attrs[aName] = newVal;
161
- }else{
178
+ } else {
162
179
  //parse
163
180
  attrs[aName] = parseValue(
164
181
  oldVal,
@@ -183,46 +200,52 @@ function buildAttributesMap(attrStr, jPath, tagName) {
183
200
  }
184
201
  }
185
202
 
186
- const parseXml = function(xmlData) {
203
+ const parseXml = function (xmlData) {
187
204
  xmlData = xmlData.replace(/\r\n?/g, "\n"); //TODO: remove this line
188
205
  const xmlObj = new xmlNode('!xml');
189
206
  let currentNode = xmlObj;
190
207
  let textData = "";
191
208
  let jPath = "";
192
- for(let i=0; i< xmlData.length; i++){//for each char in XML data
209
+
210
+ // Reset entity expansion counters for this document
211
+ this.entityExpansionCount = 0;
212
+ this.currentExpandedLength = 0;
213
+
214
+ const docTypeReader = new DocTypeReader(this.options.processEntities);
215
+ for (let i = 0; i < xmlData.length; i++) {//for each char in XML data
193
216
  const ch = xmlData[i];
194
- if(ch === '<'){
217
+ if (ch === '<') {
195
218
  // const nextIndex = i+1;
196
219
  // const _2ndChar = xmlData[nextIndex];
197
- if( xmlData[i+1] === '/') {//Closing Tag
220
+ if (xmlData[i + 1] === '/') {//Closing Tag
198
221
  const closeIndex = findClosingIndex(xmlData, ">", i, "Closing Tag is not closed.")
199
- let tagName = xmlData.substring(i+2,closeIndex).trim();
222
+ let tagName = xmlData.substring(i + 2, closeIndex).trim();
200
223
 
201
- if(this.options.removeNSPrefix){
224
+ if (this.options.removeNSPrefix) {
202
225
  const colonIndex = tagName.indexOf(":");
203
- if(colonIndex !== -1){
204
- tagName = tagName.substr(colonIndex+1);
226
+ if (colonIndex !== -1) {
227
+ tagName = tagName.substr(colonIndex + 1);
205
228
  }
206
229
  }
207
230
 
208
- if(this.options.transformTagName) {
231
+ if (this.options.transformTagName) {
209
232
  tagName = this.options.transformTagName(tagName);
210
233
  }
211
234
 
212
- if(currentNode){
235
+ if (currentNode) {
213
236
  textData = this.saveTextToParentTag(textData, currentNode, jPath);
214
237
  }
215
238
 
216
239
  //check if last tag of nested tag was unpaired tag
217
- const lastTagName = jPath.substring(jPath.lastIndexOf(".")+1);
218
- if(tagName && this.options.unpairedTags.indexOf(tagName) !== -1 ){
240
+ const lastTagName = jPath.substring(jPath.lastIndexOf(".") + 1);
241
+ if (tagName && this.options.unpairedTags.indexOf(tagName) !== -1) {
219
242
  throw new Error(`Unpaired tag can not be used as closing tag: </${tagName}>`);
220
243
  }
221
244
  let propIndex = 0
222
- if(lastTagName && this.options.unpairedTags.indexOf(lastTagName) !== -1 ){
223
- propIndex = jPath.lastIndexOf('.', jPath.lastIndexOf('.')-1)
245
+ if (lastTagName && this.options.unpairedTags.indexOf(lastTagName) !== -1) {
246
+ propIndex = jPath.lastIndexOf('.', jPath.lastIndexOf('.') - 1)
224
247
  this.tagsNodeStack.pop();
225
- }else{
248
+ } else {
226
249
  propIndex = jPath.lastIndexOf(".");
227
250
  }
228
251
  jPath = jPath.substring(0, propIndex);
@@ -230,74 +253,85 @@ const parseXml = function(xmlData) {
230
253
  currentNode = this.tagsNodeStack.pop();//avoid recursion, set the parent tag scope
231
254
  textData = "";
232
255
  i = closeIndex;
233
- } else if( xmlData[i+1] === '?') {
256
+ } else if (xmlData[i + 1] === '?') {
234
257
 
235
- let tagData = readTagExp(xmlData,i, false, "?>");
236
- if(!tagData) throw new Error("Pi Tag is not closed.");
258
+ let tagData = readTagExp(xmlData, i, false, "?>");
259
+ if (!tagData) throw new Error("Pi Tag is not closed.");
237
260
 
238
261
  textData = this.saveTextToParentTag(textData, currentNode, jPath);
239
- if( (this.options.ignoreDeclaration && tagData.tagName === "?xml") || this.options.ignorePiTags){
262
+ if ((this.options.ignoreDeclaration && tagData.tagName === "?xml") || this.options.ignorePiTags) {
263
+ //do nothing
264
+ } else {
240
265
 
241
- }else{
242
-
243
266
  const childNode = new xmlNode(tagData.tagName);
244
267
  childNode.add(this.options.textNodeName, "");
245
-
246
- if(tagData.tagName !== tagData.tagExp && tagData.attrExpPresent){
268
+
269
+ if (tagData.tagName !== tagData.tagExp && tagData.attrExpPresent) {
247
270
  childNode[":@"] = this.buildAttributesMap(tagData.tagExp, jPath, tagData.tagName);
248
271
  }
249
- this.addChild(currentNode, childNode, jPath)
250
-
272
+ this.addChild(currentNode, childNode, jPath, i);
251
273
  }
252
274
 
253
275
 
254
276
  i = tagData.closeIndex + 1;
255
- } else if(xmlData.substr(i + 1, 3) === '!--') {
256
- const endIndex = findClosingIndex(xmlData, "-->", i+4, "Comment is not closed.")
257
- if(this.options.commentPropName){
277
+ } else if (xmlData.substr(i + 1, 3) === '!--') {
278
+ const endIndex = findClosingIndex(xmlData, "-->", i + 4, "Comment is not closed.")
279
+ if (this.options.commentPropName) {
258
280
  const comment = xmlData.substring(i + 4, endIndex - 2);
259
281
 
260
282
  textData = this.saveTextToParentTag(textData, currentNode, jPath);
261
283
 
262
- currentNode.add(this.options.commentPropName, [ { [this.options.textNodeName] : comment } ]);
284
+ currentNode.add(this.options.commentPropName, [{ [this.options.textNodeName]: comment }]);
263
285
  }
264
286
  i = endIndex;
265
- } else if( xmlData.substr(i + 1, 2) === '!D') {
266
- const result = readDocType(xmlData, i);
287
+ } else if (xmlData.substr(i + 1, 2) === '!D') {
288
+ const result = docTypeReader.readDocType(xmlData, i);
267
289
  this.docTypeEntities = result.entities;
268
290
  i = result.i;
269
- }else if(xmlData.substr(i + 1, 2) === '![') {
291
+ } else if (xmlData.substr(i + 1, 2) === '![') {
270
292
  const closeIndex = findClosingIndex(xmlData, "]]>", i, "CDATA is not closed.") - 2;
271
- const tagExp = xmlData.substring(i + 9,closeIndex);
293
+ const tagExp = xmlData.substring(i + 9, closeIndex);
272
294
 
273
295
  textData = this.saveTextToParentTag(textData, currentNode, jPath);
274
296
 
275
297
  let val = this.parseTextData(tagExp, currentNode.tagname, jPath, true, false, true, true);
276
- if(val == undefined) val = "";
298
+ if (val == undefined) val = "";
277
299
 
278
300
  //cdata should be set even if it is 0 length string
279
- if(this.options.cdataPropName){
280
- currentNode.add(this.options.cdataPropName, [ { [this.options.textNodeName] : tagExp } ]);
281
- }else{
301
+ if (this.options.cdataPropName) {
302
+ currentNode.add(this.options.cdataPropName, [{ [this.options.textNodeName]: tagExp }]);
303
+ } else {
282
304
  currentNode.add(this.options.textNodeName, val);
283
305
  }
284
-
306
+
285
307
  i = closeIndex + 2;
286
- }else {//Opening tag
287
- let result = readTagExp(xmlData,i, this.options.removeNSPrefix);
288
- let tagName= result.tagName;
308
+ } else {//Opening tag
309
+ let result = readTagExp(xmlData, i, this.options.removeNSPrefix);
310
+ let tagName = result.tagName;
289
311
  const rawTagName = result.rawTagName;
290
312
  let tagExp = result.tagExp;
291
313
  let attrExpPresent = result.attrExpPresent;
292
314
  let closeIndex = result.closeIndex;
293
315
 
294
316
  if (this.options.transformTagName) {
295
- tagName = this.options.transformTagName(tagName);
317
+ //console.log(tagExp, tagName)
318
+ const newTagName = this.options.transformTagName(tagName);
319
+ if (tagExp === tagName) {
320
+ tagExp = newTagName
321
+ }
322
+ tagName = newTagName;
323
+ }
324
+
325
+ if (this.options.strictReservedNames &&
326
+ (tagName === this.options.commentPropName
327
+ || tagName === this.options.cdataPropName
328
+ )) {
329
+ throw new Error(`Invalid tag name: ${tagName}`);
296
330
  }
297
-
331
+
298
332
  //save text as child node
299
333
  if (currentNode && textData) {
300
- if(currentNode.tagname !== '!xml'){
334
+ if (currentNode.tagname !== '!xml') {
301
335
  //when nested tag is found
302
336
  textData = this.saveTextToParentTag(textData, currentNode, jPath, false);
303
337
  }
@@ -305,80 +339,99 @@ const parseXml = function(xmlData) {
305
339
 
306
340
  //check if last tag was unpaired tag
307
341
  const lastTag = currentNode;
308
- if(lastTag && this.options.unpairedTags.indexOf(lastTag.tagname) !== -1 ){
342
+ if (lastTag && this.options.unpairedTags.indexOf(lastTag.tagname) !== -1) {
309
343
  currentNode = this.tagsNodeStack.pop();
310
344
  jPath = jPath.substring(0, jPath.lastIndexOf("."));
311
345
  }
312
- if(tagName !== xmlObj.tagname){
346
+ if (tagName !== xmlObj.tagname) {
313
347
  jPath += jPath ? "." + tagName : tagName;
314
348
  }
315
- if (this.isItStopNode(this.options.stopNodes, jPath, tagName)) {
349
+ const startIndex = i;
350
+ if (this.isItStopNode(this.stopNodesExact, this.stopNodesWildcard, jPath, tagName)) {
316
351
  let tagContent = "";
317
352
  //self-closing tag
318
- if(tagExp.length > 0 && tagExp.lastIndexOf("/") === tagExp.length - 1){
319
- if(tagName[tagName.length - 1] === "/"){ //remove trailing '/'
353
+ if (tagExp.length > 0 && tagExp.lastIndexOf("/") === tagExp.length - 1) {
354
+ if (tagName[tagName.length - 1] === "/") { //remove trailing '/'
320
355
  tagName = tagName.substr(0, tagName.length - 1);
321
356
  jPath = jPath.substr(0, jPath.length - 1);
322
357
  tagExp = tagName;
323
- }else{
358
+ } else {
324
359
  tagExp = tagExp.substr(0, tagExp.length - 1);
325
360
  }
326
361
  i = result.closeIndex;
327
362
  }
328
363
  //unpaired tag
329
- else if(this.options.unpairedTags.indexOf(tagName) !== -1){
330
-
364
+ else if (this.options.unpairedTags.indexOf(tagName) !== -1) {
365
+
331
366
  i = result.closeIndex;
332
367
  }
333
368
  //normal tag
334
- else{
369
+ else {
335
370
  //read until closing tag is found
336
371
  const result = this.readStopNodeData(xmlData, rawTagName, closeIndex + 1);
337
- if(!result) throw new Error(`Unexpected end of ${rawTagName}`);
372
+ if (!result) throw new Error(`Unexpected end of ${rawTagName}`);
338
373
  i = result.i;
339
374
  tagContent = result.tagContent;
340
375
  }
341
376
 
342
377
  const childNode = new xmlNode(tagName);
343
- if(tagName !== tagExp && attrExpPresent){
378
+ if (tagName !== tagExp && attrExpPresent) {
344
379
  childNode[":@"] = this.buildAttributesMap(tagExp, jPath, tagName);
345
380
  }
346
- if(tagContent) {
381
+ if (tagContent) {
347
382
  tagContent = this.parseTextData(tagContent, tagName, jPath, true, attrExpPresent, true, true);
348
383
  }
349
-
384
+
350
385
  jPath = jPath.substr(0, jPath.lastIndexOf("."));
351
386
  childNode.add(this.options.textNodeName, tagContent);
352
-
353
- this.addChild(currentNode, childNode, jPath)
354
- }else{
355
- //selfClosing tag
356
- if(tagExp.length > 0 && tagExp.lastIndexOf("/") === tagExp.length - 1){
357
- if(tagName[tagName.length - 1] === "/"){ //remove trailing '/'
387
+
388
+ this.addChild(currentNode, childNode, jPath, startIndex);
389
+ } else {
390
+ //selfClosing tag
391
+ if (tagExp.length > 0 && tagExp.lastIndexOf("/") === tagExp.length - 1) {
392
+ if (tagName[tagName.length - 1] === "/") { //remove trailing '/'
358
393
  tagName = tagName.substr(0, tagName.length - 1);
359
394
  jPath = jPath.substr(0, jPath.length - 1);
360
395
  tagExp = tagName;
361
- }else{
396
+ } else {
362
397
  tagExp = tagExp.substr(0, tagExp.length - 1);
363
398
  }
364
-
365
- if(this.options.transformTagName) {
366
- tagName = this.options.transformTagName(tagName);
399
+
400
+ if (this.options.transformTagName) {
401
+ const newTagName = this.options.transformTagName(tagName);
402
+ if (tagExp === tagName) {
403
+ tagExp = newTagName
404
+ }
405
+ tagName = newTagName;
367
406
  }
368
407
 
369
408
  const childNode = new xmlNode(tagName);
370
- if(tagName !== tagExp && attrExpPresent){
409
+ if (tagName !== tagExp && attrExpPresent) {
371
410
  childNode[":@"] = this.buildAttributesMap(tagExp, jPath, tagName);
372
411
  }
373
- this.addChild(currentNode, childNode, jPath)
412
+ this.addChild(currentNode, childNode, jPath, startIndex);
413
+ jPath = jPath.substr(0, jPath.lastIndexOf("."));
414
+ }
415
+ else if (this.options.unpairedTags.indexOf(tagName) !== -1) {//unpaired tag
416
+ const childNode = new xmlNode(tagName);
417
+ if (tagName !== tagExp && attrExpPresent) {
418
+ childNode[":@"] = this.buildAttributesMap(tagExp, jPath);
419
+ }
420
+ this.addChild(currentNode, childNode, jPath, startIndex);
374
421
  jPath = jPath.substr(0, jPath.lastIndexOf("."));
422
+ i = result.closeIndex;
423
+ // Continue to next iteration without changing currentNode
424
+ continue;
375
425
  }
376
- //opening tag
377
- else{
378
- const childNode = new xmlNode( tagName);
426
+ //opening tag
427
+ else {
428
+ const childNode = new xmlNode(tagName);
429
+ if (this.tagsNodeStack.length > this.options.maxNestedTags) {
430
+ throw new Error("Maximum nested tags exceeded");
431
+ }
379
432
  this.tagsNodeStack.push(currentNode);
380
-
381
- if(tagName !== tagExp && attrExpPresent){
433
+
434
+ if (tagName !== tagExp && attrExpPresent) {
382
435
  childNode[":@"] = this.buildAttributesMap(tagExp, jPath, tagName);
383
436
  }
384
437
  this.addChild(currentNode, childNode, jPath)
@@ -388,58 +441,121 @@ const parseXml = function(xmlData) {
388
441
  i = closeIndex;
389
442
  }
390
443
  }
391
- }else{
444
+ } else {
392
445
  textData += xmlData[i];
393
446
  }
394
447
  }
395
448
  return xmlObj.child;
396
449
  }
397
450
 
398
- function addChild(currentNode, childNode, jPath){
451
+ function addChild(currentNode, childNode, jPath, startIndex) {
452
+ // unset startIndex if not requested
453
+ if (!this.options.captureMetaData) startIndex = undefined;
399
454
  const result = this.options.updateTag(childNode.tagname, jPath, childNode[":@"])
400
- if(result === false){
401
- }else if(typeof result === "string"){
455
+ if (result === false) {
456
+ //do nothing
457
+ } else if (typeof result === "string") {
402
458
  childNode.tagname = result
403
- currentNode.addChild(childNode);
404
- }else{
405
- currentNode.addChild(childNode);
459
+ currentNode.addChild(childNode, startIndex);
460
+ } else {
461
+ currentNode.addChild(childNode, startIndex);
406
462
  }
407
463
  }
408
464
 
409
- const replaceEntitiesValue = function(val){
465
+ const replaceEntitiesValue = function (val, tagName, jPath) {
466
+ // Performance optimization: Early return if no entities to replace
467
+ if (val.indexOf('&') === -1) {
468
+ return val;
469
+ }
470
+
471
+ const entityConfig = this.options.processEntities;
410
472
 
411
- if(this.options.processEntities){
412
- for(let entityName in this.docTypeEntities){
413
- const entity = this.docTypeEntities[entityName];
414
- val = val.replace( entity.regx, entity.val);
473
+ if (!entityConfig.enabled) {
474
+ return val;
475
+ }
476
+
477
+ // Check tag-specific filtering
478
+ if (entityConfig.allowedTags) {
479
+ if (!entityConfig.allowedTags.includes(tagName)) {
480
+ return val; // Skip entity replacement for current tag as not set
415
481
  }
416
- for(let entityName in this.lastEntities){
417
- const entity = this.lastEntities[entityName];
418
- val = val.replace( entity.regex, entity.val);
482
+ }
483
+
484
+ if (entityConfig.tagFilter) {
485
+ if (!entityConfig.tagFilter(tagName, jPath)) {
486
+ return val; // Skip based on custom filter
419
487
  }
420
- if(this.options.htmlEntities){
421
- for(let entityName in this.htmlEntities){
422
- const entity = this.htmlEntities[entityName];
423
- val = val.replace( entity.regex, entity.val);
488
+ }
489
+
490
+ // Replace DOCTYPE entities
491
+ for (let entityName in this.docTypeEntities) {
492
+ const entity = this.docTypeEntities[entityName];
493
+ const matches = val.match(entity.regx);
494
+
495
+ if (matches) {
496
+ // Track expansions
497
+ this.entityExpansionCount += matches.length;
498
+
499
+ // Check expansion limit
500
+ if (entityConfig.maxTotalExpansions &&
501
+ this.entityExpansionCount > entityConfig.maxTotalExpansions) {
502
+ throw new Error(
503
+ `Entity expansion limit exceeded: ${this.entityExpansionCount} > ${entityConfig.maxTotalExpansions}`
504
+ );
505
+ }
506
+
507
+ // Store length before replacement
508
+ const lengthBefore = val.length;
509
+ val = val.replace(entity.regx, entity.val);
510
+
511
+ // Check expanded length immediately after replacement
512
+ if (entityConfig.maxExpandedLength) {
513
+ this.currentExpandedLength += (val.length - lengthBefore);
514
+
515
+ if (this.currentExpandedLength > entityConfig.maxExpandedLength) {
516
+ throw new Error(
517
+ `Total expanded content size exceeded: ${this.currentExpandedLength} > ${entityConfig.maxExpandedLength}`
518
+ );
519
+ }
424
520
  }
425
521
  }
426
- val = val.replace( this.ampEntity.regex, this.ampEntity.val);
427
522
  }
523
+ if (val.indexOf('&') === -1) return val; // Early exit
524
+
525
+ // Replace standard entities
526
+ for (let entityName in this.lastEntities) {
527
+ const entity = this.lastEntities[entityName];
528
+ val = val.replace(entity.regex, entity.val);
529
+ }
530
+ if (val.indexOf('&') === -1) return val; // Early exit
531
+
532
+ // Replace HTML entities if enabled
533
+ if (this.options.htmlEntities) {
534
+ for (let entityName in this.htmlEntities) {
535
+ const entity = this.htmlEntities[entityName];
536
+ val = val.replace(entity.regex, entity.val);
537
+ }
538
+ }
539
+
540
+ // Replace ampersand entity last
541
+ val = val.replace(this.ampEntity.regex, this.ampEntity.val);
542
+
428
543
  return val;
429
544
  }
430
- function saveTextToParentTag(textData, currentNode, jPath, isLeafNode) {
545
+
546
+ function saveTextToParentTag(textData, parentNode, jPath, isLeafNode) {
431
547
  if (textData) { //store previously collected data as textNode
432
- if(isLeafNode === undefined) isLeafNode = currentNode.child.length === 0
433
-
548
+ if (isLeafNode === undefined) isLeafNode = parentNode.child.length === 0
549
+
434
550
  textData = this.parseTextData(textData,
435
- currentNode.tagname,
551
+ parentNode.tagname,
436
552
  jPath,
437
553
  false,
438
- currentNode[":@"] ? Object.keys(currentNode[":@"]).length !== 0 : false,
554
+ parentNode[":@"] ? Object.keys(parentNode[":@"]).length !== 0 : false,
439
555
  isLeafNode);
440
556
 
441
557
  if (textData !== undefined && textData !== "")
442
- currentNode.add(this.options.textNodeName, textData);
558
+ parentNode.add(this.options.textNodeName, textData);
443
559
  textData = "";
444
560
  }
445
561
  return textData;
@@ -447,17 +563,14 @@ function saveTextToParentTag(textData, currentNode, jPath, isLeafNode) {
447
563
 
448
564
  //TODO: use jPath to simplify the logic
449
565
  /**
450
- *
451
- * @param {string[]} stopNodes
566
+ * @param {Set} stopNodesExact
567
+ * @param {Set} stopNodesWildcard
452
568
  * @param {string} jPath
453
- * @param {string} currentTagName
569
+ * @param {string} currentTagName
454
570
  */
455
- function isItStopNode(stopNodes, jPath, currentTagName){
456
- const allNodesExp = "*." + currentTagName;
457
- for (const stopNodePath in stopNodes) {
458
- const stopNodeExp = stopNodes[stopNodePath];
459
- if( allNodesExp === stopNodeExp || jPath === stopNodeExp ) return true;
460
- }
571
+ function isItStopNode(stopNodesExact, stopNodesWildcard, jPath, currentTagName) {
572
+ if (stopNodesWildcard && stopNodesWildcard.has(currentTagName)) return true;
573
+ if (stopNodesExact && stopNodesExact.has(jPath)) return true;
461
574
  return false;
462
575
  }
463
576
 
@@ -467,24 +580,24 @@ function isItStopNode(stopNodes, jPath, currentTagName){
467
580
  * @param {number} i starting index
468
581
  * @returns
469
582
  */
470
- function tagExpWithClosingIndex(xmlData, i, closingChar = ">"){
583
+ function tagExpWithClosingIndex(xmlData, i, closingChar = ">") {
471
584
  let attrBoundary;
472
585
  let tagExp = "";
473
586
  for (let index = i; index < xmlData.length; index++) {
474
587
  let ch = xmlData[index];
475
588
  if (attrBoundary) {
476
- if (ch === attrBoundary) attrBoundary = "";//reset
589
+ if (ch === attrBoundary) attrBoundary = "";//reset
477
590
  } else if (ch === '"' || ch === "'") {
478
- attrBoundary = ch;
591
+ attrBoundary = ch;
479
592
  } else if (ch === closingChar[0]) {
480
- if(closingChar[1]){
481
- if(xmlData[index + 1] === closingChar[1]){
593
+ if (closingChar[1]) {
594
+ if (xmlData[index + 1] === closingChar[1]) {
482
595
  return {
483
596
  data: tagExp,
484
597
  index: index
485
598
  }
486
599
  }
487
- }else{
600
+ } else {
488
601
  return {
489
602
  data: tagExp,
490
603
  index: index
@@ -497,33 +610,33 @@ function tagExpWithClosingIndex(xmlData, i, closingChar = ">"){
497
610
  }
498
611
  }
499
612
 
500
- function findClosingIndex(xmlData, str, i, errMsg){
613
+ function findClosingIndex(xmlData, str, i, errMsg) {
501
614
  const closingIndex = xmlData.indexOf(str, i);
502
- if(closingIndex === -1){
615
+ if (closingIndex === -1) {
503
616
  throw new Error(errMsg)
504
- }else{
617
+ } else {
505
618
  return closingIndex + str.length - 1;
506
619
  }
507
620
  }
508
621
 
509
- function readTagExp(xmlData,i, removeNSPrefix, closingChar = ">"){
510
- const result = tagExpWithClosingIndex(xmlData, i+1, closingChar);
511
- if(!result) return;
622
+ function readTagExp(xmlData, i, removeNSPrefix, closingChar = ">") {
623
+ const result = tagExpWithClosingIndex(xmlData, i + 1, closingChar);
624
+ if (!result) return;
512
625
  let tagExp = result.data;
513
626
  const closeIndex = result.index;
514
627
  const separatorIndex = tagExp.search(/\s/);
515
628
  let tagName = tagExp;
516
629
  let attrExpPresent = true;
517
- if(separatorIndex !== -1){//separate tag name and attributes expression
630
+ if (separatorIndex !== -1) {//separate tag name and attributes expression
518
631
  tagName = tagExp.substring(0, separatorIndex);
519
632
  tagExp = tagExp.substring(separatorIndex + 1).trimStart();
520
633
  }
521
634
 
522
635
  const rawTagName = tagName;
523
- if(removeNSPrefix){
636
+ if (removeNSPrefix) {
524
637
  const colonIndex = tagName.indexOf(":");
525
- if(colonIndex !== -1){
526
- tagName = tagName.substr(colonIndex+1);
638
+ if (colonIndex !== -1) {
639
+ tagName = tagName.substr(colonIndex + 1);
527
640
  attrExpPresent = tagName !== result.data.substr(colonIndex + 1);
528
641
  }
529
642
  }
@@ -542,47 +655,47 @@ function readTagExp(xmlData,i, removeNSPrefix, closingChar = ">"){
542
655
  * @param {string} tagName
543
656
  * @param {number} i
544
657
  */
545
- function readStopNodeData(xmlData, tagName, i){
658
+ function readStopNodeData(xmlData, tagName, i) {
546
659
  const startIndex = i;
547
660
  // Starting at 1 since we already have an open tag
548
661
  let openTagCount = 1;
549
662
 
550
663
  for (; i < xmlData.length; i++) {
551
- if( xmlData[i] === "<"){
552
- if (xmlData[i+1] === "/") {//close tag
553
- const closeIndex = findClosingIndex(xmlData, ">", i, `${tagName} is not closed`);
554
- let closeTagName = xmlData.substring(i+2,closeIndex).trim();
555
- if(closeTagName === tagName){
556
- openTagCount--;
557
- if (openTagCount === 0) {
558
- return {
559
- tagContent: xmlData.substring(startIndex, i),
560
- i : closeIndex
561
- }
664
+ if (xmlData[i] === "<") {
665
+ if (xmlData[i + 1] === "/") {//close tag
666
+ const closeIndex = findClosingIndex(xmlData, ">", i, `${tagName} is not closed`);
667
+ let closeTagName = xmlData.substring(i + 2, closeIndex).trim();
668
+ if (closeTagName === tagName) {
669
+ openTagCount--;
670
+ if (openTagCount === 0) {
671
+ return {
672
+ tagContent: xmlData.substring(startIndex, i),
673
+ i: closeIndex
562
674
  }
563
675
  }
564
- i=closeIndex;
565
- } else if(xmlData[i+1] === '?') {
566
- const closeIndex = findClosingIndex(xmlData, "?>", i+1, "StopNode is not closed.")
567
- i=closeIndex;
568
- } else if(xmlData.substr(i + 1, 3) === '!--') {
569
- const closeIndex = findClosingIndex(xmlData, "-->", i+3, "StopNode is not closed.")
570
- i=closeIndex;
571
- } else if(xmlData.substr(i + 1, 2) === '![') {
572
- const closeIndex = findClosingIndex(xmlData, "]]>", i, "StopNode is not closed.") - 2;
573
- i=closeIndex;
574
- } else {
575
- const tagData = readTagExp(xmlData, i, '>')
676
+ }
677
+ i = closeIndex;
678
+ } else if (xmlData[i + 1] === '?') {
679
+ const closeIndex = findClosingIndex(xmlData, "?>", i + 1, "StopNode is not closed.")
680
+ i = closeIndex;
681
+ } else if (xmlData.substr(i + 1, 3) === '!--') {
682
+ const closeIndex = findClosingIndex(xmlData, "-->", i + 3, "StopNode is not closed.")
683
+ i = closeIndex;
684
+ } else if (xmlData.substr(i + 1, 2) === '![') {
685
+ const closeIndex = findClosingIndex(xmlData, "]]>", i, "StopNode is not closed.") - 2;
686
+ i = closeIndex;
687
+ } else {
688
+ const tagData = readTagExp(xmlData, i, '>')
576
689
 
577
- if (tagData) {
578
- const openTagName = tagData && tagData.tagName;
579
- if (openTagName === tagName && tagData.tagExp[tagData.tagExp.length-1] !== "/") {
580
- openTagCount++;
581
- }
582
- i=tagData.closeIndex;
690
+ if (tagData) {
691
+ const openTagName = tagData && tagData.tagName;
692
+ if (openTagName === tagName && tagData.tagExp[tagData.tagExp.length - 1] !== "/") {
693
+ openTagCount++;
583
694
  }
695
+ i = tagData.closeIndex;
584
696
  }
585
697
  }
698
+ }
586
699
  }//end for loop
587
700
  }
588
701
 
@@ -590,8 +703,8 @@ function parseValue(val, shouldParse, options) {
590
703
  if (shouldParse && typeof val === 'string') {
591
704
  //console.log(options)
592
705
  const newval = val.trim();
593
- if(newval === 'true' ) return true;
594
- else if(newval === 'false' ) return false;
706
+ if (newval === 'true') return true;
707
+ else if (newval === 'false') return false;
595
708
  else return toNumber(val, options);
596
709
  } else {
597
710
  if (util.isExist(val)) {
@@ -602,5 +715,14 @@ function parseValue(val, shouldParse, options) {
602
715
  }
603
716
  }
604
717
 
718
+ function fromCodePoint(str, base, prefix) {
719
+ const codePoint = Number.parseInt(str, base);
720
+
721
+ if (codePoint >= 0 && codePoint <= 0x10FFFF) {
722
+ return String.fromCodePoint(codePoint);
723
+ } else {
724
+ return prefix + str + ";";
725
+ }
726
+ }
605
727
 
606
728
  module.exports = OrderedObjParser;