fast-xml-parser 3.15.1 → 3.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -135,7 +135,6 @@ var options = {
135
135
  trimValues: true,
136
136
  cdataTagName: "__cdata", //default is 'false'
137
137
  cdataPositionChar: "\\c",
138
- localeRange: "", //To support non english character in tag/attribute values.
139
138
  parseTrueNumberOnly: false,
140
139
  arrayMode: false, //"strict"
141
140
  attrValueProcessor: (val, attrName) => he.decode(val, {isAttributeValue: true}),//default is a=>a
@@ -162,7 +161,7 @@ try{
162
161
  }
163
162
  ```
164
163
 
165
- Validator reurns the following object in case of error;
164
+ Validator returns the following object in case of error;
166
165
  ```js
167
166
  {
168
167
  err: {
@@ -190,7 +189,6 @@ Validator reurns the following object in case of error;
190
189
  * **decodeHTMLchar** : This options has been removed from 3.3.4. Instead, use tagValueProcessor, and attrValueProcessor. See above example.
191
190
  * **cdataTagName** : If specified, parser parse CDATA as nested tag instead of adding it's value to parent tag.
192
191
  * **cdataPositionChar** : It'll help to covert JSON back to XML without losing CDATA position.
193
- * **localeRange**: Parser will accept non-English character in tag or attribute name. Check #87 for more detail. Eg `localeRange: "a-zA-Zа-яёА-ЯЁ"`
194
192
  * **parseTrueNumberOnly**: if true then values like "+123", or "0123" will not be parsed as number.
195
193
  * **arrayMode** : When `false`, a tag with single occurence is parsed as an object but as an array in case of multiple occurences. When `true`, a tag will be parsed as an array always excluding leaf nodes. When `strict`, all the tags will be parsed as array only.
196
194
  * **tagValueProcessor** : Process tag value during transformation. Like HTML decoding, word capitalization, etc. Applicable in case of string only.
@@ -303,7 +301,7 @@ With the correct options, you can get the almost original XML without losing any
303
301
  </details>
304
302
 
305
303
  ### Limitations
306
- Currently FXP fails to parse XML with attributes has ">" in the value. This problem is left open as change in regex for its fix is degrading the performance. And the parser become very slow in case of long attrbute names.
304
+ Currently FXP fails to parse XML with attributes has ">" in the value. This problem is left open as change in regex for its fix is degrading the performance. And the parser become very slow in case of long attrbute names. Hoever, It is not ignored and we're working on the fix.
307
305
 
308
306
  ### Worth to mention
309
307
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "fast-xml-parser",
3
- "version": "3.15.1",
3
+ "version": "3.16.0",
4
4
  "description": "Validate XML or Parse XML to JS/JSON very fast without C/C++ based libraries",
5
5
  "main": "./src/parser.js",
6
6
  "scripts": {
package/src/parser.d.ts CHANGED
@@ -11,7 +11,6 @@ type X2jOptions = {
11
11
  trimValues: boolean;
12
12
  cdataTagName: false | string;
13
13
  cdataPositionChar: string;
14
- localeRange: string;
15
14
  parseTrueNumberOnly: boolean;
16
15
  tagValueProcessor: (tagValue: string, tagName: string) => string;
17
16
  attrValueProcessor: (attrValue: string, attrName: string) => string;
@@ -20,7 +19,6 @@ type X2jOptions = {
20
19
  type X2jOptionsOptional = Partial<X2jOptions>;
21
20
  type validationOptions = {
22
21
  allowBooleanAttributes: boolean;
23
- localeRange: string;
24
22
  };
25
23
  type validationOptionsOptional = Partial<validationOptions>;
26
24
  type J2xOptions = {
@@ -41,7 +39,7 @@ type J2xOptionsOptional = Partial<J2xOptions>;
41
39
  type ESchema = string | object | Array<string|object>;
42
40
 
43
41
  type ValidationError = {
44
- err: { code: string; msg: string };
42
+ err: { code: string; msg: string, line: number };
45
43
  };
46
44
 
47
45
  export function parse(xmlData: string, options?: X2jOptionsOptional, validationOptions?: validationOptionsOptional | boolean): any;
package/src/util.js CHANGED
@@ -1,5 +1,10 @@
1
1
  'use strict';
2
2
 
3
+ const nameStartChar = ':A-Za-z_\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u02FF\\u0370-\\u037D\\u037F-\\u1FFF\\u200C-\\u200D\\u2070-\\u218F\\u2C00-\\u2FEF\\u3001-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFFD';
4
+ const nameChar = nameStartChar + '\\-.\\d\\u00B7\\u0300-\\u036F\\u203F-\\u2040';
5
+ const nameRegexp = '[' + nameStartChar + '][' + nameChar + ']*'
6
+ const regexName = new RegExp('^' + nameRegexp + '$');
7
+
3
8
  const getAllMatches = function(string, regex) {
4
9
  const matches = [];
5
10
  let match = regex.exec(string);
@@ -15,15 +20,11 @@ const getAllMatches = function(string, regex) {
15
20
  return matches;
16
21
  };
17
22
 
18
- const doesMatch = function(string, regex) {
19
- const match = regex.exec(string);
23
+ const isName = function(string) {
24
+ const match = regexName.exec(string);
20
25
  return !(match === null || typeof match === 'undefined');
21
26
  };
22
27
 
23
- const doesNotMatch = function(string, regex) {
24
- return !doesMatch(string, regex);
25
- };
26
-
27
28
  exports.isExist = function(v) {
28
29
  return typeof v !== 'undefined';
29
30
  };
@@ -81,6 +82,6 @@ exports.buildOptions = function(options, defaultOptions, props) {
81
82
  return newOptions;
82
83
  };
83
84
 
84
- exports.doesMatch = doesMatch;
85
- exports.doesNotMatch = doesNotMatch;
85
+ exports.isName = isName;
86
86
  exports.getAllMatches = getAllMatches;
87
+ exports.nameRegexp = nameRegexp;
package/src/validator.js CHANGED
@@ -4,10 +4,9 @@ const util = require('./util');
4
4
 
5
5
  const defaultOptions = {
6
6
  allowBooleanAttributes: false, //A tag can have attributes without any value
7
- localeRange: 'a-zA-Z',
8
7
  };
9
8
 
10
- const props = ['allowBooleanAttributes', 'localeRange'];
9
+ const props = ['allowBooleanAttributes'];
11
10
 
12
11
  //const tagsPattern = new RegExp("<\\/?([\\w:\\-_\.]+)\\s*\/?>","g");
13
12
  exports.validate = function (xmlData, options) {
@@ -16,12 +15,6 @@ exports.validate = function (xmlData, options) {
16
15
  //xmlData = xmlData.replace(/(\r\n|\n|\r)/gm,"");//make it single line
17
16
  //xmlData = xmlData.replace(/(^\s*<\?xml.*?\?>)/g,"");//Remove XML starting tag
18
17
  //xmlData = xmlData.replace(/(<!DOCTYPE[\s\w\"\.\/\-\:]+(\[.*\])*\s*>)/g,"");//Remove DOCTYPE
19
- const localRangeRegex = new RegExp(`[${options.localeRange}]`);
20
-
21
- if (localRangeRegex.test("<#$'\"\\\/:0")) {
22
- return getErrorObject('InvalidOptions', 'Invalid localeRange', 1);
23
- }
24
-
25
18
  const tags = [];
26
19
  let tagFound = false;
27
20
 
@@ -32,8 +25,7 @@ exports.validate = function (xmlData, options) {
32
25
  // check for byte order mark (BOM)
33
26
  xmlData = xmlData.substr(1);
34
27
  }
35
- const regxAttrName = new RegExp(`^[${options.localeRange}_][${options.localeRange}0-9\\-\\.:]*$`);
36
- const regxTagName = new RegExp(`^([${options.localeRange}_])[${options.localeRange}0-9\\.\\-_:]*$`);
28
+
37
29
  for (let i = 0; i < xmlData.length; i++) {
38
30
  if (xmlData[i] === '<') {
39
31
  //starting of tag
@@ -78,7 +70,7 @@ exports.validate = function (xmlData, options) {
78
70
  //continue;
79
71
  i--;
80
72
  }
81
- if (!validateTagName(tagName, regxTagName)) {
73
+ if (!validateTagName(tagName)) {
82
74
  let msg;
83
75
  if(tagName.trim().length === 0) {
84
76
  msg = "There is an unnecessary space between tag name and backward slash '</ ..'.";
@@ -98,7 +90,7 @@ exports.validate = function (xmlData, options) {
98
90
  if (attrStr[attrStr.length - 1] === '/') {
99
91
  //self closing tag
100
92
  attrStr = attrStr.substring(0, attrStr.length - 1);
101
- const isValid = validateAttributeString(attrStr, options, regxAttrName);
93
+ const isValid = validateAttributeString(attrStr, options);
102
94
  if (isValid === true) {
103
95
  tagFound = true;
104
96
  //continue; //text may presents after self closing tag
@@ -126,7 +118,7 @@ exports.validate = function (xmlData, options) {
126
118
  }
127
119
  }
128
120
  } else {
129
- const isValid = validateAttributeString(attrStr, options, regxAttrName);
121
+ const isValid = validateAttributeString(attrStr, options);
130
122
  if (isValid !== true) {
131
123
  //the result from the nested function returns the position of the error within the attribute
132
124
  //in order to get the 'true' error line, we need to calculate the position where the attribute begins (i - attrStr.length) and then add the position within the attribute
@@ -155,6 +147,11 @@ exports.validate = function (xmlData, options) {
155
147
  } else {
156
148
  break;
157
149
  }
150
+ } else if (xmlData[i] === '&') {
151
+ const afterAmp = validateAmpersand(xmlData, i);
152
+ if (afterAmp == -1)
153
+ return getErrorObject('InvalidChar', `char '&' is not expected.`, getLineNumberForPosition(xmlData, i));
154
+ i = afterAmp;
158
155
  }
159
156
  } //end of reading tag text value
160
157
  if (xmlData[i] === '<') {
@@ -298,7 +295,7 @@ const validAttrStrRegxp = new RegExp('(\\s*)([^\\s=]+)(\\s*=)?(\\s*([\'"])(([\\s
298
295
 
299
296
  //attr, ="sd", a="amit's", a="sd"b="saf", ab cd=""
300
297
 
301
- function validateAttributeString(attrStr, options, regxAttrName) {
298
+ function validateAttributeString(attrStr, options) {
302
299
  //console.log("start:"+attrStr+":end");
303
300
 
304
301
  //if(attrStr.trim().length === 0) return true; //empty string
@@ -318,7 +315,7 @@ function validateAttributeString(attrStr, options, regxAttrName) {
318
315
  return { err: { code:"InvalidAttr",msg:"attribute " + matches[i][2] + " has no value assigned."}};
319
316
  } */
320
317
  const attrName = matches[i][2];
321
- if (!validateAttrName(attrName, regxAttrName)) {
318
+ if (!validateAttrName(attrName)) {
322
319
  return getErrorObject('InvalidAttr', `Attribute '${attrName}' is an invalid name.`, getPositionFromMatch(attrStr, matches[i][0]));
323
320
  }
324
321
  if (!attrNames.hasOwnProperty(attrName)) {
@@ -332,6 +329,41 @@ function validateAttributeString(attrStr, options, regxAttrName) {
332
329
  return true;
333
330
  }
334
331
 
332
+ function validateNumberAmpersand(xmlData, i) {
333
+ let re = /\d/;
334
+ if (xmlData[i] === 'x') {
335
+ i++;
336
+ re = /[\da-fA-F]/;
337
+ }
338
+ for (; i < xmlData.length; i++) {
339
+ if (xmlData[i] === ';')
340
+ return i;
341
+ if (!xmlData[i].match(re))
342
+ break;
343
+ }
344
+ return -1;
345
+ }
346
+
347
+ function validateAmpersand(xmlData, i) {
348
+ // https://www.w3.org/TR/xml/#dt-charref
349
+ i++;
350
+ if (xmlData[i] === ';')
351
+ return -1;
352
+ if (xmlData[i] === '#') {
353
+ i++;
354
+ return validateNumberAmpersand(xmlData, i);
355
+ }
356
+ let count = 0;
357
+ for (; i < xmlData.length; i++, count++) {
358
+ if (xmlData[i].match(/\w/) && count < 20)
359
+ continue;
360
+ if (xmlData[i] === ';')
361
+ break;
362
+ return -1;
363
+ }
364
+ return i;
365
+ }
366
+
335
367
  function getErrorObject(code, message, lineNumber) {
336
368
  return {
337
369
  err: {
@@ -342,19 +374,18 @@ function getErrorObject(code, message, lineNumber) {
342
374
  };
343
375
  }
344
376
 
345
- function validateAttrName(attrName, regxAttrName) {
346
- // const validAttrRegxp = new RegExp(regxAttrName);
347
- return util.doesMatch(attrName, regxAttrName);
377
+ function validateAttrName(attrName) {
378
+ return util.isName(attrName);
348
379
  }
349
380
 
350
381
  //const startsWithXML = new RegExp("^[Xx][Mm][Ll]");
351
382
  // startsWith = /^([a-zA-Z]|_)[\w.\-_:]*/;
352
383
 
353
- function validateTagName(tagname, regxTagName) {
384
+ function validateTagName(tagname) {
354
385
  /*if(util.doesMatch(tagname,startsWithXML)) return false;
355
386
  else*/
356
387
  //return !tagname.toLowerCase().startsWith("xml") || !util.doesNotMatch(tagname, regxTagName);
357
- return !util.doesNotMatch(tagname, regxTagName);
388
+ return util.isName(tagname);
358
389
  }
359
390
 
360
391
  //this function returns the line number for the character at the given index
@@ -4,8 +4,9 @@ const util = require('./util');
4
4
  const buildOptions = require('./util').buildOptions;
5
5
  const xmlNode = require('./xmlNode');
6
6
  const TagType = {OPENING: 1, CLOSING: 2, SELF: 3, CDATA: 4};
7
- let regx =
8
- '<((!\\[CDATA\\[([\\s\\S]*?)(]]>))|(([\\w:\\-._]*:)?([\\w:\\-._]+))([^>]*)>|((\\/)(([\\w:\\-._]*:)?([\\w:\\-._]+))\\s*>))([^<]*)';
7
+ const regx =
8
+ '<((!\\[CDATA\\[([\\s\\S]*?)(]]>))|((NAME:)?(NAME))([^>]*)>|((\\/)(NAME)\\s*>))([^<]*)'
9
+ .replace(/NAME/g, util.nameRegexp);
9
10
 
10
11
  //const tagsRegx = new RegExp("<(\\/?[\\w:\\-\._]+)([^>]*)>(\\s*"+cdataRegx+")*([^<]+)?","g");
11
12
  //const tagsRegx = new RegExp("<(\\/?)((\\w*:)?([\\w:\\-\._]+))([^>]*)>([^<]*)("+cdataRegx+"([^<]*))*([^<]+)?","g");
@@ -32,7 +33,6 @@ const defaultOptions = {
32
33
  trimValues: true, //Trim string values of tag and attributes
33
34
  cdataTagName: false,
34
35
  cdataPositionChar: '\\c',
35
- localeRange: '',
36
36
  tagValueProcessor: function(a, tagName) {
37
37
  return a;
38
38
  },
@@ -58,7 +58,6 @@ const props = [
58
58
  'trimValues',
59
59
  'cdataTagName',
60
60
  'cdataPositionChar',
61
- 'localeRange',
62
61
  'tagValueProcessor',
63
62
  'attrValueProcessor',
64
63
  'parseTrueNumberOnly',
@@ -74,7 +73,6 @@ const getTraversalObj = function(xmlData, options) {
74
73
  const xmlObj = new xmlNode('!xml');
75
74
  let currentNode = xmlObj;
76
75
 
77
- regx = regx.replace(/\[\\w/g, '[' + options.localeRange + '\\w');
78
76
  const tagsRegx = new RegExp(regx, 'g');
79
77
  let tag = tagsRegx.exec(xmlData);
80
78
  let nextTag = tagsRegx.exec(xmlData);
@@ -83,7 +81,7 @@ const getTraversalObj = function(xmlData, options) {
83
81
 
84
82
  if (tagType === TagType.CLOSING) {
85
83
  //add parsed data to parent node
86
- if (currentNode.parent && tag[14]) {
84
+ if (currentNode.parent && tag[12]) {
87
85
  currentNode.parent.val = util.getValue(currentNode.parent.val) + '' + processTagValue(tag, options, currentNode.parent.tagname);
88
86
  }
89
87
  if (options.stopNodes.length && options.stopNodes.includes(currentNode.tagname)) {
@@ -101,14 +99,14 @@ const getTraversalObj = function(xmlData, options) {
101
99
  //for backtracking
102
100
  currentNode.val = util.getValue(currentNode.val) + options.cdataPositionChar;
103
101
  //add rest value to parent node
104
- if (tag[14]) {
102
+ if (tag[12]) {
105
103
  currentNode.val += processTagValue(tag, options);
106
104
  }
107
105
  } else {
108
106
  currentNode.val = (currentNode.val || '') + (tag[3] || '') + processTagValue(tag, options);
109
107
  }
110
108
  } else if (tagType === TagType.SELF) {
111
- if (currentNode && tag[14]) {
109
+ if (currentNode && tag[12]) {
112
110
  currentNode.val = util.getValue(currentNode.val) + '' + processTagValue(tag, options);
113
111
  }
114
112
 
@@ -142,7 +140,7 @@ const getTraversalObj = function(xmlData, options) {
142
140
 
143
141
  function processTagValue(parsedTags, options, parentTagName) {
144
142
  const tagName = parsedTags[7] || parentTagName;
145
- let val = parsedTags[14];
143
+ let val = parsedTags[12];
146
144
  if (val) {
147
145
  if (options.trimValues) {
148
146
  val = val.trim();