fast-xml-parser 3.15.1 → 3.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -4
- package/package.json +1 -1
- package/src/parser.d.ts +1 -3
- package/src/util.js +9 -8
- package/src/validator.js +51 -20
- package/src/xmlstr2xmlnode.js +7 -9
package/README.md
CHANGED
|
@@ -135,7 +135,6 @@ var options = {
|
|
|
135
135
|
trimValues: true,
|
|
136
136
|
cdataTagName: "__cdata", //default is 'false'
|
|
137
137
|
cdataPositionChar: "\\c",
|
|
138
|
-
localeRange: "", //To support non english character in tag/attribute values.
|
|
139
138
|
parseTrueNumberOnly: false,
|
|
140
139
|
arrayMode: false, //"strict"
|
|
141
140
|
attrValueProcessor: (val, attrName) => he.decode(val, {isAttributeValue: true}),//default is a=>a
|
|
@@ -162,7 +161,7 @@ try{
|
|
|
162
161
|
}
|
|
163
162
|
```
|
|
164
163
|
|
|
165
|
-
Validator
|
|
164
|
+
Validator returns the following object in case of error;
|
|
166
165
|
```js
|
|
167
166
|
{
|
|
168
167
|
err: {
|
|
@@ -190,7 +189,6 @@ Validator reurns the following object in case of error;
|
|
|
190
189
|
* **decodeHTMLchar** : This options has been removed from 3.3.4. Instead, use tagValueProcessor, and attrValueProcessor. See above example.
|
|
191
190
|
* **cdataTagName** : If specified, parser parse CDATA as nested tag instead of adding it's value to parent tag.
|
|
192
191
|
* **cdataPositionChar** : It'll help to covert JSON back to XML without losing CDATA position.
|
|
193
|
-
* **localeRange**: Parser will accept non-English character in tag or attribute name. Check #87 for more detail. Eg `localeRange: "a-zA-Zа-яёА-ЯЁ"`
|
|
194
192
|
* **parseTrueNumberOnly**: if true then values like "+123", or "0123" will not be parsed as number.
|
|
195
193
|
* **arrayMode** : When `false`, a tag with single occurence is parsed as an object but as an array in case of multiple occurences. When `true`, a tag will be parsed as an array always excluding leaf nodes. When `strict`, all the tags will be parsed as array only.
|
|
196
194
|
* **tagValueProcessor** : Process tag value during transformation. Like HTML decoding, word capitalization, etc. Applicable in case of string only.
|
|
@@ -303,7 +301,7 @@ With the correct options, you can get the almost original XML without losing any
|
|
|
303
301
|
</details>
|
|
304
302
|
|
|
305
303
|
### Limitations
|
|
306
|
-
Currently FXP fails to parse XML with attributes has ">" in the value. This problem is left open as change in regex for its fix is degrading the performance. And the parser become very slow in case of long attrbute names.
|
|
304
|
+
Currently FXP fails to parse XML with attributes has ">" in the value. This problem is left open as change in regex for its fix is degrading the performance. And the parser become very slow in case of long attrbute names. Hoever, It is not ignored and we're working on the fix.
|
|
307
305
|
|
|
308
306
|
### Worth to mention
|
|
309
307
|
|
package/package.json
CHANGED
package/src/parser.d.ts
CHANGED
|
@@ -11,7 +11,6 @@ type X2jOptions = {
|
|
|
11
11
|
trimValues: boolean;
|
|
12
12
|
cdataTagName: false | string;
|
|
13
13
|
cdataPositionChar: string;
|
|
14
|
-
localeRange: string;
|
|
15
14
|
parseTrueNumberOnly: boolean;
|
|
16
15
|
tagValueProcessor: (tagValue: string, tagName: string) => string;
|
|
17
16
|
attrValueProcessor: (attrValue: string, attrName: string) => string;
|
|
@@ -20,7 +19,6 @@ type X2jOptions = {
|
|
|
20
19
|
type X2jOptionsOptional = Partial<X2jOptions>;
|
|
21
20
|
type validationOptions = {
|
|
22
21
|
allowBooleanAttributes: boolean;
|
|
23
|
-
localeRange: string;
|
|
24
22
|
};
|
|
25
23
|
type validationOptionsOptional = Partial<validationOptions>;
|
|
26
24
|
type J2xOptions = {
|
|
@@ -41,7 +39,7 @@ type J2xOptionsOptional = Partial<J2xOptions>;
|
|
|
41
39
|
type ESchema = string | object | Array<string|object>;
|
|
42
40
|
|
|
43
41
|
type ValidationError = {
|
|
44
|
-
err: { code: string; msg: string };
|
|
42
|
+
err: { code: string; msg: string, line: number };
|
|
45
43
|
};
|
|
46
44
|
|
|
47
45
|
export function parse(xmlData: string, options?: X2jOptionsOptional, validationOptions?: validationOptionsOptional | boolean): any;
|
package/src/util.js
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
+
const nameStartChar = ':A-Za-z_\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u02FF\\u0370-\\u037D\\u037F-\\u1FFF\\u200C-\\u200D\\u2070-\\u218F\\u2C00-\\u2FEF\\u3001-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFFD';
|
|
4
|
+
const nameChar = nameStartChar + '\\-.\\d\\u00B7\\u0300-\\u036F\\u203F-\\u2040';
|
|
5
|
+
const nameRegexp = '[' + nameStartChar + '][' + nameChar + ']*'
|
|
6
|
+
const regexName = new RegExp('^' + nameRegexp + '$');
|
|
7
|
+
|
|
3
8
|
const getAllMatches = function(string, regex) {
|
|
4
9
|
const matches = [];
|
|
5
10
|
let match = regex.exec(string);
|
|
@@ -15,15 +20,11 @@ const getAllMatches = function(string, regex) {
|
|
|
15
20
|
return matches;
|
|
16
21
|
};
|
|
17
22
|
|
|
18
|
-
const
|
|
19
|
-
const match =
|
|
23
|
+
const isName = function(string) {
|
|
24
|
+
const match = regexName.exec(string);
|
|
20
25
|
return !(match === null || typeof match === 'undefined');
|
|
21
26
|
};
|
|
22
27
|
|
|
23
|
-
const doesNotMatch = function(string, regex) {
|
|
24
|
-
return !doesMatch(string, regex);
|
|
25
|
-
};
|
|
26
|
-
|
|
27
28
|
exports.isExist = function(v) {
|
|
28
29
|
return typeof v !== 'undefined';
|
|
29
30
|
};
|
|
@@ -81,6 +82,6 @@ exports.buildOptions = function(options, defaultOptions, props) {
|
|
|
81
82
|
return newOptions;
|
|
82
83
|
};
|
|
83
84
|
|
|
84
|
-
exports.
|
|
85
|
-
exports.doesNotMatch = doesNotMatch;
|
|
85
|
+
exports.isName = isName;
|
|
86
86
|
exports.getAllMatches = getAllMatches;
|
|
87
|
+
exports.nameRegexp = nameRegexp;
|
package/src/validator.js
CHANGED
|
@@ -4,10 +4,9 @@ const util = require('./util');
|
|
|
4
4
|
|
|
5
5
|
const defaultOptions = {
|
|
6
6
|
allowBooleanAttributes: false, //A tag can have attributes without any value
|
|
7
|
-
localeRange: 'a-zA-Z',
|
|
8
7
|
};
|
|
9
8
|
|
|
10
|
-
const props = ['allowBooleanAttributes'
|
|
9
|
+
const props = ['allowBooleanAttributes'];
|
|
11
10
|
|
|
12
11
|
//const tagsPattern = new RegExp("<\\/?([\\w:\\-_\.]+)\\s*\/?>","g");
|
|
13
12
|
exports.validate = function (xmlData, options) {
|
|
@@ -16,12 +15,6 @@ exports.validate = function (xmlData, options) {
|
|
|
16
15
|
//xmlData = xmlData.replace(/(\r\n|\n|\r)/gm,"");//make it single line
|
|
17
16
|
//xmlData = xmlData.replace(/(^\s*<\?xml.*?\?>)/g,"");//Remove XML starting tag
|
|
18
17
|
//xmlData = xmlData.replace(/(<!DOCTYPE[\s\w\"\.\/\-\:]+(\[.*\])*\s*>)/g,"");//Remove DOCTYPE
|
|
19
|
-
const localRangeRegex = new RegExp(`[${options.localeRange}]`);
|
|
20
|
-
|
|
21
|
-
if (localRangeRegex.test("<#$'\"\\\/:0")) {
|
|
22
|
-
return getErrorObject('InvalidOptions', 'Invalid localeRange', 1);
|
|
23
|
-
}
|
|
24
|
-
|
|
25
18
|
const tags = [];
|
|
26
19
|
let tagFound = false;
|
|
27
20
|
|
|
@@ -32,8 +25,7 @@ exports.validate = function (xmlData, options) {
|
|
|
32
25
|
// check for byte order mark (BOM)
|
|
33
26
|
xmlData = xmlData.substr(1);
|
|
34
27
|
}
|
|
35
|
-
|
|
36
|
-
const regxTagName = new RegExp(`^([${options.localeRange}_])[${options.localeRange}0-9\\.\\-_:]*$`);
|
|
28
|
+
|
|
37
29
|
for (let i = 0; i < xmlData.length; i++) {
|
|
38
30
|
if (xmlData[i] === '<') {
|
|
39
31
|
//starting of tag
|
|
@@ -78,7 +70,7 @@ exports.validate = function (xmlData, options) {
|
|
|
78
70
|
//continue;
|
|
79
71
|
i--;
|
|
80
72
|
}
|
|
81
|
-
if (!validateTagName(tagName
|
|
73
|
+
if (!validateTagName(tagName)) {
|
|
82
74
|
let msg;
|
|
83
75
|
if(tagName.trim().length === 0) {
|
|
84
76
|
msg = "There is an unnecessary space between tag name and backward slash '</ ..'.";
|
|
@@ -98,7 +90,7 @@ exports.validate = function (xmlData, options) {
|
|
|
98
90
|
if (attrStr[attrStr.length - 1] === '/') {
|
|
99
91
|
//self closing tag
|
|
100
92
|
attrStr = attrStr.substring(0, attrStr.length - 1);
|
|
101
|
-
const isValid = validateAttributeString(attrStr, options
|
|
93
|
+
const isValid = validateAttributeString(attrStr, options);
|
|
102
94
|
if (isValid === true) {
|
|
103
95
|
tagFound = true;
|
|
104
96
|
//continue; //text may presents after self closing tag
|
|
@@ -126,7 +118,7 @@ exports.validate = function (xmlData, options) {
|
|
|
126
118
|
}
|
|
127
119
|
}
|
|
128
120
|
} else {
|
|
129
|
-
const isValid = validateAttributeString(attrStr, options
|
|
121
|
+
const isValid = validateAttributeString(attrStr, options);
|
|
130
122
|
if (isValid !== true) {
|
|
131
123
|
//the result from the nested function returns the position of the error within the attribute
|
|
132
124
|
//in order to get the 'true' error line, we need to calculate the position where the attribute begins (i - attrStr.length) and then add the position within the attribute
|
|
@@ -155,6 +147,11 @@ exports.validate = function (xmlData, options) {
|
|
|
155
147
|
} else {
|
|
156
148
|
break;
|
|
157
149
|
}
|
|
150
|
+
} else if (xmlData[i] === '&') {
|
|
151
|
+
const afterAmp = validateAmpersand(xmlData, i);
|
|
152
|
+
if (afterAmp == -1)
|
|
153
|
+
return getErrorObject('InvalidChar', `char '&' is not expected.`, getLineNumberForPosition(xmlData, i));
|
|
154
|
+
i = afterAmp;
|
|
158
155
|
}
|
|
159
156
|
} //end of reading tag text value
|
|
160
157
|
if (xmlData[i] === '<') {
|
|
@@ -298,7 +295,7 @@ const validAttrStrRegxp = new RegExp('(\\s*)([^\\s=]+)(\\s*=)?(\\s*([\'"])(([\\s
|
|
|
298
295
|
|
|
299
296
|
//attr, ="sd", a="amit's", a="sd"b="saf", ab cd=""
|
|
300
297
|
|
|
301
|
-
function validateAttributeString(attrStr, options
|
|
298
|
+
function validateAttributeString(attrStr, options) {
|
|
302
299
|
//console.log("start:"+attrStr+":end");
|
|
303
300
|
|
|
304
301
|
//if(attrStr.trim().length === 0) return true; //empty string
|
|
@@ -318,7 +315,7 @@ function validateAttributeString(attrStr, options, regxAttrName) {
|
|
|
318
315
|
return { err: { code:"InvalidAttr",msg:"attribute " + matches[i][2] + " has no value assigned."}};
|
|
319
316
|
} */
|
|
320
317
|
const attrName = matches[i][2];
|
|
321
|
-
if (!validateAttrName(attrName
|
|
318
|
+
if (!validateAttrName(attrName)) {
|
|
322
319
|
return getErrorObject('InvalidAttr', `Attribute '${attrName}' is an invalid name.`, getPositionFromMatch(attrStr, matches[i][0]));
|
|
323
320
|
}
|
|
324
321
|
if (!attrNames.hasOwnProperty(attrName)) {
|
|
@@ -332,6 +329,41 @@ function validateAttributeString(attrStr, options, regxAttrName) {
|
|
|
332
329
|
return true;
|
|
333
330
|
}
|
|
334
331
|
|
|
332
|
+
function validateNumberAmpersand(xmlData, i) {
|
|
333
|
+
let re = /\d/;
|
|
334
|
+
if (xmlData[i] === 'x') {
|
|
335
|
+
i++;
|
|
336
|
+
re = /[\da-fA-F]/;
|
|
337
|
+
}
|
|
338
|
+
for (; i < xmlData.length; i++) {
|
|
339
|
+
if (xmlData[i] === ';')
|
|
340
|
+
return i;
|
|
341
|
+
if (!xmlData[i].match(re))
|
|
342
|
+
break;
|
|
343
|
+
}
|
|
344
|
+
return -1;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
function validateAmpersand(xmlData, i) {
|
|
348
|
+
// https://www.w3.org/TR/xml/#dt-charref
|
|
349
|
+
i++;
|
|
350
|
+
if (xmlData[i] === ';')
|
|
351
|
+
return -1;
|
|
352
|
+
if (xmlData[i] === '#') {
|
|
353
|
+
i++;
|
|
354
|
+
return validateNumberAmpersand(xmlData, i);
|
|
355
|
+
}
|
|
356
|
+
let count = 0;
|
|
357
|
+
for (; i < xmlData.length; i++, count++) {
|
|
358
|
+
if (xmlData[i].match(/\w/) && count < 20)
|
|
359
|
+
continue;
|
|
360
|
+
if (xmlData[i] === ';')
|
|
361
|
+
break;
|
|
362
|
+
return -1;
|
|
363
|
+
}
|
|
364
|
+
return i;
|
|
365
|
+
}
|
|
366
|
+
|
|
335
367
|
function getErrorObject(code, message, lineNumber) {
|
|
336
368
|
return {
|
|
337
369
|
err: {
|
|
@@ -342,19 +374,18 @@ function getErrorObject(code, message, lineNumber) {
|
|
|
342
374
|
};
|
|
343
375
|
}
|
|
344
376
|
|
|
345
|
-
function validateAttrName(attrName
|
|
346
|
-
|
|
347
|
-
return util.doesMatch(attrName, regxAttrName);
|
|
377
|
+
function validateAttrName(attrName) {
|
|
378
|
+
return util.isName(attrName);
|
|
348
379
|
}
|
|
349
380
|
|
|
350
381
|
//const startsWithXML = new RegExp("^[Xx][Mm][Ll]");
|
|
351
382
|
// startsWith = /^([a-zA-Z]|_)[\w.\-_:]*/;
|
|
352
383
|
|
|
353
|
-
function validateTagName(tagname
|
|
384
|
+
function validateTagName(tagname) {
|
|
354
385
|
/*if(util.doesMatch(tagname,startsWithXML)) return false;
|
|
355
386
|
else*/
|
|
356
387
|
//return !tagname.toLowerCase().startsWith("xml") || !util.doesNotMatch(tagname, regxTagName);
|
|
357
|
-
return
|
|
388
|
+
return util.isName(tagname);
|
|
358
389
|
}
|
|
359
390
|
|
|
360
391
|
//this function returns the line number for the character at the given index
|
package/src/xmlstr2xmlnode.js
CHANGED
|
@@ -4,8 +4,9 @@ const util = require('./util');
|
|
|
4
4
|
const buildOptions = require('./util').buildOptions;
|
|
5
5
|
const xmlNode = require('./xmlNode');
|
|
6
6
|
const TagType = {OPENING: 1, CLOSING: 2, SELF: 3, CDATA: 4};
|
|
7
|
-
|
|
8
|
-
'<((!\\[CDATA\\[([\\s\\S]*?)(]]>))|((
|
|
7
|
+
const regx =
|
|
8
|
+
'<((!\\[CDATA\\[([\\s\\S]*?)(]]>))|((NAME:)?(NAME))([^>]*)>|((\\/)(NAME)\\s*>))([^<]*)'
|
|
9
|
+
.replace(/NAME/g, util.nameRegexp);
|
|
9
10
|
|
|
10
11
|
//const tagsRegx = new RegExp("<(\\/?[\\w:\\-\._]+)([^>]*)>(\\s*"+cdataRegx+")*([^<]+)?","g");
|
|
11
12
|
//const tagsRegx = new RegExp("<(\\/?)((\\w*:)?([\\w:\\-\._]+))([^>]*)>([^<]*)("+cdataRegx+"([^<]*))*([^<]+)?","g");
|
|
@@ -32,7 +33,6 @@ const defaultOptions = {
|
|
|
32
33
|
trimValues: true, //Trim string values of tag and attributes
|
|
33
34
|
cdataTagName: false,
|
|
34
35
|
cdataPositionChar: '\\c',
|
|
35
|
-
localeRange: '',
|
|
36
36
|
tagValueProcessor: function(a, tagName) {
|
|
37
37
|
return a;
|
|
38
38
|
},
|
|
@@ -58,7 +58,6 @@ const props = [
|
|
|
58
58
|
'trimValues',
|
|
59
59
|
'cdataTagName',
|
|
60
60
|
'cdataPositionChar',
|
|
61
|
-
'localeRange',
|
|
62
61
|
'tagValueProcessor',
|
|
63
62
|
'attrValueProcessor',
|
|
64
63
|
'parseTrueNumberOnly',
|
|
@@ -74,7 +73,6 @@ const getTraversalObj = function(xmlData, options) {
|
|
|
74
73
|
const xmlObj = new xmlNode('!xml');
|
|
75
74
|
let currentNode = xmlObj;
|
|
76
75
|
|
|
77
|
-
regx = regx.replace(/\[\\w/g, '[' + options.localeRange + '\\w');
|
|
78
76
|
const tagsRegx = new RegExp(regx, 'g');
|
|
79
77
|
let tag = tagsRegx.exec(xmlData);
|
|
80
78
|
let nextTag = tagsRegx.exec(xmlData);
|
|
@@ -83,7 +81,7 @@ const getTraversalObj = function(xmlData, options) {
|
|
|
83
81
|
|
|
84
82
|
if (tagType === TagType.CLOSING) {
|
|
85
83
|
//add parsed data to parent node
|
|
86
|
-
if (currentNode.parent && tag[
|
|
84
|
+
if (currentNode.parent && tag[12]) {
|
|
87
85
|
currentNode.parent.val = util.getValue(currentNode.parent.val) + '' + processTagValue(tag, options, currentNode.parent.tagname);
|
|
88
86
|
}
|
|
89
87
|
if (options.stopNodes.length && options.stopNodes.includes(currentNode.tagname)) {
|
|
@@ -101,14 +99,14 @@ const getTraversalObj = function(xmlData, options) {
|
|
|
101
99
|
//for backtracking
|
|
102
100
|
currentNode.val = util.getValue(currentNode.val) + options.cdataPositionChar;
|
|
103
101
|
//add rest value to parent node
|
|
104
|
-
if (tag[
|
|
102
|
+
if (tag[12]) {
|
|
105
103
|
currentNode.val += processTagValue(tag, options);
|
|
106
104
|
}
|
|
107
105
|
} else {
|
|
108
106
|
currentNode.val = (currentNode.val || '') + (tag[3] || '') + processTagValue(tag, options);
|
|
109
107
|
}
|
|
110
108
|
} else if (tagType === TagType.SELF) {
|
|
111
|
-
if (currentNode && tag[
|
|
109
|
+
if (currentNode && tag[12]) {
|
|
112
110
|
currentNode.val = util.getValue(currentNode.val) + '' + processTagValue(tag, options);
|
|
113
111
|
}
|
|
114
112
|
|
|
@@ -142,7 +140,7 @@ const getTraversalObj = function(xmlData, options) {
|
|
|
142
140
|
|
|
143
141
|
function processTagValue(parsedTags, options, parentTagName) {
|
|
144
142
|
const tagName = parsedTags[7] || parentTagName;
|
|
145
|
-
let val = parsedTags[
|
|
143
|
+
let val = parsedTags[12];
|
|
146
144
|
if (val) {
|
|
147
145
|
if (options.trimValues) {
|
|
148
146
|
val = val.trim();
|