fast-xml-parser 4.0.0-beta.2 → 4.0.0-beta.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +20 -0
- package/README.md +12 -1
- package/package.json +5 -5
- package/src/fxp.d.ts +12 -1
- package/src/xmlbuilder/json2xml.js +37 -4
- package/src/xmlbuilder/orderedJs2Xml.js +43 -7
- package/src/xmlparser/DocTypeReader.js +92 -0
- package/src/xmlparser/OptionsBuilder.js +4 -0
- package/src/xmlparser/OrderedObjParser.js +295 -165
- package/src/xmlparser/XMLParser.js +22 -2
- package/src/xmlparser/node2json.js +3 -3
- package/src/xmlparser/xmlNode.js +3 -3
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,25 @@
|
|
|
1
1
|
Note: If you find missing information about particular minor version, that version must have been changed without any functional change in this library.
|
|
2
2
|
|
|
3
|
+
** 4.0.0-beta.6 / 2021-12-05**
|
|
4
|
+
* Support PI Tags processing
|
|
5
|
+
* Support `suppressBooleanAttributes` by XML Builder for attributes with value `true`.
|
|
6
|
+
|
|
7
|
+
** 4.0.0-beta.5 / 2021-12-04**
|
|
8
|
+
* fix: when a tag with name "attributes"
|
|
9
|
+
|
|
10
|
+
** 4.0.0-beta.4 / 2021-12-02**
|
|
11
|
+
* Support HTML document parsing
|
|
12
|
+
* skip stop nodes parsing when building the XML from JS object
|
|
13
|
+
* Support external entites without DOCTYPE
|
|
14
|
+
* update dev dependency: strnum v1.0.5 to fix long number issue
|
|
15
|
+
|
|
16
|
+
** 4.0.0-beta.3 / 2021-11-30**
|
|
17
|
+
* support global stopNodes expression like "*.stop"
|
|
18
|
+
* support self-closing and paired unpaired tags
|
|
19
|
+
* fix: CDATA should not be parsed.
|
|
20
|
+
* Fix typings for XMLBuilder (#396)(By [Anders Emil Salvesen](https://github.com/andersem))
|
|
21
|
+
* supports XML entities, HTML entities, DOCTYPE entities
|
|
22
|
+
|
|
3
23
|
**⚠️ 4.0.0-beta.2 / 2021-11-19**
|
|
4
24
|
* rename `attrMap` to `attibutes` in parser output when `preserveOrder:true`
|
|
5
25
|
* supports unpairedTags
|
package/README.md
CHANGED
|
@@ -29,6 +29,7 @@ Check [ThankYouBackers](https://github.com/NaturalIntelligence/ThankYouBackers)
|
|
|
29
29
|
<a href="http://nasa.github.io/" title="NASA" > <img src="https://avatars0.githubusercontent.com/u/848102" width="60px" ></a>
|
|
30
30
|
<a href="https://github.com/prettier" title="Prettier" > <img src="https://avatars0.githubusercontent.com/u/25822731" width="60px" ></a>
|
|
31
31
|
<a href="http://brain.js.org/" title="brain.js" > <img src="https://avatars2.githubusercontent.com/u/23732838" width="60px" ></a>
|
|
32
|
+
<a href="https://github.com/aws" title="AWS SDK" > <img src="https://avatars.githubusercontent.com/u/2232217" width="60px" ></a>
|
|
32
33
|
<a href="#" title="NHS Connect" > <img src="https://avatars3.githubusercontent.com/u/20316669" width="60px" ></a>
|
|
33
34
|
<a href="http://www.fda.gov/" title="Food and Drug Administration " > <img src="https://avatars2.githubusercontent.com/u/6471964" width="60px" ></a>
|
|
34
35
|
<a href="http://www.magento.com/" title="Magento" > <img src="https://avatars2.githubusercontent.com/u/168457" width="60px" ></a>
|
|
@@ -48,6 +49,14 @@ Check the list of all known users [here](./USERs.md);
|
|
|
48
49
|
* Faster than any pure JS implementation.
|
|
49
50
|
* It can handle big files (tested up to 100mb).
|
|
50
51
|
* Controlled parsing using various options
|
|
52
|
+
* XML Entities, HTML entities, and DOCTYPE entites are supported.
|
|
53
|
+
* unpaired tags (Eg `<br>` in HTML), stop nodes (Eg `<script>` in HTML) are supported.
|
|
54
|
+
* You can restore almost same XML from JSON
|
|
55
|
+
* Supports comments
|
|
56
|
+
* It can preserve Order of tags in JS object
|
|
57
|
+
* You can control if a single tag should be parsed into array.
|
|
58
|
+
* Supports parsing of PI (Processing Instruction) tags with XML declaration tags
|
|
59
|
+
* And many more other features.
|
|
51
60
|
|
|
52
61
|
## How to use
|
|
53
62
|
|
|
@@ -98,7 +107,9 @@ In a HTML page
|
|
|
98
107
|
2. [XML Parser](./docs/v4/2.XMLparseOptions.md)
|
|
99
108
|
3. [XML Builder](./docs/v4/3.XMLBuilder.md)
|
|
100
109
|
4. [XML Validator](./docs/v4/4.XMLValidator.md)
|
|
101
|
-
|
|
110
|
+
5. [Entites](./docs/5.Entities.md)
|
|
111
|
+
6. [HTML Document Parsing](./docs/6.HTMLParsing.md)
|
|
112
|
+
7. [PI Tag processing](./docs/7.PITags.md)
|
|
102
113
|
## Performance
|
|
103
114
|
|
|
104
115
|
### XML Parser
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "fast-xml-parser",
|
|
3
|
-
"version": "4.0.0-beta.
|
|
3
|
+
"version": "4.0.0-beta.6",
|
|
4
4
|
"description": "Validate XML, Parse XML, Build XML without C/C++ based libraries",
|
|
5
5
|
"main": "./src/fxp.js",
|
|
6
6
|
"scripts": {
|
|
@@ -47,14 +47,14 @@
|
|
|
47
47
|
"@babel/preset-env": "^7.13.10",
|
|
48
48
|
"@babel/register": "^7.13.8",
|
|
49
49
|
"babel-loader": "^8.2.2",
|
|
50
|
-
"eslint": "^
|
|
50
|
+
"eslint": "^8.3.0",
|
|
51
51
|
"he": "^1.2.0",
|
|
52
52
|
"jasmine": "^3.6.4",
|
|
53
53
|
"nyc": "^15.1.0",
|
|
54
54
|
"prettier": "^1.19.1",
|
|
55
55
|
"publish-please": "^5.5.2",
|
|
56
|
-
"webpack": "^
|
|
57
|
-
"webpack-cli": "^
|
|
56
|
+
"webpack": "^5.64.4",
|
|
57
|
+
"webpack-cli": "^4.9.1"
|
|
58
58
|
},
|
|
59
59
|
"typings": "src/fxp.d.ts",
|
|
60
60
|
"funding": {
|
|
@@ -62,6 +62,6 @@
|
|
|
62
62
|
"url": "https://paypal.me/naturalintelligence"
|
|
63
63
|
},
|
|
64
64
|
"dependencies": {
|
|
65
|
-
"strnum": "^1.0.
|
|
65
|
+
"strnum": "^1.0.5"
|
|
66
66
|
}
|
|
67
67
|
}
|
package/src/fxp.d.ts
CHANGED
|
@@ -18,6 +18,8 @@ type X2jOptions = {
|
|
|
18
18
|
unpairedTags: string[];
|
|
19
19
|
alwaysCreateTextNode: boolean;
|
|
20
20
|
isArray: (tagName: string, jPath: string, isLeafNode: boolean, isAttribute: boolean) => boolean;
|
|
21
|
+
processEntities: boolean;
|
|
22
|
+
htmlEntities: boolean;
|
|
21
23
|
};
|
|
22
24
|
type strnumOptions = {
|
|
23
25
|
hex: boolean;
|
|
@@ -42,10 +44,13 @@ type XmlBuilderOptions = {
|
|
|
42
44
|
indentBy: string;
|
|
43
45
|
arrayNodeName: string;
|
|
44
46
|
suppressEmptyNode: boolean;
|
|
47
|
+
suppressBooleanAttributes: boolean;
|
|
45
48
|
preserveOrder: boolean;
|
|
46
49
|
unpairedTags: string[];
|
|
50
|
+
stopNodes: string[];
|
|
47
51
|
tagValueProcessor: (name: string, value: string) => string;
|
|
48
52
|
attributeValueProcessor: (name: string, value: string) => string;
|
|
53
|
+
processEntities: boolean;
|
|
49
54
|
};
|
|
50
55
|
type XmlBuilderOptionsOptional = Partial<XmlBuilderOptions>;
|
|
51
56
|
|
|
@@ -63,6 +68,12 @@ type ValidationError = {
|
|
|
63
68
|
export class XMLParser {
|
|
64
69
|
constructor(options?: X2jOptionsOptional);
|
|
65
70
|
parse(xmlData: string | Buffer ,validationOptions?: validationOptionsOptional | boolean): any;
|
|
71
|
+
/**
|
|
72
|
+
* Add Entity which is not by default supported by this library
|
|
73
|
+
* @param entityIndentifier {string} Eg: 'ent' for &ent;
|
|
74
|
+
* @param entityValue {string} Eg: '\r'
|
|
75
|
+
*/
|
|
76
|
+
addEntity(entityIndentifier: string, entityValue: string): void;
|
|
66
77
|
}
|
|
67
78
|
|
|
68
79
|
export class XMLValidator{
|
|
@@ -70,5 +81,5 @@ export class XMLValidator{
|
|
|
70
81
|
}
|
|
71
82
|
export class XMLBuilder {
|
|
72
83
|
constructor(options: XmlBuilderOptionsOptional);
|
|
73
|
-
|
|
84
|
+
build(jObj: any): any;
|
|
74
85
|
}
|
|
@@ -12,6 +12,7 @@ const defaultOptions = {
|
|
|
12
12
|
format: false,
|
|
13
13
|
indentBy: ' ',
|
|
14
14
|
suppressEmptyNode: false,
|
|
15
|
+
suppressBooleanAttributes: true,
|
|
15
16
|
tagValueProcessor: function(key, a) {
|
|
16
17
|
return a;
|
|
17
18
|
},
|
|
@@ -21,6 +22,14 @@ const defaultOptions = {
|
|
|
21
22
|
preserveOrder: false,
|
|
22
23
|
commentPropName: false,
|
|
23
24
|
unpairedTags: [],
|
|
25
|
+
entities: {
|
|
26
|
+
">" : { regex: new RegExp(">", "g"), val: ">" },
|
|
27
|
+
"<" : { regex: new RegExp("<", "g"), val: "<" },
|
|
28
|
+
"sQuot" : { regex: new RegExp("\'", "g"), val: "'" },
|
|
29
|
+
"dQuot" : { regex: new RegExp("\"", "g"), val: """ }
|
|
30
|
+
},
|
|
31
|
+
processEntities: true,
|
|
32
|
+
stopNodes: []
|
|
24
33
|
};
|
|
25
34
|
|
|
26
35
|
const props = [
|
|
@@ -32,12 +41,16 @@ const props = [
|
|
|
32
41
|
'format',
|
|
33
42
|
'indentBy',
|
|
34
43
|
'suppressEmptyNode',
|
|
44
|
+
'suppressBooleanAttributes',
|
|
35
45
|
'tagValueProcessor',
|
|
36
46
|
'attributeValueProcessor',
|
|
37
47
|
'arrayNodeName', //when array as root
|
|
38
48
|
'preserveOrder',
|
|
39
49
|
"commentPropName",
|
|
40
50
|
"unpairedTags",
|
|
51
|
+
"entities",
|
|
52
|
+
"processEntities",
|
|
53
|
+
"stopNodes",
|
|
41
54
|
// 'rootNodeName', //when jsObject have multiple properties on root level
|
|
42
55
|
];
|
|
43
56
|
|
|
@@ -76,6 +89,8 @@ function Builder(options) {
|
|
|
76
89
|
|
|
77
90
|
this.buildTextValNode = buildTextValNode;
|
|
78
91
|
this.buildObjectNode = buildObjectNode;
|
|
92
|
+
|
|
93
|
+
this.replaceEntitiesValue = replaceEntitiesValue;
|
|
79
94
|
}
|
|
80
95
|
|
|
81
96
|
Builder.prototype.build = function(jObj) {
|
|
@@ -105,11 +120,14 @@ Builder.prototype.j2x = function(jObj, level) {
|
|
|
105
120
|
//premitive type
|
|
106
121
|
const attr = this.isAttribute(key);
|
|
107
122
|
if (attr) {
|
|
108
|
-
|
|
123
|
+
let val = this.options.attributeValueProcessor(attr, '' + jObj[key]);
|
|
124
|
+
val = this.replaceEntitiesValue(val);
|
|
125
|
+
attrStr += ' ' + attr + '="' + val + '"';
|
|
109
126
|
}else {
|
|
110
127
|
//tag value
|
|
111
128
|
if (key === this.options.textNodeName) {
|
|
112
|
-
|
|
129
|
+
let newval = this.options.tagValueProcessor(key, '' + jObj[key]);
|
|
130
|
+
val += this.replaceEntitiesValue(newval);
|
|
113
131
|
} else {
|
|
114
132
|
val += this.buildTextNode(jObj[key], key, '', level);
|
|
115
133
|
}
|
|
@@ -135,7 +153,9 @@ Builder.prototype.j2x = function(jObj, level) {
|
|
|
135
153
|
const Ks = Object.keys(jObj[key]);
|
|
136
154
|
const L = Ks.length;
|
|
137
155
|
for (let j = 0; j < L; j++) {
|
|
138
|
-
|
|
156
|
+
let val = this.options.attributeValueProcessor(Ks[j], '' + jObj[key][Ks[j]]);
|
|
157
|
+
val = this.replaceEntitiesValue(val);
|
|
158
|
+
attrStr += ' ' + Ks[j] + '="' + val + '"';
|
|
139
159
|
}
|
|
140
160
|
} else {
|
|
141
161
|
val += this.processTextOrObjNode(jObj[key], key, level)
|
|
@@ -196,19 +216,32 @@ function buildEmptyObjNode(val, key, attrStr, level) {
|
|
|
196
216
|
}
|
|
197
217
|
|
|
198
218
|
function buildTextValNode(val, key, attrStr, level) {
|
|
219
|
+
let textValue = this.options.tagValueProcessor(key, val);
|
|
220
|
+
textValue = this.replaceEntitiesValue(textValue);
|
|
221
|
+
|
|
199
222
|
return (
|
|
200
223
|
this.indentate(level) +
|
|
201
224
|
'<' +
|
|
202
225
|
key +
|
|
203
226
|
attrStr +
|
|
204
227
|
'>' +
|
|
205
|
-
|
|
228
|
+
textValue +
|
|
206
229
|
'</' +
|
|
207
230
|
key +
|
|
208
231
|
this.tagEndChar
|
|
209
232
|
);
|
|
210
233
|
}
|
|
211
234
|
|
|
235
|
+
function replaceEntitiesValue(textValue){
|
|
236
|
+
if(textValue && textValue.length > 0 && this.options.processEntities){
|
|
237
|
+
for (const entityName in this.options.entities) {
|
|
238
|
+
const entity = this.options.entities[entityName];
|
|
239
|
+
textValue = textValue.replace(entity.regex, entity.val);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
return textValue;
|
|
243
|
+
}
|
|
244
|
+
|
|
212
245
|
function buildEmptyTextNode(val, key, attrStr, level) {
|
|
213
246
|
if( val === '' && this.options.unpairedTags.indexOf(key) !== -1){
|
|
214
247
|
return this.indentate(level) + '<' + key + attrStr + this.tagEndChar;
|
|
@@ -7,10 +7,10 @@ const {EOL} = require('os');
|
|
|
7
7
|
* @returns
|
|
8
8
|
*/
|
|
9
9
|
function toXml(jArray, options){
|
|
10
|
-
return arrToStr( jArray, options, 0);
|
|
10
|
+
return arrToStr( jArray, options, "", 0);
|
|
11
11
|
}
|
|
12
12
|
|
|
13
|
-
function arrToStr(arr, options, level){
|
|
13
|
+
function arrToStr(arr, options, jPath, level){
|
|
14
14
|
let xmlStr = "";
|
|
15
15
|
|
|
16
16
|
let indentation = "";
|
|
@@ -21,9 +21,17 @@ function arrToStr(arr, options, level){
|
|
|
21
21
|
for (let i = 0; i < arr.length; i++) {
|
|
22
22
|
const tagObj = arr[i];
|
|
23
23
|
const tagName = propName(tagObj);
|
|
24
|
+
let newJPath = "";
|
|
25
|
+
if(jPath.length === 0) newJPath = tagName
|
|
26
|
+
else newJPath = `${jPath}.${tagName}`;
|
|
24
27
|
|
|
25
28
|
if(tagName === options.textNodeName){
|
|
26
|
-
|
|
29
|
+
let tagText = tagObj[tagName];
|
|
30
|
+
if(!isStopNode(newJPath, options)){
|
|
31
|
+
tagText = options.tagValueProcessor( tagName, tagText);
|
|
32
|
+
tagText = replaceEntitiesValue(tagText, options);
|
|
33
|
+
}
|
|
34
|
+
xmlStr += indentation + tagText;
|
|
27
35
|
continue;
|
|
28
36
|
}else if( tagName === options.cdataPropName){
|
|
29
37
|
xmlStr += indentation + `<![CDATA[${tagObj[tagName][0][options.textNodeName]}]]>`;
|
|
@@ -31,10 +39,14 @@ function arrToStr(arr, options, level){
|
|
|
31
39
|
}else if( tagName === options.commentPropName){
|
|
32
40
|
xmlStr += indentation + `<!--${tagObj[tagName][0][options.textNodeName]}-->`;
|
|
33
41
|
continue;
|
|
42
|
+
}else if( tagName[0] === "?"){
|
|
43
|
+
const attStr = attr_to_str(tagObj[":@"], options);
|
|
44
|
+
xmlStr += indentation + `<${tagName} ${tagObj[tagName][0][options.textNodeName]} ${attStr}?>`;
|
|
45
|
+
continue;
|
|
34
46
|
}
|
|
35
|
-
const attStr = attr_to_str(tagObj
|
|
47
|
+
const attStr = attr_to_str(tagObj[":@"], options);
|
|
36
48
|
let tagStart = indentation + `<${tagName}${attStr}`;
|
|
37
|
-
let tagValue = arrToStr(tagObj[tagName], options, level + 1);
|
|
49
|
+
let tagValue = arrToStr(tagObj[tagName], options, newJPath, level + 1);
|
|
38
50
|
if( (!tagValue || tagValue.length === 0) && options.suppressEmptyNode){
|
|
39
51
|
if(options.unpairedTags.indexOf(tagName) !== -1){
|
|
40
52
|
xmlStr += tagStart + ">";
|
|
@@ -54,7 +66,7 @@ function propName(obj){
|
|
|
54
66
|
const keys = Object.keys(obj);
|
|
55
67
|
for (let i = 0; i < keys.length; i++) {
|
|
56
68
|
const key = keys[i];
|
|
57
|
-
if(key !== "
|
|
69
|
+
if(key !== ":@") return key;
|
|
58
70
|
}
|
|
59
71
|
}
|
|
60
72
|
|
|
@@ -62,10 +74,34 @@ function attr_to_str(attrMap, options){
|
|
|
62
74
|
let attrStr = "";
|
|
63
75
|
if(attrMap && !options.ignoreAttributes){
|
|
64
76
|
for( attr in attrMap){
|
|
65
|
-
|
|
77
|
+
let attrVal = options.attributeValueProcessor(attr, attrMap[attr]);
|
|
78
|
+
attrVal = replaceEntitiesValue(attrVal, options);
|
|
79
|
+
if(attrVal === true && options.suppressBooleanAttributes){
|
|
80
|
+
attrStr+= ` ${attr.substr(options.attributeNamePrefix.length)}`;
|
|
81
|
+
}else{
|
|
82
|
+
attrStr+= ` ${attr.substr(options.attributeNamePrefix.length)}="${attrVal}"`;
|
|
83
|
+
}
|
|
66
84
|
}
|
|
67
85
|
}
|
|
68
86
|
return attrStr;
|
|
69
87
|
}
|
|
70
88
|
|
|
89
|
+
function isStopNode(jPath, options){
|
|
90
|
+
jPath = jPath.substr(0,jPath.length - options.textNodeName.length - 1);
|
|
91
|
+
let tagName = jPath.substr(jPath.lastIndexOf(".") + 1);
|
|
92
|
+
for(let index in options.stopNodes){
|
|
93
|
+
if(options.stopNodes[index] === jPath || options.stopNodes[index] === "*."+tagName) return true;
|
|
94
|
+
}
|
|
95
|
+
return false;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function replaceEntitiesValue(textValue, options){
|
|
99
|
+
if(textValue && textValue.length > 0 && options.processEntities){
|
|
100
|
+
for (const entityName in options.entities) {
|
|
101
|
+
const entity = options.entities[entityName];
|
|
102
|
+
textValue = textValue.replace(entity.regex, entity.val);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
return textValue;
|
|
106
|
+
}
|
|
71
107
|
module.exports = toXml;
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
//TODO: handle comments
|
|
2
|
+
function readDocType(xmlData, i){
|
|
3
|
+
|
|
4
|
+
const entities = {};
|
|
5
|
+
if( xmlData[i + 3] === 'O' &&
|
|
6
|
+
xmlData[i + 4] === 'C' &&
|
|
7
|
+
xmlData[i + 5] === 'T' &&
|
|
8
|
+
xmlData[i + 6] === 'Y' &&
|
|
9
|
+
xmlData[i + 7] === 'P' &&
|
|
10
|
+
xmlData[i + 8] === 'E')
|
|
11
|
+
{
|
|
12
|
+
i = i+9;
|
|
13
|
+
let angleBracketsCount = 1;
|
|
14
|
+
let hasBody = false, entity = false, comment = false;
|
|
15
|
+
let exp = "";
|
|
16
|
+
for(;i<xmlData.length;i++){
|
|
17
|
+
if (xmlData[i] === '<') {
|
|
18
|
+
if( hasBody &&
|
|
19
|
+
xmlData[i+1] === '!' &&
|
|
20
|
+
xmlData[i+2] === 'E' &&
|
|
21
|
+
xmlData[i+3] === 'N' &&
|
|
22
|
+
xmlData[i+4] === 'T' &&
|
|
23
|
+
xmlData[i+5] === 'I' &&
|
|
24
|
+
xmlData[i+6] === 'T' &&
|
|
25
|
+
xmlData[i+7] === 'Y'
|
|
26
|
+
){
|
|
27
|
+
i += 7;
|
|
28
|
+
entity = true;
|
|
29
|
+
}else if( hasBody &&
|
|
30
|
+
xmlData[i+1] === '!' &&
|
|
31
|
+
xmlData[i+2] === 'E' &&
|
|
32
|
+
xmlData[i+3] === 'L' &&
|
|
33
|
+
xmlData[i+4] === 'E' &&
|
|
34
|
+
xmlData[i+5] === 'M' &&
|
|
35
|
+
xmlData[i+6] === 'E' &&
|
|
36
|
+
xmlData[i+7] === 'N' &&
|
|
37
|
+
xmlData[i+8] === 'T'
|
|
38
|
+
){
|
|
39
|
+
//Not supported
|
|
40
|
+
i += 8;
|
|
41
|
+
}else if( //comment
|
|
42
|
+
xmlData[i+1] === '!' &&
|
|
43
|
+
xmlData[i+2] === '-' &&
|
|
44
|
+
xmlData[i+3] === '-'
|
|
45
|
+
){
|
|
46
|
+
comment = true;
|
|
47
|
+
}else{
|
|
48
|
+
throw new Error("Invalid DOCTYPE");
|
|
49
|
+
}
|
|
50
|
+
angleBracketsCount++;
|
|
51
|
+
exp = "";
|
|
52
|
+
} else if (xmlData[i] === '>') {
|
|
53
|
+
if(comment){
|
|
54
|
+
if( xmlData[i - 1] === "-" && xmlData[i - 2] === "-"){
|
|
55
|
+
comment = false;
|
|
56
|
+
}else{
|
|
57
|
+
throw new Error(`Invalid XML comment in DOCTYPE`);
|
|
58
|
+
}
|
|
59
|
+
}else if(entity){
|
|
60
|
+
parseEntityExp(exp, entities);
|
|
61
|
+
entity = false;
|
|
62
|
+
}
|
|
63
|
+
angleBracketsCount--;
|
|
64
|
+
if (angleBracketsCount === 0) {
|
|
65
|
+
break;
|
|
66
|
+
}
|
|
67
|
+
}else if( xmlData[i] === '['){
|
|
68
|
+
hasBody = true;
|
|
69
|
+
}else{
|
|
70
|
+
exp += xmlData[i];
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
if(angleBracketsCount !== 0){
|
|
74
|
+
throw new Error(`Unclosed DOCTYPE`);
|
|
75
|
+
}
|
|
76
|
+
}else{
|
|
77
|
+
throw new Error(`Invalid Tag instead of DOCTYPE`);
|
|
78
|
+
}
|
|
79
|
+
return {entities, i};
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const entityRegex = RegExp("^\\s([a-zA-z0-0]+)[ \t](['\"])([^&]+)\\2");
|
|
83
|
+
function parseEntityExp(exp, entities){
|
|
84
|
+
const match = entityRegex.exec(exp);
|
|
85
|
+
if(match){
|
|
86
|
+
entities[ match[1] ] = {
|
|
87
|
+
regx : RegExp( `&${match[1]};`,"g"),
|
|
88
|
+
val: match[3]
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
module.exports = readDocType;
|
|
@@ -27,6 +27,8 @@ const defaultOptions = {
|
|
|
27
27
|
isArray: () => false,
|
|
28
28
|
commentPropName: false,
|
|
29
29
|
unpairedTags: [],
|
|
30
|
+
processEntities: true,
|
|
31
|
+
htmlEntities: false,
|
|
30
32
|
};
|
|
31
33
|
|
|
32
34
|
const props = [
|
|
@@ -49,6 +51,8 @@ const props = [
|
|
|
49
51
|
'isArray',
|
|
50
52
|
'commentPropName',
|
|
51
53
|
'unpairedTags',
|
|
54
|
+
'processEntities',
|
|
55
|
+
'htmlEntities'
|
|
52
56
|
];
|
|
53
57
|
|
|
54
58
|
const util = require('../util');
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
'use strict';
|
|
2
|
+
///@ts-check
|
|
2
3
|
|
|
3
4
|
const util = require('../util');
|
|
4
5
|
const xmlNode = require('./xmlNode');
|
|
6
|
+
const readDocType = require("./DocTypeReader");
|
|
5
7
|
const toNumber = require("strnum");
|
|
6
8
|
|
|
7
9
|
const regx =
|
|
@@ -11,43 +13,86 @@ const regx =
|
|
|
11
13
|
//const tagsRegx = new RegExp("<(\\/?[\\w:\\-\._]+)([^>]*)>(\\s*"+cdataRegx+")*([^<]+)?","g");
|
|
12
14
|
//const tagsRegx = new RegExp("<(\\/?)((\\w*:)?([\\w:\\-\._]+))([^>]*)>([^<]*)("+cdataRegx+"([^<]*))*([^<]+)?","g");
|
|
13
15
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
16
|
+
class OrderedObjParser{
|
|
17
|
+
constructor(options){
|
|
18
|
+
this.options = options;
|
|
19
|
+
this.currentNode = null;
|
|
20
|
+
this.tagsNodeStack = [];
|
|
21
|
+
this.docTypeEntities = {};
|
|
22
|
+
this.lastEntities = {
|
|
23
|
+
"amp" : { regex: /&(amp|#38|#x26);/g, val : "&"},
|
|
24
|
+
"apos" : { regex: /&(apos|#39|#x27);/g, val : "'"},
|
|
25
|
+
"gt" : { regex: /&(gt|#62|#x3E);/g, val : ">"},
|
|
26
|
+
"lt" : { regex: /&(lt|#60|#x3C);/g, val : "<"},
|
|
27
|
+
"quot" : { regex: /&(quot|#34|#x22);/g, val : "\""},
|
|
28
|
+
};
|
|
29
|
+
this.htmlEntities = {
|
|
30
|
+
"space": { regex: /&(nbsp|#160);/g, val: " " },
|
|
31
|
+
// "lt" : { regex: /&(lt|#60);/g, val: "<" },
|
|
32
|
+
// "gt" : { regex: /&(gt|#62);/g, val: ">" },
|
|
33
|
+
// "amp" : { regex: /&(amp|#38);/g, val: "&" },
|
|
34
|
+
// "quot" : { regex: /&(quot|#34);/g, val: "\"" },
|
|
35
|
+
// "apos" : { regex: /&(apos|#39);/g, val: "'" },
|
|
36
|
+
"cent" : { regex: /&(cent|#162);/g, val: "¢" },
|
|
37
|
+
"pound" : { regex: /&(pound|#163);/g, val: "£" },
|
|
38
|
+
"yen" : { regex: /&(yen|#165);/g, val: "¥" },
|
|
39
|
+
"euro" : { regex: /&(euro|#8364);/g, val: "€" },
|
|
40
|
+
"copyright" : { regex: /&(copy|#169);/g, val: "©" },
|
|
41
|
+
"reg" : { regex: /&(reg|#174);/g, val: "®" },
|
|
42
|
+
"inr" : { regex: /&(inr|#8377);/g, val: "₹" },
|
|
43
|
+
};
|
|
44
|
+
this.addExternalEntities = addExternalEntities;
|
|
45
|
+
this.parseXml = parseXml;
|
|
46
|
+
this.parseTextData = parseTextData;
|
|
47
|
+
this.resolveNameSpace = resolveNameSpace;
|
|
48
|
+
this.buildAttributesMap = buildAttributesMap;
|
|
49
|
+
this.isItStopNode = isItStopNode;
|
|
50
|
+
this.replaceEntitiesValue = replaceEntitiesValue;
|
|
51
|
+
this.readStopNodeData = readStopNodeData;
|
|
52
|
+
}
|
|
53
|
+
|
|
20
54
|
}
|
|
21
55
|
|
|
56
|
+
function addExternalEntities(externalEntities){
|
|
57
|
+
const entKeys = Object.keys(externalEntities);
|
|
58
|
+
for (let i = 0; i < entKeys.length; i++) {
|
|
59
|
+
const ent = entKeys[i];
|
|
60
|
+
this.lastEntities[ent] = {
|
|
61
|
+
regex: new RegExp("&"+ent+";","g"),
|
|
62
|
+
val : externalEntities[ent]
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
22
66
|
|
|
23
67
|
/**
|
|
24
68
|
* @param {string} val
|
|
25
|
-
* @param {object} options
|
|
26
69
|
* @param {string} tagName
|
|
27
70
|
* @param {string} jPath
|
|
28
71
|
* @param {boolean} dontTrim
|
|
29
72
|
* @param {boolean} hasAttributes
|
|
30
73
|
* @param {boolean} isLeafNode
|
|
31
74
|
*/
|
|
32
|
-
function
|
|
75
|
+
function parseTextData(val, tagName, jPath, dontTrim, hasAttributes, isLeafNode) {
|
|
33
76
|
if (val !== undefined) {
|
|
34
|
-
if (options.trimValues && !dontTrim) {
|
|
77
|
+
if (this.options.trimValues && !dontTrim) {
|
|
35
78
|
val = val.trim();
|
|
36
79
|
}
|
|
37
80
|
if(val.length > 0){
|
|
38
|
-
|
|
81
|
+
val = this.replaceEntitiesValue(val);
|
|
82
|
+
|
|
83
|
+
const newval = this.options.tagValueProcessor(tagName, val, jPath, hasAttributes, isLeafNode);
|
|
39
84
|
if(newval === null || newval === undefined){
|
|
40
85
|
//don't parse
|
|
41
86
|
return val;
|
|
42
87
|
}else if(typeof newval !== typeof val || newval !== val){
|
|
43
88
|
//overwrite
|
|
44
89
|
return newval;
|
|
45
|
-
}else if(options.trimValues){
|
|
46
|
-
return
|
|
90
|
+
}else if(this.options.trimValues){
|
|
91
|
+
return parseValue(val, this.options.parseTagValue, this.options.numberParseOptions);
|
|
47
92
|
}else{
|
|
48
93
|
const trimmedVal = val.trim();
|
|
49
94
|
if(trimmedVal === val){
|
|
50
|
-
return
|
|
95
|
+
return parseValue(val, this.options.parseTagValue, this.options.numberParseOptions);
|
|
51
96
|
}else{
|
|
52
97
|
return val;
|
|
53
98
|
}
|
|
@@ -56,8 +101,8 @@ function parseValue(val, options, tagName, jPath, dontTrim, hasAttributes, isLea
|
|
|
56
101
|
}
|
|
57
102
|
}
|
|
58
103
|
|
|
59
|
-
function resolveNameSpace(tagname
|
|
60
|
-
if (options.removeNSPrefix) {
|
|
104
|
+
function resolveNameSpace(tagname) {
|
|
105
|
+
if (this.options.removeNSPrefix) {
|
|
61
106
|
const tags = tagname.split(':');
|
|
62
107
|
const prefix = tagname.charAt(0) === '/' ? '/' : '';
|
|
63
108
|
if (tags[0] === 'xmlns') {
|
|
@@ -70,28 +115,12 @@ function resolveNameSpace(tagname, options) {
|
|
|
70
115
|
return tagname;
|
|
71
116
|
}
|
|
72
117
|
|
|
73
|
-
function _parseValue(val, shouldParse, options) {
|
|
74
|
-
if (shouldParse && typeof val === 'string') {
|
|
75
|
-
//console.log(options)
|
|
76
|
-
const newval = val.trim();
|
|
77
|
-
if(newval === 'true' ) return true;
|
|
78
|
-
else if(newval === 'false' ) return false;
|
|
79
|
-
else return toNumber(val, options);
|
|
80
|
-
} else {
|
|
81
|
-
if (util.isExist(val)) {
|
|
82
|
-
return val;
|
|
83
|
-
} else {
|
|
84
|
-
return '';
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
|
|
89
118
|
//TODO: change regex to capture NS
|
|
90
119
|
//const attrsRegx = new RegExp("([\\w\\-\\.\\:]+)\\s*=\\s*(['\"])((.|\n)*?)\\2","gm");
|
|
91
120
|
const attrsRegx = new RegExp('([^\\s=]+)\\s*(=\\s*([\'"])([\\s\\S]*?)\\3)?', 'gm');
|
|
92
121
|
|
|
93
|
-
function buildAttributesMap(attrStr, jPath
|
|
94
|
-
if (!options.ignoreAttributes && typeof attrStr === 'string') {
|
|
122
|
+
function buildAttributesMap(attrStr, jPath) {
|
|
123
|
+
if (!this.options.ignoreAttributes && typeof attrStr === 'string') {
|
|
95
124
|
// attrStr = attrStr.replace(/\r?\n/g, ' ');
|
|
96
125
|
//attrStr = attrStr || attrStr.trim();
|
|
97
126
|
|
|
@@ -99,16 +128,16 @@ function buildAttributesMap(attrStr, jPath, options) {
|
|
|
99
128
|
const len = matches.length; //don't make it inline
|
|
100
129
|
const attrs = {};
|
|
101
130
|
for (let i = 0; i < len; i++) {
|
|
102
|
-
const attrName = resolveNameSpace(matches[i][1]
|
|
131
|
+
const attrName = this.resolveNameSpace(matches[i][1]);
|
|
103
132
|
let oldVal = matches[i][4];
|
|
104
|
-
const aName = options.attributeNamePrefix + attrName;
|
|
133
|
+
const aName = this.options.attributeNamePrefix + attrName;
|
|
105
134
|
if (attrName.length) {
|
|
106
135
|
if (oldVal !== undefined) {
|
|
107
|
-
if (options.trimValues) {
|
|
136
|
+
if (this.options.trimValues) {
|
|
108
137
|
oldVal = oldVal.trim();
|
|
109
138
|
}
|
|
110
|
-
|
|
111
|
-
const newVal = options.attributeValueProcessor(attrName, oldVal, jPath);
|
|
139
|
+
oldVal = this.replaceEntitiesValue(oldVal);
|
|
140
|
+
const newVal = this.options.attributeValueProcessor(attrName, oldVal, jPath);
|
|
112
141
|
if(newVal === null || newVal === undefined){
|
|
113
142
|
//don't parse
|
|
114
143
|
attrs[aName] = oldVal;
|
|
@@ -117,13 +146,13 @@ function buildAttributesMap(attrStr, jPath, options) {
|
|
|
117
146
|
attrs[aName] = newVal;
|
|
118
147
|
}else{
|
|
119
148
|
//parse
|
|
120
|
-
attrs[aName] =
|
|
149
|
+
attrs[aName] = parseValue(
|
|
121
150
|
oldVal,
|
|
122
|
-
options.parseAttributeValue,
|
|
123
|
-
options.numberParseOptions
|
|
151
|
+
this.options.parseAttributeValue,
|
|
152
|
+
this.options.numberParseOptions
|
|
124
153
|
);
|
|
125
154
|
}
|
|
126
|
-
} else if (options.allowBooleanAttributes) {
|
|
155
|
+
} else if (this.options.allowBooleanAttributes) {
|
|
127
156
|
attrs[aName] = true;
|
|
128
157
|
}
|
|
129
158
|
}
|
|
@@ -131,23 +160,21 @@ function buildAttributesMap(attrStr, jPath, options) {
|
|
|
131
160
|
if (!Object.keys(attrs).length) {
|
|
132
161
|
return;
|
|
133
162
|
}
|
|
134
|
-
if (options.attributesGroupName) {
|
|
163
|
+
if (this.options.attributesGroupName) {
|
|
135
164
|
const attrCollection = {};
|
|
136
|
-
attrCollection[options.attributesGroupName] = attrs;
|
|
165
|
+
attrCollection[this.options.attributesGroupName] = attrs;
|
|
137
166
|
return attrCollection;
|
|
138
167
|
}
|
|
139
168
|
return attrs;
|
|
140
169
|
}
|
|
141
170
|
}
|
|
142
171
|
|
|
143
|
-
const
|
|
172
|
+
const parseXml = function(xmlData) {
|
|
144
173
|
xmlData = xmlData.replace(/\r\n?/g, "\n"); //TODO: remove this line
|
|
145
174
|
const xmlObj = new xmlNode('!xml');
|
|
146
175
|
let currentNode = xmlObj;
|
|
147
176
|
let textData = "";
|
|
148
|
-
const tagsNodeStack = [];
|
|
149
177
|
let jPath = "";
|
|
150
|
-
|
|
151
178
|
for(let i=0; i< xmlData.length; i++){//for each char in XML data
|
|
152
179
|
const ch = xmlData[i];
|
|
153
180
|
if(ch === '<'){
|
|
@@ -157,128 +184,131 @@ const parseToOrderedJsObj = function(xmlData, options) {
|
|
|
157
184
|
const closeIndex = findClosingIndex(xmlData, ">", i, "Closing Tag is not closed.")
|
|
158
185
|
let tagName = xmlData.substring(i+2,closeIndex).trim();
|
|
159
186
|
|
|
160
|
-
if(options.removeNSPrefix){
|
|
187
|
+
if(this.options.removeNSPrefix){
|
|
161
188
|
const colonIndex = tagName.indexOf(":");
|
|
162
189
|
if(colonIndex !== -1){
|
|
163
190
|
tagName = tagName.substr(colonIndex+1);
|
|
164
191
|
}
|
|
165
192
|
}
|
|
166
|
-
|
|
193
|
+
|
|
167
194
|
if(currentNode){
|
|
168
|
-
textData =
|
|
169
|
-
, options
|
|
195
|
+
textData = this.parseTextData(textData
|
|
170
196
|
, currentNode.tagname
|
|
171
197
|
, jPath
|
|
172
198
|
,false
|
|
173
|
-
, currentNode
|
|
199
|
+
, currentNode[":@"] ? Object.keys(currentNode[":@"]).length !== 0 : false
|
|
174
200
|
, Object.keys(currentNode.child).length === 0);
|
|
175
|
-
if(textData !== undefined && textData !== "") currentNode.add(options.textNodeName, textData);
|
|
201
|
+
if(textData !== undefined && textData !== "") currentNode.add(this.options.textNodeName, textData);
|
|
176
202
|
textData = "";
|
|
177
203
|
}
|
|
178
204
|
|
|
179
|
-
if (isItStopNode(options.stopNodes, tagsNodeStack, currentNode.tagname)) { //TODO: namespace
|
|
180
|
-
const top = tagsNodeStack[tagsNodeStack.length - 1];
|
|
181
|
-
const stopNode = top.child[ top.child.length -1 ];
|
|
182
|
-
stopNode[currentNode.tagname] = [ { [options.textNodeName] :xmlData.substr(currentNode.startIndex + 1, i - currentNode.startIndex - 1) }];
|
|
183
|
-
}
|
|
184
|
-
|
|
185
205
|
jPath = jPath.substr(0, jPath.lastIndexOf("."));
|
|
186
206
|
|
|
187
|
-
currentNode = tagsNodeStack.pop();//avoid recurssion, set the parent tag scope
|
|
207
|
+
currentNode = this.tagsNodeStack.pop();//avoid recurssion, set the parent tag scope
|
|
188
208
|
textData = "";
|
|
189
209
|
i = closeIndex;
|
|
190
210
|
} else if( xmlData[i+1] === '?') {
|
|
191
|
-
|
|
211
|
+
let result = readTagExp(xmlData,i, false, "?>");
|
|
212
|
+
if(!result) throw new Error("Pi Tag is not closed.");
|
|
213
|
+
|
|
214
|
+
let tagName= result.tagName;
|
|
215
|
+
let tagExp = result.tagExp;
|
|
216
|
+
let attrExpPresent = result.attrExpPresent;
|
|
217
|
+
let closeIndex = result.closeIndex;
|
|
218
|
+
|
|
219
|
+
//TODO: remove repeated code
|
|
220
|
+
if(textData){ //store previously collected data as textNode
|
|
221
|
+
textData = this.parseTextData(textData
|
|
222
|
+
, currentNode.tagname
|
|
223
|
+
, jPath
|
|
224
|
+
,false
|
|
225
|
+
, currentNode[":@"] ? Object.keys(currentNode[":@"]).length !== 0 : false
|
|
226
|
+
, Object.keys(currentNode.child).length === 0);
|
|
227
|
+
|
|
228
|
+
if(textData !== undefined && textData !== "") currentNode.add(this.options.textNodeName, textData);
|
|
229
|
+
textData = "";
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
const childNode = new xmlNode(tagName);
|
|
233
|
+
childNode.add(this.options.textNodeName, "");
|
|
234
|
+
|
|
235
|
+
if(tagName !== tagExp && attrExpPresent){
|
|
236
|
+
childNode[":@"] = this.buildAttributesMap(tagExp, jPath);
|
|
237
|
+
}
|
|
238
|
+
currentNode.addChild(childNode);
|
|
239
|
+
|
|
240
|
+
i = closeIndex + 1;
|
|
192
241
|
} else if(xmlData.substr(i + 1, 3) === '!--') {
|
|
193
242
|
const endIndex = findClosingIndex(xmlData, "-->", i, "Comment is not closed.")
|
|
194
|
-
if(options.commentPropName){
|
|
243
|
+
if(this.options.commentPropName){
|
|
195
244
|
const comment = xmlData.substring(i + 4, endIndex - 2);
|
|
196
245
|
|
|
197
246
|
//TODO: remove repeated code
|
|
198
247
|
if(textData){ //store previously collected data as textNode
|
|
199
|
-
textData =
|
|
200
|
-
, options
|
|
248
|
+
textData = this.parseTextData(textData
|
|
201
249
|
, currentNode.tagname
|
|
202
250
|
, jPath
|
|
203
251
|
,false
|
|
204
|
-
, currentNode
|
|
252
|
+
, currentNode[":@"] ? Object.keys(currentNode[":@"]).length !== 0 : false
|
|
205
253
|
, Object.keys(currentNode.child).length === 0);
|
|
206
254
|
|
|
207
|
-
if(textData !== undefined && textData !== "") currentNode.add(options.textNodeName, textData);
|
|
255
|
+
if(textData !== undefined && textData !== "") currentNode.add(this.options.textNodeName, textData);
|
|
208
256
|
textData = "";
|
|
209
257
|
}
|
|
210
|
-
currentNode.add(options.commentPropName, [ { [options.textNodeName] : comment } ]);
|
|
258
|
+
currentNode.add(this.options.commentPropName, [ { [this.options.textNodeName] : comment } ]);
|
|
211
259
|
}
|
|
212
260
|
i = endIndex;
|
|
213
261
|
} else if( xmlData.substr(i + 1, 2) === '!D') {
|
|
214
|
-
const
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
i = xmlData.indexOf("]>", i) + 1;
|
|
218
|
-
}else{
|
|
219
|
-
i = closeIndex;
|
|
220
|
-
}
|
|
262
|
+
const result = readDocType(xmlData, i);
|
|
263
|
+
this.docTypeEntities = result.entities;
|
|
264
|
+
i = result.i;
|
|
221
265
|
}else if(xmlData.substr(i + 1, 2) === '![') {
|
|
222
266
|
const closeIndex = findClosingIndex(xmlData, "]]>", i, "CDATA is not closed.") - 2;
|
|
223
267
|
const tagExp = xmlData.substring(i + 9,closeIndex);
|
|
224
268
|
|
|
225
269
|
if(textData){ //store previously collected data as textNode
|
|
226
|
-
textData =
|
|
227
|
-
, options
|
|
270
|
+
textData = this.parseTextData(textData
|
|
228
271
|
, currentNode.tagname
|
|
229
272
|
, jPath
|
|
230
273
|
,false
|
|
231
|
-
, currentNode
|
|
274
|
+
, currentNode[":@"] ? Object.keys(currentNode[":@"]).length !== 0 : false
|
|
232
275
|
, Object.keys(currentNode.child).length === 0);
|
|
233
276
|
|
|
234
|
-
if(textData !== undefined && textData !== "") currentNode.add(options.textNodeName, textData);
|
|
277
|
+
if(textData !== undefined && textData !== "") currentNode.add(this.options.textNodeName, textData);
|
|
235
278
|
textData = "";
|
|
236
279
|
}
|
|
237
280
|
|
|
238
281
|
//cdata should be set even if it is 0 length string
|
|
239
|
-
if(options.cdataPropName){
|
|
240
|
-
let val =
|
|
241
|
-
if(!val) val = "";
|
|
242
|
-
currentNode.add(options.cdataPropName, [ { [options.textNodeName] :
|
|
282
|
+
if(this.options.cdataPropName){
|
|
283
|
+
// let val = this.parseTextData(tagExp, this.options.cdataPropName, jPath + "." + this.options.cdataPropName, true, false, true);
|
|
284
|
+
// if(!val) val = "";
|
|
285
|
+
currentNode.add(this.options.cdataPropName, [ { [this.options.textNodeName] : tagExp } ]);
|
|
243
286
|
}else{
|
|
244
|
-
let val =
|
|
287
|
+
let val = this.parseTextData(tagExp, currentNode.tagname, jPath, true, false, true);
|
|
245
288
|
if(!val) val = "";
|
|
246
|
-
currentNode.add(options.textNodeName, val);
|
|
289
|
+
currentNode.add(this.options.textNodeName, val);
|
|
247
290
|
}
|
|
248
291
|
|
|
249
292
|
i = closeIndex + 2;
|
|
250
293
|
}else {//Opening tag
|
|
251
|
-
|
|
252
|
-
let
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
let
|
|
256
|
-
let
|
|
257
|
-
if(separatorIndex !== -1){//separate tag name and attributes expression
|
|
258
|
-
tagName = tagExp.substr(0, separatorIndex).replace(/\s\s*$/, '');
|
|
259
|
-
tagExp = tagExp.substr(separatorIndex + 1);
|
|
260
|
-
}
|
|
261
|
-
|
|
262
|
-
if(options.removeNSPrefix){
|
|
263
|
-
const colonIndex = tagName.indexOf(":");
|
|
264
|
-
if(colonIndex !== -1){
|
|
265
|
-
tagName = tagName.substr(colonIndex+1);
|
|
266
|
-
shouldBuildAttributesMap = tagName !== result.data.substr(colonIndex + 1);
|
|
267
|
-
}
|
|
268
|
-
}
|
|
294
|
+
|
|
295
|
+
let result = readTagExp(xmlData,i, this. options.removeNSPrefix);
|
|
296
|
+
let tagName= result.tagName;
|
|
297
|
+
let tagExp = result.tagExp;
|
|
298
|
+
let attrExpPresent = result.attrExpPresent;
|
|
299
|
+
let closeIndex = result.closeIndex;
|
|
269
300
|
|
|
270
301
|
//save text as child node
|
|
271
302
|
if (currentNode && textData) {
|
|
272
303
|
if(currentNode.tagname !== '!xml'){
|
|
273
304
|
//when nested tag is found
|
|
274
|
-
textData =
|
|
275
|
-
, options
|
|
305
|
+
textData = this.parseTextData(textData
|
|
276
306
|
, currentNode.tagname
|
|
277
307
|
, jPath
|
|
278
308
|
, false
|
|
279
|
-
, currentNode
|
|
309
|
+
, currentNode[":@"] ? Object.keys(currentNode[":@"]).length !== 0 : false
|
|
280
310
|
, false);
|
|
281
|
-
if(textData !== undefined && textData !== "") currentNode.add(options.textNodeName, textData);
|
|
311
|
+
if(textData !== undefined && textData !== "") currentNode.add(this.options.textNodeName, textData);
|
|
282
312
|
textData = "";
|
|
283
313
|
}
|
|
284
314
|
}
|
|
@@ -287,52 +317,67 @@ const parseToOrderedJsObj = function(xmlData, options) {
|
|
|
287
317
|
jPath += jPath ? "." + tagName : tagName;
|
|
288
318
|
}
|
|
289
319
|
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
320
|
+
//check if last tag was unpaired tag
|
|
321
|
+
const lastTag = currentNode;
|
|
322
|
+
if(lastTag && this.options.unpairedTags.indexOf(lastTag.tagname) !== -1 ){
|
|
323
|
+
currentNode = this.tagsNodeStack.pop();
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
if (this.isItStopNode(this.options.stopNodes, jPath, tagName)) { //TODO: namespace
|
|
327
|
+
let tagContent = "";
|
|
328
|
+
//self-closing tag
|
|
329
|
+
if(tagExp.length > 0 && tagExp.lastIndexOf("/") === tagExp.length - 1){}
|
|
330
|
+
//boolean tag
|
|
331
|
+
else if(this.options.unpairedTags.indexOf(tagName) !== -1){}
|
|
332
|
+
//normal tag
|
|
333
|
+
else{
|
|
334
|
+
//read until closing tag is found
|
|
335
|
+
const result = this.readStopNodeData(xmlData, tagName, closeIndex + 1);
|
|
336
|
+
if(!result) throw new Error(`Unexpected end of ${tagName}`);
|
|
337
|
+
i = result.i;
|
|
338
|
+
tagContent = result.tagContent;
|
|
298
339
|
}
|
|
299
340
|
|
|
300
341
|
const childNode = new xmlNode(tagName);
|
|
301
|
-
if(tagName !== tagExp &&
|
|
302
|
-
childNode
|
|
342
|
+
if(tagName !== tagExp && attrExpPresent){
|
|
343
|
+
childNode[":@"] = this.buildAttributesMap(tagExp, jPath);
|
|
303
344
|
}
|
|
304
345
|
jPath = jPath.substr(0, jPath.lastIndexOf("."));
|
|
305
|
-
|
|
346
|
+
childNode.add(this.options.textNodeName, tagContent);
|
|
347
|
+
|
|
306
348
|
currentNode.addChild(childNode);
|
|
307
|
-
}
|
|
308
|
-
//
|
|
309
|
-
|
|
310
|
-
|
|
349
|
+
}else{
|
|
350
|
+
//selfClosing tag
|
|
351
|
+
if(tagExp.length > 0 && tagExp.lastIndexOf("/") === tagExp.length - 1){
|
|
352
|
+
|
|
353
|
+
if(tagName[tagName.length - 1] === "/"){ //remove trailing '/'
|
|
354
|
+
tagName = tagName.substr(0, tagName.length - 1);
|
|
355
|
+
tagExp = tagName;
|
|
356
|
+
}else{
|
|
357
|
+
tagExp = tagExp.substr(0, tagExp.length - 1);
|
|
358
|
+
}
|
|
311
359
|
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
360
|
+
const childNode = new xmlNode(tagName);
|
|
361
|
+
if(tagName !== tagExp && attrExpPresent){
|
|
362
|
+
childNode[":@"] = this.buildAttributesMap(tagExp, jPath);
|
|
363
|
+
}
|
|
364
|
+
jPath = jPath.substr(0, jPath.lastIndexOf("."));
|
|
365
|
+
currentNode.addChild(childNode);
|
|
315
366
|
}
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
childNode.startIndex=closeIndex; //for further processing
|
|
327
|
-
|
|
328
|
-
if(tagName !== tagExp && shouldBuildAttributesMap){
|
|
329
|
-
childNode.attributes = buildAttributesMap(tagExp, jPath, options);
|
|
367
|
+
//opening tag
|
|
368
|
+
else{
|
|
369
|
+
const childNode = new xmlNode( tagName);
|
|
370
|
+
this.tagsNodeStack.push(currentNode);
|
|
371
|
+
|
|
372
|
+
if(tagName !== tagExp && attrExpPresent){
|
|
373
|
+
childNode[":@"] = this.buildAttributesMap(tagExp, jPath);
|
|
374
|
+
}
|
|
375
|
+
currentNode.addChild(childNode);
|
|
376
|
+
currentNode = childNode;
|
|
330
377
|
}
|
|
331
|
-
|
|
332
|
-
|
|
378
|
+
textData = "";
|
|
379
|
+
i = closeIndex;
|
|
333
380
|
}
|
|
334
|
-
textData = "";
|
|
335
|
-
i = closeIndex;
|
|
336
381
|
}
|
|
337
382
|
}else{
|
|
338
383
|
textData += xmlData[i];
|
|
@@ -341,31 +386,39 @@ const parseToOrderedJsObj = function(xmlData, options) {
|
|
|
341
386
|
return xmlObj.child;
|
|
342
387
|
}
|
|
343
388
|
|
|
389
|
+
const replaceEntitiesValue = function(val){
|
|
390
|
+
if(this.options.processEntities){
|
|
391
|
+
for(let entityName in this.docTypeEntities){
|
|
392
|
+
const entity = this.docTypeEntities[entityName];
|
|
393
|
+
val = val.replace( entity.regx, entity.val);
|
|
394
|
+
}
|
|
395
|
+
for(let entityName in this.lastEntities){
|
|
396
|
+
const entity = this.lastEntities[entityName];
|
|
397
|
+
val = val.replace( entity.regex, entity.val);
|
|
398
|
+
}
|
|
399
|
+
if(this.options.htmlEntities){
|
|
400
|
+
for(let entityName in this.htmlEntities){
|
|
401
|
+
const entity = this.htmlEntities[entityName];
|
|
402
|
+
val = val.replace( entity.regex, entity.val);
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
return val;
|
|
407
|
+
}
|
|
344
408
|
//TODO: use jPath to simplify the logic
|
|
345
409
|
/**
|
|
346
410
|
*
|
|
347
411
|
* @param {string[]} stopNodes
|
|
348
|
-
* @param {
|
|
412
|
+
* @param {string} jPath
|
|
413
|
+
* @param {string} currentTagName
|
|
349
414
|
*/
|
|
350
|
-
function isItStopNode(stopNodes,
|
|
351
|
-
const
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
if(
|
|
355
|
-
}
|
|
356
|
-
|
|
357
|
-
if(matchingStopNodes.length > 0){
|
|
358
|
-
let jPath = "";
|
|
359
|
-
for (let i = 1; i < tagsNodeStack.length; i++) {
|
|
360
|
-
const node = tagsNodeStack[i];
|
|
361
|
-
jPath += "." + node.tagname;
|
|
362
|
-
}
|
|
363
|
-
jPath += "." + currentTagName;
|
|
364
|
-
jPath = jPath.substr(1);
|
|
365
|
-
for (let i = 0; i < matchingStopNodes.length; i++) {
|
|
366
|
-
if(matchingStopNodes[i] === jPath) return true;
|
|
367
|
-
}
|
|
368
|
-
}else return false;
|
|
415
|
+
function isItStopNode(stopNodes, jPath, currentTagName){
|
|
416
|
+
const allNodesExp = "*." + currentTagName;
|
|
417
|
+
for (const stopNodePath in stopNodes) {
|
|
418
|
+
const stopNodeExp = stopNodes[stopNodePath];
|
|
419
|
+
if( allNodesExp === stopNodeExp || jPath === stopNodeExp ) return true;
|
|
420
|
+
}
|
|
421
|
+
return false;
|
|
369
422
|
}
|
|
370
423
|
|
|
371
424
|
/**
|
|
@@ -374,7 +427,7 @@ function isItStopNode(stopNodes, tagsNodeStack, currentTagName){
|
|
|
374
427
|
* @param {number} i starting index
|
|
375
428
|
* @returns
|
|
376
429
|
*/
|
|
377
|
-
function tagExpWithClosingIndex(xmlData, i){
|
|
430
|
+
function tagExpWithClosingIndex(xmlData, i, closingChar = ">"){
|
|
378
431
|
let attrBoundary;
|
|
379
432
|
let tagExp = "";
|
|
380
433
|
for (let index = i; index < xmlData.length; index++) {
|
|
@@ -383,11 +436,20 @@ function tagExpWithClosingIndex(xmlData, i){
|
|
|
383
436
|
if (ch === attrBoundary) attrBoundary = "";//reset
|
|
384
437
|
} else if (ch === '"' || ch === "'") {
|
|
385
438
|
attrBoundary = ch;
|
|
386
|
-
} else if (ch ===
|
|
439
|
+
} else if (ch === closingChar[0]) {
|
|
440
|
+
if(closingChar[1]){
|
|
441
|
+
if(xmlData[index + 1] === closingChar[1]){
|
|
442
|
+
return {
|
|
443
|
+
data: tagExp,
|
|
444
|
+
index: index
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
}else{
|
|
387
448
|
return {
|
|
388
449
|
data: tagExp,
|
|
389
450
|
index: index
|
|
390
451
|
}
|
|
452
|
+
}
|
|
391
453
|
} else if (ch === '\t') {
|
|
392
454
|
ch = " "
|
|
393
455
|
}
|
|
@@ -404,4 +466,72 @@ function findClosingIndex(xmlData, str, i, errMsg){
|
|
|
404
466
|
}
|
|
405
467
|
}
|
|
406
468
|
|
|
407
|
-
|
|
469
|
+
function readTagExp(xmlData,i, removeNSPrefix, closingChar = ">"){
|
|
470
|
+
const result = tagExpWithClosingIndex(xmlData, i+1, closingChar);
|
|
471
|
+
if(!result) return;
|
|
472
|
+
let tagExp = result.data;
|
|
473
|
+
const closeIndex = result.index;
|
|
474
|
+
const separatorIndex = tagExp.search(/\s/);
|
|
475
|
+
let tagName = tagExp;
|
|
476
|
+
let attrExpPresent = true;
|
|
477
|
+
if(separatorIndex !== -1){//separate tag name and attributes expression
|
|
478
|
+
tagName = tagExp.substr(0, separatorIndex).replace(/\s\s*$/, '');
|
|
479
|
+
tagExp = tagExp.substr(separatorIndex + 1);
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
if(removeNSPrefix){
|
|
483
|
+
const colonIndex = tagName.indexOf(":");
|
|
484
|
+
if(colonIndex !== -1){
|
|
485
|
+
tagName = tagName.substr(colonIndex+1);
|
|
486
|
+
attrExpPresent = tagName !== result.data.substr(colonIndex + 1);
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
return {
|
|
491
|
+
tagName: tagName,
|
|
492
|
+
tagExp: tagExp,
|
|
493
|
+
closeIndex: closeIndex,
|
|
494
|
+
attrExpPresent: attrExpPresent,
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
/**
|
|
498
|
+
* find paired tag for a stop node
|
|
499
|
+
* @param {string} xmlData
|
|
500
|
+
* @param {string} tagName
|
|
501
|
+
* @param {number} i
|
|
502
|
+
*/
|
|
503
|
+
function readStopNodeData(xmlData, tagName, i){
|
|
504
|
+
const startIndex = i;
|
|
505
|
+
for (; i < xmlData.length; i++) {
|
|
506
|
+
if( xmlData[i] === "<" && xmlData[i+1] === "/"){
|
|
507
|
+
const closeIndex = findClosingIndex(xmlData, ">", i, `${tagName} is not closed`);
|
|
508
|
+
let closeTagName = xmlData.substring(i+2,closeIndex).trim();
|
|
509
|
+
if(closeTagName === tagName){
|
|
510
|
+
return {
|
|
511
|
+
tagContent: xmlData.substring(startIndex, i),
|
|
512
|
+
i : closeIndex
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
i=closeIndex;
|
|
516
|
+
}
|
|
517
|
+
}//end for loop
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
function parseValue(val, shouldParse, options) {
|
|
521
|
+
if (shouldParse && typeof val === 'string') {
|
|
522
|
+
//console.log(options)
|
|
523
|
+
const newval = val.trim();
|
|
524
|
+
if(newval === 'true' ) return true;
|
|
525
|
+
else if(newval === 'false' ) return false;
|
|
526
|
+
else return toNumber(val, options);
|
|
527
|
+
} else {
|
|
528
|
+
if (util.isExist(val)) {
|
|
529
|
+
return val;
|
|
530
|
+
} else {
|
|
531
|
+
return '';
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
module.exports = OrderedObjParser;
|
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
const { buildOptions} = require("./OptionsBuilder");
|
|
2
|
-
const
|
|
2
|
+
const OrderedObjParser = require("./OrderedObjParser");
|
|
3
3
|
const { prettify} = require("./node2json");
|
|
4
4
|
const validator = require('../validator');
|
|
5
5
|
|
|
6
6
|
class XMLParser{
|
|
7
|
+
|
|
7
8
|
constructor(options){
|
|
9
|
+
this.externalEntities = {};
|
|
8
10
|
this.options = buildOptions(options);
|
|
11
|
+
|
|
9
12
|
}
|
|
10
13
|
/**
|
|
11
14
|
* Parse XML dats to JS object
|
|
@@ -27,10 +30,27 @@ class XMLParser{
|
|
|
27
30
|
throw Error( `${result.err.msg}:${result.err.line}:${result.err.col}` )
|
|
28
31
|
}
|
|
29
32
|
}
|
|
30
|
-
const
|
|
33
|
+
const orderedObjParser = new OrderedObjParser(this.options);
|
|
34
|
+
orderedObjParser.addExternalEntities(this.externalEntities);
|
|
35
|
+
const orderedResult = orderedObjParser.parseXml(xmlData);
|
|
31
36
|
if(this.options.preserveOrder || orderedResult === undefined) return orderedResult;
|
|
32
37
|
else return prettify(orderedResult, this.options);
|
|
33
38
|
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Add Entity which is not by default supported by this library
|
|
42
|
+
* @param {string} key
|
|
43
|
+
* @param {string} value
|
|
44
|
+
*/
|
|
45
|
+
addEntity(key, value){
|
|
46
|
+
if(value.indexOf("&") !== -1){
|
|
47
|
+
throw new Error("Entity value can't have '&'")
|
|
48
|
+
}else if(key.indexOf("&") !== -1 || key.indexOf(";") !== -1){
|
|
49
|
+
throw new Error("An entity must be set without '&' and ';'. Eg. use '#xD' for '
'")
|
|
50
|
+
}else{
|
|
51
|
+
this.externalEntities[key] = value;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
34
54
|
}
|
|
35
55
|
|
|
36
56
|
module.exports = XMLParser;
|
|
@@ -37,8 +37,8 @@ function compress(arr, options, jPath){
|
|
|
37
37
|
let val = compress(tagObj[property], options, newJpath);
|
|
38
38
|
const isLeaf = isLeafTag(val, options);
|
|
39
39
|
|
|
40
|
-
if(tagObj
|
|
41
|
-
assignAttributes( val, tagObj
|
|
40
|
+
if(tagObj[":@"]){
|
|
41
|
+
assignAttributes( val, tagObj[":@"], newJpath, options);
|
|
42
42
|
}else if(Object.keys(val).length === 1 && val[options.textNodeName] !== undefined && !options.alwaysCreateTextNode){
|
|
43
43
|
val = val[options.textNodeName];
|
|
44
44
|
}else if(Object.keys(val).length === 0){
|
|
@@ -74,7 +74,7 @@ function propName(obj){
|
|
|
74
74
|
const keys = Object.keys(obj);
|
|
75
75
|
for (let i = 0; i < keys.length; i++) {
|
|
76
76
|
const key = keys[i];
|
|
77
|
-
if(key !== "
|
|
77
|
+
if(key !== ":@") return key;
|
|
78
78
|
}
|
|
79
79
|
}
|
|
80
80
|
|
package/src/xmlparser/xmlNode.js
CHANGED
|
@@ -4,15 +4,15 @@ class XmlNode{
|
|
|
4
4
|
constructor(tagname) {
|
|
5
5
|
this.tagname = tagname;
|
|
6
6
|
this.child = []; //nested tags, text, cdata, comments in order
|
|
7
|
-
this
|
|
7
|
+
this[":@"] = {}; //attributes map
|
|
8
8
|
}
|
|
9
9
|
add(key,val){
|
|
10
10
|
// this.child.push( {name : key, val: val, isCdata: isCdata });
|
|
11
11
|
this.child.push( {[key]: val });
|
|
12
12
|
}
|
|
13
13
|
addChild(node) {
|
|
14
|
-
if(node
|
|
15
|
-
this.child.push( { [node.tagname]: node.child,
|
|
14
|
+
if(node[":@"] && Object.keys(node[":@"]).length > 0){
|
|
15
|
+
this.child.push( { [node.tagname]: node.child, [":@"]: node[":@"] });
|
|
16
16
|
}else{
|
|
17
17
|
this.child.push( { [node.tagname]: node.child });
|
|
18
18
|
}
|