fast-xml-parser 4.0.0-beta.0 → 4.0.0-beta.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/README.md +10 -1
- package/package.json +5 -5
- package/src/fxp.d.ts +22 -9
- package/src/validator.js +7 -1
- package/src/xmlbuilder/json2xml.js +45 -12
- package/src/xmlbuilder/orderedJs2Xml.js +51 -10
- package/src/xmlparser/DocTypeReader.js +92 -0
- package/src/xmlparser/OptionsBuilder.js +11 -3
- package/src/xmlparser/OrderedObjParser.js +271 -146
- package/src/xmlparser/XMLParser.js +22 -2
- package/src/xmlparser/node2json.js +10 -4
- package/src/xmlparser/xmlNode.js +3 -3
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,33 @@
|
|
|
1
1
|
Note: If you find missing information about particular minor version, that version must have been changed without any functional change in this library.
|
|
2
2
|
|
|
3
|
+
** 4.0.0-beta.5 / 2021-12-04**
|
|
4
|
+
* fix: when a tag with name "attributes"
|
|
5
|
+
|
|
6
|
+
** 4.0.0-beta.4 / 2021-12-02**
|
|
7
|
+
* Support HTML document parsing
|
|
8
|
+
* skip stop nodes parsing when building the XML from JS object
|
|
9
|
+
* Support external entites without DOCTYPE
|
|
10
|
+
* update dev dependency: strnum v1.0.5 to fix long number issue
|
|
11
|
+
|
|
12
|
+
** 4.0.0-beta.3 / 2021-11-30**
|
|
13
|
+
* support global stopNodes expression like "*.stop"
|
|
14
|
+
* support self-closing and paired unpaired tags
|
|
15
|
+
* fix: CDATA should not be parsed.
|
|
16
|
+
* Fix typings for XMLBuilder (#396)(By [Anders Emil Salvesen](https://github.com/andersem))
|
|
17
|
+
* supports XML entities, HTML entities, DOCTYPE entities
|
|
18
|
+
|
|
19
|
+
**⚠️ 4.0.0-beta.2 / 2021-11-19**
|
|
20
|
+
* rename `attrMap` to `attibutes` in parser output when `preserveOrder:true`
|
|
21
|
+
* supports unpairedTags
|
|
22
|
+
|
|
23
|
+
**⚠️ 4.0.0-beta.1 / 2021-11-18**
|
|
24
|
+
* Parser returns an array now
|
|
25
|
+
* to make the structure common
|
|
26
|
+
* and to return root level detail
|
|
27
|
+
* renamed `cdataTagName` to `cdataPropName`
|
|
28
|
+
* Added `commentPropName`
|
|
29
|
+
* fix typings
|
|
30
|
+
|
|
3
31
|
**⚠️ 4.0.0-beta.0 / 2021-11-16**
|
|
4
32
|
* Name change of many configuration properties.
|
|
5
33
|
* `attrNodeName` to `attributesGroupName`
|
package/README.md
CHANGED
|
@@ -29,6 +29,7 @@ Check [ThankYouBackers](https://github.com/NaturalIntelligence/ThankYouBackers)
|
|
|
29
29
|
<a href="http://nasa.github.io/" title="NASA" > <img src="https://avatars0.githubusercontent.com/u/848102" width="60px" ></a>
|
|
30
30
|
<a href="https://github.com/prettier" title="Prettier" > <img src="https://avatars0.githubusercontent.com/u/25822731" width="60px" ></a>
|
|
31
31
|
<a href="http://brain.js.org/" title="brain.js" > <img src="https://avatars2.githubusercontent.com/u/23732838" width="60px" ></a>
|
|
32
|
+
<a href="https://github.com/aws" title="AWS SDK" > <img src="https://avatars.githubusercontent.com/u/2232217" width="60px" ></a>
|
|
32
33
|
<a href="#" title="NHS Connect" > <img src="https://avatars3.githubusercontent.com/u/20316669" width="60px" ></a>
|
|
33
34
|
<a href="http://www.fda.gov/" title="Food and Drug Administration " > <img src="https://avatars2.githubusercontent.com/u/6471964" width="60px" ></a>
|
|
34
35
|
<a href="http://www.magento.com/" title="Magento" > <img src="https://avatars2.githubusercontent.com/u/168457" width="60px" ></a>
|
|
@@ -48,6 +49,13 @@ Check the list of all known users [here](./USERs.md);
|
|
|
48
49
|
* Faster than any pure JS implementation.
|
|
49
50
|
* It can handle big files (tested up to 100mb).
|
|
50
51
|
* Controlled parsing using various options
|
|
52
|
+
* XML Entities, HTML entities, and DOCTYPE entites are supported.
|
|
53
|
+
* unpaired tags (Eg `<br>` in HTML), stop nodes (Eg `<script>` in HTML) are supported.
|
|
54
|
+
* You can restore almost same XML from JSON
|
|
55
|
+
* Supports comments
|
|
56
|
+
* It can preserve Order of tags in JS object
|
|
57
|
+
* You can control if a single tag should be parsed into array.
|
|
58
|
+
* And many more other features.
|
|
51
59
|
|
|
52
60
|
## How to use
|
|
53
61
|
|
|
@@ -98,7 +106,8 @@ In a HTML page
|
|
|
98
106
|
2. [XML Parser](./docs/v4/2.XMLparseOptions.md)
|
|
99
107
|
3. [XML Builder](./docs/v4/3.XMLBuilder.md)
|
|
100
108
|
4. [XML Validator](./docs/v4/4.XMLValidator.md)
|
|
101
|
-
|
|
109
|
+
5. [Entites](./docs/5.Entities.md)
|
|
110
|
+
6. [HTML Document Parsing](./docs/6.HTMLParsing.md)
|
|
102
111
|
## Performance
|
|
103
112
|
|
|
104
113
|
### XML Parser
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "fast-xml-parser",
|
|
3
|
-
"version": "4.0.0-beta.
|
|
3
|
+
"version": "4.0.0-beta.5",
|
|
4
4
|
"description": "Validate XML, Parse XML, Build XML without C/C++ based libraries",
|
|
5
5
|
"main": "./src/fxp.js",
|
|
6
6
|
"scripts": {
|
|
@@ -47,14 +47,14 @@
|
|
|
47
47
|
"@babel/preset-env": "^7.13.10",
|
|
48
48
|
"@babel/register": "^7.13.8",
|
|
49
49
|
"babel-loader": "^8.2.2",
|
|
50
|
-
"eslint": "^
|
|
50
|
+
"eslint": "^8.3.0",
|
|
51
51
|
"he": "^1.2.0",
|
|
52
52
|
"jasmine": "^3.6.4",
|
|
53
53
|
"nyc": "^15.1.0",
|
|
54
54
|
"prettier": "^1.19.1",
|
|
55
55
|
"publish-please": "^5.5.2",
|
|
56
|
-
"webpack": "^
|
|
57
|
-
"webpack-cli": "^
|
|
56
|
+
"webpack": "^5.64.4",
|
|
57
|
+
"webpack-cli": "^4.9.1"
|
|
58
58
|
},
|
|
59
59
|
"typings": "src/fxp.d.ts",
|
|
60
60
|
"funding": {
|
|
@@ -62,6 +62,6 @@
|
|
|
62
62
|
"url": "https://paypal.me/naturalintelligence"
|
|
63
63
|
},
|
|
64
64
|
"dependencies": {
|
|
65
|
-
"strnum": "^1.0.
|
|
65
|
+
"strnum": "^1.0.5"
|
|
66
66
|
}
|
|
67
67
|
}
|
package/src/fxp.d.ts
CHANGED
|
@@ -9,13 +9,17 @@ type X2jOptions = {
|
|
|
9
9
|
parseTagValue: boolean;
|
|
10
10
|
parseAttributeValue: boolean;
|
|
11
11
|
trimValues: boolean;
|
|
12
|
-
|
|
12
|
+
cdataPropName: false | string;
|
|
13
|
+
commentPropName: false | string;
|
|
13
14
|
tagValueProcessor: (tagName: string, tagValue: string, jPath: string, hasAttributes: boolean, isLeafNode: boolean) => string;
|
|
14
15
|
attributeValueProcessor: (attrName: string, attrValue: string, jPath: string) => string;
|
|
15
16
|
numberParseOptions: strnumOptions;
|
|
16
17
|
stopNodes: string[];
|
|
18
|
+
unpairedTags: string[];
|
|
17
19
|
alwaysCreateTextNode: boolean;
|
|
18
20
|
isArray: (tagName: string, jPath: string, isLeafNode: boolean, isAttribute: boolean) => boolean;
|
|
21
|
+
processEntities: boolean;
|
|
22
|
+
htmlEntities: boolean;
|
|
19
23
|
};
|
|
20
24
|
type strnumOptions = {
|
|
21
25
|
hex: boolean;
|
|
@@ -25,6 +29,7 @@ type strnumOptions = {
|
|
|
25
29
|
type X2jOptionsOptional = Partial<X2jOptions>;
|
|
26
30
|
type validationOptions = {
|
|
27
31
|
allowBooleanAttributes: boolean;
|
|
32
|
+
unpairedTags: string[];
|
|
28
33
|
};
|
|
29
34
|
type validationOptionsOptional = Partial<validationOptions>;
|
|
30
35
|
|
|
@@ -33,14 +38,18 @@ type XmlBuilderOptions = {
|
|
|
33
38
|
attributesGroupName: false | string;
|
|
34
39
|
textNodeName: string;
|
|
35
40
|
ignoreAttributes: boolean;
|
|
36
|
-
|
|
41
|
+
cdataPropName: false | string;
|
|
42
|
+
commentPropName: false | string;
|
|
37
43
|
format: boolean;
|
|
38
44
|
indentBy: string;
|
|
39
45
|
arrayNodeName: string;
|
|
40
46
|
suppressEmptyNode: boolean;
|
|
41
47
|
preserveOrder: boolean;
|
|
48
|
+
unpairedTags: string[];
|
|
49
|
+
stopNodes: string[];
|
|
42
50
|
tagValueProcessor: (name: string, value: string) => string;
|
|
43
51
|
attributeValueProcessor: (name: string, value: string) => string;
|
|
52
|
+
processEntities: boolean;
|
|
44
53
|
};
|
|
45
54
|
type XmlBuilderOptionsOptional = Partial<XmlBuilderOptions>;
|
|
46
55
|
|
|
@@ -57,15 +66,19 @@ type ValidationError = {
|
|
|
57
66
|
|
|
58
67
|
export class XMLParser {
|
|
59
68
|
constructor(options?: X2jOptionsOptional);
|
|
60
|
-
parse(xmlData: string | Buffer ,validationOptions?: validationOptionsOptional | boolean);
|
|
69
|
+
parse(xmlData: string | Buffer ,validationOptions?: validationOptionsOptional | boolean): any;
|
|
70
|
+
/**
|
|
71
|
+
* Add Entity which is not by default supported by this library
|
|
72
|
+
* @param entityIndentifier {string} Eg: 'ent' for &ent;
|
|
73
|
+
* @param entityValue {string} Eg: '\r'
|
|
74
|
+
*/
|
|
75
|
+
addEntity(entityIndentifier: string, entityValue: string): void;
|
|
61
76
|
}
|
|
62
77
|
|
|
63
|
-
export
|
|
64
|
-
xmlData: string,
|
|
65
|
-
|
|
66
|
-
): true | ValidationError;
|
|
67
|
-
|
|
78
|
+
export class XMLValidator{
|
|
79
|
+
static validate( xmlData: string, options?: validationOptionsOptional): true | ValidationError;
|
|
80
|
+
}
|
|
68
81
|
export class XMLBuilder {
|
|
69
82
|
constructor(options: XmlBuilderOptionsOptional);
|
|
70
|
-
|
|
83
|
+
build(jObj: any): any;
|
|
71
84
|
}
|
package/src/validator.js
CHANGED
|
@@ -4,9 +4,13 @@ const util = require('./util');
|
|
|
4
4
|
|
|
5
5
|
const defaultOptions = {
|
|
6
6
|
allowBooleanAttributes: false, //A tag can have attributes without any value
|
|
7
|
+
unpairedTags: []
|
|
7
8
|
};
|
|
8
9
|
|
|
9
|
-
const props = [
|
|
10
|
+
const props = [
|
|
11
|
+
'allowBooleanAttributes',
|
|
12
|
+
'unpairedTags'
|
|
13
|
+
];
|
|
10
14
|
|
|
11
15
|
//const tagsPattern = new RegExp("<\\/?([\\w:\\-_\.]+)\\s*\/?>","g");
|
|
12
16
|
exports.validate = function (xmlData, options) {
|
|
@@ -130,6 +134,8 @@ exports.validate = function (xmlData, options) {
|
|
|
130
134
|
//if the root level has been reached before ...
|
|
131
135
|
if (reachedRoot === true) {
|
|
132
136
|
return getErrorObject('InvalidXml', 'Multiple possible root nodes found.', getLineNumberForPosition(xmlData, i));
|
|
137
|
+
} else if(options.unpairedTags.indexOf(tagName) !== -1){
|
|
138
|
+
//don't push into stack
|
|
133
139
|
} else {
|
|
134
140
|
tags.push({tagName, tagStartPos});
|
|
135
141
|
}
|
|
@@ -8,7 +8,7 @@ const defaultOptions = {
|
|
|
8
8
|
attributesGroupName: false,
|
|
9
9
|
textNodeName: '#text',
|
|
10
10
|
ignoreAttributes: true,
|
|
11
|
-
|
|
11
|
+
cdataPropName: false,
|
|
12
12
|
format: false,
|
|
13
13
|
indentBy: ' ',
|
|
14
14
|
suppressEmptyNode: false,
|
|
@@ -18,7 +18,17 @@ const defaultOptions = {
|
|
|
18
18
|
attributeValueProcessor: function(attrName, a) {
|
|
19
19
|
return a;
|
|
20
20
|
},
|
|
21
|
-
preserveOrder: false
|
|
21
|
+
preserveOrder: false,
|
|
22
|
+
commentPropName: false,
|
|
23
|
+
unpairedTags: [],
|
|
24
|
+
entities: {
|
|
25
|
+
">" : { regex: new RegExp(">", "g"), val: ">" },
|
|
26
|
+
"<" : { regex: new RegExp("<", "g"), val: "<" },
|
|
27
|
+
"sQuot" : { regex: new RegExp("\'", "g"), val: "'" },
|
|
28
|
+
"dQuot" : { regex: new RegExp("\"", "g"), val: """ }
|
|
29
|
+
},
|
|
30
|
+
processEntities: true,
|
|
31
|
+
stopNodes: []
|
|
22
32
|
};
|
|
23
33
|
|
|
24
34
|
const props = [
|
|
@@ -26,7 +36,7 @@ const props = [
|
|
|
26
36
|
'attributesGroupName',
|
|
27
37
|
'textNodeName',
|
|
28
38
|
'ignoreAttributes',
|
|
29
|
-
'
|
|
39
|
+
'cdataPropName',
|
|
30
40
|
'format',
|
|
31
41
|
'indentBy',
|
|
32
42
|
'suppressEmptyNode',
|
|
@@ -34,6 +44,11 @@ const props = [
|
|
|
34
44
|
'attributeValueProcessor',
|
|
35
45
|
'arrayNodeName', //when array as root
|
|
36
46
|
'preserveOrder',
|
|
47
|
+
"commentPropName",
|
|
48
|
+
"unpairedTags",
|
|
49
|
+
"entities",
|
|
50
|
+
"processEntities",
|
|
51
|
+
"stopNodes",
|
|
37
52
|
// 'rootNodeName', //when jsObject have multiple properties on root level
|
|
38
53
|
];
|
|
39
54
|
|
|
@@ -72,6 +87,8 @@ function Builder(options) {
|
|
|
72
87
|
|
|
73
88
|
this.buildTextValNode = buildTextValNode;
|
|
74
89
|
this.buildObjectNode = buildObjectNode;
|
|
90
|
+
|
|
91
|
+
this.replaceEntitiesValue = replaceEntitiesValue;
|
|
75
92
|
}
|
|
76
93
|
|
|
77
94
|
Builder.prototype.build = function(jObj) {
|
|
@@ -101,11 +118,14 @@ Builder.prototype.j2x = function(jObj, level) {
|
|
|
101
118
|
//premitive type
|
|
102
119
|
const attr = this.isAttribute(key);
|
|
103
120
|
if (attr) {
|
|
104
|
-
|
|
121
|
+
let val = this.options.attributeValueProcessor(attr, '' + jObj[key]);
|
|
122
|
+
val = this.replaceEntitiesValue(val);
|
|
123
|
+
attrStr += ' ' + attr + '="' + val + '"';
|
|
105
124
|
}else {
|
|
106
125
|
//tag value
|
|
107
126
|
if (key === this.options.textNodeName) {
|
|
108
|
-
|
|
127
|
+
let newval = this.options.tagValueProcessor(key, '' + jObj[key]);
|
|
128
|
+
val += this.replaceEntitiesValue(newval);
|
|
109
129
|
} else {
|
|
110
130
|
val += this.buildTextNode(jObj[key], key, '', level);
|
|
111
131
|
}
|
|
@@ -131,7 +151,9 @@ Builder.prototype.j2x = function(jObj, level) {
|
|
|
131
151
|
const Ks = Object.keys(jObj[key]);
|
|
132
152
|
const L = Ks.length;
|
|
133
153
|
for (let j = 0; j < L; j++) {
|
|
134
|
-
|
|
154
|
+
let val = this.options.attributeValueProcessor(Ks[j], '' + jObj[key][Ks[j]]);
|
|
155
|
+
val = this.replaceEntitiesValue(val);
|
|
156
|
+
attrStr += ' ' + Ks[j] + '="' + val + '"';
|
|
135
157
|
}
|
|
136
158
|
} else {
|
|
137
159
|
val += this.processTextOrObjNode(jObj[key], key, level)
|
|
@@ -192,21 +214,36 @@ function buildEmptyObjNode(val, key, attrStr, level) {
|
|
|
192
214
|
}
|
|
193
215
|
|
|
194
216
|
function buildTextValNode(val, key, attrStr, level) {
|
|
217
|
+
let textValue = this.options.tagValueProcessor(key, val);
|
|
218
|
+
textValue = this.replaceEntitiesValue(textValue);
|
|
219
|
+
|
|
195
220
|
return (
|
|
196
221
|
this.indentate(level) +
|
|
197
222
|
'<' +
|
|
198
223
|
key +
|
|
199
224
|
attrStr +
|
|
200
225
|
'>' +
|
|
201
|
-
|
|
226
|
+
textValue +
|
|
202
227
|
'</' +
|
|
203
228
|
key +
|
|
204
229
|
this.tagEndChar
|
|
205
230
|
);
|
|
206
231
|
}
|
|
207
232
|
|
|
233
|
+
function replaceEntitiesValue(textValue){
|
|
234
|
+
if(textValue && textValue.length > 0 && this.options.processEntities){
|
|
235
|
+
for (const entityName in this.options.entities) {
|
|
236
|
+
const entity = this.options.entities[entityName];
|
|
237
|
+
textValue = textValue.replace(entity.regex, entity.val);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
return textValue;
|
|
241
|
+
}
|
|
242
|
+
|
|
208
243
|
function buildEmptyTextNode(val, key, attrStr, level) {
|
|
209
|
-
if
|
|
244
|
+
if( val === '' && this.options.unpairedTags.indexOf(key) !== -1){
|
|
245
|
+
return this.indentate(level) + '<' + key + attrStr + this.tagEndChar;
|
|
246
|
+
}else if (val !== '') {
|
|
210
247
|
return this.buildTextValNode(val, key, attrStr, level);
|
|
211
248
|
} else {
|
|
212
249
|
return this.indentate(level) + '<' + key + attrStr + '/' + this.tagEndChar;
|
|
@@ -225,8 +262,4 @@ function isAttribute(name /*, options*/) {
|
|
|
225
262
|
}
|
|
226
263
|
}
|
|
227
264
|
|
|
228
|
-
//formatting
|
|
229
|
-
//indentation
|
|
230
|
-
//\n after each closing or self closing tag
|
|
231
|
-
|
|
232
265
|
module.exports = Builder;
|
|
@@ -1,10 +1,16 @@
|
|
|
1
1
|
const {EOL} = require('os');
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
/**
|
|
4
|
+
*
|
|
5
|
+
* @param {array} jArray
|
|
6
|
+
* @param {any} options
|
|
7
|
+
* @returns
|
|
8
|
+
*/
|
|
9
|
+
function toXml(jArray, options){
|
|
10
|
+
return arrToStr( jArray, options, "", 0);
|
|
5
11
|
}
|
|
6
12
|
|
|
7
|
-
function arrToStr(arr, options, level){
|
|
13
|
+
function arrToStr(arr, options, jPath, level){
|
|
8
14
|
let xmlStr = "";
|
|
9
15
|
|
|
10
16
|
let indentation = "";
|
|
@@ -15,19 +21,34 @@ function arrToStr(arr, options, level){
|
|
|
15
21
|
for (let i = 0; i < arr.length; i++) {
|
|
16
22
|
const tagObj = arr[i];
|
|
17
23
|
const tagName = propName(tagObj);
|
|
24
|
+
let newJPath = "";
|
|
25
|
+
if(jPath.length === 0) newJPath = tagName
|
|
26
|
+
else newJPath = `${jPath}.${tagName}`;
|
|
18
27
|
|
|
19
28
|
if(tagName === options.textNodeName){
|
|
20
|
-
|
|
29
|
+
let tagText = tagObj[tagName];
|
|
30
|
+
if(!isStopNode(newJPath, options)){
|
|
31
|
+
tagText = options.tagValueProcessor( tagName, tagText);
|
|
32
|
+
tagText = replaceEntitiesValue(tagText, options);
|
|
33
|
+
}
|
|
34
|
+
xmlStr += indentation + tagText;
|
|
21
35
|
continue;
|
|
22
|
-
}else if( tagName === options.
|
|
36
|
+
}else if( tagName === options.cdataPropName){
|
|
23
37
|
xmlStr += indentation + `<![CDATA[${tagObj[tagName][0][options.textNodeName]}]]>`;
|
|
24
38
|
continue;
|
|
39
|
+
}else if( tagName === options.commentPropName){
|
|
40
|
+
xmlStr += indentation + `<!--${tagObj[tagName][0][options.textNodeName]}-->`;
|
|
41
|
+
continue;
|
|
25
42
|
}
|
|
26
|
-
const attStr = attr_to_str(tagObj
|
|
43
|
+
const attStr = attr_to_str(tagObj[":@"], options);
|
|
27
44
|
let tagStart = indentation + `<${tagName}${attStr}`;
|
|
28
|
-
let tagValue = arrToStr(tagObj[tagName], options, level + 1);
|
|
45
|
+
let tagValue = arrToStr(tagObj[tagName], options, newJPath, level + 1);
|
|
29
46
|
if( (!tagValue || tagValue.length === 0) && options.suppressEmptyNode){
|
|
30
|
-
|
|
47
|
+
if(options.unpairedTags.indexOf(tagName) !== -1){
|
|
48
|
+
xmlStr += tagStart + ">";
|
|
49
|
+
}else{
|
|
50
|
+
xmlStr += tagStart + "/>";
|
|
51
|
+
}
|
|
31
52
|
}else{
|
|
32
53
|
//TODO: node with only text value should not parse the text value in next line
|
|
33
54
|
xmlStr += tagStart + `>${tagValue}${indentation}</${tagName}>` ;
|
|
@@ -41,7 +62,7 @@ function propName(obj){
|
|
|
41
62
|
const keys = Object.keys(obj);
|
|
42
63
|
for (let i = 0; i < keys.length; i++) {
|
|
43
64
|
const key = keys[i];
|
|
44
|
-
if(key !== "
|
|
65
|
+
if(key !== ":@") return key;
|
|
45
66
|
}
|
|
46
67
|
}
|
|
47
68
|
|
|
@@ -49,10 +70,30 @@ function attr_to_str(attrMap, options){
|
|
|
49
70
|
let attrStr = "";
|
|
50
71
|
if(attrMap && !options.ignoreAttributes){
|
|
51
72
|
for( attr in attrMap){
|
|
52
|
-
|
|
73
|
+
let attrVal = options.attributeValueProcessor(attr, attrMap[attr]);
|
|
74
|
+
attrVal = replaceEntitiesValue(attrVal, options);
|
|
75
|
+
attrStr+= ` ${attr.substr(options.attributeNamePrefix.length)}="${attrVal}"`;
|
|
53
76
|
}
|
|
54
77
|
}
|
|
55
78
|
return attrStr;
|
|
56
79
|
}
|
|
57
80
|
|
|
81
|
+
function isStopNode(jPath, options){
|
|
82
|
+
jPath = jPath.substr(0,jPath.length - options.textNodeName.length - 1);
|
|
83
|
+
let tagName = jPath.substr(jPath.lastIndexOf(".") + 1);
|
|
84
|
+
for(let index in options.stopNodes){
|
|
85
|
+
if(options.stopNodes[index] === jPath || options.stopNodes[index] === "*."+tagName) return true;
|
|
86
|
+
}
|
|
87
|
+
return false;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function replaceEntitiesValue(textValue, options){
|
|
91
|
+
if(textValue && textValue.length > 0 && options.processEntities){
|
|
92
|
+
for (const entityName in options.entities) {
|
|
93
|
+
const entity = options.entities[entityName];
|
|
94
|
+
textValue = textValue.replace(entity.regex, entity.val);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
return textValue;
|
|
98
|
+
}
|
|
58
99
|
module.exports = toXml;
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
//TODO: handle comments
|
|
2
|
+
function readDocType(xmlData, i){
|
|
3
|
+
|
|
4
|
+
const entities = {};
|
|
5
|
+
if( xmlData[i + 3] === 'O' &&
|
|
6
|
+
xmlData[i + 4] === 'C' &&
|
|
7
|
+
xmlData[i + 5] === 'T' &&
|
|
8
|
+
xmlData[i + 6] === 'Y' &&
|
|
9
|
+
xmlData[i + 7] === 'P' &&
|
|
10
|
+
xmlData[i + 8] === 'E')
|
|
11
|
+
{
|
|
12
|
+
i = i+9;
|
|
13
|
+
let angleBracketsCount = 1;
|
|
14
|
+
let hasBody = false, entity = false, comment = false;
|
|
15
|
+
let exp = "";
|
|
16
|
+
for(;i<xmlData.length;i++){
|
|
17
|
+
if (xmlData[i] === '<') {
|
|
18
|
+
if( hasBody &&
|
|
19
|
+
xmlData[i+1] === '!' &&
|
|
20
|
+
xmlData[i+2] === 'E' &&
|
|
21
|
+
xmlData[i+3] === 'N' &&
|
|
22
|
+
xmlData[i+4] === 'T' &&
|
|
23
|
+
xmlData[i+5] === 'I' &&
|
|
24
|
+
xmlData[i+6] === 'T' &&
|
|
25
|
+
xmlData[i+7] === 'Y'
|
|
26
|
+
){
|
|
27
|
+
i += 7;
|
|
28
|
+
entity = true;
|
|
29
|
+
}else if( hasBody &&
|
|
30
|
+
xmlData[i+1] === '!' &&
|
|
31
|
+
xmlData[i+2] === 'E' &&
|
|
32
|
+
xmlData[i+3] === 'L' &&
|
|
33
|
+
xmlData[i+4] === 'E' &&
|
|
34
|
+
xmlData[i+5] === 'M' &&
|
|
35
|
+
xmlData[i+6] === 'E' &&
|
|
36
|
+
xmlData[i+7] === 'N' &&
|
|
37
|
+
xmlData[i+8] === 'T'
|
|
38
|
+
){
|
|
39
|
+
//Not supported
|
|
40
|
+
i += 8;
|
|
41
|
+
}else if( //comment
|
|
42
|
+
xmlData[i+1] === '!' &&
|
|
43
|
+
xmlData[i+2] === '-' &&
|
|
44
|
+
xmlData[i+3] === '-'
|
|
45
|
+
){
|
|
46
|
+
comment = true;
|
|
47
|
+
}else{
|
|
48
|
+
throw new Error("Invalid DOCTYPE");
|
|
49
|
+
}
|
|
50
|
+
angleBracketsCount++;
|
|
51
|
+
exp = "";
|
|
52
|
+
} else if (xmlData[i] === '>') {
|
|
53
|
+
if(comment){
|
|
54
|
+
if( xmlData[i - 1] === "-" && xmlData[i - 2] === "-"){
|
|
55
|
+
comment = false;
|
|
56
|
+
}else{
|
|
57
|
+
throw new Error(`Invalid XML comment in DOCTYPE`);
|
|
58
|
+
}
|
|
59
|
+
}else if(entity){
|
|
60
|
+
parseEntityExp(exp, entities);
|
|
61
|
+
entity = false;
|
|
62
|
+
}
|
|
63
|
+
angleBracketsCount--;
|
|
64
|
+
if (angleBracketsCount === 0) {
|
|
65
|
+
break;
|
|
66
|
+
}
|
|
67
|
+
}else if( xmlData[i] === '['){
|
|
68
|
+
hasBody = true;
|
|
69
|
+
}else{
|
|
70
|
+
exp += xmlData[i];
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
if(angleBracketsCount !== 0){
|
|
74
|
+
throw new Error(`Unclosed DOCTYPE`);
|
|
75
|
+
}
|
|
76
|
+
}else{
|
|
77
|
+
throw new Error(`Invalid Tag instead of DOCTYPE`);
|
|
78
|
+
}
|
|
79
|
+
return {entities, i};
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const entityRegex = RegExp("^\\s([a-zA-z0-0]+)[ \t](['\"])([^&]+)\\2");
|
|
83
|
+
function parseEntityExp(exp, entities){
|
|
84
|
+
const match = entityRegex.exec(exp);
|
|
85
|
+
if(match){
|
|
86
|
+
entities[ match[1] ] = {
|
|
87
|
+
regx : RegExp( `&${match[1]};`,"g"),
|
|
88
|
+
val: match[3]
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
module.exports = readDocType;
|
|
@@ -11,7 +11,7 @@ const defaultOptions = {
|
|
|
11
11
|
parseTagValue: true,
|
|
12
12
|
parseAttributeValue: false,
|
|
13
13
|
trimValues: true, //Trim string values of tag and attributes
|
|
14
|
-
|
|
14
|
+
cdataPropName: false,
|
|
15
15
|
numberParseOptions: {
|
|
16
16
|
hex: true,
|
|
17
17
|
leadingZeros: true
|
|
@@ -24,7 +24,11 @@ const defaultOptions = {
|
|
|
24
24
|
},
|
|
25
25
|
stopNodes: [], //nested tags will not be parsed even for errors
|
|
26
26
|
alwaysCreateTextNode: false,
|
|
27
|
-
isArray: () => false
|
|
27
|
+
isArray: () => false,
|
|
28
|
+
commentPropName: false,
|
|
29
|
+
unpairedTags: [],
|
|
30
|
+
processEntities: true,
|
|
31
|
+
htmlEntities: false,
|
|
28
32
|
};
|
|
29
33
|
|
|
30
34
|
const props = [
|
|
@@ -38,13 +42,17 @@ const props = [
|
|
|
38
42
|
'parseTagValue',
|
|
39
43
|
'parseAttributeValue',
|
|
40
44
|
'trimValues',
|
|
41
|
-
'
|
|
45
|
+
'cdataPropName',
|
|
42
46
|
'tagValueProcessor',
|
|
43
47
|
'attributeValueProcessor',
|
|
44
48
|
'numberParseOptions',
|
|
45
49
|
'stopNodes',
|
|
46
50
|
'alwaysCreateTextNode',
|
|
47
51
|
'isArray',
|
|
52
|
+
'commentPropName',
|
|
53
|
+
'unpairedTags',
|
|
54
|
+
'processEntities',
|
|
55
|
+
'htmlEntities'
|
|
48
56
|
];
|
|
49
57
|
|
|
50
58
|
const util = require('../util');
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
const util = require('../util');
|
|
4
4
|
const xmlNode = require('./xmlNode');
|
|
5
|
+
const readDocType = require("./DocTypeReader");
|
|
5
6
|
const toNumber = require("strnum");
|
|
6
7
|
|
|
7
8
|
const regx =
|
|
@@ -11,43 +12,87 @@ const regx =
|
|
|
11
12
|
//const tagsRegx = new RegExp("<(\\/?[\\w:\\-\._]+)([^>]*)>(\\s*"+cdataRegx+")*([^<]+)?","g");
|
|
12
13
|
//const tagsRegx = new RegExp("<(\\/?)((\\w*:)?([\\w:\\-\._]+))([^>]*)>([^<]*)("+cdataRegx+"([^<]*))*([^<]+)?","g");
|
|
13
14
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
15
|
+
class OrderedObjParser{
|
|
16
|
+
constructor(options){
|
|
17
|
+
this.options = options;
|
|
18
|
+
this.currentNode = null;
|
|
19
|
+
this.tagsNodeStack = [];
|
|
20
|
+
this.docTypeEntities = {};
|
|
21
|
+
this.lastEntities = {
|
|
22
|
+
"amp" : { regex: /&(amp|#38|#x26);/g, val : "&"},
|
|
23
|
+
"apos" : { regex: /&(apos|#39|#x27);/g, val : "'"},
|
|
24
|
+
"gt" : { regex: /&(gt|#62|#x3E);/g, val : ">"},
|
|
25
|
+
"lt" : { regex: /&(lt|#60|#x3C);/g, val : "<"},
|
|
26
|
+
"quot" : { regex: /&(quot|#34|#x22);/g, val : "\""},
|
|
27
|
+
};
|
|
28
|
+
this.htmlEntities = {
|
|
29
|
+
"space": { regex: /&(nbsp|#160);/g, val: " " },
|
|
30
|
+
// "lt" : { regex: /&(lt|#60);/g, val: "<" },
|
|
31
|
+
// "gt" : { regex: /&(gt|#62);/g, val: ">" },
|
|
32
|
+
// "amp" : { regex: /&(amp|#38);/g, val: "&" },
|
|
33
|
+
// "quot" : { regex: /&(quot|#34);/g, val: "\"" },
|
|
34
|
+
// "apos" : { regex: /&(apos|#39);/g, val: "'" },
|
|
35
|
+
"cent" : { regex: /&(cent|#162);/g, val: "¢" },
|
|
36
|
+
"pound" : { regex: /&(pound|#163);/g, val: "£" },
|
|
37
|
+
"yen" : { regex: /&(yen|#165);/g, val: "¥" },
|
|
38
|
+
"euro" : { regex: /&(euro|#8364);/g, val: "€" },
|
|
39
|
+
"copyright" : { regex: /&(copy|#169);/g, val: "©" },
|
|
40
|
+
"reg" : { regex: /&(reg|#174);/g, val: "®" },
|
|
41
|
+
"inr" : { regex: /&(inr|#8377);/g, val: "₹" },
|
|
42
|
+
};
|
|
43
|
+
this.addExternalEntities = addExternalEntities;
|
|
44
|
+
this.parseXml = parseXml;
|
|
45
|
+
this.parseTextData = parseTextData;
|
|
46
|
+
this.resolveNameSpace = resolveNameSpace;
|
|
47
|
+
this.buildAttributesMap = buildAttributesMap;
|
|
48
|
+
this.isItStopNode = isItStopNode;
|
|
49
|
+
this.replaceEntitiesValue = replaceEntitiesValue;
|
|
50
|
+
this.readTagExp = readTagExp;
|
|
51
|
+
this.readStopNodeData = readStopNodeData;
|
|
52
|
+
}
|
|
53
|
+
|
|
20
54
|
}
|
|
21
55
|
|
|
56
|
+
function addExternalEntities(externalEntities){
|
|
57
|
+
const entKeys = Object.keys(externalEntities);
|
|
58
|
+
for (let i = 0; i < entKeys.length; i++) {
|
|
59
|
+
const ent = entKeys[i];
|
|
60
|
+
this.lastEntities[ent] = {
|
|
61
|
+
regex: new RegExp("&"+ent+";","g"),
|
|
62
|
+
val : externalEntities[ent]
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
22
66
|
|
|
23
67
|
/**
|
|
24
68
|
* @param {string} val
|
|
25
|
-
* @param {object} options
|
|
26
69
|
* @param {string} tagName
|
|
27
70
|
* @param {string} jPath
|
|
28
71
|
* @param {boolean} dontTrim
|
|
29
72
|
* @param {boolean} hasAttributes
|
|
30
73
|
* @param {boolean} isLeafNode
|
|
31
74
|
*/
|
|
32
|
-
function
|
|
75
|
+
function parseTextData(val, tagName, jPath, dontTrim, hasAttributes, isLeafNode) {
|
|
33
76
|
if (val !== undefined) {
|
|
34
|
-
if (options.trimValues && !dontTrim) {
|
|
77
|
+
if (this.options.trimValues && !dontTrim) {
|
|
35
78
|
val = val.trim();
|
|
36
79
|
}
|
|
37
80
|
if(val.length > 0){
|
|
38
|
-
|
|
81
|
+
val = this.replaceEntitiesValue(val);
|
|
82
|
+
|
|
83
|
+
const newval = this.options.tagValueProcessor(tagName, val, jPath, hasAttributes, isLeafNode);
|
|
39
84
|
if(newval === null || newval === undefined){
|
|
40
85
|
//don't parse
|
|
41
86
|
return val;
|
|
42
87
|
}else if(typeof newval !== typeof val || newval !== val){
|
|
43
88
|
//overwrite
|
|
44
89
|
return newval;
|
|
45
|
-
}else if(options.trimValues){
|
|
46
|
-
return
|
|
90
|
+
}else if(this.options.trimValues){
|
|
91
|
+
return parseValue(val, this.options.parseTagValue, this.options.numberParseOptions);
|
|
47
92
|
}else{
|
|
48
93
|
const trimmedVal = val.trim();
|
|
49
94
|
if(trimmedVal === val){
|
|
50
|
-
return
|
|
95
|
+
return parseValue(val, this.options.parseTagValue, this.options.numberParseOptions);
|
|
51
96
|
}else{
|
|
52
97
|
return val;
|
|
53
98
|
}
|
|
@@ -56,8 +101,8 @@ function parseValue(val, options, tagName, jPath, dontTrim, hasAttributes, isLea
|
|
|
56
101
|
}
|
|
57
102
|
}
|
|
58
103
|
|
|
59
|
-
function resolveNameSpace(tagname
|
|
60
|
-
if (options.removeNSPrefix) {
|
|
104
|
+
function resolveNameSpace(tagname) {
|
|
105
|
+
if (this.options.removeNSPrefix) {
|
|
61
106
|
const tags = tagname.split(':');
|
|
62
107
|
const prefix = tagname.charAt(0) === '/' ? '/' : '';
|
|
63
108
|
if (tags[0] === 'xmlns') {
|
|
@@ -70,28 +115,12 @@ function resolveNameSpace(tagname, options) {
|
|
|
70
115
|
return tagname;
|
|
71
116
|
}
|
|
72
117
|
|
|
73
|
-
function _parseValue(val, shouldParse, options) {
|
|
74
|
-
if (shouldParse && typeof val === 'string') {
|
|
75
|
-
//console.log(options)
|
|
76
|
-
const newval = val.trim();
|
|
77
|
-
if(newval === 'true' ) return true;
|
|
78
|
-
else if(newval === 'false' ) return false;
|
|
79
|
-
else return toNumber(val, options);
|
|
80
|
-
} else {
|
|
81
|
-
if (util.isExist(val)) {
|
|
82
|
-
return val;
|
|
83
|
-
} else {
|
|
84
|
-
return '';
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
|
|
89
118
|
//TODO: change regex to capture NS
|
|
90
119
|
//const attrsRegx = new RegExp("([\\w\\-\\.\\:]+)\\s*=\\s*(['\"])((.|\n)*?)\\2","gm");
|
|
91
120
|
const attrsRegx = new RegExp('([^\\s=]+)\\s*(=\\s*([\'"])([\\s\\S]*?)\\3)?', 'gm');
|
|
92
121
|
|
|
93
|
-
function buildAttributesMap(attrStr, jPath
|
|
94
|
-
if (!options.ignoreAttributes && typeof attrStr === 'string') {
|
|
122
|
+
function buildAttributesMap(attrStr, jPath) {
|
|
123
|
+
if (!this.options.ignoreAttributes && typeof attrStr === 'string') {
|
|
95
124
|
// attrStr = attrStr.replace(/\r?\n/g, ' ');
|
|
96
125
|
//attrStr = attrStr || attrStr.trim();
|
|
97
126
|
|
|
@@ -99,16 +128,16 @@ function buildAttributesMap(attrStr, jPath, options) {
|
|
|
99
128
|
const len = matches.length; //don't make it inline
|
|
100
129
|
const attrs = {};
|
|
101
130
|
for (let i = 0; i < len; i++) {
|
|
102
|
-
const attrName = resolveNameSpace(matches[i][1]
|
|
131
|
+
const attrName = this.resolveNameSpace(matches[i][1]);
|
|
103
132
|
let oldVal = matches[i][4];
|
|
104
|
-
const aName = options.attributeNamePrefix + attrName;
|
|
133
|
+
const aName = this.options.attributeNamePrefix + attrName;
|
|
105
134
|
if (attrName.length) {
|
|
106
135
|
if (oldVal !== undefined) {
|
|
107
|
-
if (options.trimValues) {
|
|
136
|
+
if (this.options.trimValues) {
|
|
108
137
|
oldVal = oldVal.trim();
|
|
109
138
|
}
|
|
110
|
-
|
|
111
|
-
const newVal = options.attributeValueProcessor(attrName, oldVal, jPath);
|
|
139
|
+
oldVal = this.replaceEntitiesValue(oldVal);
|
|
140
|
+
const newVal = this.options.attributeValueProcessor(attrName, oldVal, jPath);
|
|
112
141
|
if(newVal === null || newVal === undefined){
|
|
113
142
|
//don't parse
|
|
114
143
|
attrs[aName] = oldVal;
|
|
@@ -117,13 +146,13 @@ function buildAttributesMap(attrStr, jPath, options) {
|
|
|
117
146
|
attrs[aName] = newVal;
|
|
118
147
|
}else{
|
|
119
148
|
//parse
|
|
120
|
-
attrs[aName] =
|
|
149
|
+
attrs[aName] = parseValue(
|
|
121
150
|
oldVal,
|
|
122
|
-
options.parseAttributeValue,
|
|
123
|
-
options.numberParseOptions
|
|
151
|
+
this.options.parseAttributeValue,
|
|
152
|
+
this.options.numberParseOptions
|
|
124
153
|
);
|
|
125
154
|
}
|
|
126
|
-
} else if (options.allowBooleanAttributes) {
|
|
155
|
+
} else if (this.options.allowBooleanAttributes) {
|
|
127
156
|
attrs[aName] = true;
|
|
128
157
|
}
|
|
129
158
|
}
|
|
@@ -131,23 +160,21 @@ function buildAttributesMap(attrStr, jPath, options) {
|
|
|
131
160
|
if (!Object.keys(attrs).length) {
|
|
132
161
|
return;
|
|
133
162
|
}
|
|
134
|
-
if (options.attributesGroupName) {
|
|
163
|
+
if (this.options.attributesGroupName) {
|
|
135
164
|
const attrCollection = {};
|
|
136
|
-
attrCollection[options.attributesGroupName] = attrs;
|
|
165
|
+
attrCollection[this.options.attributesGroupName] = attrs;
|
|
137
166
|
return attrCollection;
|
|
138
167
|
}
|
|
139
168
|
return attrs;
|
|
140
169
|
}
|
|
141
170
|
}
|
|
142
171
|
|
|
143
|
-
const
|
|
144
|
-
xmlData = xmlData.replace(/\r\n?/g, "\n");
|
|
172
|
+
const parseXml = function(xmlData) {
|
|
173
|
+
xmlData = xmlData.replace(/\r\n?/g, "\n"); //TODO: remove this line
|
|
145
174
|
const xmlObj = new xmlNode('!xml');
|
|
146
175
|
let currentNode = xmlObj;
|
|
147
176
|
let textData = "";
|
|
148
|
-
const tagsNodeStack = [];
|
|
149
177
|
let jPath = "";
|
|
150
|
-
|
|
151
178
|
for(let i=0; i< xmlData.length; i++){//for each char in XML data
|
|
152
179
|
const ch = xmlData[i];
|
|
153
180
|
if(ch === '<'){
|
|
@@ -157,109 +184,102 @@ const parseToOrderedJsObj = function(xmlData, options) {
|
|
|
157
184
|
const closeIndex = findClosingIndex(xmlData, ">", i, "Closing Tag is not closed.")
|
|
158
185
|
let tagName = xmlData.substring(i+2,closeIndex).trim();
|
|
159
186
|
|
|
160
|
-
if(options.removeNSPrefix){
|
|
187
|
+
if(this.options.removeNSPrefix){
|
|
161
188
|
const colonIndex = tagName.indexOf(":");
|
|
162
189
|
if(colonIndex !== -1){
|
|
163
190
|
tagName = tagName.substr(colonIndex+1);
|
|
164
191
|
}
|
|
165
192
|
}
|
|
166
|
-
|
|
193
|
+
|
|
167
194
|
if(currentNode){
|
|
168
|
-
textData =
|
|
169
|
-
, options
|
|
195
|
+
textData = this.parseTextData(textData
|
|
170
196
|
, currentNode.tagname
|
|
171
197
|
, jPath
|
|
172
198
|
,false
|
|
173
|
-
, currentNode
|
|
199
|
+
, currentNode[":@"] ? Object.keys(currentNode[":@"]).length !== 0 : false
|
|
174
200
|
, Object.keys(currentNode.child).length === 0);
|
|
175
|
-
if(textData !== undefined && textData !== "") currentNode.add(options.textNodeName, textData);
|
|
201
|
+
if(textData !== undefined && textData !== "") currentNode.add(this.options.textNodeName, textData);
|
|
176
202
|
textData = "";
|
|
177
203
|
}
|
|
178
204
|
|
|
179
|
-
if (isItStopNode(options.stopNodes, tagsNodeStack, currentNode.tagname)) { //TODO: namespace
|
|
180
|
-
const top = tagsNodeStack[tagsNodeStack.length - 1];
|
|
181
|
-
const stopNode = top.child[ top.child.length -1 ];
|
|
182
|
-
stopNode[currentNode.tagname] = [ { [options.textNodeName] :xmlData.substr(currentNode.startIndex + 1, i - currentNode.startIndex - 1) }];
|
|
183
|
-
}
|
|
184
|
-
|
|
185
205
|
jPath = jPath.substr(0, jPath.lastIndexOf("."));
|
|
186
206
|
|
|
187
|
-
currentNode = tagsNodeStack.pop();//avoid recurssion, set the parent tag scope
|
|
207
|
+
currentNode = this.tagsNodeStack.pop();//avoid recurssion, set the parent tag scope
|
|
188
208
|
textData = "";
|
|
189
209
|
i = closeIndex;
|
|
190
210
|
} else if( xmlData[i+1] === '?') {
|
|
191
211
|
i = findClosingIndex(xmlData, "?>", i, "Pi Tag is not closed.")
|
|
192
212
|
} else if(xmlData.substr(i + 1, 3) === '!--') {
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
213
|
+
const endIndex = findClosingIndex(xmlData, "-->", i, "Comment is not closed.")
|
|
214
|
+
if(this.options.commentPropName){
|
|
215
|
+
const comment = xmlData.substring(i + 4, endIndex - 2);
|
|
216
|
+
|
|
217
|
+
//TODO: remove repeated code
|
|
218
|
+
if(textData){ //store previously collected data as textNode
|
|
219
|
+
textData = this.parseTextData(textData
|
|
220
|
+
, currentNode.tagname
|
|
221
|
+
, jPath
|
|
222
|
+
,false
|
|
223
|
+
, currentNode[":@"] ? Object.keys(currentNode[":@"]).length !== 0 : false
|
|
224
|
+
, Object.keys(currentNode.child).length === 0);
|
|
225
|
+
|
|
226
|
+
if(textData !== undefined && textData !== "") currentNode.add(this.options.textNodeName, textData);
|
|
227
|
+
textData = "";
|
|
228
|
+
}
|
|
229
|
+
currentNode.add(this.options.commentPropName, [ { [this.options.textNodeName] : comment } ]);
|
|
201
230
|
}
|
|
231
|
+
i = endIndex;
|
|
232
|
+
} else if( xmlData.substr(i + 1, 2) === '!D') {
|
|
233
|
+
const result = readDocType(xmlData, i);
|
|
234
|
+
this.docTypeEntities = result.entities;
|
|
235
|
+
i = result.i;
|
|
202
236
|
}else if(xmlData.substr(i + 1, 2) === '![') {
|
|
203
237
|
const closeIndex = findClosingIndex(xmlData, "]]>", i, "CDATA is not closed.") - 2;
|
|
204
238
|
const tagExp = xmlData.substring(i + 9,closeIndex);
|
|
205
239
|
|
|
206
240
|
if(textData){ //store previously collected data as textNode
|
|
207
|
-
textData =
|
|
208
|
-
, options
|
|
241
|
+
textData = this.parseTextData(textData
|
|
209
242
|
, currentNode.tagname
|
|
210
243
|
, jPath
|
|
211
244
|
,false
|
|
212
|
-
, currentNode
|
|
245
|
+
, currentNode[":@"] ? Object.keys(currentNode[":@"]).length !== 0 : false
|
|
213
246
|
, Object.keys(currentNode.child).length === 0);
|
|
214
247
|
|
|
215
|
-
if(textData !== undefined && textData !== "") currentNode.add(options.textNodeName, textData);
|
|
248
|
+
if(textData !== undefined && textData !== "") currentNode.add(this.options.textNodeName, textData);
|
|
216
249
|
textData = "";
|
|
217
250
|
}
|
|
218
251
|
|
|
219
252
|
//cdata should be set even if it is 0 length string
|
|
220
|
-
if(options.
|
|
221
|
-
let val =
|
|
222
|
-
if(!val) val = "";
|
|
223
|
-
currentNode.add(options.
|
|
253
|
+
if(this.options.cdataPropName){
|
|
254
|
+
// let val = this.parseTextData(tagExp, this.options.cdataPropName, jPath + "." + this.options.cdataPropName, true, false, true);
|
|
255
|
+
// if(!val) val = "";
|
|
256
|
+
currentNode.add(this.options.cdataPropName, [ { [this.options.textNodeName] : tagExp } ]);
|
|
224
257
|
}else{
|
|
225
|
-
let val =
|
|
258
|
+
let val = this.parseTextData(tagExp, currentNode.tagname, jPath, true, false, true);
|
|
226
259
|
if(!val) val = "";
|
|
227
|
-
currentNode.add(options.textNodeName, val);
|
|
260
|
+
currentNode.add(this.options.textNodeName, val);
|
|
228
261
|
}
|
|
229
262
|
|
|
230
263
|
i = closeIndex + 2;
|
|
231
264
|
}else {//Opening tag
|
|
232
|
-
|
|
233
|
-
let
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
let
|
|
237
|
-
let
|
|
238
|
-
if(separatorIndex !== -1){//separate tag name and attributes expression
|
|
239
|
-
tagName = tagExp.substr(0, separatorIndex).replace(/\s\s*$/, '');
|
|
240
|
-
tagExp = tagExp.substr(separatorIndex + 1);
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
if(options.removeNSPrefix){
|
|
244
|
-
const colonIndex = tagName.indexOf(":");
|
|
245
|
-
if(colonIndex !== -1){
|
|
246
|
-
tagName = tagName.substr(colonIndex+1);
|
|
247
|
-
shouldBuildAttributesMap = tagName !== result.data.substr(colonIndex + 1);
|
|
248
|
-
}
|
|
249
|
-
}
|
|
265
|
+
|
|
266
|
+
let result = this.readTagExp(xmlData,i);
|
|
267
|
+
let tagName= result.tagName;
|
|
268
|
+
let tagExp = result.tagExp;
|
|
269
|
+
let attrExpPresent = result.attrExpPresent;
|
|
270
|
+
let closeIndex = result.closeIndex;
|
|
250
271
|
|
|
251
272
|
//save text as child node
|
|
252
273
|
if (currentNode && textData) {
|
|
253
274
|
if(currentNode.tagname !== '!xml'){
|
|
254
275
|
//when nested tag is found
|
|
255
|
-
textData =
|
|
256
|
-
, options
|
|
276
|
+
textData = this.parseTextData(textData
|
|
257
277
|
, currentNode.tagname
|
|
258
278
|
, jPath
|
|
259
279
|
, false
|
|
260
|
-
, currentNode
|
|
280
|
+
, currentNode[":@"] ? Object.keys(currentNode[":@"]).length !== 0 : false
|
|
261
281
|
, false);
|
|
262
|
-
if(textData !== undefined && textData !== "") currentNode.add(options.textNodeName, textData);
|
|
282
|
+
if(textData !== undefined && textData !== "") currentNode.add(this.options.textNodeName, textData);
|
|
263
283
|
textData = "";
|
|
264
284
|
}
|
|
265
285
|
}
|
|
@@ -268,70 +288,108 @@ const parseToOrderedJsObj = function(xmlData, options) {
|
|
|
268
288
|
jPath += jPath ? "." + tagName : tagName;
|
|
269
289
|
}
|
|
270
290
|
|
|
271
|
-
if
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
291
|
+
//check if last tag was unpaired tag
|
|
292
|
+
const lastTag = currentNode;
|
|
293
|
+
if(lastTag && this.options.unpairedTags.indexOf(lastTag.tagname) !== -1 ){
|
|
294
|
+
currentNode = this.tagsNodeStack.pop();
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
if (this.isItStopNode(this.options.stopNodes, jPath, tagName)) { //TODO: namespace
|
|
298
|
+
let tagContent = "";
|
|
299
|
+
//self-closing tag
|
|
300
|
+
if(tagExp.length > 0 && tagExp.lastIndexOf("/") === tagExp.length - 1){}
|
|
301
|
+
//boolean tag
|
|
302
|
+
else if(this.options.unpairedTags.indexOf(tagName) !== -1){}
|
|
303
|
+
//normal tag
|
|
304
|
+
else{
|
|
305
|
+
//read until closing tag is found
|
|
306
|
+
const result = this.readStopNodeData(xmlData, tagName, closeIndex + 1);
|
|
307
|
+
if(!result) throw new Error(`Unexpected end of ${tagName}`);
|
|
308
|
+
i = result.i;
|
|
309
|
+
tagContent = result.tagContent;
|
|
278
310
|
}
|
|
279
311
|
|
|
280
312
|
const childNode = new xmlNode(tagName);
|
|
281
|
-
if(tagName !== tagExp &&
|
|
282
|
-
childNode
|
|
313
|
+
if(tagName !== tagExp && attrExpPresent){
|
|
314
|
+
childNode[":@"] = this.buildAttributesMap(tagExp, jPath);
|
|
283
315
|
}
|
|
284
316
|
jPath = jPath.substr(0, jPath.lastIndexOf("."));
|
|
285
|
-
|
|
286
|
-
currentNode.addChild(childNode);
|
|
287
|
-
}else{//opening tag
|
|
288
|
-
|
|
289
|
-
const childNode = new xmlNode( tagName);
|
|
290
|
-
tagsNodeStack.push(currentNode);
|
|
291
|
-
|
|
292
|
-
childNode.startIndex=closeIndex; //for further processing
|
|
317
|
+
childNode.add(this.options.textNodeName, tagContent);
|
|
293
318
|
|
|
294
|
-
if(tagName !== tagExp && shouldBuildAttributesMap){
|
|
295
|
-
childNode.attrsMap = buildAttributesMap(tagExp, jPath, options);
|
|
296
|
-
}
|
|
297
319
|
currentNode.addChild(childNode);
|
|
298
|
-
|
|
320
|
+
}else{
|
|
321
|
+
//selfClosing tag
|
|
322
|
+
if(tagExp.length > 0 && tagExp.lastIndexOf("/") === tagExp.length - 1){
|
|
323
|
+
|
|
324
|
+
if(tagName[tagName.length - 1] === "/"){ //remove trailing '/'
|
|
325
|
+
tagName = tagName.substr(0, tagName.length - 1);
|
|
326
|
+
tagExp = tagName;
|
|
327
|
+
}else{
|
|
328
|
+
tagExp = tagExp.substr(0, tagExp.length - 1);
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
const childNode = new xmlNode(tagName);
|
|
332
|
+
if(tagName !== tagExp && attrExpPresent){
|
|
333
|
+
childNode[":@"] = this.buildAttributesMap(tagExp, jPath);
|
|
334
|
+
}
|
|
335
|
+
jPath = jPath.substr(0, jPath.lastIndexOf("."));
|
|
336
|
+
currentNode.addChild(childNode);
|
|
337
|
+
}
|
|
338
|
+
//opening tag
|
|
339
|
+
else{
|
|
340
|
+
const childNode = new xmlNode( tagName);
|
|
341
|
+
this.tagsNodeStack.push(currentNode);
|
|
342
|
+
|
|
343
|
+
if(tagName !== tagExp && attrExpPresent){
|
|
344
|
+
childNode[":@"] = this.buildAttributesMap(tagExp, jPath);
|
|
345
|
+
}
|
|
346
|
+
currentNode.addChild(childNode);
|
|
347
|
+
currentNode = childNode;
|
|
348
|
+
}
|
|
349
|
+
textData = "";
|
|
350
|
+
i = closeIndex;
|
|
299
351
|
}
|
|
300
|
-
textData = "";
|
|
301
|
-
i = closeIndex;
|
|
302
352
|
}
|
|
303
353
|
}else{
|
|
304
354
|
textData += xmlData[i];
|
|
305
355
|
}
|
|
306
356
|
}
|
|
307
|
-
return xmlObj.child
|
|
357
|
+
return xmlObj.child;
|
|
308
358
|
}
|
|
309
359
|
|
|
360
|
+
const replaceEntitiesValue = function(val){
|
|
361
|
+
if(this.options.processEntities){
|
|
362
|
+
for(let entityName in this.docTypeEntities){
|
|
363
|
+
const entity = this.docTypeEntities[entityName];
|
|
364
|
+
val = val.replace( entity.regx, entity.val);
|
|
365
|
+
}
|
|
366
|
+
for(let entityName in this.lastEntities){
|
|
367
|
+
const entity = this.lastEntities[entityName];
|
|
368
|
+
val = val.replace( entity.regex, entity.val);
|
|
369
|
+
}
|
|
370
|
+
if(this.options.htmlEntities){
|
|
371
|
+
for(let entityName in this.htmlEntities){
|
|
372
|
+
const entity = this.htmlEntities[entityName];
|
|
373
|
+
val = val.replace( entity.regex, entity.val);
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
return val;
|
|
378
|
+
}
|
|
310
379
|
//TODO: use jPath to simplify the logic
|
|
311
380
|
/**
|
|
312
381
|
*
|
|
313
382
|
* @param {string[]} stopNodes
|
|
314
|
-
* @param {
|
|
383
|
+
* @param {string} jPath
|
|
384
|
+
* @param {string} currentTagName
|
|
315
385
|
*/
|
|
316
|
-
function isItStopNode(stopNodes,
|
|
317
|
-
const
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
if(
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
if(matchingStopNodes.length > 0){
|
|
324
|
-
let jPath = "";
|
|
325
|
-
for (let i = 1; i < tagsNodeStack.length; i++) {
|
|
326
|
-
const node = tagsNodeStack[i];
|
|
327
|
-
jPath += "." + node.tagname;
|
|
328
|
-
}
|
|
329
|
-
jPath += "." + currentTagName;
|
|
330
|
-
jPath = jPath.substr(1);
|
|
331
|
-
for (let i = 0; i < matchingStopNodes.length; i++) {
|
|
332
|
-
if(matchingStopNodes[i] === jPath) return true;
|
|
333
|
-
}
|
|
334
|
-
}else return false;
|
|
386
|
+
function isItStopNode(stopNodes, jPath, currentTagName){
|
|
387
|
+
const allNodesExp = "*." + currentTagName;
|
|
388
|
+
for (const stopNodePath in stopNodes) {
|
|
389
|
+
const stopNodeExp = stopNodes[stopNodePath];
|
|
390
|
+
if( allNodesExp === stopNodeExp || jPath === stopNodeExp ) return true;
|
|
391
|
+
}
|
|
392
|
+
return false;
|
|
335
393
|
}
|
|
336
394
|
|
|
337
395
|
/**
|
|
@@ -370,4 +428,71 @@ function findClosingIndex(xmlData, str, i, errMsg){
|
|
|
370
428
|
}
|
|
371
429
|
}
|
|
372
430
|
|
|
373
|
-
|
|
431
|
+
function readTagExp(xmlData,i){
|
|
432
|
+
const result = tagExpWithClosingIndex(xmlData, i+1);
|
|
433
|
+
let tagExp = result.data;
|
|
434
|
+
const closeIndex = result.index;
|
|
435
|
+
const separatorIndex = tagExp.search(/\s/);
|
|
436
|
+
let tagName = tagExp;
|
|
437
|
+
let attrExpPresent = true;
|
|
438
|
+
if(separatorIndex !== -1){//separate tag name and attributes expression
|
|
439
|
+
tagName = tagExp.substr(0, separatorIndex).replace(/\s\s*$/, '');
|
|
440
|
+
tagExp = tagExp.substr(separatorIndex + 1);
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
if(this. options.removeNSPrefix){
|
|
444
|
+
const colonIndex = tagName.indexOf(":");
|
|
445
|
+
if(colonIndex !== -1){
|
|
446
|
+
tagName = tagName.substr(colonIndex+1);
|
|
447
|
+
attrExpPresent = tagName !== result.data.substr(colonIndex + 1);
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
return {
|
|
452
|
+
tagName: tagName,
|
|
453
|
+
tagExp: tagExp,
|
|
454
|
+
closeIndex: closeIndex,
|
|
455
|
+
attrExpPresent: attrExpPresent,
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
/**
|
|
459
|
+
* find paired tag for a stop node
|
|
460
|
+
* @param {string} xmlData
|
|
461
|
+
* @param {string} tagName
|
|
462
|
+
* @param {number} i
|
|
463
|
+
*/
|
|
464
|
+
function readStopNodeData(xmlData, tagName, i){
|
|
465
|
+
const startIndex = i;
|
|
466
|
+
for (; i < xmlData.length; i++) {
|
|
467
|
+
if( xmlData[i] === "<" && xmlData[i+1] === "/"){
|
|
468
|
+
const closeIndex = findClosingIndex(xmlData, ">", i, `${tagName} is not closed`);
|
|
469
|
+
let closeTagName = xmlData.substring(i+2,closeIndex).trim();
|
|
470
|
+
if(closeTagName === tagName){
|
|
471
|
+
return {
|
|
472
|
+
tagContent: xmlData.substring(startIndex, i),
|
|
473
|
+
i : closeIndex
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
i=closeIndex;
|
|
477
|
+
}
|
|
478
|
+
}//end for loop
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
function parseValue(val, shouldParse, options) {
|
|
482
|
+
if (shouldParse && typeof val === 'string') {
|
|
483
|
+
//console.log(options)
|
|
484
|
+
const newval = val.trim();
|
|
485
|
+
if(newval === 'true' ) return true;
|
|
486
|
+
else if(newval === 'false' ) return false;
|
|
487
|
+
else return toNumber(val, options);
|
|
488
|
+
} else {
|
|
489
|
+
if (util.isExist(val)) {
|
|
490
|
+
return val;
|
|
491
|
+
} else {
|
|
492
|
+
return '';
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
module.exports = OrderedObjParser;
|
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
const { buildOptions} = require("./OptionsBuilder");
|
|
2
|
-
const
|
|
2
|
+
const OrderedObjParser = require("./OrderedObjParser");
|
|
3
3
|
const { prettify} = require("./node2json");
|
|
4
4
|
const validator = require('../validator');
|
|
5
5
|
|
|
6
6
|
class XMLParser{
|
|
7
|
+
|
|
7
8
|
constructor(options){
|
|
9
|
+
this.externalEntities = {};
|
|
8
10
|
this.options = buildOptions(options);
|
|
11
|
+
|
|
9
12
|
}
|
|
10
13
|
/**
|
|
11
14
|
* Parse XML dats to JS object
|
|
@@ -27,10 +30,27 @@ class XMLParser{
|
|
|
27
30
|
throw Error( `${result.err.msg}:${result.err.line}:${result.err.col}` )
|
|
28
31
|
}
|
|
29
32
|
}
|
|
30
|
-
const
|
|
33
|
+
const orderedObjParser = new OrderedObjParser(this.options);
|
|
34
|
+
orderedObjParser.addExternalEntities(this.externalEntities);
|
|
35
|
+
const orderedResult = orderedObjParser.parseXml(xmlData);
|
|
31
36
|
if(this.options.preserveOrder || orderedResult === undefined) return orderedResult;
|
|
32
37
|
else return prettify(orderedResult, this.options);
|
|
33
38
|
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Add Entity which is not by default supported by this library
|
|
42
|
+
* @param {string} key
|
|
43
|
+
* @param {string} value
|
|
44
|
+
*/
|
|
45
|
+
addEntity(key, value){
|
|
46
|
+
if(value.indexOf("&") !== -1){
|
|
47
|
+
throw new Error("Entity value can't have '&'")
|
|
48
|
+
}else if(key.indexOf("&") !== -1 || key.indexOf(";") !== -1){
|
|
49
|
+
throw new Error("An entity must be set without '&' and ';'. Eg. use '#xD' for '
'")
|
|
50
|
+
}else{
|
|
51
|
+
this.externalEntities[key] = value;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
34
54
|
}
|
|
35
55
|
|
|
36
56
|
module.exports = XMLParser;
|
|
@@ -1,7 +1,13 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
+
/**
|
|
4
|
+
*
|
|
5
|
+
* @param {array} node
|
|
6
|
+
* @param {any} options
|
|
7
|
+
* @returns
|
|
8
|
+
*/
|
|
3
9
|
function prettify(node, options){
|
|
4
|
-
return compress(
|
|
10
|
+
return compress( node, options);
|
|
5
11
|
}
|
|
6
12
|
|
|
7
13
|
/**
|
|
@@ -31,8 +37,8 @@ function compress(arr, options, jPath){
|
|
|
31
37
|
let val = compress(tagObj[property], options, newJpath);
|
|
32
38
|
const isLeaf = isLeafTag(val, options);
|
|
33
39
|
|
|
34
|
-
if(tagObj
|
|
35
|
-
assignAttributes( val, tagObj
|
|
40
|
+
if(tagObj[":@"]){
|
|
41
|
+
assignAttributes( val, tagObj[":@"], newJpath, options);
|
|
36
42
|
}else if(Object.keys(val).length === 1 && val[options.textNodeName] !== undefined && !options.alwaysCreateTextNode){
|
|
37
43
|
val = val[options.textNodeName];
|
|
38
44
|
}else if(Object.keys(val).length === 0){
|
|
@@ -68,7 +74,7 @@ function propName(obj){
|
|
|
68
74
|
const keys = Object.keys(obj);
|
|
69
75
|
for (let i = 0; i < keys.length; i++) {
|
|
70
76
|
const key = keys[i];
|
|
71
|
-
if(key !== "
|
|
77
|
+
if(key !== ":@") return key;
|
|
72
78
|
}
|
|
73
79
|
}
|
|
74
80
|
|
package/src/xmlparser/xmlNode.js
CHANGED
|
@@ -4,15 +4,15 @@ class XmlNode{
|
|
|
4
4
|
constructor(tagname) {
|
|
5
5
|
this.tagname = tagname;
|
|
6
6
|
this.child = []; //nested tags, text, cdata, comments in order
|
|
7
|
-
this
|
|
7
|
+
this[":@"] = {}; //attributes map
|
|
8
8
|
}
|
|
9
9
|
add(key,val){
|
|
10
10
|
// this.child.push( {name : key, val: val, isCdata: isCdata });
|
|
11
11
|
this.child.push( {[key]: val });
|
|
12
12
|
}
|
|
13
13
|
addChild(node) {
|
|
14
|
-
if(node
|
|
15
|
-
this.child.push( { [node.tagname]: node.child,
|
|
14
|
+
if(node[":@"] && Object.keys(node[":@"]).length > 0){
|
|
15
|
+
this.child.push( { [node.tagname]: node.child, [":@"]: node[":@"] });
|
|
16
16
|
}else{
|
|
17
17
|
this.child.push( { [node.tagname]: node.child });
|
|
18
18
|
}
|