fast-xml-parser 4.0.0-beta.2 → 4.0.0-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  Note: If you find missing information about particular minor version, that version must have been changed without any functional change in this library.
2
2
 
3
+ ** 4.0.0-beta.3 / 2021-11-30**
4
+ * support global stopNodes expression like "*.stop"
5
+ * support self-closing and paired unpaired tags
6
+ * fix: CDATA should not be parsed.
7
+ * Fix typings for XMLBuilder (#396)(By [Anders Emil Salvesen](https://github.com/andersem))
8
+ * supports XML entities, HTML entities, DOCTYPE entities
9
+
3
10
  **⚠️ 4.0.0-beta.2 / 2021-11-19**
4
11
  * rename `attrMap` to `attibutes` in parser output when `preserveOrder:true`
5
12
  * supports unpairedTags
package/README.md CHANGED
@@ -48,6 +48,13 @@ Check the list of all known users [here](./USERs.md);
48
48
  * Faster than any pure JS implementation.
49
49
  * It can handle big files (tested up to 100mb).
50
50
  * Controlled parsing using various options
51
+ * XML Entities, HTML entities, and DOCTYPE entites are supported.
52
+ * unpaired tags (Eg `<br>` in HTML), stop nodes (Eg `<script>` in HTML) are supported.
53
+ * You can restore almost same XML from JSON
54
+ * Supports comments
55
+ * It can preserve Order of tags in JS object
56
+ * You can control if a single tag should be parsed into array.
57
+ * And many more other features.
51
58
 
52
59
  ## How to use
53
60
 
@@ -98,6 +105,7 @@ In a HTML page
98
105
  2. [XML Parser](./docs/v4/2.XMLparseOptions.md)
99
106
  3. [XML Builder](./docs/v4/3.XMLBuilder.md)
100
107
  4. [XML Validator](./docs/v4/4.XMLValidator.md)
108
+ 5. [Entites](./docs/5.Entities.md)
101
109
 
102
110
  ## Performance
103
111
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "fast-xml-parser",
3
- "version": "4.0.0-beta.2",
3
+ "version": "4.0.0-beta.3",
4
4
  "description": "Validate XML, Parse XML, Build XML without C/C++ based libraries",
5
5
  "main": "./src/fxp.js",
6
6
  "scripts": {
package/src/fxp.d.ts CHANGED
@@ -18,6 +18,8 @@ type X2jOptions = {
18
18
  unpairedTags: string[];
19
19
  alwaysCreateTextNode: boolean;
20
20
  isArray: (tagName: string, jPath: string, isLeafNode: boolean, isAttribute: boolean) => boolean;
21
+ processEntities: boolean;
22
+ htmlEntities: boolean;
21
23
  };
22
24
  type strnumOptions = {
23
25
  hex: boolean;
@@ -46,6 +48,7 @@ type XmlBuilderOptions = {
46
48
  unpairedTags: string[];
47
49
  tagValueProcessor: (name: string, value: string) => string;
48
50
  attributeValueProcessor: (name: string, value: string) => string;
51
+ processEntities: boolean;
49
52
  };
50
53
  type XmlBuilderOptionsOptional = Partial<XmlBuilderOptions>;
51
54
 
@@ -70,5 +73,5 @@ export class XMLValidator{
70
73
  }
71
74
  export class XMLBuilder {
72
75
  constructor(options: XmlBuilderOptionsOptional);
73
- parse(options: any): any;
76
+ build(jObj: any): any;
74
77
  }
@@ -21,6 +21,13 @@ const defaultOptions = {
21
21
  preserveOrder: false,
22
22
  commentPropName: false,
23
23
  unpairedTags: [],
24
+ entities: {
25
+ ">" : { regex: new RegExp(">", "g"), val: "&gt;" },
26
+ "<" : { regex: new RegExp("<", "g"), val: "&lt;" },
27
+ "sQuot" : { regex: new RegExp("\'", "g"), val: "&apos;" },
28
+ "dQuot" : { regex: new RegExp("\"", "g"), val: "&quot;" }
29
+ },
30
+ processEntities: true
24
31
  };
25
32
 
26
33
  const props = [
@@ -38,6 +45,8 @@ const props = [
38
45
  'preserveOrder',
39
46
  "commentPropName",
40
47
  "unpairedTags",
48
+ "entities",
49
+ "processEntities",
41
50
  // 'rootNodeName', //when jsObject have multiple properties on root level
42
51
  ];
43
52
 
@@ -76,6 +85,8 @@ function Builder(options) {
76
85
 
77
86
  this.buildTextValNode = buildTextValNode;
78
87
  this.buildObjectNode = buildObjectNode;
88
+
89
+ this.replaceEntitiesValue = replaceEntitiesValue;
79
90
  }
80
91
 
81
92
  Builder.prototype.build = function(jObj) {
@@ -105,11 +116,14 @@ Builder.prototype.j2x = function(jObj, level) {
105
116
  //premitive type
106
117
  const attr = this.isAttribute(key);
107
118
  if (attr) {
108
- attrStr += ' ' + attr + '="' + this.options.attributeValueProcessor(attr, '' + jObj[key]) + '"';
119
+ let val = this.options.attributeValueProcessor(attr, '' + jObj[key]);
120
+ val = this.replaceEntitiesValue(val);
121
+ attrStr += ' ' + attr + '="' + val + '"';
109
122
  }else {
110
123
  //tag value
111
124
  if (key === this.options.textNodeName) {
112
- val += this.options.tagValueProcessor(key, '' + jObj[key]);
125
+ let newval = this.options.tagValueProcessor(key, '' + jObj[key]);
126
+ val += this.replaceEntitiesValue(newval);
113
127
  } else {
114
128
  val += this.buildTextNode(jObj[key], key, '', level);
115
129
  }
@@ -135,7 +149,9 @@ Builder.prototype.j2x = function(jObj, level) {
135
149
  const Ks = Object.keys(jObj[key]);
136
150
  const L = Ks.length;
137
151
  for (let j = 0; j < L; j++) {
138
- attrStr += ' ' + Ks[j] + '="' + this.options.attributeValueProcessor(Ks[j], '' + jObj[key][Ks[j]]) + '"';
152
+ let val = this.options.attributeValueProcessor(Ks[j], '' + jObj[key][Ks[j]]);
153
+ val = this.replaceEntitiesValue(val);
154
+ attrStr += ' ' + Ks[j] + '="' + val + '"';
139
155
  }
140
156
  } else {
141
157
  val += this.processTextOrObjNode(jObj[key], key, level)
@@ -196,19 +212,32 @@ function buildEmptyObjNode(val, key, attrStr, level) {
196
212
  }
197
213
 
198
214
  function buildTextValNode(val, key, attrStr, level) {
215
+ let textValue = this.options.tagValueProcessor(key, val);
216
+ textValue = this.replaceEntitiesValue(textValue);
217
+
199
218
  return (
200
219
  this.indentate(level) +
201
220
  '<' +
202
221
  key +
203
222
  attrStr +
204
223
  '>' +
205
- this.options.tagValueProcessor(key, val) +
224
+ textValue +
206
225
  '</' +
207
226
  key +
208
227
  this.tagEndChar
209
228
  );
210
229
  }
211
230
 
231
+ function replaceEntitiesValue(textValue){
232
+ if(textValue && textValue.length > 0 && this.options.processEntities){
233
+ for (const entityName in this.options.entities) {
234
+ const entity = this.options.entities[entityName];
235
+ textValue = textValue.replace(entity.regex, entity.val);
236
+ }
237
+ }
238
+ return textValue;
239
+ }
240
+
212
241
  function buildEmptyTextNode(val, key, attrStr, level) {
213
242
  if( val === '' && this.options.unpairedTags.indexOf(key) !== -1){
214
243
  return this.indentate(level) + '<' + key + attrStr + this.tagEndChar;
@@ -23,7 +23,9 @@ function arrToStr(arr, options, level){
23
23
  const tagName = propName(tagObj);
24
24
 
25
25
  if(tagName === options.textNodeName){
26
- xmlStr += indentation + options.tagValueProcessor( tagName, tagObj[tagName]);
26
+ let tagText = options.tagValueProcessor( tagName, tagObj[tagName]);
27
+ tagText = replaceEntitiesValue(tagText, options);
28
+ xmlStr += indentation + tagText;
27
29
  continue;
28
30
  }else if( tagName === options.cdataPropName){
29
31
  xmlStr += indentation + `<![CDATA[${tagObj[tagName][0][options.textNodeName]}]]>`;
@@ -62,10 +64,21 @@ function attr_to_str(attrMap, options){
62
64
  let attrStr = "";
63
65
  if(attrMap && !options.ignoreAttributes){
64
66
  for( attr in attrMap){
65
- attrStr+= ` ${attr.substr(options.attributeNamePrefix.length)}="${options.attributeValueProcessor(attr, attrMap[attr])}"`;
67
+ let attrVal = options.attributeValueProcessor(attr, attrMap[attr]);
68
+ attrVal = replaceEntitiesValue(attrVal, options);
69
+ attrStr+= ` ${attr.substr(options.attributeNamePrefix.length)}="${attrVal}"`;
66
70
  }
67
71
  }
68
72
  return attrStr;
69
73
  }
70
74
 
75
+ function replaceEntitiesValue(textValue, options){
76
+ if(textValue && textValue.length > 0 && options.processEntities){
77
+ for (const entityName in options.entities) {
78
+ const entity = options.entities[entityName];
79
+ textValue = textValue.replace(entity.regex, entity.val);
80
+ }
81
+ }
82
+ return textValue;
83
+ }
71
84
  module.exports = toXml;
@@ -0,0 +1,92 @@
1
+ //TODO: handle comments
2
+ function readDocType(xmlData, i){
3
+
4
+ const entities = {};
5
+ if( xmlData[i + 3] === 'O' &&
6
+ xmlData[i + 4] === 'C' &&
7
+ xmlData[i + 5] === 'T' &&
8
+ xmlData[i + 6] === 'Y' &&
9
+ xmlData[i + 7] === 'P' &&
10
+ xmlData[i + 8] === 'E')
11
+ {
12
+ i = i+9;
13
+ let angleBracketsCount = 1;
14
+ let hasBody = false, entity = false, comment = false;
15
+ let exp = "";
16
+ for(;i<xmlData.length;i++){
17
+ if (xmlData[i] === '<') {
18
+ if( hasBody &&
19
+ xmlData[i+1] === '!' &&
20
+ xmlData[i+2] === 'E' &&
21
+ xmlData[i+3] === 'N' &&
22
+ xmlData[i+4] === 'T' &&
23
+ xmlData[i+5] === 'I' &&
24
+ xmlData[i+6] === 'T' &&
25
+ xmlData[i+7] === 'Y'
26
+ ){
27
+ i += 7;
28
+ entity = true;
29
+ }else if( hasBody &&
30
+ xmlData[i+1] === '!' &&
31
+ xmlData[i+2] === 'E' &&
32
+ xmlData[i+3] === 'L' &&
33
+ xmlData[i+4] === 'E' &&
34
+ xmlData[i+5] === 'M' &&
35
+ xmlData[i+6] === 'E' &&
36
+ xmlData[i+7] === 'N' &&
37
+ xmlData[i+8] === 'T'
38
+ ){
39
+ //Not supported
40
+ i += 8;
41
+ }else if( //comment
42
+ xmlData[i+1] === '!' &&
43
+ xmlData[i+2] === '-' &&
44
+ xmlData[i+3] === '-'
45
+ ){
46
+ comment = true;
47
+ }else{
48
+ throw new Error("Invalid DOCTYPE");
49
+ }
50
+ angleBracketsCount++;
51
+ exp = "";
52
+ } else if (xmlData[i] === '>') {
53
+ if(comment){
54
+ if( xmlData[i - 1] === "-" && xmlData[i - 2] === "-"){
55
+ comment = false;
56
+ }else{
57
+ throw new Error(`Invalid XML comment in DOCTYPE`);
58
+ }
59
+ }else if(entity){
60
+ parseEntityExp(exp, entities);
61
+ entity = false;
62
+ }
63
+ angleBracketsCount--;
64
+ if (angleBracketsCount === 0) {
65
+ break;
66
+ }
67
+ }else if( xmlData[i] === '['){
68
+ hasBody = true;
69
+ }else{
70
+ exp += xmlData[i];
71
+ }
72
+ }
73
+ if(angleBracketsCount !== 0){
74
+ throw new Error(`Unclosed DOCTYPE`);
75
+ }
76
+ }else{
77
+ throw new Error(`Invalid Tag instead of DOCTYPE`);
78
+ }
79
+ return {entities, i};
80
+ }
81
+
82
+ const entityRegex = RegExp("^\\s([a-zA-z0-0]+)[ \t](['\"])([^&]+)\\2");
83
+ function parseEntityExp(exp, entities){
84
+ const match = entityRegex.exec(exp);
85
+ if(match){
86
+ entities[ match[1] ] = {
87
+ regx : RegExp( `&${match[1]};`,"g"),
88
+ val: match[3]
89
+ };
90
+ }
91
+ }
92
+ module.exports = readDocType;
@@ -27,6 +27,8 @@ const defaultOptions = {
27
27
  isArray: () => false,
28
28
  commentPropName: false,
29
29
  unpairedTags: [],
30
+ processEntities: true,
31
+ htmlEntities: false,
30
32
  };
31
33
 
32
34
  const props = [
@@ -49,6 +51,8 @@ const props = [
49
51
  'isArray',
50
52
  'commentPropName',
51
53
  'unpairedTags',
54
+ 'processEntities',
55
+ 'htmlEntities'
52
56
  ];
53
57
 
54
58
  const util = require('../util');
@@ -2,6 +2,7 @@
2
2
 
3
3
  const util = require('../util');
4
4
  const xmlNode = require('./xmlNode');
5
+ const readDocType = require("./DocTypeReader");
5
6
  const toNumber = require("strnum");
6
7
 
7
8
  const regx =
@@ -11,43 +12,75 @@ const regx =
11
12
  //const tagsRegx = new RegExp("<(\\/?[\\w:\\-\._]+)([^>]*)>(\\s*"+cdataRegx+")*([^<]+)?","g");
12
13
  //const tagsRegx = new RegExp("<(\\/?)((\\w*:)?([\\w:\\-\._]+))([^>]*)>([^<]*)("+cdataRegx+"([^<]*))*([^<]+)?","g");
13
14
 
14
- //polyfill
15
- if (!Number.parseInt && window.parseInt) {
16
- Number.parseInt = window.parseInt;
17
- }
18
- if (!Number.parseFloat && window.parseFloat) {
19
- Number.parseFloat = window.parseFloat;
20
- }
15
+ class OrderedObjParser{
16
+ constructor(options){
17
+ this.options = options;
18
+ this.currentNode = null;
19
+ this.tagsNodeStack = [];
20
+ this.docTypeEntities = {};
21
+ this.lastEntities = {
22
+ "amp" : { regex: /&(amp|#38|#x26);/g, val : "&"},
23
+ "apos" : { regex: /&(apos|#39|#x27);/g, val : "'"},
24
+ "gt" : { regex: /&(gt|#62|#x3E);/g, val : ">"},
25
+ "lt" : { regex: /&(lt|#60|#x3C);/g, val : "<"},
26
+ "quot" : { regex: /&(quot|#34|#x22);/g, val : "\""},
27
+ };
28
+ this.htmlEntities = {
29
+ "space": { regex: /&(nbsp|#160);/g, val: " " },
30
+ // "lt" : { regex: /&(lt|#60);/g, val: "<" },
31
+ // "gt" : { regex: /&(gt|#62);/g, val: ">" },
32
+ // "amp" : { regex: /&(amp|#38);/g, val: "&" },
33
+ // "quot" : { regex: /&(quot|#34);/g, val: "\"" },
34
+ // "apos" : { regex: /&(apos|#39);/g, val: "'" },
35
+ "cent" : { regex: /&(cent|#162);/g, val: "¢" },
36
+ "pound" : { regex: /&(pound|#163);/g, val: "£" },
37
+ "yen" : { regex: /&(yen|#165);/g, val: "¥" },
38
+ "euro" : { regex: /&(euro|#8364);/g, val: "€" },
39
+ "copyright" : { regex: /&(copy|#169);/g, val: "©" },
40
+ "reg" : { regex: /&(reg|#174);/g, val: "®" },
41
+ "inr" : { regex: /&(inr|#8377);/g, val: "₹" },
42
+ };
43
+ this.parseXml = parseXml;
44
+ this.parseTextData = parseTextData;
45
+ this.resolveNameSpace = resolveNameSpace;
46
+ this.buildAttributesMap = buildAttributesMap;
47
+ this.isItStopNode = isItStopNode;
48
+ this.replaceEntitiesValue = replaceEntitiesValue;
49
+ this.readTagExp = readTagExp;
50
+ this.readStopNodeData = readStopNodeData;
51
+ }
21
52
 
53
+ }
22
54
 
23
55
  /**
24
56
  * @param {string} val
25
- * @param {object} options
26
57
  * @param {string} tagName
27
58
  * @param {string} jPath
28
59
  * @param {boolean} dontTrim
29
60
  * @param {boolean} hasAttributes
30
61
  * @param {boolean} isLeafNode
31
62
  */
32
- function parseValue(val, options, tagName, jPath, dontTrim, hasAttributes, isLeafNode) {
63
+ function parseTextData(val, tagName, jPath, dontTrim, hasAttributes, isLeafNode) {
33
64
  if (val !== undefined) {
34
- if (options.trimValues && !dontTrim) {
65
+ if (this.options.trimValues && !dontTrim) {
35
66
  val = val.trim();
36
67
  }
37
68
  if(val.length > 0){
38
- const newval = options.tagValueProcessor(tagName, val, jPath, hasAttributes, isLeafNode);
69
+ val = this.replaceEntitiesValue(val);
70
+
71
+ const newval = this.options.tagValueProcessor(tagName, val, jPath, hasAttributes, isLeafNode);
39
72
  if(newval === null || newval === undefined){
40
73
  //don't parse
41
74
  return val;
42
75
  }else if(typeof newval !== typeof val || newval !== val){
43
76
  //overwrite
44
77
  return newval;
45
- }else if(options.trimValues){
46
- return _parseValue(val, options.parseTagValue, options.numberParseOptions);
78
+ }else if(this.options.trimValues){
79
+ return parseValue(val, this.options.parseTagValue, this.options.numberParseOptions);
47
80
  }else{
48
81
  const trimmedVal = val.trim();
49
82
  if(trimmedVal === val){
50
- return _parseValue(val, options.parseTagValue, options.numberParseOptions);
83
+ return parseValue(val, this.options.parseTagValue, this.options.numberParseOptions);
51
84
  }else{
52
85
  return val;
53
86
  }
@@ -56,8 +89,8 @@ function parseValue(val, options, tagName, jPath, dontTrim, hasAttributes, isLea
56
89
  }
57
90
  }
58
91
 
59
- function resolveNameSpace(tagname, options) {
60
- if (options.removeNSPrefix) {
92
+ function resolveNameSpace(tagname) {
93
+ if (this.options.removeNSPrefix) {
61
94
  const tags = tagname.split(':');
62
95
  const prefix = tagname.charAt(0) === '/' ? '/' : '';
63
96
  if (tags[0] === 'xmlns') {
@@ -70,28 +103,12 @@ function resolveNameSpace(tagname, options) {
70
103
  return tagname;
71
104
  }
72
105
 
73
- function _parseValue(val, shouldParse, options) {
74
- if (shouldParse && typeof val === 'string') {
75
- //console.log(options)
76
- const newval = val.trim();
77
- if(newval === 'true' ) return true;
78
- else if(newval === 'false' ) return false;
79
- else return toNumber(val, options);
80
- } else {
81
- if (util.isExist(val)) {
82
- return val;
83
- } else {
84
- return '';
85
- }
86
- }
87
- }
88
-
89
106
  //TODO: change regex to capture NS
90
107
  //const attrsRegx = new RegExp("([\\w\\-\\.\\:]+)\\s*=\\s*(['\"])((.|\n)*?)\\2","gm");
91
108
  const attrsRegx = new RegExp('([^\\s=]+)\\s*(=\\s*([\'"])([\\s\\S]*?)\\3)?', 'gm');
92
109
 
93
- function buildAttributesMap(attrStr, jPath, options) {
94
- if (!options.ignoreAttributes && typeof attrStr === 'string') {
110
+ function buildAttributesMap(attrStr, jPath) {
111
+ if (!this.options.ignoreAttributes && typeof attrStr === 'string') {
95
112
  // attrStr = attrStr.replace(/\r?\n/g, ' ');
96
113
  //attrStr = attrStr || attrStr.trim();
97
114
 
@@ -99,16 +116,16 @@ function buildAttributesMap(attrStr, jPath, options) {
99
116
  const len = matches.length; //don't make it inline
100
117
  const attrs = {};
101
118
  for (let i = 0; i < len; i++) {
102
- const attrName = resolveNameSpace(matches[i][1], options);
119
+ const attrName = this.resolveNameSpace(matches[i][1]);
103
120
  let oldVal = matches[i][4];
104
- const aName = options.attributeNamePrefix + attrName;
121
+ const aName = this.options.attributeNamePrefix + attrName;
105
122
  if (attrName.length) {
106
123
  if (oldVal !== undefined) {
107
- if (options.trimValues) {
124
+ if (this.options.trimValues) {
108
125
  oldVal = oldVal.trim();
109
126
  }
110
-
111
- const newVal = options.attributeValueProcessor(attrName, oldVal, jPath);
127
+ oldVal = this.replaceEntitiesValue(oldVal);
128
+ const newVal = this.options.attributeValueProcessor(attrName, oldVal, jPath);
112
129
  if(newVal === null || newVal === undefined){
113
130
  //don't parse
114
131
  attrs[aName] = oldVal;
@@ -117,13 +134,13 @@ function buildAttributesMap(attrStr, jPath, options) {
117
134
  attrs[aName] = newVal;
118
135
  }else{
119
136
  //parse
120
- attrs[aName] = _parseValue(
137
+ attrs[aName] = parseValue(
121
138
  oldVal,
122
- options.parseAttributeValue,
123
- options.numberParseOptions
139
+ this.options.parseAttributeValue,
140
+ this.options.numberParseOptions
124
141
  );
125
142
  }
126
- } else if (options.allowBooleanAttributes) {
143
+ } else if (this.options.allowBooleanAttributes) {
127
144
  attrs[aName] = true;
128
145
  }
129
146
  }
@@ -131,23 +148,21 @@ function buildAttributesMap(attrStr, jPath, options) {
131
148
  if (!Object.keys(attrs).length) {
132
149
  return;
133
150
  }
134
- if (options.attributesGroupName) {
151
+ if (this.options.attributesGroupName) {
135
152
  const attrCollection = {};
136
- attrCollection[options.attributesGroupName] = attrs;
153
+ attrCollection[this.options.attributesGroupName] = attrs;
137
154
  return attrCollection;
138
155
  }
139
156
  return attrs;
140
157
  }
141
158
  }
142
159
 
143
- const parseToOrderedJsObj = function(xmlData, options) {
160
+ const parseXml = function(xmlData) {
144
161
  xmlData = xmlData.replace(/\r\n?/g, "\n"); //TODO: remove this line
145
162
  const xmlObj = new xmlNode('!xml');
146
163
  let currentNode = xmlObj;
147
164
  let textData = "";
148
- const tagsNodeStack = [];
149
165
  let jPath = "";
150
-
151
166
  for(let i=0; i< xmlData.length; i++){//for each char in XML data
152
167
  const ch = xmlData[i];
153
168
  if(ch === '<'){
@@ -157,128 +172,102 @@ const parseToOrderedJsObj = function(xmlData, options) {
157
172
  const closeIndex = findClosingIndex(xmlData, ">", i, "Closing Tag is not closed.")
158
173
  let tagName = xmlData.substring(i+2,closeIndex).trim();
159
174
 
160
- if(options.removeNSPrefix){
175
+ if(this.options.removeNSPrefix){
161
176
  const colonIndex = tagName.indexOf(":");
162
177
  if(colonIndex !== -1){
163
178
  tagName = tagName.substr(colonIndex+1);
164
179
  }
165
180
  }
166
-
181
+
167
182
  if(currentNode){
168
- textData = parseValue(textData
169
- , options
183
+ textData = this.parseTextData(textData
170
184
  , currentNode.tagname
171
185
  , jPath
172
186
  ,false
173
187
  , currentNode.attributes ? Object.keys(currentNode.attributes).length !== 0 : false
174
188
  , Object.keys(currentNode.child).length === 0);
175
- if(textData !== undefined && textData !== "") currentNode.add(options.textNodeName, textData);
189
+ if(textData !== undefined && textData !== "") currentNode.add(this.options.textNodeName, textData);
176
190
  textData = "";
177
191
  }
178
192
 
179
- if (isItStopNode(options.stopNodes, tagsNodeStack, currentNode.tagname)) { //TODO: namespace
180
- const top = tagsNodeStack[tagsNodeStack.length - 1];
181
- const stopNode = top.child[ top.child.length -1 ];
182
- stopNode[currentNode.tagname] = [ { [options.textNodeName] :xmlData.substr(currentNode.startIndex + 1, i - currentNode.startIndex - 1) }];
183
- }
184
-
185
193
  jPath = jPath.substr(0, jPath.lastIndexOf("."));
186
194
 
187
- currentNode = tagsNodeStack.pop();//avoid recurssion, set the parent tag scope
195
+ currentNode = this.tagsNodeStack.pop();//avoid recurssion, set the parent tag scope
188
196
  textData = "";
189
197
  i = closeIndex;
190
198
  } else if( xmlData[i+1] === '?') {
191
199
  i = findClosingIndex(xmlData, "?>", i, "Pi Tag is not closed.")
192
200
  } else if(xmlData.substr(i + 1, 3) === '!--') {
193
201
  const endIndex = findClosingIndex(xmlData, "-->", i, "Comment is not closed.")
194
- if(options.commentPropName){
202
+ if(this.options.commentPropName){
195
203
  const comment = xmlData.substring(i + 4, endIndex - 2);
196
204
 
197
205
  //TODO: remove repeated code
198
206
  if(textData){ //store previously collected data as textNode
199
- textData = parseValue(textData
200
- , options
207
+ textData = this.parseTextData(textData
201
208
  , currentNode.tagname
202
209
  , jPath
203
210
  ,false
204
211
  , currentNode.attributes ? Object.keys(currentNode.attributes).length !== 0 : false
205
212
  , Object.keys(currentNode.child).length === 0);
206
213
 
207
- if(textData !== undefined && textData !== "") currentNode.add(options.textNodeName, textData);
214
+ if(textData !== undefined && textData !== "") currentNode.add(this.options.textNodeName, textData);
208
215
  textData = "";
209
216
  }
210
- currentNode.add(options.commentPropName, [ { [options.textNodeName] : comment } ]);
217
+ currentNode.add(this.options.commentPropName, [ { [this.options.textNodeName] : comment } ]);
211
218
  }
212
219
  i = endIndex;
213
220
  } else if( xmlData.substr(i + 1, 2) === '!D') {
214
- const closeIndex = findClosingIndex(xmlData, ">", i, "DOCTYPE is not closed.")
215
- const tagExp = xmlData.substring(i, closeIndex);
216
- if(tagExp.indexOf("[") >= 0){
217
- i = xmlData.indexOf("]>", i) + 1;
218
- }else{
219
- i = closeIndex;
220
- }
221
+ const result = readDocType(xmlData, i);
222
+ this.docTypeEntities = result.entities;
223
+ i = result.i;
221
224
  }else if(xmlData.substr(i + 1, 2) === '![') {
222
225
  const closeIndex = findClosingIndex(xmlData, "]]>", i, "CDATA is not closed.") - 2;
223
226
  const tagExp = xmlData.substring(i + 9,closeIndex);
224
227
 
225
228
  if(textData){ //store previously collected data as textNode
226
- textData = parseValue(textData
227
- , options
229
+ textData = this.parseTextData(textData
228
230
  , currentNode.tagname
229
231
  , jPath
230
232
  ,false
231
233
  , currentNode.attributes ? Object.keys(currentNode.attributes).length !== 0 : false
232
234
  , Object.keys(currentNode.child).length === 0);
233
235
 
234
- if(textData !== undefined && textData !== "") currentNode.add(options.textNodeName, textData);
236
+ if(textData !== undefined && textData !== "") currentNode.add(this.options.textNodeName, textData);
235
237
  textData = "";
236
238
  }
237
239
 
238
240
  //cdata should be set even if it is 0 length string
239
- if(options.cdataPropName){
240
- let val = parseValue(tagExp, options, options.cdataPropName, jPath + "." + options.cdataPropName, true, false, true);
241
- if(!val) val = "";
242
- currentNode.add(options.cdataPropName, [ { [options.textNodeName] : val } ]);
241
+ if(this.options.cdataPropName){
242
+ // let val = this.parseTextData(tagExp, this.options.cdataPropName, jPath + "." + this.options.cdataPropName, true, false, true);
243
+ // if(!val) val = "";
244
+ currentNode.add(this.options.cdataPropName, [ { [this.options.textNodeName] : tagExp } ]);
243
245
  }else{
244
- let val = parseValue(tagExp, options, currentNode.tagname, jPath, true, false, true);
246
+ let val = this.parseTextData(tagExp, currentNode.tagname, jPath, true, false, true);
245
247
  if(!val) val = "";
246
- currentNode.add(options.textNodeName, val);
248
+ currentNode.add(this.options.textNodeName, val);
247
249
  }
248
250
 
249
251
  i = closeIndex + 2;
250
252
  }else {//Opening tag
251
- const result = tagExpWithClosingIndex(xmlData, i+1)
252
- let tagExp = result.data;
253
- const closeIndex = result.index;
254
- const separatorIndex = tagExp.search(/\s/);
255
- let tagName = tagExp;
256
- let shouldBuildAttributesMap = true;
257
- if(separatorIndex !== -1){//separate tag name and attributes expression
258
- tagName = tagExp.substr(0, separatorIndex).replace(/\s\s*$/, '');
259
- tagExp = tagExp.substr(separatorIndex + 1);
260
- }
261
-
262
- if(options.removeNSPrefix){
263
- const colonIndex = tagName.indexOf(":");
264
- if(colonIndex !== -1){
265
- tagName = tagName.substr(colonIndex+1);
266
- shouldBuildAttributesMap = tagName !== result.data.substr(colonIndex + 1);
267
- }
268
- }
253
+
254
+ let result = this.readTagExp(xmlData,i);
255
+ let tagName= result.tagName;
256
+ let tagExp = result.tagExp;
257
+ let attrExpPresent = result.attrExpPresent;
258
+ let closeIndex = result.closeIndex;
269
259
 
270
260
  //save text as child node
271
261
  if (currentNode && textData) {
272
262
  if(currentNode.tagname !== '!xml'){
273
263
  //when nested tag is found
274
- textData = parseValue(textData
275
- , options
264
+ textData = this.parseTextData(textData
276
265
  , currentNode.tagname
277
266
  , jPath
278
267
  , false
279
268
  , currentNode.attributes ? Object.keys(currentNode.attributes).length !== 0 : false
280
269
  , false);
281
- if(textData !== undefined && textData !== "") currentNode.add(options.textNodeName, textData);
270
+ if(textData !== undefined && textData !== "") currentNode.add(this.options.textNodeName, textData);
282
271
  textData = "";
283
272
  }
284
273
  }
@@ -287,52 +276,67 @@ const parseToOrderedJsObj = function(xmlData, options) {
287
276
  jPath += jPath ? "." + tagName : tagName;
288
277
  }
289
278
 
290
- //selfClosing tag
291
- if(tagExp.length > 0 && tagExp.lastIndexOf("/") === tagExp.length - 1){
292
-
293
- if(tagName[tagName.length - 1] === "/"){ //remove trailing '/'
294
- tagName = tagName.substr(0, tagName.length - 1);
295
- tagExp = tagName;
296
- }else{
297
- tagExp = tagExp.substr(0, tagExp.length - 1);
279
+ //check if last tag was unpaired tag
280
+ const lastTag = currentNode;
281
+ if(lastTag && this.options.unpairedTags.indexOf(lastTag.tagname) !== -1 ){
282
+ currentNode = this.tagsNodeStack.pop();
283
+ }
284
+
285
+ if (this.isItStopNode(this.options.stopNodes, jPath, tagName)) { //TODO: namespace
286
+ let tagContent = "";
287
+ //self-closing tag
288
+ if(tagExp.length > 0 && tagExp.lastIndexOf("/") === tagExp.length - 1){}
289
+ //boolean tag
290
+ else if(this.options.unpairedTags.indexOf(tagName) !== -1){}
291
+ //normal tag
292
+ else{
293
+ //read until closing tag is found
294
+ const result = this.readStopNodeData(xmlData, tagName, closeIndex + 1);
295
+ if(!result) throw new Error(`Unexpected end of ${tagName}`);
296
+ i = result.i;
297
+ tagContent = result.tagContent;
298
298
  }
299
299
 
300
300
  const childNode = new xmlNode(tagName);
301
- if(tagName !== tagExp && shouldBuildAttributesMap){
302
- childNode.attributes = buildAttributesMap(tagExp, jPath , options);
301
+ if(tagName !== tagExp && attrExpPresent){
302
+ childNode.attributes = this.buildAttributesMap(tagExp, jPath);
303
303
  }
304
304
  jPath = jPath.substr(0, jPath.lastIndexOf("."));
305
- // tagsNodeStack.push(currentNode);
305
+ childNode.add(this.options.textNodeName, tagContent);
306
+
306
307
  currentNode.addChild(childNode);
307
- }
308
- //boolean tags
309
- else if(options.unpairedTags.indexOf(tagName) !== -1){
310
- // tagExp = tagExp.substr(0, tagExp.length - 1);
308
+ }else{
309
+ //selfClosing tag
310
+ if(tagExp.length > 0 && tagExp.lastIndexOf("/") === tagExp.length - 1){
311
+
312
+ if(tagName[tagName.length - 1] === "/"){ //remove trailing '/'
313
+ tagName = tagName.substr(0, tagName.length - 1);
314
+ tagExp = tagName;
315
+ }else{
316
+ tagExp = tagExp.substr(0, tagExp.length - 1);
317
+ }
311
318
 
312
- const childNode = new xmlNode(tagName);
313
- if(tagName !== tagExp && shouldBuildAttributesMap){
314
- childNode.attributes = buildAttributesMap(tagExp, jPath , options);
319
+ const childNode = new xmlNode(tagName);
320
+ if(tagName !== tagExp && attrExpPresent){
321
+ childNode.attributes = this.buildAttributesMap(tagExp, jPath);
322
+ }
323
+ jPath = jPath.substr(0, jPath.lastIndexOf("."));
324
+ currentNode.addChild(childNode);
315
325
  }
316
- jPath = jPath.substr(0, jPath.lastIndexOf("."));
317
- // tagsNodeStack.push(currentNode);
318
- currentNode.addChild(childNode);
319
- }
320
- //opening tag
321
- else{
322
-
323
- const childNode = new xmlNode( tagName);
324
- tagsNodeStack.push(currentNode);
325
-
326
- childNode.startIndex=closeIndex; //for further processing
327
-
328
- if(tagName !== tagExp && shouldBuildAttributesMap){
329
- childNode.attributes = buildAttributesMap(tagExp, jPath, options);
326
+ //opening tag
327
+ else{
328
+ const childNode = new xmlNode( tagName);
329
+ this.tagsNodeStack.push(currentNode);
330
+
331
+ if(tagName !== tagExp && attrExpPresent){
332
+ childNode.attributes = this.buildAttributesMap(tagExp, jPath);
333
+ }
334
+ currentNode.addChild(childNode);
335
+ currentNode = childNode;
330
336
  }
331
- currentNode.addChild(childNode);
332
- currentNode = childNode;
337
+ textData = "";
338
+ i = closeIndex;
333
339
  }
334
- textData = "";
335
- i = closeIndex;
336
340
  }
337
341
  }else{
338
342
  textData += xmlData[i];
@@ -341,31 +345,39 @@ const parseToOrderedJsObj = function(xmlData, options) {
341
345
  return xmlObj.child;
342
346
  }
343
347
 
348
+ const replaceEntitiesValue = function(val){
349
+ if(this.options.processEntities){
350
+ for(let entityName in this.docTypeEntities){
351
+ const entity = this.docTypeEntities[entityName];
352
+ val = val.replace( entity.regx, entity.val);
353
+ }
354
+ for(let entityName in this.lastEntities){
355
+ const entity = this.lastEntities[entityName];
356
+ val = val.replace( entity.regex, entity.val);
357
+ }
358
+ if(this.options.htmlEntities){
359
+ for(let entityName in this.htmlEntities){
360
+ const entity = this.htmlEntities[entityName];
361
+ val = val.replace( entity.regex, entity.val);
362
+ }
363
+ }
364
+ }
365
+ return val;
366
+ }
344
367
  //TODO: use jPath to simplify the logic
345
368
  /**
346
369
  *
347
370
  * @param {string[]} stopNodes
348
- * @param {XmlNode[]} tagsNodeStack
371
+ * @param {string} jPath
372
+ * @param {string} currentTagName
349
373
  */
350
- function isItStopNode(stopNodes, tagsNodeStack, currentTagName){
351
- const matchingStopNodes = [];
352
- //filter the list of stopNodes as per current tag
353
- stopNodes.forEach( jPath => {
354
- if( jPath.substr( jPath.length - currentTagName.length) === currentTagName) matchingStopNodes.push(jPath);
355
- });
356
-
357
- if(matchingStopNodes.length > 0){
358
- let jPath = "";
359
- for (let i = 1; i < tagsNodeStack.length; i++) {
360
- const node = tagsNodeStack[i];
361
- jPath += "." + node.tagname;
362
- }
363
- jPath += "." + currentTagName;
364
- jPath = jPath.substr(1);
365
- for (let i = 0; i < matchingStopNodes.length; i++) {
366
- if(matchingStopNodes[i] === jPath) return true;
367
- }
368
- }else return false;
374
+ function isItStopNode(stopNodes, jPath, currentTagName){
375
+ const allNodesExp = "*." + currentTagName;
376
+ for (const stopNodePath in stopNodes) {
377
+ const stopNodeExp = stopNodes[stopNodePath];
378
+ if( allNodesExp === stopNodeExp || jPath === stopNodeExp ) return true;
379
+ }
380
+ return false;
369
381
  }
370
382
 
371
383
  /**
@@ -404,4 +416,71 @@ function findClosingIndex(xmlData, str, i, errMsg){
404
416
  }
405
417
  }
406
418
 
407
- exports.parseToOrderedJsObj = parseToOrderedJsObj;
419
+ function readTagExp(xmlData,i){
420
+ const result = tagExpWithClosingIndex(xmlData, i+1);
421
+ let tagExp = result.data;
422
+ const closeIndex = result.index;
423
+ const separatorIndex = tagExp.search(/\s/);
424
+ let tagName = tagExp;
425
+ let attrExpPresent = true;
426
+ if(separatorIndex !== -1){//separate tag name and attributes expression
427
+ tagName = tagExp.substr(0, separatorIndex).replace(/\s\s*$/, '');
428
+ tagExp = tagExp.substr(separatorIndex + 1);
429
+ }
430
+
431
+ if(this. options.removeNSPrefix){
432
+ const colonIndex = tagName.indexOf(":");
433
+ if(colonIndex !== -1){
434
+ tagName = tagName.substr(colonIndex+1);
435
+ attrExpPresent = tagName !== result.data.substr(colonIndex + 1);
436
+ }
437
+ }
438
+
439
+ return {
440
+ tagName: tagName,
441
+ tagExp: tagExp,
442
+ closeIndex: closeIndex,
443
+ attrExpPresent: attrExpPresent,
444
+ }
445
+ }
446
+ /**
447
+ * find paired tag for a stop node
448
+ * @param {string} xmlData
449
+ * @param {string} tagName
450
+ * @param {number} i
451
+ */
452
+ function readStopNodeData(xmlData, tagName, i){
453
+ const startIndex = i;
454
+ for (; i < xmlData.length; i++) {
455
+ if( xmlData[i] === "<" && xmlData[i+1] === "/"){
456
+ const closeIndex = findClosingIndex(xmlData, ">", i, `${tagName} is not closed`);
457
+ let closeTagName = xmlData.substring(i+2,closeIndex).trim();
458
+ if(closeTagName === tagName){
459
+ return {
460
+ tagContent: xmlData.substring(startIndex, i),
461
+ i : closeIndex
462
+ }
463
+ }
464
+ i=closeIndex;
465
+ }
466
+ }//end for loop
467
+ }
468
+
469
+ function parseValue(val, shouldParse, options) {
470
+ if (shouldParse && typeof val === 'string') {
471
+ //console.log(options)
472
+ const newval = val.trim();
473
+ if(newval === 'true' ) return true;
474
+ else if(newval === 'false' ) return false;
475
+ else return toNumber(val, options);
476
+ } else {
477
+ if (util.isExist(val)) {
478
+ return val;
479
+ } else {
480
+ return '';
481
+ }
482
+ }
483
+ }
484
+
485
+
486
+ module.exports = OrderedObjParser;
@@ -1,5 +1,5 @@
1
1
  const { buildOptions} = require("./OptionsBuilder");
2
- const { parseToOrderedJsObj} = require("./OrderedObjParser");
2
+ const OrderedObjParser = require("./OrderedObjParser");
3
3
  const { prettify} = require("./node2json");
4
4
  const validator = require('../validator');
5
5
 
@@ -27,7 +27,8 @@ class XMLParser{
27
27
  throw Error( `${result.err.msg}:${result.err.line}:${result.err.col}` )
28
28
  }
29
29
  }
30
- const orderedResult = parseToOrderedJsObj(xmlData, this.options);
30
+ const orderedObjParser = new OrderedObjParser(this.options);
31
+ const orderedResult = orderedObjParser.parseXml(xmlData);
31
32
  if(this.options.preserveOrder || orderedResult === undefined) return orderedResult;
32
33
  else return prettify(orderedResult, this.options);
33
34
  }