fast-xml-parser 4.5.2 → 4.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -0
- package/package.json +10 -9
- package/src/fxp.d.ts +102 -18
- package/src/xmlbuilder/orderedJs2Xml.js +14 -3
- package/src/xmlparser/DocTypeReader.js +377 -131
- package/src/xmlparser/OptionsBuilder.js +85 -41
- package/src/xmlparser/OrderedObjParser.js +313 -191
- package/test_output.txt +21 -0
- package/test_output_2.txt +21 -0
- package/test_output_3.txt +11 -0
- package/test_output_4.txt +26 -0
- package/test_output_5.txt +17 -0
- package/test_output_6.txt +98 -0
- package/test_output_7.txt +48 -0
- package/test_output_8.txt +30 -0
- package/test_output_9.txt +41 -0
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
|
|
4
4
|
const util = require('../util');
|
|
5
5
|
const xmlNode = require('./xmlNode');
|
|
6
|
-
const
|
|
6
|
+
const DocTypeReader = require('./DocTypeReader');
|
|
7
7
|
const toNumber = require("strnum");
|
|
8
8
|
const getIgnoreAttributesFn = require('../ignoreAttributes')
|
|
9
9
|
|
|
@@ -14,19 +14,19 @@ const getIgnoreAttributesFn = require('../ignoreAttributes')
|
|
|
14
14
|
//const tagsRegx = new RegExp("<(\\/?[\\w:\\-\._]+)([^>]*)>(\\s*"+cdataRegx+")*([^<]+)?","g");
|
|
15
15
|
//const tagsRegx = new RegExp("<(\\/?)((\\w*:)?([\\w:\\-\._]+))([^>]*)>([^<]*)("+cdataRegx+"([^<]*))*([^<]+)?","g");
|
|
16
16
|
|
|
17
|
-
class OrderedObjParser{
|
|
18
|
-
constructor(options){
|
|
17
|
+
class OrderedObjParser {
|
|
18
|
+
constructor(options) {
|
|
19
19
|
this.options = options;
|
|
20
20
|
this.currentNode = null;
|
|
21
21
|
this.tagsNodeStack = [];
|
|
22
22
|
this.docTypeEntities = {};
|
|
23
23
|
this.lastEntities = {
|
|
24
|
-
"apos"
|
|
25
|
-
"gt"
|
|
26
|
-
"lt"
|
|
27
|
-
"quot"
|
|
24
|
+
"apos": { regex: /&(apos|#39|#x27);/g, val: "'" },
|
|
25
|
+
"gt": { regex: /&(gt|#62|#x3E);/g, val: ">" },
|
|
26
|
+
"lt": { regex: /&(lt|#60|#x3C);/g, val: "<" },
|
|
27
|
+
"quot": { regex: /&(quot|#34|#x22);/g, val: "\"" },
|
|
28
28
|
};
|
|
29
|
-
this.ampEntity = { regex: /&(amp|#38|#x26);/g, val
|
|
29
|
+
this.ampEntity = { regex: /&(amp|#38|#x26);/g, val: "&" };
|
|
30
30
|
this.htmlEntities = {
|
|
31
31
|
"space": { regex: /&(nbsp|#160);/g, val: " " },
|
|
32
32
|
// "lt" : { regex: /&(lt|#60);/g, val: "<" },
|
|
@@ -34,15 +34,15 @@ class OrderedObjParser{
|
|
|
34
34
|
// "amp" : { regex: /&(amp|#38);/g, val: "&" },
|
|
35
35
|
// "quot" : { regex: /&(quot|#34);/g, val: "\"" },
|
|
36
36
|
// "apos" : { regex: /&(apos|#39);/g, val: "'" },
|
|
37
|
-
"cent"
|
|
38
|
-
"pound"
|
|
39
|
-
"yen"
|
|
40
|
-
"euro"
|
|
41
|
-
"copyright"
|
|
42
|
-
"reg"
|
|
43
|
-
"inr"
|
|
44
|
-
"num_dec": { regex: /&#([0-9]{1,7});/g, val
|
|
45
|
-
"num_hex": { regex: /&#x([0-9a-fA-F]{1,6});/g, val
|
|
37
|
+
"cent": { regex: /&(cent|#162);/g, val: "¢" },
|
|
38
|
+
"pound": { regex: /&(pound|#163);/g, val: "£" },
|
|
39
|
+
"yen": { regex: /&(yen|#165);/g, val: "¥" },
|
|
40
|
+
"euro": { regex: /&(euro|#8364);/g, val: "€" },
|
|
41
|
+
"copyright": { regex: /&(copy|#169);/g, val: "©" },
|
|
42
|
+
"reg": { regex: /&(reg|#174);/g, val: "®" },
|
|
43
|
+
"inr": { regex: /&(inr|#8377);/g, val: "₹" },
|
|
44
|
+
"num_dec": { regex: /&#([0-9]{1,7});/g, val: (_, str) => fromCodePoint(str, 10, "&#") },
|
|
45
|
+
"num_hex": { regex: /&#x([0-9a-fA-F]{1,6});/g, val: (_, str) => fromCodePoint(str, 16, "&#x") },
|
|
46
46
|
};
|
|
47
47
|
this.addExternalEntities = addExternalEntities;
|
|
48
48
|
this.parseXml = parseXml;
|
|
@@ -55,17 +55,34 @@ class OrderedObjParser{
|
|
|
55
55
|
this.saveTextToParentTag = saveTextToParentTag;
|
|
56
56
|
this.addChild = addChild;
|
|
57
57
|
this.ignoreAttributesFn = getIgnoreAttributesFn(this.options.ignoreAttributes)
|
|
58
|
+
this.entityExpansionCount = 0;
|
|
59
|
+
this.currentExpandedLength = 0;
|
|
60
|
+
|
|
61
|
+
if (this.options.stopNodes && this.options.stopNodes.length > 0) {
|
|
62
|
+
this.stopNodesExact = new Set();
|
|
63
|
+
this.stopNodesWildcard = new Set();
|
|
64
|
+
for (let i = 0; i < this.options.stopNodes.length; i++) {
|
|
65
|
+
const stopNodeExp = this.options.stopNodes[i];
|
|
66
|
+
if (typeof stopNodeExp !== 'string') continue;
|
|
67
|
+
if (stopNodeExp.startsWith("*.")) {
|
|
68
|
+
this.stopNodesWildcard.add(stopNodeExp.substring(2));
|
|
69
|
+
} else {
|
|
70
|
+
this.stopNodesExact.add(stopNodeExp);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
58
74
|
}
|
|
59
75
|
|
|
60
76
|
}
|
|
61
77
|
|
|
62
|
-
function addExternalEntities(externalEntities){
|
|
78
|
+
function addExternalEntities(externalEntities) {
|
|
63
79
|
const entKeys = Object.keys(externalEntities);
|
|
64
80
|
for (let i = 0; i < entKeys.length; i++) {
|
|
65
81
|
const ent = entKeys[i];
|
|
82
|
+
const escaped = ent.replace(/[.\-+*:]/g, '\\.');
|
|
66
83
|
this.lastEntities[ent] = {
|
|
67
|
-
|
|
68
|
-
|
|
84
|
+
regex: new RegExp("&" + escaped + ";", "g"),
|
|
85
|
+
val: externalEntities[ent]
|
|
69
86
|
}
|
|
70
87
|
}
|
|
71
88
|
}
|
|
@@ -84,23 +101,23 @@ function parseTextData(val, tagName, jPath, dontTrim, hasAttributes, isLeafNode,
|
|
|
84
101
|
if (this.options.trimValues && !dontTrim) {
|
|
85
102
|
val = val.trim();
|
|
86
103
|
}
|
|
87
|
-
if(val.length > 0){
|
|
88
|
-
if(!escapeEntities) val = this.replaceEntitiesValue(val);
|
|
89
|
-
|
|
104
|
+
if (val.length > 0) {
|
|
105
|
+
if (!escapeEntities) val = this.replaceEntitiesValue(val, tagName, jPath);
|
|
106
|
+
|
|
90
107
|
const newval = this.options.tagValueProcessor(tagName, val, jPath, hasAttributes, isLeafNode);
|
|
91
|
-
if(newval === null || newval === undefined){
|
|
108
|
+
if (newval === null || newval === undefined) {
|
|
92
109
|
//don't parse
|
|
93
110
|
return val;
|
|
94
|
-
}else if(typeof newval !== typeof val || newval !== val){
|
|
111
|
+
} else if (typeof newval !== typeof val || newval !== val) {
|
|
95
112
|
//overwrite
|
|
96
113
|
return newval;
|
|
97
|
-
}else if(this.options.trimValues){
|
|
114
|
+
} else if (this.options.trimValues) {
|
|
98
115
|
return parseValue(val, this.options.parseTagValue, this.options.numberParseOptions);
|
|
99
|
-
}else{
|
|
116
|
+
} else {
|
|
100
117
|
const trimmedVal = val.trim();
|
|
101
|
-
if(trimmedVal === val){
|
|
118
|
+
if (trimmedVal === val) {
|
|
102
119
|
return parseValue(val, this.options.parseTagValue, this.options.numberParseOptions);
|
|
103
|
-
}else{
|
|
120
|
+
} else {
|
|
104
121
|
return val;
|
|
105
122
|
}
|
|
106
123
|
}
|
|
@@ -145,20 +162,20 @@ function buildAttributesMap(attrStr, jPath, tagName) {
|
|
|
145
162
|
if (this.options.transformAttributeName) {
|
|
146
163
|
aName = this.options.transformAttributeName(aName);
|
|
147
164
|
}
|
|
148
|
-
if(aName === "__proto__") aName
|
|
165
|
+
if (aName === "__proto__") aName = "#__proto__";
|
|
149
166
|
if (oldVal !== undefined) {
|
|
150
167
|
if (this.options.trimValues) {
|
|
151
168
|
oldVal = oldVal.trim();
|
|
152
169
|
}
|
|
153
|
-
oldVal = this.replaceEntitiesValue(oldVal);
|
|
170
|
+
oldVal = this.replaceEntitiesValue(oldVal, tagName, jPath);
|
|
154
171
|
const newVal = this.options.attributeValueProcessor(attrName, oldVal, jPath);
|
|
155
|
-
if(newVal === null || newVal === undefined){
|
|
172
|
+
if (newVal === null || newVal === undefined) {
|
|
156
173
|
//don't parse
|
|
157
174
|
attrs[aName] = oldVal;
|
|
158
|
-
}else if(typeof newVal !== typeof oldVal || newVal !== oldVal){
|
|
175
|
+
} else if (typeof newVal !== typeof oldVal || newVal !== oldVal) {
|
|
159
176
|
//overwrite
|
|
160
177
|
attrs[aName] = newVal;
|
|
161
|
-
}else{
|
|
178
|
+
} else {
|
|
162
179
|
//parse
|
|
163
180
|
attrs[aName] = parseValue(
|
|
164
181
|
oldVal,
|
|
@@ -183,46 +200,52 @@ function buildAttributesMap(attrStr, jPath, tagName) {
|
|
|
183
200
|
}
|
|
184
201
|
}
|
|
185
202
|
|
|
186
|
-
const parseXml = function(xmlData) {
|
|
203
|
+
const parseXml = function (xmlData) {
|
|
187
204
|
xmlData = xmlData.replace(/\r\n?/g, "\n"); //TODO: remove this line
|
|
188
205
|
const xmlObj = new xmlNode('!xml');
|
|
189
206
|
let currentNode = xmlObj;
|
|
190
207
|
let textData = "";
|
|
191
208
|
let jPath = "";
|
|
192
|
-
|
|
209
|
+
|
|
210
|
+
// Reset entity expansion counters for this document
|
|
211
|
+
this.entityExpansionCount = 0;
|
|
212
|
+
this.currentExpandedLength = 0;
|
|
213
|
+
|
|
214
|
+
const docTypeReader = new DocTypeReader(this.options.processEntities);
|
|
215
|
+
for (let i = 0; i < xmlData.length; i++) {//for each char in XML data
|
|
193
216
|
const ch = xmlData[i];
|
|
194
|
-
if(ch === '<'){
|
|
217
|
+
if (ch === '<') {
|
|
195
218
|
// const nextIndex = i+1;
|
|
196
219
|
// const _2ndChar = xmlData[nextIndex];
|
|
197
|
-
if(
|
|
220
|
+
if (xmlData[i + 1] === '/') {//Closing Tag
|
|
198
221
|
const closeIndex = findClosingIndex(xmlData, ">", i, "Closing Tag is not closed.")
|
|
199
|
-
let tagName = xmlData.substring(i+2,closeIndex).trim();
|
|
222
|
+
let tagName = xmlData.substring(i + 2, closeIndex).trim();
|
|
200
223
|
|
|
201
|
-
if(this.options.removeNSPrefix){
|
|
224
|
+
if (this.options.removeNSPrefix) {
|
|
202
225
|
const colonIndex = tagName.indexOf(":");
|
|
203
|
-
if(colonIndex !== -1){
|
|
204
|
-
tagName = tagName.substr(colonIndex+1);
|
|
226
|
+
if (colonIndex !== -1) {
|
|
227
|
+
tagName = tagName.substr(colonIndex + 1);
|
|
205
228
|
}
|
|
206
229
|
}
|
|
207
230
|
|
|
208
|
-
if(this.options.transformTagName) {
|
|
231
|
+
if (this.options.transformTagName) {
|
|
209
232
|
tagName = this.options.transformTagName(tagName);
|
|
210
233
|
}
|
|
211
234
|
|
|
212
|
-
if(currentNode){
|
|
235
|
+
if (currentNode) {
|
|
213
236
|
textData = this.saveTextToParentTag(textData, currentNode, jPath);
|
|
214
237
|
}
|
|
215
238
|
|
|
216
239
|
//check if last tag of nested tag was unpaired tag
|
|
217
|
-
const lastTagName = jPath.substring(jPath.lastIndexOf(".")+1);
|
|
218
|
-
if(tagName && this.options.unpairedTags.indexOf(tagName) !== -1
|
|
240
|
+
const lastTagName = jPath.substring(jPath.lastIndexOf(".") + 1);
|
|
241
|
+
if (tagName && this.options.unpairedTags.indexOf(tagName) !== -1) {
|
|
219
242
|
throw new Error(`Unpaired tag can not be used as closing tag: </${tagName}>`);
|
|
220
243
|
}
|
|
221
244
|
let propIndex = 0
|
|
222
|
-
if(lastTagName && this.options.unpairedTags.indexOf(lastTagName) !== -1
|
|
223
|
-
propIndex = jPath.lastIndexOf('.', jPath.lastIndexOf('.')-1)
|
|
245
|
+
if (lastTagName && this.options.unpairedTags.indexOf(lastTagName) !== -1) {
|
|
246
|
+
propIndex = jPath.lastIndexOf('.', jPath.lastIndexOf('.') - 1)
|
|
224
247
|
this.tagsNodeStack.pop();
|
|
225
|
-
}else{
|
|
248
|
+
} else {
|
|
226
249
|
propIndex = jPath.lastIndexOf(".");
|
|
227
250
|
}
|
|
228
251
|
jPath = jPath.substring(0, propIndex);
|
|
@@ -230,74 +253,85 @@ const parseXml = function(xmlData) {
|
|
|
230
253
|
currentNode = this.tagsNodeStack.pop();//avoid recursion, set the parent tag scope
|
|
231
254
|
textData = "";
|
|
232
255
|
i = closeIndex;
|
|
233
|
-
} else if(
|
|
256
|
+
} else if (xmlData[i + 1] === '?') {
|
|
234
257
|
|
|
235
|
-
let tagData = readTagExp(xmlData,i, false, "?>");
|
|
236
|
-
if(!tagData) throw new Error("Pi Tag is not closed.");
|
|
258
|
+
let tagData = readTagExp(xmlData, i, false, "?>");
|
|
259
|
+
if (!tagData) throw new Error("Pi Tag is not closed.");
|
|
237
260
|
|
|
238
261
|
textData = this.saveTextToParentTag(textData, currentNode, jPath);
|
|
239
|
-
if
|
|
262
|
+
if ((this.options.ignoreDeclaration && tagData.tagName === "?xml") || this.options.ignorePiTags) {
|
|
263
|
+
//do nothing
|
|
264
|
+
} else {
|
|
240
265
|
|
|
241
|
-
}else{
|
|
242
|
-
|
|
243
266
|
const childNode = new xmlNode(tagData.tagName);
|
|
244
267
|
childNode.add(this.options.textNodeName, "");
|
|
245
|
-
|
|
246
|
-
if(tagData.tagName !== tagData.tagExp && tagData.attrExpPresent){
|
|
268
|
+
|
|
269
|
+
if (tagData.tagName !== tagData.tagExp && tagData.attrExpPresent) {
|
|
247
270
|
childNode[":@"] = this.buildAttributesMap(tagData.tagExp, jPath, tagData.tagName);
|
|
248
271
|
}
|
|
249
|
-
this.addChild(currentNode, childNode, jPath)
|
|
250
|
-
|
|
272
|
+
this.addChild(currentNode, childNode, jPath, i);
|
|
251
273
|
}
|
|
252
274
|
|
|
253
275
|
|
|
254
276
|
i = tagData.closeIndex + 1;
|
|
255
|
-
} else if(xmlData.substr(i + 1, 3) === '!--') {
|
|
256
|
-
const endIndex = findClosingIndex(xmlData, "-->", i+4, "Comment is not closed.")
|
|
257
|
-
if(this.options.commentPropName){
|
|
277
|
+
} else if (xmlData.substr(i + 1, 3) === '!--') {
|
|
278
|
+
const endIndex = findClosingIndex(xmlData, "-->", i + 4, "Comment is not closed.")
|
|
279
|
+
if (this.options.commentPropName) {
|
|
258
280
|
const comment = xmlData.substring(i + 4, endIndex - 2);
|
|
259
281
|
|
|
260
282
|
textData = this.saveTextToParentTag(textData, currentNode, jPath);
|
|
261
283
|
|
|
262
|
-
currentNode.add(this.options.commentPropName, [
|
|
284
|
+
currentNode.add(this.options.commentPropName, [{ [this.options.textNodeName]: comment }]);
|
|
263
285
|
}
|
|
264
286
|
i = endIndex;
|
|
265
|
-
} else if(
|
|
266
|
-
const result = readDocType(xmlData, i);
|
|
287
|
+
} else if (xmlData.substr(i + 1, 2) === '!D') {
|
|
288
|
+
const result = docTypeReader.readDocType(xmlData, i);
|
|
267
289
|
this.docTypeEntities = result.entities;
|
|
268
290
|
i = result.i;
|
|
269
|
-
}else if(xmlData.substr(i + 1, 2) === '![') {
|
|
291
|
+
} else if (xmlData.substr(i + 1, 2) === '![') {
|
|
270
292
|
const closeIndex = findClosingIndex(xmlData, "]]>", i, "CDATA is not closed.") - 2;
|
|
271
|
-
const tagExp = xmlData.substring(i + 9,closeIndex);
|
|
293
|
+
const tagExp = xmlData.substring(i + 9, closeIndex);
|
|
272
294
|
|
|
273
295
|
textData = this.saveTextToParentTag(textData, currentNode, jPath);
|
|
274
296
|
|
|
275
297
|
let val = this.parseTextData(tagExp, currentNode.tagname, jPath, true, false, true, true);
|
|
276
|
-
if(val == undefined) val = "";
|
|
298
|
+
if (val == undefined) val = "";
|
|
277
299
|
|
|
278
300
|
//cdata should be set even if it is 0 length string
|
|
279
|
-
if(this.options.cdataPropName){
|
|
280
|
-
currentNode.add(this.options.cdataPropName, [
|
|
281
|
-
}else{
|
|
301
|
+
if (this.options.cdataPropName) {
|
|
302
|
+
currentNode.add(this.options.cdataPropName, [{ [this.options.textNodeName]: tagExp }]);
|
|
303
|
+
} else {
|
|
282
304
|
currentNode.add(this.options.textNodeName, val);
|
|
283
305
|
}
|
|
284
|
-
|
|
306
|
+
|
|
285
307
|
i = closeIndex + 2;
|
|
286
|
-
}else {//Opening tag
|
|
287
|
-
let result = readTagExp(xmlData,i, this.options.removeNSPrefix);
|
|
288
|
-
let tagName= result.tagName;
|
|
308
|
+
} else {//Opening tag
|
|
309
|
+
let result = readTagExp(xmlData, i, this.options.removeNSPrefix);
|
|
310
|
+
let tagName = result.tagName;
|
|
289
311
|
const rawTagName = result.rawTagName;
|
|
290
312
|
let tagExp = result.tagExp;
|
|
291
313
|
let attrExpPresent = result.attrExpPresent;
|
|
292
314
|
let closeIndex = result.closeIndex;
|
|
293
315
|
|
|
294
316
|
if (this.options.transformTagName) {
|
|
295
|
-
|
|
317
|
+
//console.log(tagExp, tagName)
|
|
318
|
+
const newTagName = this.options.transformTagName(tagName);
|
|
319
|
+
if (tagExp === tagName) {
|
|
320
|
+
tagExp = newTagName
|
|
321
|
+
}
|
|
322
|
+
tagName = newTagName;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
if (this.options.strictReservedNames &&
|
|
326
|
+
(tagName === this.options.commentPropName
|
|
327
|
+
|| tagName === this.options.cdataPropName
|
|
328
|
+
)) {
|
|
329
|
+
throw new Error(`Invalid tag name: ${tagName}`);
|
|
296
330
|
}
|
|
297
|
-
|
|
331
|
+
|
|
298
332
|
//save text as child node
|
|
299
333
|
if (currentNode && textData) {
|
|
300
|
-
if(currentNode.tagname !== '!xml'){
|
|
334
|
+
if (currentNode.tagname !== '!xml') {
|
|
301
335
|
//when nested tag is found
|
|
302
336
|
textData = this.saveTextToParentTag(textData, currentNode, jPath, false);
|
|
303
337
|
}
|
|
@@ -305,80 +339,99 @@ const parseXml = function(xmlData) {
|
|
|
305
339
|
|
|
306
340
|
//check if last tag was unpaired tag
|
|
307
341
|
const lastTag = currentNode;
|
|
308
|
-
if(lastTag && this.options.unpairedTags.indexOf(lastTag.tagname) !== -1
|
|
342
|
+
if (lastTag && this.options.unpairedTags.indexOf(lastTag.tagname) !== -1) {
|
|
309
343
|
currentNode = this.tagsNodeStack.pop();
|
|
310
344
|
jPath = jPath.substring(0, jPath.lastIndexOf("."));
|
|
311
345
|
}
|
|
312
|
-
if(tagName !== xmlObj.tagname){
|
|
346
|
+
if (tagName !== xmlObj.tagname) {
|
|
313
347
|
jPath += jPath ? "." + tagName : tagName;
|
|
314
348
|
}
|
|
315
|
-
|
|
349
|
+
const startIndex = i;
|
|
350
|
+
if (this.isItStopNode(this.stopNodesExact, this.stopNodesWildcard, jPath, tagName)) {
|
|
316
351
|
let tagContent = "";
|
|
317
352
|
//self-closing tag
|
|
318
|
-
if(tagExp.length > 0 && tagExp.lastIndexOf("/") === tagExp.length - 1){
|
|
319
|
-
if(tagName[tagName.length - 1] === "/"){ //remove trailing '/'
|
|
353
|
+
if (tagExp.length > 0 && tagExp.lastIndexOf("/") === tagExp.length - 1) {
|
|
354
|
+
if (tagName[tagName.length - 1] === "/") { //remove trailing '/'
|
|
320
355
|
tagName = tagName.substr(0, tagName.length - 1);
|
|
321
356
|
jPath = jPath.substr(0, jPath.length - 1);
|
|
322
357
|
tagExp = tagName;
|
|
323
|
-
}else{
|
|
358
|
+
} else {
|
|
324
359
|
tagExp = tagExp.substr(0, tagExp.length - 1);
|
|
325
360
|
}
|
|
326
361
|
i = result.closeIndex;
|
|
327
362
|
}
|
|
328
363
|
//unpaired tag
|
|
329
|
-
else if(this.options.unpairedTags.indexOf(tagName) !== -1){
|
|
330
|
-
|
|
364
|
+
else if (this.options.unpairedTags.indexOf(tagName) !== -1) {
|
|
365
|
+
|
|
331
366
|
i = result.closeIndex;
|
|
332
367
|
}
|
|
333
368
|
//normal tag
|
|
334
|
-
else{
|
|
369
|
+
else {
|
|
335
370
|
//read until closing tag is found
|
|
336
371
|
const result = this.readStopNodeData(xmlData, rawTagName, closeIndex + 1);
|
|
337
|
-
if(!result) throw new Error(`Unexpected end of ${rawTagName}`);
|
|
372
|
+
if (!result) throw new Error(`Unexpected end of ${rawTagName}`);
|
|
338
373
|
i = result.i;
|
|
339
374
|
tagContent = result.tagContent;
|
|
340
375
|
}
|
|
341
376
|
|
|
342
377
|
const childNode = new xmlNode(tagName);
|
|
343
|
-
if(tagName !== tagExp && attrExpPresent){
|
|
378
|
+
if (tagName !== tagExp && attrExpPresent) {
|
|
344
379
|
childNode[":@"] = this.buildAttributesMap(tagExp, jPath, tagName);
|
|
345
380
|
}
|
|
346
|
-
if(tagContent) {
|
|
381
|
+
if (tagContent) {
|
|
347
382
|
tagContent = this.parseTextData(tagContent, tagName, jPath, true, attrExpPresent, true, true);
|
|
348
383
|
}
|
|
349
|
-
|
|
384
|
+
|
|
350
385
|
jPath = jPath.substr(0, jPath.lastIndexOf("."));
|
|
351
386
|
childNode.add(this.options.textNodeName, tagContent);
|
|
352
|
-
|
|
353
|
-
this.addChild(currentNode, childNode, jPath)
|
|
354
|
-
}else{
|
|
355
|
-
|
|
356
|
-
if(tagExp.length > 0 && tagExp.lastIndexOf("/") === tagExp.length - 1){
|
|
357
|
-
if(tagName[tagName.length - 1] === "/"){ //remove trailing '/'
|
|
387
|
+
|
|
388
|
+
this.addChild(currentNode, childNode, jPath, startIndex);
|
|
389
|
+
} else {
|
|
390
|
+
//selfClosing tag
|
|
391
|
+
if (tagExp.length > 0 && tagExp.lastIndexOf("/") === tagExp.length - 1) {
|
|
392
|
+
if (tagName[tagName.length - 1] === "/") { //remove trailing '/'
|
|
358
393
|
tagName = tagName.substr(0, tagName.length - 1);
|
|
359
394
|
jPath = jPath.substr(0, jPath.length - 1);
|
|
360
395
|
tagExp = tagName;
|
|
361
|
-
}else{
|
|
396
|
+
} else {
|
|
362
397
|
tagExp = tagExp.substr(0, tagExp.length - 1);
|
|
363
398
|
}
|
|
364
|
-
|
|
365
|
-
if(this.options.transformTagName) {
|
|
366
|
-
|
|
399
|
+
|
|
400
|
+
if (this.options.transformTagName) {
|
|
401
|
+
const newTagName = this.options.transformTagName(tagName);
|
|
402
|
+
if (tagExp === tagName) {
|
|
403
|
+
tagExp = newTagName
|
|
404
|
+
}
|
|
405
|
+
tagName = newTagName;
|
|
367
406
|
}
|
|
368
407
|
|
|
369
408
|
const childNode = new xmlNode(tagName);
|
|
370
|
-
if(tagName !== tagExp && attrExpPresent){
|
|
409
|
+
if (tagName !== tagExp && attrExpPresent) {
|
|
371
410
|
childNode[":@"] = this.buildAttributesMap(tagExp, jPath, tagName);
|
|
372
411
|
}
|
|
373
|
-
this.addChild(currentNode, childNode, jPath)
|
|
412
|
+
this.addChild(currentNode, childNode, jPath, startIndex);
|
|
413
|
+
jPath = jPath.substr(0, jPath.lastIndexOf("."));
|
|
414
|
+
}
|
|
415
|
+
else if (this.options.unpairedTags.indexOf(tagName) !== -1) {//unpaired tag
|
|
416
|
+
const childNode = new xmlNode(tagName);
|
|
417
|
+
if (tagName !== tagExp && attrExpPresent) {
|
|
418
|
+
childNode[":@"] = this.buildAttributesMap(tagExp, jPath);
|
|
419
|
+
}
|
|
420
|
+
this.addChild(currentNode, childNode, jPath, startIndex);
|
|
374
421
|
jPath = jPath.substr(0, jPath.lastIndexOf("."));
|
|
422
|
+
i = result.closeIndex;
|
|
423
|
+
// Continue to next iteration without changing currentNode
|
|
424
|
+
continue;
|
|
375
425
|
}
|
|
376
|
-
|
|
377
|
-
else{
|
|
378
|
-
const childNode = new xmlNode(
|
|
426
|
+
//opening tag
|
|
427
|
+
else {
|
|
428
|
+
const childNode = new xmlNode(tagName);
|
|
429
|
+
if (this.tagsNodeStack.length > this.options.maxNestedTags) {
|
|
430
|
+
throw new Error("Maximum nested tags exceeded");
|
|
431
|
+
}
|
|
379
432
|
this.tagsNodeStack.push(currentNode);
|
|
380
|
-
|
|
381
|
-
if(tagName !== tagExp && attrExpPresent){
|
|
433
|
+
|
|
434
|
+
if (tagName !== tagExp && attrExpPresent) {
|
|
382
435
|
childNode[":@"] = this.buildAttributesMap(tagExp, jPath, tagName);
|
|
383
436
|
}
|
|
384
437
|
this.addChild(currentNode, childNode, jPath)
|
|
@@ -388,58 +441,121 @@ const parseXml = function(xmlData) {
|
|
|
388
441
|
i = closeIndex;
|
|
389
442
|
}
|
|
390
443
|
}
|
|
391
|
-
}else{
|
|
444
|
+
} else {
|
|
392
445
|
textData += xmlData[i];
|
|
393
446
|
}
|
|
394
447
|
}
|
|
395
448
|
return xmlObj.child;
|
|
396
449
|
}
|
|
397
450
|
|
|
398
|
-
function addChild(currentNode, childNode, jPath){
|
|
451
|
+
function addChild(currentNode, childNode, jPath, startIndex) {
|
|
452
|
+
// unset startIndex if not requested
|
|
453
|
+
if (!this.options.captureMetaData) startIndex = undefined;
|
|
399
454
|
const result = this.options.updateTag(childNode.tagname, jPath, childNode[":@"])
|
|
400
|
-
if(result === false){
|
|
401
|
-
|
|
455
|
+
if (result === false) {
|
|
456
|
+
//do nothing
|
|
457
|
+
} else if (typeof result === "string") {
|
|
402
458
|
childNode.tagname = result
|
|
403
|
-
currentNode.addChild(childNode);
|
|
404
|
-
}else{
|
|
405
|
-
currentNode.addChild(childNode);
|
|
459
|
+
currentNode.addChild(childNode, startIndex);
|
|
460
|
+
} else {
|
|
461
|
+
currentNode.addChild(childNode, startIndex);
|
|
406
462
|
}
|
|
407
463
|
}
|
|
408
464
|
|
|
409
|
-
const replaceEntitiesValue = function(val){
|
|
465
|
+
const replaceEntitiesValue = function (val, tagName, jPath) {
|
|
466
|
+
// Performance optimization: Early return if no entities to replace
|
|
467
|
+
if (val.indexOf('&') === -1) {
|
|
468
|
+
return val;
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
const entityConfig = this.options.processEntities;
|
|
410
472
|
|
|
411
|
-
if(
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
473
|
+
if (!entityConfig.enabled) {
|
|
474
|
+
return val;
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
// Check tag-specific filtering
|
|
478
|
+
if (entityConfig.allowedTags) {
|
|
479
|
+
if (!entityConfig.allowedTags.includes(tagName)) {
|
|
480
|
+
return val; // Skip entity replacement for current tag as not set
|
|
415
481
|
}
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
if (entityConfig.tagFilter) {
|
|
485
|
+
if (!entityConfig.tagFilter(tagName, jPath)) {
|
|
486
|
+
return val; // Skip based on custom filter
|
|
419
487
|
}
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
// Replace DOCTYPE entities
|
|
491
|
+
for (let entityName in this.docTypeEntities) {
|
|
492
|
+
const entity = this.docTypeEntities[entityName];
|
|
493
|
+
const matches = val.match(entity.regx);
|
|
494
|
+
|
|
495
|
+
if (matches) {
|
|
496
|
+
// Track expansions
|
|
497
|
+
this.entityExpansionCount += matches.length;
|
|
498
|
+
|
|
499
|
+
// Check expansion limit
|
|
500
|
+
if (entityConfig.maxTotalExpansions &&
|
|
501
|
+
this.entityExpansionCount > entityConfig.maxTotalExpansions) {
|
|
502
|
+
throw new Error(
|
|
503
|
+
`Entity expansion limit exceeded: ${this.entityExpansionCount} > ${entityConfig.maxTotalExpansions}`
|
|
504
|
+
);
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
// Store length before replacement
|
|
508
|
+
const lengthBefore = val.length;
|
|
509
|
+
val = val.replace(entity.regx, entity.val);
|
|
510
|
+
|
|
511
|
+
// Check expanded length immediately after replacement
|
|
512
|
+
if (entityConfig.maxExpandedLength) {
|
|
513
|
+
this.currentExpandedLength += (val.length - lengthBefore);
|
|
514
|
+
|
|
515
|
+
if (this.currentExpandedLength > entityConfig.maxExpandedLength) {
|
|
516
|
+
throw new Error(
|
|
517
|
+
`Total expanded content size exceeded: ${this.currentExpandedLength} > ${entityConfig.maxExpandedLength}`
|
|
518
|
+
);
|
|
519
|
+
}
|
|
424
520
|
}
|
|
425
521
|
}
|
|
426
|
-
val = val.replace( this.ampEntity.regex, this.ampEntity.val);
|
|
427
522
|
}
|
|
523
|
+
if (val.indexOf('&') === -1) return val; // Early exit
|
|
524
|
+
|
|
525
|
+
// Replace standard entities
|
|
526
|
+
for (let entityName in this.lastEntities) {
|
|
527
|
+
const entity = this.lastEntities[entityName];
|
|
528
|
+
val = val.replace(entity.regex, entity.val);
|
|
529
|
+
}
|
|
530
|
+
if (val.indexOf('&') === -1) return val; // Early exit
|
|
531
|
+
|
|
532
|
+
// Replace HTML entities if enabled
|
|
533
|
+
if (this.options.htmlEntities) {
|
|
534
|
+
for (let entityName in this.htmlEntities) {
|
|
535
|
+
const entity = this.htmlEntities[entityName];
|
|
536
|
+
val = val.replace(entity.regex, entity.val);
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
// Replace ampersand entity last
|
|
541
|
+
val = val.replace(this.ampEntity.regex, this.ampEntity.val);
|
|
542
|
+
|
|
428
543
|
return val;
|
|
429
544
|
}
|
|
430
|
-
|
|
545
|
+
|
|
546
|
+
function saveTextToParentTag(textData, parentNode, jPath, isLeafNode) {
|
|
431
547
|
if (textData) { //store previously collected data as textNode
|
|
432
|
-
if(isLeafNode === undefined) isLeafNode =
|
|
433
|
-
|
|
548
|
+
if (isLeafNode === undefined) isLeafNode = parentNode.child.length === 0
|
|
549
|
+
|
|
434
550
|
textData = this.parseTextData(textData,
|
|
435
|
-
|
|
551
|
+
parentNode.tagname,
|
|
436
552
|
jPath,
|
|
437
553
|
false,
|
|
438
|
-
|
|
554
|
+
parentNode[":@"] ? Object.keys(parentNode[":@"]).length !== 0 : false,
|
|
439
555
|
isLeafNode);
|
|
440
556
|
|
|
441
557
|
if (textData !== undefined && textData !== "")
|
|
442
|
-
|
|
558
|
+
parentNode.add(this.options.textNodeName, textData);
|
|
443
559
|
textData = "";
|
|
444
560
|
}
|
|
445
561
|
return textData;
|
|
@@ -447,17 +563,14 @@ function saveTextToParentTag(textData, currentNode, jPath, isLeafNode) {
|
|
|
447
563
|
|
|
448
564
|
//TODO: use jPath to simplify the logic
|
|
449
565
|
/**
|
|
450
|
-
*
|
|
451
|
-
* @param {
|
|
566
|
+
* @param {Set} stopNodesExact
|
|
567
|
+
* @param {Set} stopNodesWildcard
|
|
452
568
|
* @param {string} jPath
|
|
453
|
-
* @param {string} currentTagName
|
|
569
|
+
* @param {string} currentTagName
|
|
454
570
|
*/
|
|
455
|
-
function isItStopNode(
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
const stopNodeExp = stopNodes[stopNodePath];
|
|
459
|
-
if( allNodesExp === stopNodeExp || jPath === stopNodeExp ) return true;
|
|
460
|
-
}
|
|
571
|
+
function isItStopNode(stopNodesExact, stopNodesWildcard, jPath, currentTagName) {
|
|
572
|
+
if (stopNodesWildcard && stopNodesWildcard.has(currentTagName)) return true;
|
|
573
|
+
if (stopNodesExact && stopNodesExact.has(jPath)) return true;
|
|
461
574
|
return false;
|
|
462
575
|
}
|
|
463
576
|
|
|
@@ -467,24 +580,24 @@ function isItStopNode(stopNodes, jPath, currentTagName){
|
|
|
467
580
|
* @param {number} i starting index
|
|
468
581
|
* @returns
|
|
469
582
|
*/
|
|
470
|
-
function tagExpWithClosingIndex(xmlData, i, closingChar = ">"){
|
|
583
|
+
function tagExpWithClosingIndex(xmlData, i, closingChar = ">") {
|
|
471
584
|
let attrBoundary;
|
|
472
585
|
let tagExp = "";
|
|
473
586
|
for (let index = i; index < xmlData.length; index++) {
|
|
474
587
|
let ch = xmlData[index];
|
|
475
588
|
if (attrBoundary) {
|
|
476
|
-
|
|
589
|
+
if (ch === attrBoundary) attrBoundary = "";//reset
|
|
477
590
|
} else if (ch === '"' || ch === "'") {
|
|
478
|
-
|
|
591
|
+
attrBoundary = ch;
|
|
479
592
|
} else if (ch === closingChar[0]) {
|
|
480
|
-
if(closingChar[1]){
|
|
481
|
-
if(xmlData[index + 1] === closingChar[1]){
|
|
593
|
+
if (closingChar[1]) {
|
|
594
|
+
if (xmlData[index + 1] === closingChar[1]) {
|
|
482
595
|
return {
|
|
483
596
|
data: tagExp,
|
|
484
597
|
index: index
|
|
485
598
|
}
|
|
486
599
|
}
|
|
487
|
-
}else{
|
|
600
|
+
} else {
|
|
488
601
|
return {
|
|
489
602
|
data: tagExp,
|
|
490
603
|
index: index
|
|
@@ -497,33 +610,33 @@ function tagExpWithClosingIndex(xmlData, i, closingChar = ">"){
|
|
|
497
610
|
}
|
|
498
611
|
}
|
|
499
612
|
|
|
500
|
-
function findClosingIndex(xmlData, str, i, errMsg){
|
|
613
|
+
function findClosingIndex(xmlData, str, i, errMsg) {
|
|
501
614
|
const closingIndex = xmlData.indexOf(str, i);
|
|
502
|
-
if(closingIndex === -1){
|
|
615
|
+
if (closingIndex === -1) {
|
|
503
616
|
throw new Error(errMsg)
|
|
504
|
-
}else{
|
|
617
|
+
} else {
|
|
505
618
|
return closingIndex + str.length - 1;
|
|
506
619
|
}
|
|
507
620
|
}
|
|
508
621
|
|
|
509
|
-
function readTagExp(xmlData,i, removeNSPrefix, closingChar = ">"){
|
|
510
|
-
const result = tagExpWithClosingIndex(xmlData, i+1, closingChar);
|
|
511
|
-
if(!result) return;
|
|
622
|
+
function readTagExp(xmlData, i, removeNSPrefix, closingChar = ">") {
|
|
623
|
+
const result = tagExpWithClosingIndex(xmlData, i + 1, closingChar);
|
|
624
|
+
if (!result) return;
|
|
512
625
|
let tagExp = result.data;
|
|
513
626
|
const closeIndex = result.index;
|
|
514
627
|
const separatorIndex = tagExp.search(/\s/);
|
|
515
628
|
let tagName = tagExp;
|
|
516
629
|
let attrExpPresent = true;
|
|
517
|
-
if(separatorIndex !== -1){//separate tag name and attributes expression
|
|
630
|
+
if (separatorIndex !== -1) {//separate tag name and attributes expression
|
|
518
631
|
tagName = tagExp.substring(0, separatorIndex);
|
|
519
632
|
tagExp = tagExp.substring(separatorIndex + 1).trimStart();
|
|
520
633
|
}
|
|
521
634
|
|
|
522
635
|
const rawTagName = tagName;
|
|
523
|
-
if(removeNSPrefix){
|
|
636
|
+
if (removeNSPrefix) {
|
|
524
637
|
const colonIndex = tagName.indexOf(":");
|
|
525
|
-
if(colonIndex !== -1){
|
|
526
|
-
tagName = tagName.substr(colonIndex+1);
|
|
638
|
+
if (colonIndex !== -1) {
|
|
639
|
+
tagName = tagName.substr(colonIndex + 1);
|
|
527
640
|
attrExpPresent = tagName !== result.data.substr(colonIndex + 1);
|
|
528
641
|
}
|
|
529
642
|
}
|
|
@@ -542,47 +655,47 @@ function readTagExp(xmlData,i, removeNSPrefix, closingChar = ">"){
|
|
|
542
655
|
* @param {string} tagName
|
|
543
656
|
* @param {number} i
|
|
544
657
|
*/
|
|
545
|
-
function readStopNodeData(xmlData, tagName, i){
|
|
658
|
+
function readStopNodeData(xmlData, tagName, i) {
|
|
546
659
|
const startIndex = i;
|
|
547
660
|
// Starting at 1 since we already have an open tag
|
|
548
661
|
let openTagCount = 1;
|
|
549
662
|
|
|
550
663
|
for (; i < xmlData.length; i++) {
|
|
551
|
-
if(
|
|
552
|
-
if (xmlData[i+1] === "/") {//close tag
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
}
|
|
664
|
+
if (xmlData[i] === "<") {
|
|
665
|
+
if (xmlData[i + 1] === "/") {//close tag
|
|
666
|
+
const closeIndex = findClosingIndex(xmlData, ">", i, `${tagName} is not closed`);
|
|
667
|
+
let closeTagName = xmlData.substring(i + 2, closeIndex).trim();
|
|
668
|
+
if (closeTagName === tagName) {
|
|
669
|
+
openTagCount--;
|
|
670
|
+
if (openTagCount === 0) {
|
|
671
|
+
return {
|
|
672
|
+
tagContent: xmlData.substring(startIndex, i),
|
|
673
|
+
i: closeIndex
|
|
562
674
|
}
|
|
563
675
|
}
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
676
|
+
}
|
|
677
|
+
i = closeIndex;
|
|
678
|
+
} else if (xmlData[i + 1] === '?') {
|
|
679
|
+
const closeIndex = findClosingIndex(xmlData, "?>", i + 1, "StopNode is not closed.")
|
|
680
|
+
i = closeIndex;
|
|
681
|
+
} else if (xmlData.substr(i + 1, 3) === '!--') {
|
|
682
|
+
const closeIndex = findClosingIndex(xmlData, "-->", i + 3, "StopNode is not closed.")
|
|
683
|
+
i = closeIndex;
|
|
684
|
+
} else if (xmlData.substr(i + 1, 2) === '![') {
|
|
685
|
+
const closeIndex = findClosingIndex(xmlData, "]]>", i, "StopNode is not closed.") - 2;
|
|
686
|
+
i = closeIndex;
|
|
687
|
+
} else {
|
|
688
|
+
const tagData = readTagExp(xmlData, i, '>')
|
|
576
689
|
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
}
|
|
582
|
-
i=tagData.closeIndex;
|
|
690
|
+
if (tagData) {
|
|
691
|
+
const openTagName = tagData && tagData.tagName;
|
|
692
|
+
if (openTagName === tagName && tagData.tagExp[tagData.tagExp.length - 1] !== "/") {
|
|
693
|
+
openTagCount++;
|
|
583
694
|
}
|
|
695
|
+
i = tagData.closeIndex;
|
|
584
696
|
}
|
|
585
697
|
}
|
|
698
|
+
}
|
|
586
699
|
}//end for loop
|
|
587
700
|
}
|
|
588
701
|
|
|
@@ -590,8 +703,8 @@ function parseValue(val, shouldParse, options) {
|
|
|
590
703
|
if (shouldParse && typeof val === 'string') {
|
|
591
704
|
//console.log(options)
|
|
592
705
|
const newval = val.trim();
|
|
593
|
-
if(newval === 'true'
|
|
594
|
-
else if(newval === 'false'
|
|
706
|
+
if (newval === 'true') return true;
|
|
707
|
+
else if (newval === 'false') return false;
|
|
595
708
|
else return toNumber(val, options);
|
|
596
709
|
} else {
|
|
597
710
|
if (util.isExist(val)) {
|
|
@@ -602,5 +715,14 @@ function parseValue(val, shouldParse, options) {
|
|
|
602
715
|
}
|
|
603
716
|
}
|
|
604
717
|
|
|
718
|
+
function fromCodePoint(str, base, prefix) {
|
|
719
|
+
const codePoint = Number.parseInt(str, base);
|
|
720
|
+
|
|
721
|
+
if (codePoint >= 0 && codePoint <= 0x10FFFF) {
|
|
722
|
+
return String.fromCodePoint(codePoint);
|
|
723
|
+
} else {
|
|
724
|
+
return prefix + str + ";";
|
|
725
|
+
}
|
|
726
|
+
}
|
|
605
727
|
|
|
606
728
|
module.exports = OrderedObjParser;
|