fast-xml-parser 5.5.9 → 5.5.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/README.md +8 -28
- package/lib/fxbuilder.min.js +1 -1
- package/lib/fxbuilder.min.js.map +1 -1
- package/lib/fxp.cjs +1 -1
- package/lib/fxp.min.js +1 -1
- package/lib/fxp.min.js.map +1 -1
- package/lib/fxparser.min.js +1 -1
- package/lib/fxparser.min.js.map +1 -1
- package/package.json +4 -4
- package/src/xmlparser/OptionsBuilder.js +4 -4
- package/src/xmlparser/OrderedObjParser.js +157 -146
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "fast-xml-parser",
|
|
3
|
-
"version": "5.5.
|
|
3
|
+
"version": "5.5.11",
|
|
4
4
|
"description": "Validate XML, Parse XML, Build XML without C/C++ based libraries",
|
|
5
5
|
"main": "./lib/fxp.cjs",
|
|
6
6
|
"type": "module",
|
|
@@ -88,7 +88,7 @@
|
|
|
88
88
|
],
|
|
89
89
|
"dependencies": {
|
|
90
90
|
"fast-xml-builder": "^1.1.4",
|
|
91
|
-
"path-expression-matcher": "^1.
|
|
92
|
-
"strnum": "^2.2.
|
|
91
|
+
"path-expression-matcher": "^1.4.0",
|
|
92
|
+
"strnum": "^2.2.3"
|
|
93
93
|
}
|
|
94
|
-
}
|
|
94
|
+
}
|
|
@@ -105,10 +105,10 @@ function normalizeProcessEntities(value) {
|
|
|
105
105
|
return {
|
|
106
106
|
enabled: value.enabled !== false,
|
|
107
107
|
maxEntitySize: Math.max(1, value.maxEntitySize ?? 10000),
|
|
108
|
-
maxExpansionDepth: Math.max(1, value.maxExpansionDepth ??
|
|
109
|
-
maxTotalExpansions: Math.max(1, value.maxTotalExpansions ??
|
|
108
|
+
maxExpansionDepth: Math.max(1, value.maxExpansionDepth ?? 10000),
|
|
109
|
+
maxTotalExpansions: Math.max(1, value.maxTotalExpansions ?? Infinity),
|
|
110
110
|
maxExpandedLength: Math.max(1, value.maxExpandedLength ?? 100000),
|
|
111
|
-
maxEntityCount: Math.max(1, value.maxEntityCount ??
|
|
111
|
+
maxEntityCount: Math.max(1, value.maxEntityCount ?? 1000),
|
|
112
112
|
allowedTags: value.allowedTags ?? null,
|
|
113
113
|
tagFilter: value.tagFilter ?? null
|
|
114
114
|
};
|
|
@@ -142,7 +142,7 @@ export const buildOptions = function (options) {
|
|
|
142
142
|
|
|
143
143
|
// Always normalize processEntities for backward compatibility and validation
|
|
144
144
|
built.processEntities = normalizeProcessEntities(built.processEntities);
|
|
145
|
-
|
|
145
|
+
built.unpairedTagsSet = new Set(built.unpairedTags);
|
|
146
146
|
// Convert old-style stopNodes for backward compatibility
|
|
147
147
|
if (built.stopNodes && Array.isArray(built.stopNodes)) {
|
|
148
148
|
built.stopNodes = built.stopNodes.map(node => {
|
|
@@ -7,6 +7,7 @@ import DocTypeReader from './DocTypeReader.js';
|
|
|
7
7
|
import toNumber from "strnum";
|
|
8
8
|
import getIgnoreAttributesFn from "../ignoreAttributes.js";
|
|
9
9
|
import { Expression, Matcher } from 'path-expression-matcher';
|
|
10
|
+
import { ExpressionSet } from 'path-expression-matcher';
|
|
10
11
|
|
|
11
12
|
// const regx =
|
|
12
13
|
// '<((!\\[CDATA\\[([\\s\\S]*?)(]]>))|((NAME:)?(NAME))([^>]*)>|((\\/)(NAME)\\s*>))([^<]*)'
|
|
@@ -121,18 +122,20 @@ export default class OrderedObjParser {
|
|
|
121
122
|
this.isCurrentNodeStopNode = false;
|
|
122
123
|
|
|
123
124
|
// Pre-compile stopNodes expressions
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
125
|
+
this.stopNodeExpressionsSet = new ExpressionSet();
|
|
126
|
+
const stopNodesOpts = this.options.stopNodes;
|
|
127
|
+
if (stopNodesOpts && stopNodesOpts.length > 0) {
|
|
128
|
+
for (let i = 0; i < stopNodesOpts.length; i++) {
|
|
129
|
+
const stopNodeExp = stopNodesOpts[i];
|
|
128
130
|
if (typeof stopNodeExp === 'string') {
|
|
129
131
|
// Convert string to Expression object
|
|
130
|
-
this.
|
|
132
|
+
this.stopNodeExpressionsSet.add(new Expression(stopNodeExp));
|
|
131
133
|
} else if (stopNodeExp instanceof Expression) {
|
|
132
134
|
// Already an Expression object
|
|
133
|
-
this.
|
|
135
|
+
this.stopNodeExpressionsSet.add(stopNodeExp);
|
|
134
136
|
}
|
|
135
137
|
}
|
|
138
|
+
this.stopNodeExpressionsSet.seal();
|
|
136
139
|
}
|
|
137
140
|
}
|
|
138
141
|
|
|
@@ -160,28 +163,29 @@ function addExternalEntities(externalEntities) {
|
|
|
160
163
|
* @param {boolean} escapeEntities
|
|
161
164
|
*/
|
|
162
165
|
function parseTextData(val, tagName, jPath, dontTrim, hasAttributes, isLeafNode, escapeEntities) {
|
|
166
|
+
const options = this.options;
|
|
163
167
|
if (val !== undefined) {
|
|
164
|
-
if (
|
|
168
|
+
if (options.trimValues && !dontTrim) {
|
|
165
169
|
val = val.trim();
|
|
166
170
|
}
|
|
167
171
|
if (val.length > 0) {
|
|
168
172
|
if (!escapeEntities) val = this.replaceEntitiesValue(val, tagName, jPath);
|
|
169
173
|
|
|
170
174
|
// Pass jPath string or matcher based on options.jPath setting
|
|
171
|
-
const jPathOrMatcher =
|
|
172
|
-
const newval =
|
|
175
|
+
const jPathOrMatcher = options.jPath ? jPath.toString() : jPath;
|
|
176
|
+
const newval = options.tagValueProcessor(tagName, val, jPathOrMatcher, hasAttributes, isLeafNode);
|
|
173
177
|
if (newval === null || newval === undefined) {
|
|
174
178
|
//don't parse
|
|
175
179
|
return val;
|
|
176
180
|
} else if (typeof newval !== typeof val || newval !== val) {
|
|
177
181
|
//overwrite
|
|
178
182
|
return newval;
|
|
179
|
-
} else if (
|
|
180
|
-
return parseValue(val,
|
|
183
|
+
} else if (options.trimValues) {
|
|
184
|
+
return parseValue(val, options.parseTagValue, options.numberParseOptions);
|
|
181
185
|
} else {
|
|
182
186
|
const trimmedVal = val.trim();
|
|
183
187
|
if (trimmedVal === val) {
|
|
184
|
-
return parseValue(val,
|
|
188
|
+
return parseValue(val, options.parseTagValue, options.numberParseOptions);
|
|
185
189
|
} else {
|
|
186
190
|
return val;
|
|
187
191
|
}
|
|
@@ -209,7 +213,8 @@ function resolveNameSpace(tagname) {
|
|
|
209
213
|
const attrsRegx = new RegExp('([^\\s=]+)\\s*(=\\s*([\'"])([\\s\\S]*?)\\3)?', 'gm');
|
|
210
214
|
|
|
211
215
|
function buildAttributesMap(attrStr, jPath, tagName) {
|
|
212
|
-
|
|
216
|
+
const options = this.options;
|
|
217
|
+
if (options.ignoreAttributes !== true && typeof attrStr === 'string') {
|
|
213
218
|
// attrStr = attrStr.replace(/\r?\n/g, ' ');
|
|
214
219
|
//attrStr = attrStr || attrStr.trim();
|
|
215
220
|
|
|
@@ -217,89 +222,80 @@ function buildAttributesMap(attrStr, jPath, tagName) {
|
|
|
217
222
|
const len = matches.length; //don't make it inline
|
|
218
223
|
const attrs = {};
|
|
219
224
|
|
|
220
|
-
//
|
|
221
|
-
//
|
|
225
|
+
// Pre-process values once: trim + entity replacement
|
|
226
|
+
// Reused in both matcher update and second pass
|
|
227
|
+
const processedVals = new Array(len);
|
|
228
|
+
let hasRawAttrs = false;
|
|
222
229
|
const rawAttrsForMatcher = {};
|
|
230
|
+
|
|
223
231
|
for (let i = 0; i < len; i++) {
|
|
224
232
|
const attrName = this.resolveNameSpace(matches[i][1]);
|
|
225
233
|
const oldVal = matches[i][4];
|
|
226
234
|
|
|
227
235
|
if (attrName.length && oldVal !== undefined) {
|
|
228
|
-
let
|
|
229
|
-
if (
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
rawAttrsForMatcher[attrName] =
|
|
236
|
+
let val = oldVal;
|
|
237
|
+
if (options.trimValues) val = val.trim();
|
|
238
|
+
val = this.replaceEntitiesValue(val, tagName, this.readonlyMatcher);
|
|
239
|
+
processedVals[i] = val;
|
|
240
|
+
|
|
241
|
+
rawAttrsForMatcher[attrName] = val;
|
|
242
|
+
hasRawAttrs = true;
|
|
234
243
|
}
|
|
235
244
|
}
|
|
236
245
|
|
|
237
|
-
// Update matcher
|
|
238
|
-
if (
|
|
246
|
+
// Update matcher ONCE before second pass, if applicable
|
|
247
|
+
if (hasRawAttrs && typeof jPath === 'object' && jPath.updateCurrent) {
|
|
239
248
|
jPath.updateCurrent(rawAttrsForMatcher);
|
|
240
249
|
}
|
|
241
250
|
|
|
242
|
-
//
|
|
251
|
+
// Hoist toString() once — path doesn't change during attribute processing
|
|
252
|
+
const jPathStr = options.jPath ? jPath.toString() : this.readonlyMatcher;
|
|
253
|
+
|
|
254
|
+
// Second pass: apply processors, build final attrs
|
|
255
|
+
let hasAttrs = false;
|
|
243
256
|
for (let i = 0; i < len; i++) {
|
|
244
257
|
const attrName = this.resolveNameSpace(matches[i][1]);
|
|
245
258
|
|
|
246
|
-
|
|
247
|
-
const jPathStr = this.options.jPath ? jPath.toString() : this.readonlyMatcher;
|
|
248
|
-
if (this.ignoreAttributesFn(attrName, jPathStr)) {
|
|
249
|
-
continue
|
|
250
|
-
}
|
|
259
|
+
if (this.ignoreAttributesFn(attrName, jPathStr)) continue;
|
|
251
260
|
|
|
252
|
-
let
|
|
253
|
-
let aName = this.options.attributeNamePrefix + attrName;
|
|
261
|
+
let aName = options.attributeNamePrefix + attrName;
|
|
254
262
|
|
|
255
263
|
if (attrName.length) {
|
|
256
|
-
if (
|
|
257
|
-
aName =
|
|
264
|
+
if (options.transformAttributeName) {
|
|
265
|
+
aName = options.transformAttributeName(aName);
|
|
258
266
|
}
|
|
259
|
-
|
|
260
|
-
aName = sanitizeName(aName, this.options);
|
|
267
|
+
aName = sanitizeName(aName, options);
|
|
261
268
|
|
|
262
|
-
if (
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
}
|
|
266
|
-
oldVal = this.replaceEntitiesValue(oldVal, tagName, this.readonlyMatcher);
|
|
269
|
+
if (matches[i][4] !== undefined) {
|
|
270
|
+
// Reuse already-processed value — no double entity replacement
|
|
271
|
+
const oldVal = processedVals[i];
|
|
267
272
|
|
|
268
|
-
|
|
269
|
-
const jPathOrMatcher = this.options.jPath ? jPath.toString() : this.readonlyMatcher;
|
|
270
|
-
const newVal = this.options.attributeValueProcessor(attrName, oldVal, jPathOrMatcher);
|
|
273
|
+
const newVal = options.attributeValueProcessor(attrName, oldVal, jPathStr);
|
|
271
274
|
if (newVal === null || newVal === undefined) {
|
|
272
|
-
//don't parse
|
|
273
275
|
attrs[aName] = oldVal;
|
|
274
276
|
} else if (typeof newVal !== typeof oldVal || newVal !== oldVal) {
|
|
275
|
-
//overwrite
|
|
276
277
|
attrs[aName] = newVal;
|
|
277
278
|
} else {
|
|
278
|
-
|
|
279
|
-
attrs[aName] = parseValue(
|
|
280
|
-
oldVal,
|
|
281
|
-
this.options.parseAttributeValue,
|
|
282
|
-
this.options.numberParseOptions
|
|
283
|
-
);
|
|
279
|
+
attrs[aName] = parseValue(oldVal, options.parseAttributeValue, options.numberParseOptions);
|
|
284
280
|
}
|
|
285
|
-
|
|
281
|
+
hasAttrs = true;
|
|
282
|
+
} else if (options.allowBooleanAttributes) {
|
|
286
283
|
attrs[aName] = true;
|
|
284
|
+
hasAttrs = true;
|
|
287
285
|
}
|
|
288
286
|
}
|
|
289
287
|
}
|
|
290
288
|
|
|
291
|
-
if (!
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
if (this.options.attributesGroupName) {
|
|
289
|
+
if (!hasAttrs) return;
|
|
290
|
+
|
|
291
|
+
if (options.attributesGroupName) {
|
|
295
292
|
const attrCollection = {};
|
|
296
|
-
attrCollection[
|
|
293
|
+
attrCollection[options.attributesGroupName] = attrs;
|
|
297
294
|
return attrCollection;
|
|
298
295
|
}
|
|
299
|
-
return attrs
|
|
296
|
+
return attrs;
|
|
300
297
|
}
|
|
301
298
|
}
|
|
302
|
-
|
|
303
299
|
const parseXml = function (xmlData) {
|
|
304
300
|
xmlData = xmlData.replace(/\r\n?/g, "\n"); //TODO: remove this line
|
|
305
301
|
const xmlObj = new xmlNode('!xml');
|
|
@@ -312,25 +308,30 @@ const parseXml = function (xmlData) {
|
|
|
312
308
|
// Reset entity expansion counters for this document
|
|
313
309
|
this.entityExpansionCount = 0;
|
|
314
310
|
this.currentExpandedLength = 0;
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
311
|
+
this.docTypeEntitiesKeys = [];
|
|
312
|
+
this.lastEntitiesKeys = Object.keys(this.lastEntities);
|
|
313
|
+
this.htmlEntitiesKeys = this.options.htmlEntities ? Object.keys(this.htmlEntities) : [];
|
|
314
|
+
const options = this.options;
|
|
315
|
+
const docTypeReader = new DocTypeReader(options.processEntities);
|
|
316
|
+
const xmlLen = xmlData.length;
|
|
317
|
+
for (let i = 0; i < xmlLen; i++) {//for each char in XML data
|
|
318
318
|
const ch = xmlData[i];
|
|
319
319
|
if (ch === '<') {
|
|
320
320
|
// const nextIndex = i+1;
|
|
321
321
|
// const _2ndChar = xmlData[nextIndex];
|
|
322
|
-
|
|
322
|
+
const c1 = xmlData.charCodeAt(i + 1);
|
|
323
|
+
if (c1 === 47) {//Closing Tag '/'
|
|
323
324
|
const closeIndex = findClosingIndex(xmlData, ">", i, "Closing Tag is not closed.")
|
|
324
325
|
let tagName = xmlData.substring(i + 2, closeIndex).trim();
|
|
325
326
|
|
|
326
|
-
if (
|
|
327
|
+
if (options.removeNSPrefix) {
|
|
327
328
|
const colonIndex = tagName.indexOf(":");
|
|
328
329
|
if (colonIndex !== -1) {
|
|
329
330
|
tagName = tagName.substr(colonIndex + 1);
|
|
330
331
|
}
|
|
331
332
|
}
|
|
332
333
|
|
|
333
|
-
tagName = transformTagName(
|
|
334
|
+
tagName = transformTagName(options.transformTagName, tagName, "", options).tagName;
|
|
334
335
|
|
|
335
336
|
if (currentNode) {
|
|
336
337
|
textData = this.saveTextToParentTag(textData, currentNode, this.readonlyMatcher);
|
|
@@ -338,10 +339,10 @@ const parseXml = function (xmlData) {
|
|
|
338
339
|
|
|
339
340
|
//check if last tag of nested tag was unpaired tag
|
|
340
341
|
const lastTagName = this.matcher.getCurrentTag();
|
|
341
|
-
if (tagName &&
|
|
342
|
+
if (tagName && options.unpairedTagsSet.has(tagName)) {
|
|
342
343
|
throw new Error(`Unpaired tag can not be used as closing tag: </${tagName}>`);
|
|
343
344
|
}
|
|
344
|
-
if (lastTagName &&
|
|
345
|
+
if (lastTagName && options.unpairedTagsSet.has(lastTagName)) {
|
|
345
346
|
// Pop the unpaired tag
|
|
346
347
|
this.matcher.pop();
|
|
347
348
|
this.tagsNodeStack.pop();
|
|
@@ -353,18 +354,18 @@ const parseXml = function (xmlData) {
|
|
|
353
354
|
currentNode = this.tagsNodeStack.pop();//avoid recursion, set the parent tag scope
|
|
354
355
|
textData = "";
|
|
355
356
|
i = closeIndex;
|
|
356
|
-
} else if (
|
|
357
|
+
} else if (c1 === 63) { //'?'
|
|
357
358
|
|
|
358
359
|
let tagData = readTagExp(xmlData, i, false, "?>");
|
|
359
360
|
if (!tagData) throw new Error("Pi Tag is not closed.");
|
|
360
361
|
|
|
361
362
|
textData = this.saveTextToParentTag(textData, currentNode, this.readonlyMatcher);
|
|
362
|
-
if ((
|
|
363
|
+
if ((options.ignoreDeclaration && tagData.tagName === "?xml") || options.ignorePiTags) {
|
|
363
364
|
//do nothing
|
|
364
365
|
} else {
|
|
365
366
|
|
|
366
367
|
const childNode = new xmlNode(tagData.tagName);
|
|
367
|
-
childNode.add(
|
|
368
|
+
childNode.add(options.textNodeName, "");
|
|
368
369
|
|
|
369
370
|
if (tagData.tagName !== tagData.tagExp && tagData.attrExpPresent) {
|
|
370
371
|
childNode[":@"] = this.buildAttributesMap(tagData.tagExp, this.matcher, tagData.tagName);
|
|
@@ -374,21 +375,26 @@ const parseXml = function (xmlData) {
|
|
|
374
375
|
|
|
375
376
|
|
|
376
377
|
i = tagData.closeIndex + 1;
|
|
377
|
-
} else if (
|
|
378
|
+
} else if (c1 === 33
|
|
379
|
+
&& xmlData.charCodeAt(i + 2) === 45
|
|
380
|
+
&& xmlData.charCodeAt(i + 3) === 45) { //'!--'
|
|
378
381
|
const endIndex = findClosingIndex(xmlData, "-->", i + 4, "Comment is not closed.")
|
|
379
|
-
if (
|
|
382
|
+
if (options.commentPropName) {
|
|
380
383
|
const comment = xmlData.substring(i + 4, endIndex - 2);
|
|
381
384
|
|
|
382
385
|
textData = this.saveTextToParentTag(textData, currentNode, this.readonlyMatcher);
|
|
383
386
|
|
|
384
|
-
currentNode.add(
|
|
387
|
+
currentNode.add(options.commentPropName, [{ [options.textNodeName]: comment }]);
|
|
385
388
|
}
|
|
386
389
|
i = endIndex;
|
|
387
|
-
} else if (
|
|
390
|
+
} else if (c1 === 33
|
|
391
|
+
&& xmlData.charCodeAt(i + 2) === 68) { //'!D'
|
|
388
392
|
const result = docTypeReader.readDocType(xmlData, i);
|
|
389
393
|
this.docTypeEntities = result.entities;
|
|
394
|
+
this.docTypeEntitiesKeys = Object.keys(this.docTypeEntities) || []
|
|
390
395
|
i = result.i;
|
|
391
|
-
} else if (
|
|
396
|
+
} else if (c1 === 33
|
|
397
|
+
&& xmlData.charCodeAt(i + 2) === 91) { // '!['
|
|
392
398
|
const closeIndex = findClosingIndex(xmlData, "]]>", i, "CDATA is not closed.") - 2;
|
|
393
399
|
const tagExp = xmlData.substring(i + 9, closeIndex);
|
|
394
400
|
|
|
@@ -398,20 +404,20 @@ const parseXml = function (xmlData) {
|
|
|
398
404
|
if (val == undefined) val = "";
|
|
399
405
|
|
|
400
406
|
//cdata should be set even if it is 0 length string
|
|
401
|
-
if (
|
|
402
|
-
currentNode.add(
|
|
407
|
+
if (options.cdataPropName) {
|
|
408
|
+
currentNode.add(options.cdataPropName, [{ [options.textNodeName]: tagExp }]);
|
|
403
409
|
} else {
|
|
404
|
-
currentNode.add(
|
|
410
|
+
currentNode.add(options.textNodeName, val);
|
|
405
411
|
}
|
|
406
412
|
|
|
407
413
|
i = closeIndex + 2;
|
|
408
414
|
} else {//Opening tag
|
|
409
|
-
let result = readTagExp(xmlData, i,
|
|
415
|
+
let result = readTagExp(xmlData, i, options.removeNSPrefix);
|
|
410
416
|
|
|
411
417
|
// Safety check: readTagExp can return undefined
|
|
412
418
|
if (!result) {
|
|
413
419
|
// Log context for debugging
|
|
414
|
-
const context = xmlData.substring(Math.max(0, i - 50), Math.min(
|
|
420
|
+
const context = xmlData.substring(Math.max(0, i - 50), Math.min(xmlLen, i + 50));
|
|
415
421
|
throw new Error(`readTagExp returned undefined at position ${i}. Context: "${context}"`);
|
|
416
422
|
}
|
|
417
423
|
|
|
@@ -421,13 +427,13 @@ const parseXml = function (xmlData) {
|
|
|
421
427
|
let attrExpPresent = result.attrExpPresent;
|
|
422
428
|
let closeIndex = result.closeIndex;
|
|
423
429
|
|
|
424
|
-
({ tagName, tagExp } = transformTagName(
|
|
430
|
+
({ tagName, tagExp } = transformTagName(options.transformTagName, tagName, tagExp, options));
|
|
425
431
|
|
|
426
|
-
if (
|
|
427
|
-
(tagName ===
|
|
428
|
-
|| tagName ===
|
|
429
|
-
|| tagName ===
|
|
430
|
-
|| tagName ===
|
|
432
|
+
if (options.strictReservedNames &&
|
|
433
|
+
(tagName === options.commentPropName
|
|
434
|
+
|| tagName === options.cdataPropName
|
|
435
|
+
|| tagName === options.textNodeName
|
|
436
|
+
|| tagName === options.attributesGroupName
|
|
431
437
|
)) {
|
|
432
438
|
throw new Error(`Invalid tag name: ${tagName}`);
|
|
433
439
|
}
|
|
@@ -442,7 +448,7 @@ const parseXml = function (xmlData) {
|
|
|
442
448
|
|
|
443
449
|
//check if last tag was unpaired tag
|
|
444
450
|
const lastTag = currentNode;
|
|
445
|
-
if (lastTag &&
|
|
451
|
+
if (lastTag && options.unpairedTagsSet.has(lastTag.tagname)) {
|
|
446
452
|
currentNode = this.tagsNodeStack.pop();
|
|
447
453
|
this.matcher.pop();
|
|
448
454
|
}
|
|
@@ -484,13 +490,13 @@ const parseXml = function (xmlData) {
|
|
|
484
490
|
|
|
485
491
|
if (prefixedAttrs) {
|
|
486
492
|
// Extract raw attributes (without prefix) for our use
|
|
487
|
-
rawAttrs = extractRawAttributes(prefixedAttrs,
|
|
493
|
+
rawAttrs = extractRawAttributes(prefixedAttrs, options);
|
|
488
494
|
}
|
|
489
495
|
}
|
|
490
496
|
|
|
491
497
|
// Now check if this is a stop node (after attributes are set)
|
|
492
498
|
if (tagName !== xmlObj.tagname) {
|
|
493
|
-
this.isCurrentNodeStopNode = this.isItStopNode(
|
|
499
|
+
this.isCurrentNodeStopNode = this.isItStopNode();
|
|
494
500
|
}
|
|
495
501
|
|
|
496
502
|
const startIndex = i;
|
|
@@ -502,7 +508,7 @@ const parseXml = function (xmlData) {
|
|
|
502
508
|
i = result.closeIndex;
|
|
503
509
|
}
|
|
504
510
|
//unpaired tag
|
|
505
|
-
else if (
|
|
511
|
+
else if (options.unpairedTagsSet.has(tagName)) {
|
|
506
512
|
i = result.closeIndex;
|
|
507
513
|
}
|
|
508
514
|
//normal tag
|
|
@@ -521,7 +527,7 @@ const parseXml = function (xmlData) {
|
|
|
521
527
|
}
|
|
522
528
|
|
|
523
529
|
// For stop nodes, store raw content as-is without any processing
|
|
524
|
-
childNode.add(
|
|
530
|
+
childNode.add(options.textNodeName, tagContent);
|
|
525
531
|
|
|
526
532
|
this.matcher.pop(); // Pop the stop node tag
|
|
527
533
|
this.isCurrentNodeStopNode = false; // Reset flag
|
|
@@ -530,7 +536,7 @@ const parseXml = function (xmlData) {
|
|
|
530
536
|
} else {
|
|
531
537
|
//selfClosing tag
|
|
532
538
|
if (isSelfClosing) {
|
|
533
|
-
({ tagName, tagExp } = transformTagName(
|
|
539
|
+
({ tagName, tagExp } = transformTagName(options.transformTagName, tagName, tagExp, options));
|
|
534
540
|
|
|
535
541
|
const childNode = new xmlNode(tagName);
|
|
536
542
|
if (prefixedAttrs) {
|
|
@@ -540,7 +546,7 @@ const parseXml = function (xmlData) {
|
|
|
540
546
|
this.matcher.pop(); // Pop self-closing tag
|
|
541
547
|
this.isCurrentNodeStopNode = false; // Reset flag
|
|
542
548
|
}
|
|
543
|
-
else if (
|
|
549
|
+
else if (options.unpairedTagsSet.has(tagName)) {//unpaired tag
|
|
544
550
|
const childNode = new xmlNode(tagName);
|
|
545
551
|
if (prefixedAttrs) {
|
|
546
552
|
childNode[":@"] = prefixedAttrs;
|
|
@@ -555,7 +561,7 @@ const parseXml = function (xmlData) {
|
|
|
555
561
|
//opening tag
|
|
556
562
|
else {
|
|
557
563
|
const childNode = new xmlNode(tagName);
|
|
558
|
-
if (this.tagsNodeStack.length >
|
|
564
|
+
if (this.tagsNodeStack.length > options.maxNestedTags) {
|
|
559
565
|
throw new Error("Maximum nested tags exceeded");
|
|
560
566
|
}
|
|
561
567
|
this.tagsNodeStack.push(currentNode);
|
|
@@ -627,7 +633,7 @@ function replaceEntitiesValue(val, tagName, jPath) {
|
|
|
627
633
|
}
|
|
628
634
|
|
|
629
635
|
// Replace DOCTYPE entities
|
|
630
|
-
for (const entityName of
|
|
636
|
+
for (const entityName of this.docTypeEntitiesKeys) {
|
|
631
637
|
const entity = this.docTypeEntities[entityName];
|
|
632
638
|
const matches = val.match(entity.regx);
|
|
633
639
|
|
|
@@ -659,8 +665,9 @@ function replaceEntitiesValue(val, tagName, jPath) {
|
|
|
659
665
|
}
|
|
660
666
|
}
|
|
661
667
|
}
|
|
668
|
+
if (val.indexOf('&') === -1) return val;
|
|
662
669
|
// Replace standard entities
|
|
663
|
-
for (const entityName of
|
|
670
|
+
for (const entityName of this.lastEntitiesKeys) {
|
|
664
671
|
const entity = this.lastEntities[entityName];
|
|
665
672
|
const matches = val.match(entity.regex);
|
|
666
673
|
if (matches) {
|
|
@@ -677,22 +684,20 @@ function replaceEntitiesValue(val, tagName, jPath) {
|
|
|
677
684
|
if (val.indexOf('&') === -1) return val;
|
|
678
685
|
|
|
679
686
|
// Replace HTML entities if enabled
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
);
|
|
692
|
-
}
|
|
687
|
+
for (const entityName of this.htmlEntitiesKeys) {
|
|
688
|
+
const entity = this.htmlEntities[entityName];
|
|
689
|
+
const matches = val.match(entity.regex);
|
|
690
|
+
if (matches) {
|
|
691
|
+
//console.log(matches);
|
|
692
|
+
this.entityExpansionCount += matches.length;
|
|
693
|
+
if (entityConfig.maxTotalExpansions &&
|
|
694
|
+
this.entityExpansionCount > entityConfig.maxTotalExpansions) {
|
|
695
|
+
throw new Error(
|
|
696
|
+
`Entity expansion limit exceeded: ${this.entityExpansionCount} > ${entityConfig.maxTotalExpansions}`
|
|
697
|
+
);
|
|
693
698
|
}
|
|
694
|
-
val = val.replace(entity.regex, entity.val);
|
|
695
699
|
}
|
|
700
|
+
val = val.replace(entity.regex, entity.val);
|
|
696
701
|
}
|
|
697
702
|
|
|
698
703
|
// Replace ampersand entity last
|
|
@@ -720,20 +725,14 @@ function saveTextToParentTag(textData, parentNode, matcher, isLeafNode) {
|
|
|
720
725
|
return textData;
|
|
721
726
|
}
|
|
722
727
|
|
|
723
|
-
//TODO: use jPath to simplify the logic
|
|
724
728
|
/**
|
|
725
729
|
* @param {Array<Expression>} stopNodeExpressions - Array of compiled Expression objects
|
|
726
730
|
* @param {Matcher} matcher - Current path matcher
|
|
727
731
|
*/
|
|
728
|
-
function isItStopNode(
|
|
729
|
-
if (
|
|
732
|
+
function isItStopNode() {
|
|
733
|
+
if (this.stopNodeExpressionsSet.size === 0) return false;
|
|
730
734
|
|
|
731
|
-
|
|
732
|
-
if (matcher.matches(stopNodeExpressions[i])) {
|
|
733
|
-
return true;
|
|
734
|
-
}
|
|
735
|
-
}
|
|
736
|
-
return false;
|
|
735
|
+
return this.matcher.matchesAny(this.stopNodeExpressionsSet);
|
|
737
736
|
}
|
|
738
737
|
|
|
739
738
|
/**
|
|
@@ -743,32 +742,33 @@ function isItStopNode(stopNodeExpressions, matcher) {
|
|
|
743
742
|
* @returns
|
|
744
743
|
*/
|
|
745
744
|
function tagExpWithClosingIndex(xmlData, i, closingChar = ">") {
|
|
746
|
-
let attrBoundary;
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
745
|
+
let attrBoundary = 0;
|
|
746
|
+
const chars = [];
|
|
747
|
+
const len = xmlData.length;
|
|
748
|
+
const closeCode0 = closingChar.charCodeAt(0);
|
|
749
|
+
const closeCode1 = closingChar.length > 1 ? closingChar.charCodeAt(1) : -1;
|
|
750
|
+
|
|
751
|
+
for (let index = i; index < len; index++) {
|
|
752
|
+
const code = xmlData.charCodeAt(index);
|
|
753
|
+
|
|
750
754
|
if (attrBoundary) {
|
|
751
|
-
if (
|
|
752
|
-
} else if (
|
|
753
|
-
attrBoundary =
|
|
754
|
-
} else if (
|
|
755
|
-
if (
|
|
756
|
-
if (xmlData
|
|
757
|
-
return {
|
|
758
|
-
data: tagExp,
|
|
759
|
-
index: index
|
|
760
|
-
}
|
|
755
|
+
if (code === attrBoundary) attrBoundary = 0;
|
|
756
|
+
} else if (code === 34 || code === 39) { // " or '
|
|
757
|
+
attrBoundary = code;
|
|
758
|
+
} else if (code === closeCode0) {
|
|
759
|
+
if (closeCode1 !== -1) {
|
|
760
|
+
if (xmlData.charCodeAt(index + 1) === closeCode1) {
|
|
761
|
+
return { data: String.fromCharCode(...chars), index };
|
|
761
762
|
}
|
|
762
763
|
} else {
|
|
763
|
-
return {
|
|
764
|
-
data: tagExp,
|
|
765
|
-
index: index
|
|
766
|
-
}
|
|
764
|
+
return { data: String.fromCharCode(...chars), index };
|
|
767
765
|
}
|
|
768
|
-
} else if (
|
|
769
|
-
|
|
766
|
+
} else if (code === 9) { // \t
|
|
767
|
+
chars.push(32); // space
|
|
768
|
+
continue;
|
|
770
769
|
}
|
|
771
|
-
|
|
770
|
+
|
|
771
|
+
chars.push(code);
|
|
772
772
|
}
|
|
773
773
|
}
|
|
774
774
|
|
|
@@ -781,6 +781,12 @@ function findClosingIndex(xmlData, str, i, errMsg) {
|
|
|
781
781
|
}
|
|
782
782
|
}
|
|
783
783
|
|
|
784
|
+
function findClosingChar(xmlData, char, i, errMsg) {
|
|
785
|
+
const closingIndex = xmlData.indexOf(char, i);
|
|
786
|
+
if (closingIndex === -1) throw new Error(errMsg);
|
|
787
|
+
return closingIndex; // no offset needed
|
|
788
|
+
}
|
|
789
|
+
|
|
784
790
|
function readTagExp(xmlData, i, removeNSPrefix, closingChar = ">") {
|
|
785
791
|
const result = tagExpWithClosingIndex(xmlData, i + 1, closingChar);
|
|
786
792
|
if (!result) return;
|
|
@@ -822,10 +828,12 @@ function readStopNodeData(xmlData, tagName, i) {
|
|
|
822
828
|
// Starting at 1 since we already have an open tag
|
|
823
829
|
let openTagCount = 1;
|
|
824
830
|
|
|
825
|
-
|
|
831
|
+
const xmllen = xmlData.length;
|
|
832
|
+
for (; i < xmllen; i++) {
|
|
826
833
|
if (xmlData[i] === "<") {
|
|
827
|
-
|
|
828
|
-
|
|
834
|
+
const c1 = xmlData.charCodeAt(i + 1);
|
|
835
|
+
if (c1 === 47) {//close tag '/'
|
|
836
|
+
const closeIndex = findClosingChar(xmlData, ">", i, `${tagName} is not closed`);
|
|
829
837
|
let closeTagName = xmlData.substring(i + 2, closeIndex).trim();
|
|
830
838
|
if (closeTagName === tagName) {
|
|
831
839
|
openTagCount--;
|
|
@@ -837,13 +845,16 @@ function readStopNodeData(xmlData, tagName, i) {
|
|
|
837
845
|
}
|
|
838
846
|
}
|
|
839
847
|
i = closeIndex;
|
|
840
|
-
} else if (
|
|
848
|
+
} else if (c1 === 63) { //?
|
|
841
849
|
const closeIndex = findClosingIndex(xmlData, "?>", i + 1, "StopNode is not closed.")
|
|
842
850
|
i = closeIndex;
|
|
843
|
-
} else if (
|
|
851
|
+
} else if (c1 === 33
|
|
852
|
+
&& xmlData.charCodeAt(i + 2) === 45
|
|
853
|
+
&& xmlData.charCodeAt(i + 3) === 45) { // '!--'
|
|
844
854
|
const closeIndex = findClosingIndex(xmlData, "-->", i + 3, "StopNode is not closed.")
|
|
845
855
|
i = closeIndex;
|
|
846
|
-
} else if (
|
|
856
|
+
} else if (c1 === 33
|
|
857
|
+
&& xmlData.charCodeAt(i + 2) === 91) { // '!['
|
|
847
858
|
const closeIndex = findClosingIndex(xmlData, "]]>", i, "StopNode is not closed.") - 2;
|
|
848
859
|
i = closeIndex;
|
|
849
860
|
} else {
|