@nodable/flexible-xml-parser 1.1.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/package.json +4 -3
- package/src/AttributeProcessor.js +85 -32
- package/src/DocTypeReader.js +6 -6
- package/src/OptionsBuilder.js +1 -0
- package/src/XMLParser.js +42 -12
- package/src/Xml2JsParser.js +23 -5
- package/src/XmlPartReader.js +10 -7
- package/src/XmlSpecialTagsReader.js +15 -5
- package/src/util.js +9 -10
package/CHANGELOG.md
CHANGED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
|
|
2
|
+
**1.2.0 (2026-05-132**
|
|
3
|
+
- fix: Tag name can be separated with rest of the tag expression by any type of spaces.
|
|
4
|
+
- fix: parser should not fail when tag expresison is very long
|
|
5
|
+
- fix: stop node with namespace should work
|
|
6
|
+
- support `feedable.bufferSize` option to improve/speed up feed method.
|
|
7
|
+
- integrate `xml-naming` library that would also consider xml version
|
|
8
|
+
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@nodable/flexible-xml-parser",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.2.0",
|
|
4
4
|
"description": "Fastest XML parser in pure JS with fully customizable ouput",
|
|
5
5
|
"main": "./lib/fxp.cjs",
|
|
6
6
|
"type": "module",
|
|
@@ -45,15 +45,16 @@
|
|
|
45
45
|
},
|
|
46
46
|
"dependencies": {
|
|
47
47
|
"@nodable/base-output-builder": "^1.0.5",
|
|
48
|
-
"@nodable/compact-builder": "^1.0.
|
|
48
|
+
"@nodable/compact-builder": "^1.0.8",
|
|
49
49
|
"path-expression-matcher": "^1.5.0",
|
|
50
|
-
"
|
|
50
|
+
"xml-naming": "^0.1.0"
|
|
51
51
|
},
|
|
52
52
|
"devDependencies": {
|
|
53
53
|
"@babel/core": "^7.29.0",
|
|
54
54
|
"@babel/plugin-transform-runtime": "^7.29.0",
|
|
55
55
|
"@babel/preset-env": "^7.29.2",
|
|
56
56
|
"@babel/register": "^7.28.6",
|
|
57
|
+
"@byspec/xml": "^0.1.0",
|
|
57
58
|
"@nodable/entities": "^2.1.0",
|
|
58
59
|
"@types/node": "^20.19.37",
|
|
59
60
|
"babel-loader": "^10.1.1",
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
import { ParseError, ErrorCode } from './ParseError.js';
|
|
3
|
+
import { isSpaceCode } from "./util.js"
|
|
3
4
|
|
|
4
5
|
/**
|
|
5
6
|
* AttributeProcessor — owns all attribute parsing logic.
|
|
@@ -21,9 +22,87 @@ import { ParseError, ErrorCode } from './ParseError.js';
|
|
|
21
22
|
* complete attribute context when value parsers execute.
|
|
22
23
|
*/
|
|
23
24
|
|
|
24
|
-
// Module-level regex
|
|
25
|
-
//
|
|
26
|
-
|
|
25
|
+
// Module-level regex kept for reference only — no longer called from this
|
|
26
|
+
// module. parseAttributes() below replaces it with an O(n) linear scanner
|
|
27
|
+
// that is immune to catastrophic backtracking and stack overflow.
|
|
28
|
+
// const attrsRegx = new RegExp('([^\\s=]+)\\s*(=\\s*([\'"])([\\s\\S]*?)\\3)?', 'gm');
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Parse an attribute expression string into an array of match tuples.
|
|
32
|
+
*
|
|
33
|
+
* Each element has the same shape the old getAllMatches() returned so that
|
|
34
|
+
* callers are unchanged:
|
|
35
|
+
* [fullMatch, name, '=value' | undefined, quote | undefined, value | undefined]
|
|
36
|
+
*
|
|
37
|
+
* The implementation is a single O(n) pass over char codes with no regex and
|
|
38
|
+
* no recursion, making it safe for arbitrarily long attribute strings.
|
|
39
|
+
*
|
|
40
|
+
* State machine:
|
|
41
|
+
* SEEK_NAME — skipping whitespace looking for the start of an attr name
|
|
42
|
+
* IN_NAME — accumulating a name token until whitespace or '='
|
|
43
|
+
* SEEK_VALUE — saw name + optional whitespace, now expecting '=' or next name
|
|
44
|
+
* IN_VALUE — inside a quoted value, accumulating until the closing quote
|
|
45
|
+
*
|
|
46
|
+
* @param {string} attrStr
|
|
47
|
+
* @returns {Array} array of match tuples (see shape above)
|
|
48
|
+
*/
|
|
49
|
+
function parseAttributes(attrStr) {
|
|
50
|
+
const results = [];
|
|
51
|
+
const len = attrStr.length;
|
|
52
|
+
let i = 0;
|
|
53
|
+
|
|
54
|
+
while (i < len) {
|
|
55
|
+
// Skip whitespace between attributes
|
|
56
|
+
while (i < len && isSpaceCode(attrStr.charCodeAt(i))) i++;
|
|
57
|
+
if (i >= len) break;
|
|
58
|
+
|
|
59
|
+
// Read name
|
|
60
|
+
const nameStart = i;
|
|
61
|
+
while (i < len && attrStr.charCodeAt(i) !== 61 && !isSpaceCode(attrStr.charCodeAt(i))) i++;
|
|
62
|
+
const name = attrStr.substring(nameStart, i);
|
|
63
|
+
|
|
64
|
+
// Skip whitespace before '='
|
|
65
|
+
while (i < len && isSpaceCode(attrStr.charCodeAt(i))) i++;
|
|
66
|
+
|
|
67
|
+
if (i >= len || attrStr.charCodeAt(i) !== 61) {
|
|
68
|
+
// Boolean attribute — no '='
|
|
69
|
+
const m = [name, name, undefined, undefined, undefined];
|
|
70
|
+
m.startIndex = nameStart;
|
|
71
|
+
results.push(m);
|
|
72
|
+
continue;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
i++; // skip '='
|
|
76
|
+
|
|
77
|
+
// Skip whitespace after '='
|
|
78
|
+
while (i < len && isSpaceCode(attrStr.charCodeAt(i))) i++;
|
|
79
|
+
|
|
80
|
+
// Read quoted value
|
|
81
|
+
const quote = attrStr.charCodeAt(i);
|
|
82
|
+
if (quote === 34 || quote === 39) { // " or '
|
|
83
|
+
i++; // skip opening quote
|
|
84
|
+
const valueStart = i;
|
|
85
|
+
let value = '';
|
|
86
|
+
let segStart = i;
|
|
87
|
+
while (i < len && attrStr.charCodeAt(i) !== quote) {
|
|
88
|
+
const c = attrStr.charCodeAt(i);
|
|
89
|
+
if (c === 10 || c === 13) { // \n or \r → space per XML §3.3.3
|
|
90
|
+
value += attrStr.substring(segStart, i) + ' ';
|
|
91
|
+
segStart = i + 1;
|
|
92
|
+
}
|
|
93
|
+
i++;
|
|
94
|
+
}
|
|
95
|
+
value += attrStr.substring(segStart, i);
|
|
96
|
+
i++; // skip closing quote
|
|
97
|
+
const quoteChar = String.fromCharCode(quote);
|
|
98
|
+
const m = [name + '=' + quoteChar + value + quoteChar, name, '=' + quoteChar + value + quoteChar, quoteChar, value];
|
|
99
|
+
m.startIndex = nameStart;
|
|
100
|
+
results.push(m);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return results;
|
|
105
|
+
}
|
|
27
106
|
|
|
28
107
|
/**
|
|
29
108
|
* Pass 1: extract raw (unparsed) attribute values into rawAttributes.
|
|
@@ -33,9 +112,9 @@ const attrsRegx = new RegExp('([^\\s=]+)\\s*(=\\s*([\'"])([\\s\\S]*?)\\3)?', 'gm
|
|
|
33
112
|
* @param {object} tagExp - tagExp object to populate rawAttributes (Object.create(null))
|
|
34
113
|
*/
|
|
35
114
|
export function collectRawAttributes(attrStr, parser, tagExp) {
|
|
36
|
-
|
|
37
115
|
if (!attrStr || attrStr.length === 0) return;
|
|
38
|
-
|
|
116
|
+
|
|
117
|
+
const matches = parseAttributes(attrStr);
|
|
39
118
|
const len = matches.length;
|
|
40
119
|
let count = 0;
|
|
41
120
|
for (let i = 0; i < len; i++) {
|
|
@@ -56,7 +135,7 @@ export function collectRawAttributes(attrStr, parser, tagExp) {
|
|
|
56
135
|
*/
|
|
57
136
|
export function flushAttributes(attrStr, parser) {
|
|
58
137
|
if (!attrStr || attrStr.length === 0) return;
|
|
59
|
-
const matches =
|
|
138
|
+
const matches = parseAttributes(attrStr);
|
|
60
139
|
const len = matches.length;
|
|
61
140
|
|
|
62
141
|
const maxAttrs = parser.options.limits?.maxAttributesPerTag;
|
|
@@ -78,30 +157,4 @@ export function flushAttributes(attrStr, parser) {
|
|
|
78
157
|
|
|
79
158
|
parser.outputBuilder.addAttribute(attrName, attrVal, parser.readonlyMatcher);
|
|
80
159
|
}
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
/**
|
|
84
|
-
* Run the regex against the string and return all capture groups.
|
|
85
|
-
* lastIndex is always reset to 0 before iterating so the module-level
|
|
86
|
-
* stateful regex is safe to share across calls.
|
|
87
|
-
*
|
|
88
|
-
* @param {string} string
|
|
89
|
-
* @param {RegExp} regex
|
|
90
|
-
* @returns {Array}
|
|
91
|
-
*/
|
|
92
|
-
function getAllMatches(string, regex) {
|
|
93
|
-
regex.lastIndex = 0;
|
|
94
|
-
const matches = [];
|
|
95
|
-
let match = regex.exec(string);
|
|
96
|
-
while (match) {
|
|
97
|
-
const allmatches = [];
|
|
98
|
-
allmatches.startIndex = regex.lastIndex - match[0].length;
|
|
99
|
-
const len = match.length;
|
|
100
|
-
for (let index = 0; index < len; index++) {
|
|
101
|
-
allmatches.push(match[index]);
|
|
102
|
-
}
|
|
103
|
-
matches.push(allmatches);
|
|
104
|
-
match = regex.exec(string);
|
|
105
|
-
}
|
|
106
|
-
return matches;
|
|
107
160
|
}
|
package/src/DocTypeReader.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { isName } from './util.js';
|
|
2
1
|
import { ParseError, ErrorCode } from './ParseError.js';
|
|
2
|
+
import { name as isName, qName as isQName } from 'xml-naming';
|
|
3
3
|
|
|
4
4
|
export function readDocType(parser) {
|
|
5
5
|
parser.source.markTokenStart(1);
|
|
@@ -267,7 +267,7 @@ function readEntityExp(parser) {
|
|
|
267
267
|
{ line: source.line, col: source.cols, index: source.startIndex });
|
|
268
268
|
}
|
|
269
269
|
|
|
270
|
-
validateEntityName(entityName);
|
|
270
|
+
validateEntityName(entityName, parser.xmlVersion);
|
|
271
271
|
skipSourceWhitespace(source);
|
|
272
272
|
|
|
273
273
|
if (!source.canRead()) {
|
|
@@ -346,7 +346,7 @@ function readElementExp(parser) {
|
|
|
346
346
|
{ line: source.line, col: source.cols, index: source.startIndex });
|
|
347
347
|
}
|
|
348
348
|
|
|
349
|
-
if (!isName(elementName)) {
|
|
349
|
+
if (!isName(elementName, parser.xmlVersion)) {
|
|
350
350
|
throw new ParseError(`Invalid element name: "${elementName}"`,
|
|
351
351
|
ErrorCode.INVALID_TAG,
|
|
352
352
|
{ line: source.line, col: source.cols, index: source.startIndex });
|
|
@@ -434,7 +434,7 @@ function readNotationExp(parser) {
|
|
|
434
434
|
{ line: source.line, col: source.cols, index: source.startIndex });
|
|
435
435
|
}
|
|
436
436
|
|
|
437
|
-
validateEntityName(notationName);
|
|
437
|
+
validateEntityName(notationName, parser.xmlVersion);
|
|
438
438
|
skipSourceWhitespace(source);
|
|
439
439
|
|
|
440
440
|
// Need all 6 chars of "SYSTEM" / "PUBLIC" before we can classify
|
|
@@ -512,8 +512,8 @@ function skipSourceWhitespace(source) {
|
|
|
512
512
|
}
|
|
513
513
|
}
|
|
514
514
|
|
|
515
|
-
function validateEntityName(name) {
|
|
516
|
-
if (isName(name)) return name;
|
|
515
|
+
function validateEntityName(name, xmlVersion) {
|
|
516
|
+
if (isName(name, xmlVersion)) return name;
|
|
517
517
|
throw new ParseError(
|
|
518
518
|
`Invalid entity name "${name}"`,
|
|
519
519
|
ErrorCode.ENTITY_INVALID_KEY,
|
package/src/OptionsBuilder.js
CHANGED
package/src/XMLParser.js
CHANGED
|
@@ -13,6 +13,10 @@ export default class XMLParser {
|
|
|
13
13
|
this._feedParser = null;
|
|
14
14
|
this._feedSource = null;
|
|
15
15
|
this._isFeeding = false;
|
|
16
|
+
|
|
17
|
+
// ── Batching state ──────────────────────────────────
|
|
18
|
+
this._pendingBytes = 0;
|
|
19
|
+
this._batchThreshold = this.options.feedable?.bufferSize;
|
|
16
20
|
}
|
|
17
21
|
|
|
18
22
|
// ─── One-shot parse methods ───────────────────────────────────────────────
|
|
@@ -126,6 +130,37 @@ export default class XMLParser {
|
|
|
126
130
|
|
|
127
131
|
// ─── Incremental feed()/end() API ────────────────────────────────────────
|
|
128
132
|
|
|
133
|
+
_runParse() {
|
|
134
|
+
if (!this._feedParser) return;
|
|
135
|
+
|
|
136
|
+
const beforePos = this._feedSource.startIndex; // bytes consumed so far
|
|
137
|
+
|
|
138
|
+
try {
|
|
139
|
+
this._feedParser.parseXml();
|
|
140
|
+
} catch (err) {
|
|
141
|
+
if (err.code === ErrorCode.UNEXPECTED_END) {
|
|
142
|
+
this._feedSource.rewindToMark();
|
|
143
|
+
} else {
|
|
144
|
+
throw err;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const afterPos = this._feedSource.startIndex;
|
|
149
|
+
const didAdvance = afterPos > beforePos;
|
|
150
|
+
|
|
151
|
+
if (didAdvance) {
|
|
152
|
+
// Real progress made — reset threshold normally
|
|
153
|
+
this._pendingBytes = 0;
|
|
154
|
+
} else {
|
|
155
|
+
// Parser is stuck mid-token — grow the threshold to avoid
|
|
156
|
+
// hammering parseXml() until significantly more data arrives
|
|
157
|
+
this._batchThreshold = Math.min(
|
|
158
|
+
this._batchThreshold * 2,
|
|
159
|
+
this.options.feedable.maxBufferSize
|
|
160
|
+
);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
129
164
|
/**
|
|
130
165
|
* Feed an XML data chunk for incremental parsing.
|
|
131
166
|
*
|
|
@@ -160,20 +195,12 @@ export default class XMLParser {
|
|
|
160
195
|
}
|
|
161
196
|
|
|
162
197
|
this._feedSource.feed(str);
|
|
198
|
+
this._pendingBytes += str.length;
|
|
163
199
|
|
|
164
|
-
|
|
165
|
-
this.
|
|
166
|
-
} catch (err) {
|
|
167
|
-
if (err.code === ErrorCode.UNEXPECTED_END) {
|
|
168
|
-
// Chunk boundary fell mid-token. Rewind to the token start so the
|
|
169
|
-
// incomplete bytes are re-parsed when the next chunk arrives.
|
|
170
|
-
this._feedSource.rewindToMark();
|
|
171
|
-
} else {
|
|
172
|
-
// Real parse error — clean up and propagate.
|
|
173
|
-
this._cleanupFeedSession();
|
|
174
|
-
throw err;
|
|
175
|
-
}
|
|
200
|
+
if (this._pendingBytes >= this._batchThreshold) {
|
|
201
|
+
this._runParse();
|
|
176
202
|
}
|
|
203
|
+
// Otherwise, delay parsing until next feed() or end()
|
|
177
204
|
|
|
178
205
|
return this;
|
|
179
206
|
}
|
|
@@ -201,6 +228,9 @@ export default class XMLParser {
|
|
|
201
228
|
throw new ParseError('No data fed. Call feed() before end().', ErrorCode.NOT_STREAMING);
|
|
202
229
|
}
|
|
203
230
|
|
|
231
|
+
// Force a final parse (any pending bytes are now processed)
|
|
232
|
+
this._runParse();
|
|
233
|
+
|
|
204
234
|
try {
|
|
205
235
|
// Mark the source as complete so readers know there is no more data.
|
|
206
236
|
this._feedSource.end();
|
package/src/Xml2JsParser.js
CHANGED
|
@@ -5,9 +5,10 @@ import { StopNodeProcessor } from './StopNodeProcessor.js';
|
|
|
5
5
|
import { readComment, readCdata, readPiTag } from './XmlSpecialTagsReader.js';
|
|
6
6
|
import { Expression, ExpressionSet, Matcher } from 'path-expression-matcher';
|
|
7
7
|
import { readDocType } from './DocTypeReader.js';
|
|
8
|
-
import {
|
|
8
|
+
import { DANGEROUS_PROPERTY_NAMES, criticalProperties } from './util.js';
|
|
9
9
|
import AutoCloseHandler from './AutoCloseHandler.js';
|
|
10
10
|
import { ParseError, ErrorCode } from './ParseError.js';
|
|
11
|
+
import { name as isName, qName as isQName } from 'xml-naming';
|
|
11
12
|
|
|
12
13
|
class TagDetail {
|
|
13
14
|
/**
|
|
@@ -60,6 +61,7 @@ export default class Xml2JsParser {
|
|
|
60
61
|
this.tagsStack = [];
|
|
61
62
|
this._stopNodeProcessor = null;
|
|
62
63
|
this._exitIfTriggered = false;
|
|
64
|
+
this.xmlVersion = '1.0';
|
|
63
65
|
|
|
64
66
|
if (!this.matcher) {
|
|
65
67
|
this.matcher = new Matcher();
|
|
@@ -283,6 +285,18 @@ export default class Xml2JsParser {
|
|
|
283
285
|
this.source.startIndex,
|
|
284
286
|
);
|
|
285
287
|
|
|
288
|
+
// Extract namespace prefix and local name from raw tag name (e.g. "ns:tag" → "ns", "tag").
|
|
289
|
+
// Always done from the raw name (tagExp.tagName), before processTagName strips the prefix,
|
|
290
|
+
// so these values are stable regardless of skip.nsPrefix.
|
|
291
|
+
const colonIdx = tagExp.tagName.indexOf(':');
|
|
292
|
+
const tagNamespace = colonIdx !== -1 ? tagExp.tagName.slice(0, colonIdx) : undefined;
|
|
293
|
+
// Local name for the matcher: prefix-free always (e.g. "code" from "ns:code").
|
|
294
|
+
// The matcher library tracks namespace separately via the 3rd push() argument —
|
|
295
|
+
// passing the full "ns:code" as the tag name would break ns::code expression matching.
|
|
296
|
+
const matcherTagName = tagNamespace !== undefined
|
|
297
|
+
? tagExp.tagName.slice(colonIdx + 1)
|
|
298
|
+
: processedTagName;
|
|
299
|
+
|
|
286
300
|
// ── Limit: maxNestedTags ─────────────────────────────────────────────────
|
|
287
301
|
const maxNested = options.limits?.maxNestedTags;
|
|
288
302
|
if (maxNested !== undefined && maxNested !== null) {
|
|
@@ -304,7 +318,7 @@ export default class Xml2JsParser {
|
|
|
304
318
|
raeAttrLen = tagExp.rawAttributesLen;
|
|
305
319
|
}
|
|
306
320
|
|
|
307
|
-
this.matcher.push(
|
|
321
|
+
this.matcher.push(matcherTagName, {}, tagNamespace);
|
|
308
322
|
if (raeAttrLen > 0) {
|
|
309
323
|
this.matcher.updateCurrent(rawAttributes);
|
|
310
324
|
}
|
|
@@ -334,7 +348,10 @@ export default class Xml2JsParser {
|
|
|
334
348
|
this.matcher.pop();
|
|
335
349
|
} else if (stopNodeConfig) {
|
|
336
350
|
// Create a fresh processor with the matching nested + skipEnclosures config.
|
|
337
|
-
|
|
351
|
+
// Raw tag name (tagExp.tagName) is used — the processor scans the source
|
|
352
|
+
// character-by-character and must match the prefix-as-written (e.g. "ns:code"),
|
|
353
|
+
// independent of what skip.nsPrefix does to the processed output name.
|
|
354
|
+
this._stopNodeProcessor = new StopNodeProcessor(tagExp.tagName, {
|
|
338
355
|
nested: stopNodeConfig.nested,
|
|
339
356
|
skipEnclosures: stopNodeConfig.skipEnclosures,
|
|
340
357
|
});
|
|
@@ -351,7 +368,8 @@ export default class Xml2JsParser {
|
|
|
351
368
|
} else if (skipTagConfig) {
|
|
352
369
|
// Skip tag: collect raw content (to advance the source past the closing tag)
|
|
353
370
|
// but call no output builder methods — the tag is silently dropped.
|
|
354
|
-
|
|
371
|
+
// Raw tag name used for the same reason as the stop-node branch above.
|
|
372
|
+
this._stopNodeProcessor = new StopNodeProcessor(tagExp.tagName, {
|
|
355
373
|
nested: skipTagConfig.nested,
|
|
356
374
|
skipEnclosures: skipTagConfig.skipEnclosures,
|
|
357
375
|
});
|
|
@@ -460,7 +478,7 @@ export default class Xml2JsParser {
|
|
|
460
478
|
processAttrName(attrName) {
|
|
461
479
|
const options = this.options;
|
|
462
480
|
attrName = resolveNsPrefix(attrName, options.skip.nsPrefix);
|
|
463
|
-
if (!
|
|
481
|
+
if (!isQName(attrName, this.xmlVersion)) { //TODO: make it optional
|
|
464
482
|
throw new ParseError(`Invalid attribute name: ${attrName}`, ErrorCode.INVALID_ATTRIBUTE_NAME);
|
|
465
483
|
}
|
|
466
484
|
attrName = sanitizeName(attrName, options.onDangerousProperty);
|
package/src/XmlPartReader.js
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
import { ParseError, ErrorCode } from './ParseError.js';
|
|
3
3
|
import { collectRawAttributes } from './AttributeProcessor.js';
|
|
4
|
-
import {
|
|
4
|
+
import { isSpace } from "./util.js"
|
|
5
|
+
import { name as isName, qName as isQName } from 'xml-naming';
|
|
5
6
|
// Re-export flushAttributes so Xml2JsParser and XmlSpecialTagsReader can
|
|
6
7
|
// continue to import it from here without changing their import lines.
|
|
7
8
|
export { flushAttributes } from './AttributeProcessor.js';
|
|
@@ -157,19 +158,20 @@ function buildTagExpObj(exp, parser) {
|
|
|
157
158
|
let attrsExp = "";
|
|
158
159
|
let i = 0;
|
|
159
160
|
|
|
160
|
-
for (; i <
|
|
161
|
-
|
|
161
|
+
for (; i < exp.length; i++) {
|
|
162
|
+
const c = exp[i];
|
|
163
|
+
if (isSpace(c)) {
|
|
162
164
|
tagExp.tagName = exp.substring(0, i);
|
|
163
165
|
attrsExp = exp.substring(i + 1);
|
|
164
166
|
break;
|
|
165
167
|
}
|
|
166
168
|
}
|
|
167
169
|
//only tag
|
|
168
|
-
if (tagExp.tagName.length === 0 && i ===
|
|
170
|
+
if (tagExp.tagName.length === 0 && i === exp.length) tagExp.tagName = exp;
|
|
169
171
|
tagExp.tagName = tagExp.tagName.trimEnd();
|
|
170
172
|
tagExp._attrsExp = attrsExp;
|
|
171
173
|
|
|
172
|
-
if (!
|
|
174
|
+
if (!isQName(tagExp.tagName, parser.xmlVersion)) {
|
|
173
175
|
throw new ParseError("Invalid tag name", ErrorCode.INVALID_TAG_NAME);
|
|
174
176
|
}
|
|
175
177
|
|
|
@@ -178,6 +180,7 @@ function buildTagExpObj(exp, parser) {
|
|
|
178
180
|
if (!parser.options.skip.attributes && attrsExp.length > 0) {
|
|
179
181
|
collectRawAttributes(attrsExp, parser, tagExp);
|
|
180
182
|
}
|
|
181
|
-
|
|
183
|
+
// console.log(tagExp)
|
|
182
184
|
return tagExp;
|
|
183
|
-
}
|
|
185
|
+
}
|
|
186
|
+
|
|
@@ -36,11 +36,21 @@ export function readPiTag(parser) {
|
|
|
36
36
|
parser.source.markTokenStart(1);
|
|
37
37
|
//<? already consumed
|
|
38
38
|
let tagExp = readPiExp(parser, "?>");
|
|
39
|
-
if (!tagExp)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
39
|
+
if (!tagExp) {
|
|
40
|
+
throw new ParseError(
|
|
41
|
+
"Invalid Pi Tag expression.",
|
|
42
|
+
ErrorCode.INVALID_TAG,
|
|
43
|
+
{ line: parser.source.line, col: parser.source.cols, index: parser.source.startIndex }
|
|
44
|
+
)
|
|
45
|
+
} else if (tagExp.tagName === "xml") {
|
|
46
|
+
// Read version from the declaration and store it on the parser for validators.
|
|
47
|
+
const version = tagExp.rawAttributes?.version;
|
|
48
|
+
if (version === '1.1') {
|
|
49
|
+
parser.xmlVersion = 1.1;
|
|
50
|
+
} else {
|
|
51
|
+
parser.xmlVersion = 1.0; // default
|
|
52
|
+
}
|
|
53
|
+
}
|
|
44
54
|
|
|
45
55
|
// Flush attributes into the output builder's this.attributes accumulator
|
|
46
56
|
// so addDeclaration() / addInstruction() pick them up, mirroring what readOpeningTag
|
package/src/util.js
CHANGED
|
@@ -1,10 +1,3 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
const nameStartChar = ':A-Za-z_\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u02FF\\u0370-\\u037D\\u037F-\\u1FFF\\u200C-\\u200D\\u2070-\\u218F\\u2C00-\\u2FEF\\u3001-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFFD';
|
|
4
|
-
const nameChar = nameStartChar + '\\-.\\d\\u00B7\\u0300-\\u036F\\u203F-\\u2040';
|
|
5
|
-
export const nameRegexp = '[' + nameStartChar + '][' + nameChar + ']*';
|
|
6
|
-
const regexName = new RegExp('^' + nameRegexp + '$');
|
|
7
|
-
|
|
8
1
|
export function getAllMatches(string, regex) {
|
|
9
2
|
const matches = [];
|
|
10
3
|
let match = regex.exec(string);
|
|
@@ -21,9 +14,15 @@ export function getAllMatches(string, regex) {
|
|
|
21
14
|
return matches;
|
|
22
15
|
}
|
|
23
16
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
export function isSpace(char) {
|
|
20
|
+
return char === " " || char === "\t" || char === "\n" || char === "\r" || char === "\f";
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
export function isSpaceCode(code) {
|
|
25
|
+
return code === 32 || code === 9 || code === 10 || code === 13 || code === 12; // space \t \n \r \f
|
|
27
26
|
}
|
|
28
27
|
|
|
29
28
|
export function isExist(v) {
|