@nodable/flexible-xml-parser 1.6.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/lib/fxp.cjs +1 -1
- package/package.json +2 -2
- package/src/AttributeProcessor.js +50 -35
- package/src/DocTypeReader.js +6 -6
- package/src/InputSource/BufferSource.js +28 -14
- package/src/InputSource/FeedableSource.js +93 -4
- package/src/InputSource/StreamSource.js +5 -1
- package/src/InputSource/StringSource.js +35 -2
- package/src/XMLParser.js +9 -13
- package/src/Xml2JsParser.js +43 -8
- package/src/XmlPartReader.js +15 -27
- package/src/XmlSpecialTagsReader.js +14 -1
- package/src/CharsSymbol.js +0 -16
package/src/XmlPartReader.js
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
import { ParseError, ErrorCode } from './ParseError.js';
|
|
3
3
|
import { collectRawAttributes } from './AttributeProcessor.js';
|
|
4
4
|
import { isSpace } from "./util.js"
|
|
5
|
-
import { name as isName, qName as isQName } from 'xml-naming';
|
|
6
5
|
// Re-export flushAttributes so Xml2JsParser and XmlSpecialTagsReader can
|
|
7
6
|
// continue to import it from here without changing their import lines.
|
|
8
7
|
export { flushAttributes } from './AttributeProcessor.js';
|
|
@@ -52,35 +51,24 @@ export function readTagExp(parser) {
|
|
|
52
51
|
// begins — captured before any reads so buildTagExpObj can compute each
|
|
53
52
|
// attribute's absolute document position from its offset within attrsExp.
|
|
54
53
|
const expStart = parser.source.startIndex;
|
|
55
|
-
let inSingleQuotes = false;
|
|
56
|
-
let inDoubleQuotes = false;
|
|
57
|
-
let i;
|
|
58
|
-
let EOE = false;
|
|
59
54
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
if (!EOE) {
|
|
74
|
-
// Buffer exhausted before '>' — chunk boundary mid-tag. Throw UNEXPECTED_END
|
|
75
|
-
// so feed()/parseStream() rewinds to the level-0 outer mark and retries.
|
|
55
|
+
const relEnd = parser.source.scanTagExpEnd();
|
|
56
|
+
|
|
57
|
+
if (relEnd === -1) {
|
|
58
|
+
// Buffer exhausted before an unquoted '>' was found — chunk boundary
|
|
59
|
+
// mid-tag. Throw UNEXPECTED_END so feed()/parseStream() rewinds to the
|
|
60
|
+
// level-0 outer mark and retries. (Note: scanTagExpEnd() only returns a
|
|
61
|
+
// non-negative index once both quote flags are already balanced-closed —
|
|
62
|
+
// by construction, not by a separate post-scan check — so there is no
|
|
63
|
+
// longer a distinct "unclosed quote but '>' was found" case to detect;
|
|
64
|
+
// the old UNCLOSED_QUOTE branch here was checking the same two flags
|
|
65
|
+
// immediately after the only code path that requires them both false,
|
|
66
|
+
// making it permanently unreachable.)
|
|
76
67
|
throw new ParseError("Unexpected closing of source waiting for '>'", ErrorCode.UNEXPECTED_END);
|
|
77
|
-
} else if (inSingleQuotes || inDoubleQuotes) {
|
|
78
|
-
// '>' found but a quote was never closed — real syntax error.
|
|
79
|
-
throw new ParseError("Invalid attribute expression. Quote is not properly closed", ErrorCode.UNCLOSED_QUOTE);
|
|
80
68
|
}
|
|
81
69
|
|
|
82
|
-
const exp = parser.source.readStr(
|
|
83
|
-
parser.source.updateBufferBoundary(
|
|
70
|
+
const exp = parser.source.readStr(relEnd);
|
|
71
|
+
parser.source.updateBufferBoundary(relEnd + 1);
|
|
84
72
|
return buildTagExpObj(exp, parser, expStart);
|
|
85
73
|
}
|
|
86
74
|
|
|
@@ -183,7 +171,7 @@ function buildTagExpObj(exp, parser, expStart, forceToReadAttrs = false) {
|
|
|
183
171
|
tagExp.tagName = tagExp.tagName.trimEnd();
|
|
184
172
|
tagExp._attrsExp = attrsExp;
|
|
185
173
|
|
|
186
|
-
if (!
|
|
174
|
+
if (!parser.getNameValidator('qName')(tagExp.tagName)) {
|
|
187
175
|
throw new ParseError("Invalid tag name", ErrorCode.INVALID_TAG_NAME);
|
|
188
176
|
}
|
|
189
177
|
|
|
@@ -48,6 +48,19 @@ export function readPiTag(parser) {
|
|
|
48
48
|
if (version === '1.1') {
|
|
49
49
|
parser.xmlDec.version = 1.1;
|
|
50
50
|
}
|
|
51
|
+
parser.xmlDec.encoding = tagExp.rawAttributes?.encoding;
|
|
52
|
+
parser.xmlDec.standalone = tagExp.rawAttributes?.standalone;
|
|
53
|
+
|
|
54
|
+
// BUG FIX: getNameValidator('qName') was already called (and memoized)
|
|
55
|
+
// above the moment this PI tag's own name ("xml") got validated — before
|
|
56
|
+
// xmlDec.version was known, so it was always cached with the '1.0'
|
|
57
|
+
// default. Every subsequent tag/attribute name in the document —
|
|
58
|
+
// including the root element — would silently be checked against XML
|
|
59
|
+
// 1.0 rules even for a document declaring version="1.1". Reset the
|
|
60
|
+
// cache now that the real version is known; this runs at most once per
|
|
61
|
+
// document (a <?xml?> declaration can only appear once), so the cost is
|
|
62
|
+
// negligible.
|
|
63
|
+
parser._nameValidators = Object.create(null);
|
|
51
64
|
}
|
|
52
65
|
|
|
53
66
|
// Flush attributes into the output builder's this.attributes accumulator
|
|
@@ -55,7 +68,7 @@ export function readPiTag(parser) {
|
|
|
55
68
|
// does for regular tags. PI tags are not pushed onto the matcher, so no
|
|
56
69
|
// updateCurrent() call is needed here.
|
|
57
70
|
if (!skipOptions.attributes) {
|
|
58
|
-
flushAttributes(tagExp.
|
|
71
|
+
flushAttributes(tagExp._parsedAttrs, parser, tagExp._attrsExpStart, tagExp._rawAttrMatchCount);
|
|
59
72
|
}
|
|
60
73
|
|
|
61
74
|
if (tagExp.tagName === "xml") {
|
package/src/CharsSymbol.js
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
export default {
|
|
2
|
-
"<" : "<", //tag start
|
|
3
|
-
">" : ">", //tag end
|
|
4
|
-
"/" : "/", //close tag
|
|
5
|
-
"!" : "!", //comment or docttype
|
|
6
|
-
"!--" : "!--", //comment
|
|
7
|
-
"-->" : "-->", //comment end
|
|
8
|
-
"?" : "?", //pi
|
|
9
|
-
"?>" : "?>", //pi end
|
|
10
|
-
"?xml" : "?xml", //pi end
|
|
11
|
-
"![" : "![", //cdata
|
|
12
|
-
"]]>" : "]]>", //cdata end
|
|
13
|
-
"[" : "[",
|
|
14
|
-
"-" : "-",
|
|
15
|
-
"D" : "D",
|
|
16
|
-
}
|