@nodable/flexible-xml-parser 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +0 -0
- package/LICENSE +21 -0
- package/README.md +284 -0
- package/lib/fxp.d.cts +652 -0
- package/package.json +80 -0
- package/src/AttributeProcessor.js +107 -0
- package/src/AutoCloseHandler.js +257 -0
- package/src/CharsSymbol.js +16 -0
- package/src/DocTypeReader.js +522 -0
- package/src/InputSource/BufferSource.js +228 -0
- package/src/InputSource/FeedableSource.js +340 -0
- package/src/InputSource/StreamSource.js +49 -0
- package/src/InputSource/StringSource.js +225 -0
- package/src/OptionsBuilder.js +400 -0
- package/src/ParseError.js +91 -0
- package/src/StopNodeProcessor.js +573 -0
- package/src/XMLParser.js +293 -0
- package/src/Xml2JsParser.js +573 -0
- package/src/XmlPartReader.js +183 -0
- package/src/XmlSpecialTagsReader.js +82 -0
- package/src/fxp.d.ts +619 -0
- package/src/fxp.js +8 -0
- package/src/util.js +58 -0
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
import { ParseError, ErrorCode } from './ParseError.js';
|
|
3
|
+
import { collectRawAttributes } from './AttributeProcessor.js';
|
|
4
|
+
import { isName } from "./util.js"
|
|
5
|
+
// Re-export flushAttributes so Xml2JsParser and XmlSpecialTagsReader can
|
|
6
|
+
// continue to import it from here without changing their import lines.
|
|
7
|
+
export { flushAttributes } from './AttributeProcessor.js';
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Read closing tag name.
|
|
11
|
+
*
|
|
12
|
+
* Uses level-1 (inner) mark so flush() knows the safe trim boundary while
|
|
13
|
+
* this reader is in progress. Does NOT overwrite the level-0 outer mark set
|
|
14
|
+
* by parseXml()'s loop, which rewindToMark() always restores to.
|
|
15
|
+
*
|
|
16
|
+
* @param {Source} source
|
|
17
|
+
* @returns {string} tag name
|
|
18
|
+
*/
|
|
19
|
+
export function readClosingTagName(source) {
|
|
20
|
+
source.markTokenStart(1);
|
|
21
|
+
let i = 0;
|
|
22
|
+
const start = source.startIndex;
|
|
23
|
+
while (source.canRead()) {
|
|
24
|
+
const ch = source.readCh();
|
|
25
|
+
if (ch === ">") {
|
|
26
|
+
const str = source.readStr(i, start);
|
|
27
|
+
if (str) return str.trimEnd();
|
|
28
|
+
else return "";
|
|
29
|
+
} else i++;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const text = source.readStr(i, start);
|
|
33
|
+
source.updateBufferBoundary(i);
|
|
34
|
+
throw new ParseError(`Unexpected end of source reading closing tag '</${text}'`, ErrorCode.UNEXPECTED_END);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Read an XML opening tag expression and return a tag descriptor.
|
|
39
|
+
*
|
|
40
|
+
* Handles normal tags — not comments, CDATA, or DOCTYPE.
|
|
41
|
+
* Example input (from source, after '<'): `tag attr='some"' attr2=">" bool>`
|
|
42
|
+
*
|
|
43
|
+
* Uses level-1 (inner) mark — see readClosingTagName for rationale.
|
|
44
|
+
*
|
|
45
|
+
* @param {object} parser - Xml2JsParser instance
|
|
46
|
+
* @returns {{ tagName, selfClosing, rawAttributes, _attrsExp }}
|
|
47
|
+
*/
|
|
48
|
+
export function readTagExp(parser) {
|
|
49
|
+
parser.source.markTokenStart(1);
|
|
50
|
+
let inSingleQuotes = false;
|
|
51
|
+
let inDoubleQuotes = false;
|
|
52
|
+
let i;
|
|
53
|
+
let EOE = false;
|
|
54
|
+
|
|
55
|
+
for (i = 0; parser.source.canRead(i); i++) {
|
|
56
|
+
const char = parser.source.readChAt(i);
|
|
57
|
+
|
|
58
|
+
if (char === "'" && !inDoubleQuotes) {
|
|
59
|
+
inSingleQuotes = !inSingleQuotes;
|
|
60
|
+
} else if (char === '"' && !inSingleQuotes) {
|
|
61
|
+
inDoubleQuotes = !inDoubleQuotes;
|
|
62
|
+
} else if (char === '>' && !inSingleQuotes && !inDoubleQuotes) {
|
|
63
|
+
EOE = true;
|
|
64
|
+
break;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if (!EOE) {
|
|
69
|
+
// Buffer exhausted before '>' — chunk boundary mid-tag. Throw UNEXPECTED_END
|
|
70
|
+
// so feed()/parseStream() rewinds to the level-0 outer mark and retries.
|
|
71
|
+
throw new ParseError("Unexpected closing of source waiting for '>'", ErrorCode.UNEXPECTED_END);
|
|
72
|
+
} else if (inSingleQuotes || inDoubleQuotes) {
|
|
73
|
+
// '>' found but a quote was never closed — real syntax error.
|
|
74
|
+
throw new ParseError("Invalid attribute expression. Quote is not properly closed", ErrorCode.UNCLOSED_QUOTE);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const exp = parser.source.readStr(i);
|
|
78
|
+
parser.source.updateBufferBoundary(i + 1);
|
|
79
|
+
return buildTagExpObj(exp, parser);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Read a processing-instruction tag expression (<?name attrs?>).
|
|
84
|
+
*
|
|
85
|
+
* Uses level-1 (inner) mark — see readClosingTagName for rationale.
|
|
86
|
+
*
|
|
87
|
+
* @param {object} parser
|
|
88
|
+
* @returns {{ tagName, selfClosing, rawAttributes, _attrsExp }}
|
|
89
|
+
*/
|
|
90
|
+
export function readPiExp(parser) {
|
|
91
|
+
parser.source.markTokenStart(1);
|
|
92
|
+
let inSingleQuotes = false;
|
|
93
|
+
let inDoubleQuotes = false;
|
|
94
|
+
let i;
|
|
95
|
+
let EOE = false;
|
|
96
|
+
|
|
97
|
+
for (i = 0; parser.source.canRead(i); i++) {
|
|
98
|
+
const currentChar = parser.source.readChAt(i);
|
|
99
|
+
const nextChar = parser.source.readChAt(i + 1);
|
|
100
|
+
|
|
101
|
+
if (currentChar === "'" && !inDoubleQuotes) {
|
|
102
|
+
inSingleQuotes = !inSingleQuotes;
|
|
103
|
+
} else if (currentChar === '"' && !inSingleQuotes) {
|
|
104
|
+
inDoubleQuotes = !inDoubleQuotes;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
if (!inSingleQuotes && !inDoubleQuotes) {
|
|
108
|
+
if (currentChar === '?' && nextChar === '>') {
|
|
109
|
+
EOE = true;
|
|
110
|
+
break;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
if (!EOE) {
|
|
116
|
+
// Buffer exhausted before '?>' — chunk boundary mid-PI-tag.
|
|
117
|
+
throw new ParseError("Unexpected closing of source waiting for '?>'", ErrorCode.UNEXPECTED_END);
|
|
118
|
+
} else if (inSingleQuotes || inDoubleQuotes) {
|
|
119
|
+
// '?>' found but a quote was never closed — real syntax error.
|
|
120
|
+
throw new ParseError("Invalid attribute expression. Quote is not properly closed in PI tag expression", ErrorCode.UNCLOSED_QUOTE);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if (!parser.options.skip.attributes) {
|
|
124
|
+
//TODO: use regex to verify attributes if not set to ignore
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const exp = parser.source.readStr(i);
|
|
128
|
+
parser.source.updateBufferBoundary(i + 2);
|
|
129
|
+
return buildTagExpObj(exp, parser);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// ─── Internal helpers ─────────────────────────────────────────────────────────
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Parse a raw tag expression string into a structured tag descriptor.
|
|
136
|
+
*
|
|
137
|
+
* @param {string} exp - everything between '<' and '>' (exclusive)
|
|
138
|
+
* @param {object} parser
|
|
139
|
+
* @returns {{ tagName, selfClosing, rawAttributes, _attrsExp }}
|
|
140
|
+
*/
|
|
141
|
+
function buildTagExpObj(exp, parser) {
|
|
142
|
+
const tagExp = {
|
|
143
|
+
tagName: "",
|
|
144
|
+
selfClosing: false,
|
|
145
|
+
rawAttributes: Object.create(null),
|
|
146
|
+
_attrsExp: "", // stored for two-pass attribute flushing in readOpeningTag
|
|
147
|
+
};
|
|
148
|
+
|
|
149
|
+
const expLen = exp.length;
|
|
150
|
+
|
|
151
|
+
if (exp[expLen - 1] === "/") {
|
|
152
|
+
tagExp.selfClosing = true;
|
|
153
|
+
exp = exp.slice(0, -1); // Remove the trailing slash
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Separate tag name from attribute expression
|
|
157
|
+
let attrsExp = "";
|
|
158
|
+
let i = 0;
|
|
159
|
+
|
|
160
|
+
for (; i < expLen; i++) {
|
|
161
|
+
if (exp[i] === " ") {
|
|
162
|
+
tagExp.tagName = exp.substring(0, i);
|
|
163
|
+
attrsExp = exp.substring(i + 1);
|
|
164
|
+
break;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
//only tag
|
|
168
|
+
if (tagExp.tagName.length === 0 && i === expLen) tagExp.tagName = exp;
|
|
169
|
+
tagExp.tagName = tagExp.tagName.trimEnd();
|
|
170
|
+
tagExp._attrsExp = attrsExp;
|
|
171
|
+
|
|
172
|
+
if (!isName(tagExp.tagName)) {
|
|
173
|
+
throw new ParseError("Invalid tag name", ErrorCode.INVALID_TAG_NAME);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// Pass 1: collect raw attribute values for matcher.updateCurrent().
|
|
177
|
+
// Pass 2 (flushAttributes) runs later in readOpeningTag, after updateCurrent().
|
|
178
|
+
if (!parser.options.skip.attributes && attrsExp.length > 0) {
|
|
179
|
+
collectRawAttributes(attrsExp, parser, tagExp);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
return tagExp;
|
|
183
|
+
}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import { readPiExp, flushAttributes } from './XmlPartReader.js';
|
|
2
|
+
import { ParseError, ErrorCode } from './ParseError.js';
|
|
3
|
+
|
|
4
|
+
export function readCdata(parser) {
|
|
5
|
+
// Level-1 inner mark: records where this reader began, used only by flush()
|
|
6
|
+
// as a safe trim boundary. Does NOT overwrite the level-0 outer mark set by
|
|
7
|
+
// parseXml()'s loop before it consumed '<![', which rewindToMark() restores to.
|
|
8
|
+
parser.source.markTokenStart(1);
|
|
9
|
+
|
|
10
|
+
//<![ already consumed up to this point
|
|
11
|
+
if (!parser.source.canRead(5)) {
|
|
12
|
+
// Fewer than 6 chars available — chunk boundary inside "CDATA[" preamble.
|
|
13
|
+
// Throw UNEXPECTED_END so feed() rewinds to the level-0 outer mark and
|
|
14
|
+
// retries the full '<![CDATA[' on the next chunk.
|
|
15
|
+
throw new ParseError(
|
|
16
|
+
`Unexpected end of source reading CDATA preamble`,
|
|
17
|
+
ErrorCode.UNEXPECTED_END,
|
|
18
|
+
{ line: parser.source.line, col: parser.source.cols, index: parser.source.startIndex }
|
|
19
|
+
);
|
|
20
|
+
}
|
|
21
|
+
let str = parser.source.readStr(6); // "CDATA["
|
|
22
|
+
parser.source.updateBufferBoundary(6);
|
|
23
|
+
|
|
24
|
+
if (str !== "CDATA[") throw new ParseError(
|
|
25
|
+
`Invalid CDATA expression at ${parser.source.line}:${parser.source.cols}`,
|
|
26
|
+
ErrorCode.INVALID_TAG,
|
|
27
|
+
{ line: parser.source.line, col: parser.source.cols, index: parser.source.startIndex }
|
|
28
|
+
);
|
|
29
|
+
|
|
30
|
+
let text = parser.source.readUpto("]]>");
|
|
31
|
+
parser.outputBuilder.addLiteral(text);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function readPiTag(parser) {
|
|
35
|
+
const skipOptions = parser.options.skip;
|
|
36
|
+
parser.source.markTokenStart(1);
|
|
37
|
+
//<? already consumed
|
|
38
|
+
let tagExp = readPiExp(parser, "?>");
|
|
39
|
+
if (!tagExp) throw new ParseError(
|
|
40
|
+
"Invalid Pi Tag expression.",
|
|
41
|
+
ErrorCode.INVALID_TAG,
|
|
42
|
+
{ line: parser.source.line, col: parser.source.cols, index: parser.source.startIndex }
|
|
43
|
+
);
|
|
44
|
+
|
|
45
|
+
// Flush attributes into the output builder's this.attributes accumulator
|
|
46
|
+
// so addDeclaration() / addInstruction() pick them up, mirroring what readOpeningTag
|
|
47
|
+
// does for regular tags. PI tags are not pushed onto the matcher, so no
|
|
48
|
+
// updateCurrent() call is needed here.
|
|
49
|
+
if (!skipOptions.attributes) {
|
|
50
|
+
flushAttributes(tagExp._attrsExp, parser);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
if (tagExp.tagName === "xml") {
|
|
54
|
+
//TODO: verify it is very first tag else error
|
|
55
|
+
if (!skipOptions.declaration) {
|
|
56
|
+
parser.outputBuilder.addDeclaration("?xml");
|
|
57
|
+
}
|
|
58
|
+
} else if (!skipOptions.pi) {
|
|
59
|
+
parser.outputBuilder.addInstruction("?" + tagExp.tagName);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export function readComment(parser) {
|
|
64
|
+
parser.source.markTokenStart(1);
|
|
65
|
+
//<!- already consumed
|
|
66
|
+
if (!parser.source.canRead()) {
|
|
67
|
+
throw new ParseError(
|
|
68
|
+
`Unexpected end of source reading comment`,
|
|
69
|
+
ErrorCode.UNEXPECTED_END,
|
|
70
|
+
{ line: parser.source.line, col: parser.source.cols, index: parser.source.startIndex }
|
|
71
|
+
);
|
|
72
|
+
}
|
|
73
|
+
let ch = parser.source.readCh();
|
|
74
|
+
if (ch !== "-") throw new ParseError(
|
|
75
|
+
`Invalid comment expression at ${parser.source.line}:${parser.source.cols}`,
|
|
76
|
+
ErrorCode.INVALID_TAG,
|
|
77
|
+
{ line: parser.source.line, col: parser.source.cols, index: parser.source.startIndex }
|
|
78
|
+
);
|
|
79
|
+
|
|
80
|
+
let text = parser.source.readUpto("-->");
|
|
81
|
+
parser.outputBuilder.addComment(text);
|
|
82
|
+
}
|