@nodable/flexible-xml-parser 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,183 @@
1
+ 'use strict';
2
+ import { ParseError, ErrorCode } from './ParseError.js';
3
+ import { collectRawAttributes } from './AttributeProcessor.js';
4
+ import { isName } from "./util.js"
5
+ // Re-export flushAttributes so Xml2JsParser and XmlSpecialTagsReader can
6
+ // continue to import it from here without changing their import lines.
7
+ export { flushAttributes } from './AttributeProcessor.js';
8
+
9
+ /**
10
+ * Read closing tag name.
11
+ *
12
+ * Uses level-1 (inner) mark so flush() knows the safe trim boundary while
13
+ * this reader is in progress. Does NOT overwrite the level-0 outer mark set
14
+ * by parseXml()'s loop, which rewindToMark() always restores to.
15
+ *
16
+ * @param {Source} source
17
+ * @returns {string} tag name
18
+ */
19
+ export function readClosingTagName(source) {
20
+ source.markTokenStart(1);
21
+ let i = 0;
22
+ const start = source.startIndex;
23
+ while (source.canRead()) {
24
+ const ch = source.readCh();
25
+ if (ch === ">") {
26
+ const str = source.readStr(i, start);
27
+ if (str) return str.trimEnd();
28
+ else return "";
29
+ } else i++;
30
+ }
31
+
32
+ const text = source.readStr(i, start);
33
+ source.updateBufferBoundary(i);
34
+ throw new ParseError(`Unexpected end of source reading closing tag '</${text}'`, ErrorCode.UNEXPECTED_END);
35
+ }
36
+
37
+ /**
38
+ * Read an XML opening tag expression and return a tag descriptor.
39
+ *
40
+ * Handles normal tags — not comments, CDATA, or DOCTYPE.
41
+ * Example input (from source, after '<'): `tag attr='some"' attr2=">" bool>`
42
+ *
43
+ * Uses level-1 (inner) mark — see readClosingTagName for rationale.
44
+ *
45
+ * @param {object} parser - Xml2JsParser instance
46
+ * @returns {{ tagName, selfClosing, rawAttributes, _attrsExp }}
47
+ */
48
+ export function readTagExp(parser) {
49
+ parser.source.markTokenStart(1);
50
+ let inSingleQuotes = false;
51
+ let inDoubleQuotes = false;
52
+ let i;
53
+ let EOE = false;
54
+
55
+ for (i = 0; parser.source.canRead(i); i++) {
56
+ const char = parser.source.readChAt(i);
57
+
58
+ if (char === "'" && !inDoubleQuotes) {
59
+ inSingleQuotes = !inSingleQuotes;
60
+ } else if (char === '"' && !inSingleQuotes) {
61
+ inDoubleQuotes = !inDoubleQuotes;
62
+ } else if (char === '>' && !inSingleQuotes && !inDoubleQuotes) {
63
+ EOE = true;
64
+ break;
65
+ }
66
+ }
67
+
68
+ if (!EOE) {
69
+ // Buffer exhausted before '>' — chunk boundary mid-tag. Throw UNEXPECTED_END
70
+ // so feed()/parseStream() rewinds to the level-0 outer mark and retries.
71
+ throw new ParseError("Unexpected closing of source waiting for '>'", ErrorCode.UNEXPECTED_END);
72
+ } else if (inSingleQuotes || inDoubleQuotes) {
73
+ // '>' found but a quote was never closed — real syntax error.
74
+ throw new ParseError("Invalid attribute expression. Quote is not properly closed", ErrorCode.UNCLOSED_QUOTE);
75
+ }
76
+
77
+ const exp = parser.source.readStr(i);
78
+ parser.source.updateBufferBoundary(i + 1);
79
+ return buildTagExpObj(exp, parser);
80
+ }
81
+
82
+ /**
83
+ * Read a processing-instruction tag expression (<?name attrs?>).
84
+ *
85
+ * Uses level-1 (inner) mark — see readClosingTagName for rationale.
86
+ *
87
+ * @param {object} parser
88
+ * @returns {{ tagName, selfClosing, rawAttributes, _attrsExp }}
89
+ */
90
+ export function readPiExp(parser) {
91
+ parser.source.markTokenStart(1);
92
+ let inSingleQuotes = false;
93
+ let inDoubleQuotes = false;
94
+ let i;
95
+ let EOE = false;
96
+
97
+ for (i = 0; parser.source.canRead(i); i++) {
98
+ const currentChar = parser.source.readChAt(i);
99
+ const nextChar = parser.source.readChAt(i + 1);
100
+
101
+ if (currentChar === "'" && !inDoubleQuotes) {
102
+ inSingleQuotes = !inSingleQuotes;
103
+ } else if (currentChar === '"' && !inSingleQuotes) {
104
+ inDoubleQuotes = !inDoubleQuotes;
105
+ }
106
+
107
+ if (!inSingleQuotes && !inDoubleQuotes) {
108
+ if (currentChar === '?' && nextChar === '>') {
109
+ EOE = true;
110
+ break;
111
+ }
112
+ }
113
+ }
114
+
115
+ if (!EOE) {
116
+ // Buffer exhausted before '?>' — chunk boundary mid-PI-tag.
117
+ throw new ParseError("Unexpected closing of source waiting for '?>'", ErrorCode.UNEXPECTED_END);
118
+ } else if (inSingleQuotes || inDoubleQuotes) {
119
+ // '?>' found but a quote was never closed — real syntax error.
120
+ throw new ParseError("Invalid attribute expression. Quote is not properly closed in PI tag expression", ErrorCode.UNCLOSED_QUOTE);
121
+ }
122
+
123
+ if (!parser.options.skip.attributes) {
124
+ //TODO: use regex to verify attributes if not set to ignore
125
+ }
126
+
127
+ const exp = parser.source.readStr(i);
128
+ parser.source.updateBufferBoundary(i + 2);
129
+ return buildTagExpObj(exp, parser);
130
+ }
131
+
132
+ // ─── Internal helpers ─────────────────────────────────────────────────────────
133
+
134
+ /**
135
+ * Parse a raw tag expression string into a structured tag descriptor.
136
+ *
137
+ * @param {string} exp - everything between '<' and '>' (exclusive)
138
+ * @param {object} parser
139
+ * @returns {{ tagName, selfClosing, rawAttributes, _attrsExp }}
140
+ */
141
+ function buildTagExpObj(exp, parser) {
142
+ const tagExp = {
143
+ tagName: "",
144
+ selfClosing: false,
145
+ rawAttributes: Object.create(null),
146
+ _attrsExp: "", // stored for two-pass attribute flushing in readOpeningTag
147
+ };
148
+
149
+ const expLen = exp.length;
150
+
151
+ if (exp[expLen - 1] === "/") {
152
+ tagExp.selfClosing = true;
153
+ exp = exp.slice(0, -1); // Remove the trailing slash
154
+ }
155
+
156
+ // Separate tag name from attribute expression
157
+ let attrsExp = "";
158
+ let i = 0;
159
+
160
+ for (; i < expLen; i++) {
161
+ if (exp[i] === " ") {
162
+ tagExp.tagName = exp.substring(0, i);
163
+ attrsExp = exp.substring(i + 1);
164
+ break;
165
+ }
166
+ }
167
+ //only tag
168
+ if (tagExp.tagName.length === 0 && i === expLen) tagExp.tagName = exp;
169
+ tagExp.tagName = tagExp.tagName.trimEnd();
170
+ tagExp._attrsExp = attrsExp;
171
+
172
+ if (!isName(tagExp.tagName)) {
173
+ throw new ParseError("Invalid tag name", ErrorCode.INVALID_TAG_NAME);
174
+ }
175
+
176
+ // Pass 1: collect raw attribute values for matcher.updateCurrent().
177
+ // Pass 2 (flushAttributes) runs later in readOpeningTag, after updateCurrent().
178
+ if (!parser.options.skip.attributes && attrsExp.length > 0) {
179
+ collectRawAttributes(attrsExp, parser, tagExp);
180
+ }
181
+
182
+ return tagExp;
183
+ }
@@ -0,0 +1,82 @@
1
+ import { readPiExp, flushAttributes } from './XmlPartReader.js';
2
+ import { ParseError, ErrorCode } from './ParseError.js';
3
+
4
+ export function readCdata(parser) {
5
+ // Level-1 inner mark: records where this reader began, used only by flush()
6
+ // as a safe trim boundary. Does NOT overwrite the level-0 outer mark set by
7
+ // parseXml()'s loop before it consumed '<![', which rewindToMark() restores to.
8
+ parser.source.markTokenStart(1);
9
+
10
+ //<![ already consumed up to this point
11
+ if (!parser.source.canRead(5)) {
12
+ // Fewer than 6 chars available — chunk boundary inside "CDATA[" preamble.
13
+ // Throw UNEXPECTED_END so feed() rewinds to the level-0 outer mark and
14
+ // retries the full '<![CDATA[' on the next chunk.
15
+ throw new ParseError(
16
+ `Unexpected end of source reading CDATA preamble`,
17
+ ErrorCode.UNEXPECTED_END,
18
+ { line: parser.source.line, col: parser.source.cols, index: parser.source.startIndex }
19
+ );
20
+ }
21
+ let str = parser.source.readStr(6); // "CDATA["
22
+ parser.source.updateBufferBoundary(6);
23
+
24
+ if (str !== "CDATA[") throw new ParseError(
25
+ `Invalid CDATA expression at ${parser.source.line}:${parser.source.cols}`,
26
+ ErrorCode.INVALID_TAG,
27
+ { line: parser.source.line, col: parser.source.cols, index: parser.source.startIndex }
28
+ );
29
+
30
+ let text = parser.source.readUpto("]]>");
31
+ parser.outputBuilder.addLiteral(text);
32
+ }
33
+
34
+ export function readPiTag(parser) {
35
+ const skipOptions = parser.options.skip;
36
+ parser.source.markTokenStart(1);
37
+ //<? already consumed
38
+ let tagExp = readPiExp(parser, "?>");
39
+ if (!tagExp) throw new ParseError(
40
+ "Invalid Pi Tag expression.",
41
+ ErrorCode.INVALID_TAG,
42
+ { line: parser.source.line, col: parser.source.cols, index: parser.source.startIndex }
43
+ );
44
+
45
+ // Flush attributes into the output builder's this.attributes accumulator
46
+ // so addDeclaration() / addInstruction() pick them up, mirroring what readOpeningTag
47
+ // does for regular tags. PI tags are not pushed onto the matcher, so no
48
+ // updateCurrent() call is needed here.
49
+ if (!skipOptions.attributes) {
50
+ flushAttributes(tagExp._attrsExp, parser);
51
+ }
52
+
53
+ if (tagExp.tagName === "xml") {
54
+ //TODO: verify it is very first tag else error
55
+ if (!skipOptions.declaration) {
56
+ parser.outputBuilder.addDeclaration("?xml");
57
+ }
58
+ } else if (!skipOptions.pi) {
59
+ parser.outputBuilder.addInstruction("?" + tagExp.tagName);
60
+ }
61
+ }
62
+
63
+ export function readComment(parser) {
64
+ parser.source.markTokenStart(1);
65
+ //<!- already consumed
66
+ if (!parser.source.canRead()) {
67
+ throw new ParseError(
68
+ `Unexpected end of source reading comment`,
69
+ ErrorCode.UNEXPECTED_END,
70
+ { line: parser.source.line, col: parser.source.cols, index: parser.source.startIndex }
71
+ );
72
+ }
73
+ let ch = parser.source.readCh();
74
+ if (ch !== "-") throw new ParseError(
75
+ `Invalid comment expression at ${parser.source.line}:${parser.source.cols}`,
76
+ ErrorCode.INVALID_TAG,
77
+ { line: parser.source.line, col: parser.source.cols, index: parser.source.startIndex }
78
+ );
79
+
80
+ let text = parser.source.readUpto("-->");
81
+ parser.outputBuilder.addComment(text);
82
+ }