@nodable/flexible-xml-parser 1.4.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/lib/fxp.cjs +1 -1
- package/package.json +4 -2
- package/src/AttributeProcessor.js +13 -2
- package/src/InputSource/BufferSource.js +42 -2
- package/src/InputSource/FeedableSource.js +55 -14
- package/src/InputSource/StringSource.js +41 -2
- package/src/StopNodeProcessor.js +19 -10
- package/src/Xml2JsParser.js +90 -25
- package/src/XmlPartReader.js +19 -7
- package/src/XmlSpecialTagsReader.js +5 -7
|
@@ -95,7 +95,14 @@ export default class BufferSource {
|
|
|
95
95
|
// ─── Core read interface ──────────────────────────────────────────────────
|
|
96
96
|
|
|
97
97
|
readCh() {
|
|
98
|
-
|
|
98
|
+
const code = this.buffer[this.startIndex++];
|
|
99
|
+
if (code === 10) { // '\n'
|
|
100
|
+
this.line++;
|
|
101
|
+
this.cols = 0;
|
|
102
|
+
} else {
|
|
103
|
+
this.cols++;
|
|
104
|
+
}
|
|
105
|
+
return String.fromCharCode(code);
|
|
99
106
|
}
|
|
100
107
|
|
|
101
108
|
readChAt(index) {
|
|
@@ -107,6 +114,34 @@ export default class BufferSource {
|
|
|
107
114
|
return this.buffer.slice(from, from + n).toString();
|
|
108
115
|
}
|
|
109
116
|
|
|
117
|
+
/**
|
|
118
|
+
* Scan buffer[this.startIndex, end) for byte code 10 ('\n') and advance
|
|
119
|
+
* line/cols to match, mirroring readCh()'s per-byte logic. Does NOT touch
|
|
120
|
+
* startIndex — callers set that themselves afterwards (their "end" is not
|
|
121
|
+
* always startIndex + n; readUptoCloseTag's consumed span includes the
|
|
122
|
+
* matched stop string).
|
|
123
|
+
*
|
|
124
|
+
* Shared by updateBufferBoundary() and the readUpto*() family so every path
|
|
125
|
+
* that advances the cursor in bulk keeps line/col accurate, not just the
|
|
126
|
+
* single-byte readCh() path.
|
|
127
|
+
*
|
|
128
|
+
* @param {number} end — exclusive end of the span being skipped
|
|
129
|
+
*/
|
|
130
|
+
_advanceLineCol(end) {
|
|
131
|
+
let lastNewlineIdx = -1;
|
|
132
|
+
for (let i = this.startIndex; i < end; i++) {
|
|
133
|
+
if (this.buffer[i] === 10) {
|
|
134
|
+
this.line++;
|
|
135
|
+
lastNewlineIdx = i;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
if (lastNewlineIdx >= 0) {
|
|
139
|
+
this.cols = end - lastNewlineIdx - 1;
|
|
140
|
+
} else {
|
|
141
|
+
this.cols += end - this.startIndex;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
110
145
|
readUpto(stopStr) {
|
|
111
146
|
const inputLength = this.buffer.length;
|
|
112
147
|
const stopLength = stopStr.length;
|
|
@@ -119,6 +154,7 @@ export default class BufferSource {
|
|
|
119
154
|
}
|
|
120
155
|
if (match) {
|
|
121
156
|
const result = this.buffer.slice(this.startIndex, i).toString();
|
|
157
|
+
this._advanceLineCol(i + stopLength);
|
|
122
158
|
this.startIndex = i + stopLength;
|
|
123
159
|
return result;
|
|
124
160
|
}
|
|
@@ -142,6 +178,7 @@ export default class BufferSource {
|
|
|
142
178
|
for (let i = this.startIndex; i < len; i++) {
|
|
143
179
|
if (buf[i] === stopCode) {
|
|
144
180
|
const result = buf.slice(this.startIndex, i).toString();
|
|
181
|
+
this._advanceLineCol(i + 1);
|
|
145
182
|
this.startIndex = i + 1;
|
|
146
183
|
return result;
|
|
147
184
|
}
|
|
@@ -177,6 +214,7 @@ export default class BufferSource {
|
|
|
177
214
|
}
|
|
178
215
|
if (state === 2) {
|
|
179
216
|
const result = this.buffer.slice(this.startIndex, tagMatchStart).toString();
|
|
217
|
+
this._advanceLineCol(i + 1);
|
|
180
218
|
this.startIndex = i + 1;
|
|
181
219
|
return result;
|
|
182
220
|
}
|
|
@@ -215,7 +253,9 @@ export default class BufferSource {
|
|
|
215
253
|
* @param {number} [n=1]
|
|
216
254
|
*/
|
|
217
255
|
updateBufferBoundary(n = 1) {
|
|
218
|
-
this.startIndex
|
|
256
|
+
const end = this.startIndex + n;
|
|
257
|
+
this._advanceLineCol(end);
|
|
258
|
+
this.startIndex = end;
|
|
219
259
|
if (this.autoFlush && this.startIndex >= this.flushThreshold && this._tokenStart < 0) {
|
|
220
260
|
this.flush();
|
|
221
261
|
}
|
|
@@ -69,9 +69,15 @@ export default class FeedableSource {
|
|
|
69
69
|
* _marks[1] — inner mark: set by individual reader functions.
|
|
70
70
|
* Used only by flush() as the safe trim boundary.
|
|
71
71
|
*
|
|
72
|
-
*
|
|
72
|
+
* `null` means "not set" for that level.
|
|
73
|
+
*
|
|
74
|
+
* Each entry is { startIndex, line, cols } (or null when unset) — line/cols
|
|
75
|
+
* are captured alongside startIndex so a rewind can undo readCh()'s
|
|
76
|
+
* line/col advancement too, not just the buffer offset. Without this,
|
|
77
|
+
* any token that fails mid-read (UNEXPECTED_END) and gets replayed on
|
|
78
|
+
* the next feed() double-counts every '\n' it consumed before failing.
|
|
73
79
|
*/
|
|
74
|
-
this._marks = [
|
|
80
|
+
this._marks = [null, null];
|
|
75
81
|
}
|
|
76
82
|
|
|
77
83
|
/**
|
|
@@ -132,7 +138,7 @@ export default class FeedableSource {
|
|
|
132
138
|
* @param {0|1} [level=0]
|
|
133
139
|
*/
|
|
134
140
|
markTokenStart(level = 0) {
|
|
135
|
-
this._marks[level] = this.startIndex;
|
|
141
|
+
this._marks[level] = { startIndex: this.startIndex, line: this.line, cols: this.cols };
|
|
136
142
|
}
|
|
137
143
|
|
|
138
144
|
/**
|
|
@@ -145,11 +151,13 @@ export default class FeedableSource {
|
|
|
145
151
|
* Called by XMLParser.feed() when a reader throws UNEXPECTED_END.
|
|
146
152
|
*/
|
|
147
153
|
rewindToMark() {
|
|
148
|
-
if (this._marks[0]
|
|
149
|
-
this.startIndex = this._marks[0];
|
|
154
|
+
if (this._marks[0] !== null) {
|
|
155
|
+
this.startIndex = this._marks[0].startIndex;
|
|
156
|
+
this.line = this._marks[0].line;
|
|
157
|
+
this.cols = this._marks[0].cols;
|
|
150
158
|
}
|
|
151
|
-
this._marks[0] =
|
|
152
|
-
this._marks[1] =
|
|
159
|
+
this._marks[0] = null;
|
|
160
|
+
this._marks[1] = null;
|
|
153
161
|
}
|
|
154
162
|
|
|
155
163
|
/**
|
|
@@ -165,8 +173,8 @@ export default class FeedableSource {
|
|
|
165
173
|
* stale mark only delays flushing — it does not cause correctness issues.
|
|
166
174
|
*/
|
|
167
175
|
clearMark() {
|
|
168
|
-
this._marks[0] =
|
|
169
|
-
this._marks[1] =
|
|
176
|
+
this._marks[0] = null;
|
|
177
|
+
this._marks[1] = null;
|
|
170
178
|
}
|
|
171
179
|
|
|
172
180
|
/**
|
|
@@ -212,6 +220,33 @@ export default class FeedableSource {
|
|
|
212
220
|
* @returns {string} content before the stop string (stop string is consumed)
|
|
213
221
|
* @throws {ParseError} UNEXPECTED_END when stop string is not found
|
|
214
222
|
*/
|
|
223
|
+
/**
|
|
224
|
+
* Scan buffer[this.startIndex, end) for '\n' and advance line/cols to match,
|
|
225
|
+
* mirroring readCh()'s per-char logic. Does NOT touch startIndex — callers
|
|
226
|
+
* set that themselves afterwards (their "end" is not always startIndex + n;
|
|
227
|
+
* readUptoCloseTag's consumed span includes the matched stop string).
|
|
228
|
+
*
|
|
229
|
+
* Shared by updateBufferBoundary() and the readUpto*() family so every path
|
|
230
|
+
* that advances the cursor in bulk keeps line/col accurate, not just the
|
|
231
|
+
* single-character readCh() path.
|
|
232
|
+
*
|
|
233
|
+
* @param {number} end — exclusive end of the span being skipped
|
|
234
|
+
*/
|
|
235
|
+
_advanceLineCol(end) {
|
|
236
|
+
let lastNewlineIdx = -1;
|
|
237
|
+
for (let i = this.startIndex; i < end; i++) {
|
|
238
|
+
if (this.buffer[i] === '\n') {
|
|
239
|
+
this.line++;
|
|
240
|
+
lastNewlineIdx = i;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
if (lastNewlineIdx >= 0) {
|
|
244
|
+
this.cols = end - lastNewlineIdx - 1;
|
|
245
|
+
} else {
|
|
246
|
+
this.cols += end - this.startIndex;
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
215
250
|
readUpto(stopStr) {
|
|
216
251
|
const inputLength = this.buffer.length;
|
|
217
252
|
const stopLength = stopStr.length;
|
|
@@ -223,6 +258,7 @@ export default class FeedableSource {
|
|
|
223
258
|
}
|
|
224
259
|
if (match) {
|
|
225
260
|
const result = this.buffer.substring(this.startIndex, i);
|
|
261
|
+
this._advanceLineCol(i + stopLength);
|
|
226
262
|
this.startIndex = i + stopLength;
|
|
227
263
|
return result;
|
|
228
264
|
}
|
|
@@ -245,6 +281,7 @@ export default class FeedableSource {
|
|
|
245
281
|
throw new ParseError(`Unexpected end of source reading '${stopChar}'`, ErrorCode.UNEXPECTED_END);
|
|
246
282
|
}
|
|
247
283
|
const result = this.buffer.substring(this.startIndex, i);
|
|
284
|
+
this._advanceLineCol(i + 1);
|
|
248
285
|
this.startIndex = i + 1;
|
|
249
286
|
return result;
|
|
250
287
|
}
|
|
@@ -281,6 +318,7 @@ export default class FeedableSource {
|
|
|
281
318
|
}
|
|
282
319
|
if (state === 2) {
|
|
283
320
|
const result = this.buffer.substring(this.startIndex, tagMatchStart);
|
|
321
|
+
this._advanceLineCol(i + 1);
|
|
284
322
|
this.startIndex = i + 1;
|
|
285
323
|
return result;
|
|
286
324
|
}
|
|
@@ -300,8 +338,10 @@ export default class FeedableSource {
|
|
|
300
338
|
* @param {number} [n=1]
|
|
301
339
|
*/
|
|
302
340
|
updateBufferBoundary(n = 1) {
|
|
303
|
-
this.startIndex
|
|
304
|
-
|
|
341
|
+
const end = this.startIndex + n;
|
|
342
|
+
this._advanceLineCol(end);
|
|
343
|
+
this.startIndex = end;
|
|
344
|
+
const anyMarkActive = this._marks[0] !== null || this._marks[1] !== null;
|
|
305
345
|
if (this.autoFlush && this.startIndex >= this.flushThreshold && !anyMarkActive) {
|
|
306
346
|
this.flush();
|
|
307
347
|
}
|
|
@@ -322,16 +362,17 @@ export default class FeedableSource {
|
|
|
322
362
|
// Determine the earliest position that must be kept.
|
|
323
363
|
let origin = this.startIndex;
|
|
324
364
|
for (const m of this._marks) {
|
|
325
|
-
if (m
|
|
365
|
+
if (m !== null && m.startIndex < origin) origin = m.startIndex;
|
|
326
366
|
}
|
|
327
367
|
|
|
328
368
|
if (origin > 0) {
|
|
329
369
|
this.buffer = this.buffer.substring(origin);
|
|
330
370
|
|
|
331
|
-
// Adjust all mark
|
|
371
|
+
// Adjust all mark buffer-offsets by the amount trimmed.
|
|
372
|
+
// line/cols are not buffer-relative — they stay untouched.
|
|
332
373
|
const marksLen = this._marks.length;
|
|
333
374
|
for (let i = 0; i < marksLen; i++) {
|
|
334
|
-
if (this._marks[i]
|
|
375
|
+
if (this._marks[i] !== null) this._marks[i].startIndex -= origin;
|
|
335
376
|
}
|
|
336
377
|
|
|
337
378
|
this.startIndex -= origin;
|
|
@@ -108,7 +108,14 @@ export default class StringSource {
|
|
|
108
108
|
// ─── Core read interface ──────────────────────────────────────────────────
|
|
109
109
|
|
|
110
110
|
readCh() {
|
|
111
|
-
|
|
111
|
+
const ch = this.buffer[this.startIndex++];
|
|
112
|
+
if (ch === '\n') {
|
|
113
|
+
this.line++;
|
|
114
|
+
this.cols = 0;
|
|
115
|
+
} else {
|
|
116
|
+
this.cols++;
|
|
117
|
+
}
|
|
118
|
+
return ch;
|
|
112
119
|
}
|
|
113
120
|
|
|
114
121
|
readChAt(index) {
|
|
@@ -120,6 +127,33 @@ export default class StringSource {
|
|
|
120
127
|
return this.buffer.substring(from, from + n);
|
|
121
128
|
}
|
|
122
129
|
|
|
130
|
+
/**
|
|
131
|
+
* Scan buffer[this.startIndex, end) for '\n' and advance line/cols to match,
|
|
132
|
+
* mirroring readCh()'s per-char logic. Does NOT touch startIndex — callers
|
|
133
|
+
* set that themselves afterwards (their "end" is not always startIndex + n;
|
|
134
|
+
* readUptoCloseTag's consumed span includes the matched stop string).
|
|
135
|
+
*
|
|
136
|
+
* Shared by updateBufferBoundary() and the readUpto*() family so every path
|
|
137
|
+
* that advances the cursor in bulk keeps line/col accurate, not just the
|
|
138
|
+
* single-character readCh() path.
|
|
139
|
+
*
|
|
140
|
+
* @param {number} end — exclusive end of the span being skipped
|
|
141
|
+
*/
|
|
142
|
+
_advanceLineCol(end) {
|
|
143
|
+
let lastNewlineIdx = -1;
|
|
144
|
+
for (let i = this.startIndex; i < end; i++) {
|
|
145
|
+
if (this.buffer[i] === '\n') {
|
|
146
|
+
this.line++;
|
|
147
|
+
lastNewlineIdx = i;
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
if (lastNewlineIdx >= 0) {
|
|
151
|
+
this.cols = end - lastNewlineIdx - 1;
|
|
152
|
+
} else {
|
|
153
|
+
this.cols += end - this.startIndex;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
123
157
|
readUpto(stopStr) {
|
|
124
158
|
const inputLength = this.buffer.length;
|
|
125
159
|
const stopLength = stopStr.length;
|
|
@@ -131,6 +165,7 @@ export default class StringSource {
|
|
|
131
165
|
}
|
|
132
166
|
if (match) {
|
|
133
167
|
const result = this.buffer.substring(this.startIndex, i);
|
|
168
|
+
this._advanceLineCol(i + stopLength);
|
|
134
169
|
this.startIndex = i + stopLength;
|
|
135
170
|
return result;
|
|
136
171
|
}
|
|
@@ -153,6 +188,7 @@ export default class StringSource {
|
|
|
153
188
|
throw new ParseError(`Unexpected end of source reading '${stopChar}'`, ErrorCode.UNEXPECTED_END);
|
|
154
189
|
}
|
|
155
190
|
const result = this.buffer.substring(this.startIndex, i);
|
|
191
|
+
this._advanceLineCol(i + 1);
|
|
156
192
|
this.startIndex = i + 1;
|
|
157
193
|
return result;
|
|
158
194
|
}
|
|
@@ -184,6 +220,7 @@ export default class StringSource {
|
|
|
184
220
|
}
|
|
185
221
|
if (state === 2) {
|
|
186
222
|
const result = this.buffer.substring(this.startIndex, tagMatchStart);
|
|
223
|
+
this._advanceLineCol(i + 1);
|
|
187
224
|
this.startIndex = i + 1;
|
|
188
225
|
return result;
|
|
189
226
|
}
|
|
@@ -211,7 +248,9 @@ export default class StringSource {
|
|
|
211
248
|
* @param {number} [n=1]
|
|
212
249
|
*/
|
|
213
250
|
updateBufferBoundary(n = 1) {
|
|
214
|
-
this.startIndex
|
|
251
|
+
const end = this.startIndex + n;
|
|
252
|
+
this._advanceLineCol(end);
|
|
253
|
+
this.startIndex = end;
|
|
215
254
|
const anyMarkActive = this._marks[0] >= 0 || this._marks[1] >= 0;
|
|
216
255
|
if (this.autoFlush && this.startIndex >= this.flushThreshold && !anyMarkActive) {
|
|
217
256
|
this.flush();
|
package/src/StopNodeProcessor.js
CHANGED
|
@@ -143,7 +143,12 @@ export class StopNodeProcessor {
|
|
|
143
143
|
* chunk-boundary `UNEXPECTED_END` can be retried seamlessly.
|
|
144
144
|
*
|
|
145
145
|
* @param {object} source Any source object with the standard read interface.
|
|
146
|
-
* @returns {string
|
|
146
|
+
* @returns {{content: string, end: {index: number, line: number, col: number}}}
|
|
147
|
+
* `content` is the raw text between the opening and closing tags.
|
|
148
|
+
* `end` is the position immediately after the matched closing tag's '>' —
|
|
149
|
+
* mirrors `TagDetail.openEnd` / closeMeta.closeEnd for the opening-tag side,
|
|
150
|
+
* letting a caller recover the exact span of `<tag>...</tag>` including
|
|
151
|
+
* both delimiters, not just the inner content.
|
|
147
152
|
*/
|
|
148
153
|
collect(source) {
|
|
149
154
|
source.markTokenStart(1);
|
|
@@ -198,7 +203,7 @@ export class StopNodeProcessor {
|
|
|
198
203
|
const c = source.readCh();
|
|
199
204
|
if (c === '>') break;
|
|
200
205
|
}
|
|
201
|
-
return this._finish();
|
|
206
|
+
return this._finish(source);
|
|
202
207
|
}
|
|
203
208
|
}
|
|
204
209
|
|
|
@@ -253,7 +258,7 @@ export class StopNodeProcessor {
|
|
|
253
258
|
|
|
254
259
|
if (closeName === this._tagName) {
|
|
255
260
|
this._depth--;
|
|
256
|
-
if (this._depth === 0) return this._finish();
|
|
261
|
+
if (this._depth === 0) return this._finish(source);
|
|
257
262
|
}
|
|
258
263
|
this._content += '</' + closeName + closeSuffix;
|
|
259
264
|
continue;
|
|
@@ -322,7 +327,7 @@ export class StopNodeProcessor {
|
|
|
322
327
|
const c = source.readCh();
|
|
323
328
|
if (c === '>') break;
|
|
324
329
|
}
|
|
325
|
-
return this._finish();
|
|
330
|
+
return this._finish(source);
|
|
326
331
|
}
|
|
327
332
|
}
|
|
328
333
|
|
|
@@ -389,7 +394,7 @@ export class StopNodeProcessor {
|
|
|
389
394
|
|
|
390
395
|
if (closeName === this._tagName) {
|
|
391
396
|
this._depth--;
|
|
392
|
-
if (this._depth === 0) return this._finish();
|
|
397
|
+
if (this._depth === 0) return this._finish(source);
|
|
393
398
|
}
|
|
394
399
|
this._content += '</' + closeName + closeSuffix;
|
|
395
400
|
continue;
|
|
@@ -417,16 +422,20 @@ export class StopNodeProcessor {
|
|
|
417
422
|
// ── Shared finish helper ───────────────────────────────────────────────────
|
|
418
423
|
|
|
419
424
|
/**
|
|
420
|
-
* Reset runtime state and return the accumulated content
|
|
421
|
-
*
|
|
422
|
-
*
|
|
425
|
+
* Reset runtime state and return the accumulated content plus the end
|
|
426
|
+
* position (immediately after the matched closing tag's '>').
|
|
427
|
+
* Called by every strategy when the closing tag is confirmed — always
|
|
428
|
+
* right after that '>' has just been consumed from `source`.
|
|
429
|
+
* @param {object} source
|
|
430
|
+
* @returns {{content: string, end: {index: number, line: number, col: number}}}
|
|
423
431
|
*/
|
|
424
|
-
_finish() {
|
|
432
|
+
_finish(source) {
|
|
425
433
|
const result = this._content;
|
|
434
|
+
const end = { index: source.startIndex, line: source.line, col: source.cols };
|
|
426
435
|
this._active = false;
|
|
427
436
|
this._content = '';
|
|
428
437
|
this._depth = 1;
|
|
429
|
-
return result;
|
|
438
|
+
return { content: result, end };
|
|
430
439
|
}
|
|
431
440
|
|
|
432
441
|
// ── Private helpers ────────────────────────────────────────────────────────
|
package/src/Xml2JsParser.js
CHANGED
|
@@ -13,15 +13,20 @@ import { name as isName, qName as isQName } from 'xml-naming';
|
|
|
13
13
|
class TagDetail {
|
|
14
14
|
/**
|
|
15
15
|
* @param {string} name - Tag name
|
|
16
|
-
* @param {number} line - 1-based line number where the opening tag began
|
|
17
|
-
* @param {number} col - 1-based column where the opening tag began
|
|
18
|
-
* @param {number} index - Character offset from document start
|
|
16
|
+
* @param {number} line - 1-based line number where the opening tag's '<' began
|
|
17
|
+
* @param {number} col - 1-based column where the opening tag's '<' began
|
|
18
|
+
* @param {number} index - Character offset of '<' from document start
|
|
19
|
+
* @param {number} [openEnd] - Character offset immediately after the opening
|
|
20
|
+
* tag's closing '>' (i.e. end of `<tag attr="x">`). Undefined until the
|
|
21
|
+
* opening tag expression has been fully read; set in readOpeningTag().
|
|
22
|
+
* For self-closing tags this is the offset after '/>'.
|
|
19
23
|
*/
|
|
20
|
-
constructor(name, line = 0, col = 0, index = 0) {
|
|
24
|
+
constructor(name, line = 0, col = 0, index = 0, openEnd = undefined) {
|
|
21
25
|
this.name = name;
|
|
22
26
|
this.line = line;
|
|
23
27
|
this.col = col;
|
|
24
28
|
this.index = index;
|
|
29
|
+
this.openEnd = openEnd;
|
|
25
30
|
}
|
|
26
31
|
}
|
|
27
32
|
|
|
@@ -61,7 +66,12 @@ export default class Xml2JsParser {
|
|
|
61
66
|
this.tagsStack = [];
|
|
62
67
|
this._stopNodeProcessor = null;
|
|
63
68
|
this._exitIfTriggered = false;
|
|
64
|
-
this.
|
|
69
|
+
this.xmlDec = {
|
|
70
|
+
version: 1.0,
|
|
71
|
+
lang: null,
|
|
72
|
+
encoding: null,
|
|
73
|
+
standalone: "yes"
|
|
74
|
+
}
|
|
65
75
|
|
|
66
76
|
if (!this.matcher) {
|
|
67
77
|
this.matcher = new Matcher();
|
|
@@ -125,10 +135,20 @@ export default class Xml2JsParser {
|
|
|
125
135
|
// which never overwrite this position.
|
|
126
136
|
this.source.markTokenStart(0);
|
|
127
137
|
|
|
138
|
+
// Position of the next character, captured before it's read. When that
|
|
139
|
+
// character turns out to be '<', this is exactly the position of '<'
|
|
140
|
+
// itself — used below as the authoritative tag-start position for both
|
|
141
|
+
// TagDetail (open tags) and closeMeta (close tags), instead of deriving
|
|
142
|
+
// it after the fact from source.startIndex once the tag name/attrs have
|
|
143
|
+
// already been consumed (which points past the tag, not at its start).
|
|
144
|
+
const preReadPos = { line: this.source.line, col: this.source.cols, index: this.source.startIndex };
|
|
145
|
+
|
|
128
146
|
const ch = this.source.readCh();
|
|
129
147
|
if (ch === undefined || ch === '') break;
|
|
130
148
|
|
|
131
149
|
if (ch === '<') {
|
|
150
|
+
const tagStart = preReadPos;
|
|
151
|
+
|
|
132
152
|
const nextChar = this.source.readChAt(0);
|
|
133
153
|
if (nextChar === '') throw new ParseError(
|
|
134
154
|
"Unexpected end of source after '<'",
|
|
@@ -142,9 +162,9 @@ export default class Xml2JsParser {
|
|
|
142
162
|
this.readSpecialTag(nextChar);
|
|
143
163
|
} else if (nextChar === '/') {
|
|
144
164
|
this.source.updateBufferBoundary();
|
|
145
|
-
this.readClosingTag();
|
|
165
|
+
this.readClosingTag(tagStart);
|
|
146
166
|
} else {
|
|
147
|
-
this.readOpeningTag();
|
|
167
|
+
this.readOpeningTag(tagStart);
|
|
148
168
|
}
|
|
149
169
|
} else {
|
|
150
170
|
// ch is already consumed. Peek ahead for more non-'<' chars and grab
|
|
@@ -223,8 +243,18 @@ export default class Xml2JsParser {
|
|
|
223
243
|
this.finalizeXml();
|
|
224
244
|
}
|
|
225
245
|
|
|
226
|
-
readClosingTag() {
|
|
246
|
+
readClosingTag(tagStart) {
|
|
227
247
|
const tagName = this.processTagName(readClosingTagName(this.source));
|
|
248
|
+
// closeMeta: position of this closing tag's '</' (tagStart, passed in from
|
|
249
|
+
// parseXml's dispatch) plus the offset right after its '>' (closeEnd) —
|
|
250
|
+
// mirrors tagDetail.index / tagDetail.openEnd for the opening-tag side.
|
|
251
|
+
const closeMeta = {
|
|
252
|
+
name: tagName,
|
|
253
|
+
line: tagStart.line,
|
|
254
|
+
col: tagStart.col,
|
|
255
|
+
index: tagStart.index,
|
|
256
|
+
closeEnd: this.source.startIndex,
|
|
257
|
+
};
|
|
228
258
|
|
|
229
259
|
if (this.isUnpaired(tagName) || this.isStopNode()) {
|
|
230
260
|
throw new ParseError(`Unexpected closing tag '${tagName}'`, ErrorCode.UNEXPECTED_CLOSE_TAG, { line: this.source.line, col: this.source.cols, index: this.source.startIndex });
|
|
@@ -246,10 +276,10 @@ export default class Xml2JsParser {
|
|
|
246
276
|
}
|
|
247
277
|
|
|
248
278
|
if (!this.currentTagDetail.root) this.addTextNode();
|
|
249
|
-
this.popTag();
|
|
279
|
+
this.popTag(closeMeta);
|
|
250
280
|
}
|
|
251
281
|
|
|
252
|
-
readOpeningTag() {
|
|
282
|
+
readOpeningTag(tagStart) {
|
|
253
283
|
const options = this.options;
|
|
254
284
|
this.addTextNode();
|
|
255
285
|
|
|
@@ -263,12 +293,15 @@ export default class Xml2JsParser {
|
|
|
263
293
|
const { tagDetail, isSkip } = this._stopNodeProcessorMeta;
|
|
264
294
|
this._stopNodeProcessor.resumeAfterOpenTag();
|
|
265
295
|
readTagExp(this); // re-consume the opening tag from the rewound source
|
|
266
|
-
|
|
296
|
+
// openEnd reflects the offset right after this opening tag's '>' — stable
|
|
297
|
+
// across retries since the opening tag is fully re-read every time.
|
|
298
|
+
tagDetail.openEnd = this.source.startIndex;
|
|
299
|
+
const { content, end: stopEnd } = this._stopNodeProcessor.collect(this.source);
|
|
267
300
|
if (!isSkip) {
|
|
268
301
|
this.outputBuilder.addElement(tagDetail, this.readonlyMatcher);
|
|
269
|
-
this.outputBuilder.onStopNode?.(tagDetail, content, this.readonlyMatcher);
|
|
302
|
+
this.outputBuilder.onStopNode?.(tagDetail, content, this.readonlyMatcher, stopEnd);
|
|
270
303
|
this.outputBuilder.addValue(content, this.readonlyMatcher);
|
|
271
|
-
this.outputBuilder.closeElement(this.readonlyMatcher);
|
|
304
|
+
this.outputBuilder.closeElement(this.readonlyMatcher, { name: tagDetail.name, closeEnd: stopEnd.index });
|
|
272
305
|
}
|
|
273
306
|
this.matcher.pop();
|
|
274
307
|
this._stopNodeProcessor = null;
|
|
@@ -280,9 +313,10 @@ export default class Xml2JsParser {
|
|
|
280
313
|
const processedTagName = this.processTagName(tagExp.tagName);
|
|
281
314
|
const tagDetail = new TagDetail(
|
|
282
315
|
processedTagName,
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
316
|
+
tagStart.line,
|
|
317
|
+
tagStart.col,
|
|
318
|
+
tagStart.index,
|
|
319
|
+
this.source.startIndex, // openEnd: offset right after this opening tag's '>'
|
|
286
320
|
);
|
|
287
321
|
|
|
288
322
|
// Extract namespace prefix and local name from raw tag name (e.g. "ns:tag" → "ns", "tag").
|
|
@@ -331,7 +365,7 @@ export default class Xml2JsParser {
|
|
|
331
365
|
const skipTagConfig = stopNodeConfig ? null : this.isSkipTag();
|
|
332
366
|
|
|
333
367
|
if (!options.skip.attributes && !skipTagConfig) {
|
|
334
|
-
flushAttributes(tagExp._attrsExp, this);
|
|
368
|
+
flushAttributes(tagExp._attrsExp, this, tagExp._attrsExpStart);
|
|
335
369
|
}
|
|
336
370
|
|
|
337
371
|
// Stop-node and skip-tag checks AFTER attributes are set so attribute conditions work.
|
|
@@ -341,12 +375,15 @@ export default class Xml2JsParser {
|
|
|
341
375
|
|
|
342
376
|
if (this.isUnpaired(processedTagName)) {
|
|
343
377
|
this.outputBuilder.addElement(tagDetail, this.readonlyMatcher);
|
|
344
|
-
|
|
378
|
+
// Unpaired tags (e.g. <br>, <img>) have no separate closing tag — the
|
|
379
|
+
// close position is the same as the open tag's end.
|
|
380
|
+
this.outputBuilder.closeElement(this.readonlyMatcher, this._closeMetaFor(tagDetail));
|
|
345
381
|
this.matcher.pop();
|
|
346
382
|
} else if (tagExp.selfClosing) {
|
|
347
383
|
if (!skipTagConfig) {
|
|
348
384
|
this.outputBuilder.addElement(tagDetail, this.readonlyMatcher);
|
|
349
|
-
|
|
385
|
+
// Self-closing tags (<tag/>) likewise have no distinct closing tag.
|
|
386
|
+
this.outputBuilder.closeElement(this.readonlyMatcher, this._closeMetaFor(tagDetail));
|
|
350
387
|
}
|
|
351
388
|
this.matcher.pop();
|
|
352
389
|
} else if (stopNodeConfig) {
|
|
@@ -360,11 +397,16 @@ export default class Xml2JsParser {
|
|
|
360
397
|
});
|
|
361
398
|
this._stopNodeProcessorMeta = { tagDetail, isSkip: false };
|
|
362
399
|
this._stopNodeProcessor.activate();
|
|
363
|
-
const content = this._stopNodeProcessor.collect(this.source);
|
|
400
|
+
const { content, end: stopEnd } = this._stopNodeProcessor.collect(this.source);
|
|
364
401
|
this.outputBuilder.addElement(tagDetail, this.readonlyMatcher);
|
|
365
|
-
this.outputBuilder.onStopNode?.(tagDetail, content, this.readonlyMatcher);
|
|
402
|
+
this.outputBuilder.onStopNode?.(tagDetail, content, this.readonlyMatcher, stopEnd);
|
|
366
403
|
this.outputBuilder.addValue(content, this.readonlyMatcher);
|
|
367
|
-
|
|
404
|
+
// closeMeta for a stop node carries only `closeEnd` (offset right after
|
|
405
|
+
// the matched </tagname> was consumed) — StopNodeProcessor scans the
|
|
406
|
+
// closing tag opaquely and doesn't track where '</tagname' itself starts,
|
|
407
|
+
// so unlike the normal close path we don't have a real index/line/col
|
|
408
|
+
// for the close tag's own start, only its end.
|
|
409
|
+
this.outputBuilder.closeElement(this.readonlyMatcher, { name: tagDetail.name, closeEnd: stopEnd.index });
|
|
368
410
|
this.matcher.pop();
|
|
369
411
|
this._stopNodeProcessor = null;
|
|
370
412
|
this._stopNodeProcessorMeta = null;
|
|
@@ -435,13 +477,36 @@ export default class Xml2JsParser {
|
|
|
435
477
|
* Pop the current tag from the parser stack and notify the output builder.
|
|
436
478
|
* This is the single point of exit for closing a tag — both stacks are
|
|
437
479
|
* updated together.
|
|
480
|
+
*
|
|
481
|
+
* @param {object} [closeMeta] - Position info for the closing tag:
|
|
482
|
+
* { name, line, col, index, closeEnd }. Omitted when there is no real
|
|
483
|
+
* closing tag to report a position for — e.g. AutoCloseHandler synthesizing
|
|
484
|
+
* a close at EOF, or exitIf closing already-open ancestors. In that case a
|
|
485
|
+
* minimal `{ name }` is passed to the builder instead of nothing, so
|
|
486
|
+
* closeElement() never has to special-case "no second argument at all".
|
|
438
487
|
*/
|
|
439
|
-
popTag() {
|
|
440
|
-
this.outputBuilder.closeElement(this.readonlyMatcher);
|
|
488
|
+
popTag(closeMeta) {
|
|
489
|
+
this.outputBuilder.closeElement(this.readonlyMatcher, closeMeta ?? { name: this.currentTagDetail?.name });
|
|
441
490
|
this.matcher.pop();
|
|
442
491
|
this.currentTagDetail = this.tagsStack.pop();
|
|
443
492
|
}
|
|
444
493
|
|
|
494
|
+
/**
|
|
495
|
+
* Build a closeMeta object for tags with no distinct closing token
|
|
496
|
+
* (unpaired tags like <br>, and self-closing tags like <tag/>) — the close
|
|
497
|
+
* position is just the opening tag's own end.
|
|
498
|
+
* @param {TagDetail} tagDetail
|
|
499
|
+
*/
|
|
500
|
+
_closeMetaFor(tagDetail) {
|
|
501
|
+
return {
|
|
502
|
+
name: tagDetail.name,
|
|
503
|
+
line: tagDetail.line,
|
|
504
|
+
col: tagDetail.col,
|
|
505
|
+
index: tagDetail.index,
|
|
506
|
+
closeEnd: tagDetail.openEnd,
|
|
507
|
+
};
|
|
508
|
+
}
|
|
509
|
+
|
|
445
510
|
readSpecialTag(startCh) {
|
|
446
511
|
if (startCh === "!") {
|
|
447
512
|
let nextChar = this.source.readCh();
|
|
@@ -481,7 +546,7 @@ export default class Xml2JsParser {
|
|
|
481
546
|
processAttrName(attrName) {
|
|
482
547
|
const options = this.options;
|
|
483
548
|
attrName = resolveNsPrefix(attrName, options.skip.nsPrefix);
|
|
484
|
-
if (!isQName(attrName, this.
|
|
549
|
+
if (!isQName(attrName, this.xmlDec.version)) { //TODO: make it optional
|
|
485
550
|
throw new ParseError(`Invalid attribute name: ${attrName}`, ErrorCode.INVALID_ATTRIBUTE_NAME);
|
|
486
551
|
}
|
|
487
552
|
attrName = sanitizeName(attrName, options.onDangerousProperty);
|