@pipobscure/xml 0.1.0 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chars.d.ts +1 -1
- package/dist/chars.d.ts.map +1 -1
- package/dist/chars.js +70 -38
- package/dist/chars.js.map +1 -1
- package/dist/index.d.ts +5 -4
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +6 -4
- package/dist/index.js.map +1 -1
- package/dist/parser.d.ts +8 -8
- package/dist/parser.d.ts.map +1 -1
- package/dist/parser.js +659 -608
- package/dist/parser.js.map +1 -1
- package/dist/query.d.ts +1 -1
- package/dist/query.d.ts.map +1 -1
- package/dist/query.js +63 -56
- package/dist/query.js.map +1 -1
- package/dist/serialize.d.ts +34 -0
- package/dist/serialize.d.ts.map +1 -0
- package/dist/serialize.js +178 -0
- package/dist/serialize.js.map +1 -0
- package/dist/types.d.ts +60 -60
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +9 -9
- package/dist/types.js.map +1 -1
- package/package.json +2 -2
package/dist/parser.js
CHANGED
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
* • DOCTYPE internal subsets are captured verbatim, not validated.
|
|
22
22
|
* • The BOM (U+FEFF) at the start of the stream is silently skipped.
|
|
23
23
|
*/
|
|
24
|
-
import { isXmlWhitespace, isNameStartChar, isNameChar, isHexDigit, isDecimalDigit } from
|
|
24
|
+
import { isXmlWhitespace, isNameStartChar, isNameChar, isHexDigit, isDecimalDigit } from "./chars.js";
|
|
25
25
|
// ---------------------------------------------------------------------------
|
|
26
26
|
// Constants
|
|
27
27
|
// ---------------------------------------------------------------------------
|
|
@@ -29,11 +29,11 @@ const XML_NS = 'http://www.w3.org/XML/1998/namespace';
|
|
|
29
29
|
const XMLNS_NS = 'http://www.w3.org/2000/xmlns/';
|
|
30
30
|
/** The five predefined XML entities. Unknown entities are left verbatim. */
|
|
31
31
|
const PREDEFINED_ENTITIES = {
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
32
|
+
amp: '&',
|
|
33
|
+
lt: '<',
|
|
34
|
+
gt: '>',
|
|
35
|
+
apos: "'",
|
|
36
|
+
quot: '"',
|
|
37
37
|
};
|
|
38
38
|
// ---------------------------------------------------------------------------
|
|
39
39
|
// Public error type
|
|
@@ -44,611 +44,662 @@ const PREDEFINED_ENTITIES = {
|
|
|
44
44
|
* truly unrecoverable situations (e.g. no root element found) reach here.
|
|
45
45
|
*/
|
|
46
46
|
export class ParseError extends Error {
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
47
|
+
/** Byte offset in the source string where the problem was detected. */
|
|
48
|
+
position;
|
|
49
|
+
/** 1-based line number. */
|
|
50
|
+
line;
|
|
51
|
+
/** 1-based column number. */
|
|
52
|
+
column;
|
|
53
|
+
constructor(message, position, line, column) {
|
|
54
|
+
super(`${message} (line ${line}, col ${column})`);
|
|
55
|
+
this.name = 'XmlParseError';
|
|
56
|
+
this.position = position;
|
|
57
|
+
this.line = line;
|
|
58
|
+
this.column = column;
|
|
59
|
+
}
|
|
60
60
|
}
|
|
61
61
|
// ---------------------------------------------------------------------------
|
|
62
62
|
// Parser
|
|
63
63
|
// ---------------------------------------------------------------------------
|
|
64
64
|
class XmlParser {
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
65
|
+
src;
|
|
66
|
+
pos = 0;
|
|
67
|
+
/**
|
|
68
|
+
* Namespace scope stack.
|
|
69
|
+
* Each layer maps prefix → URI; `''` (empty string) is the default NS.
|
|
70
|
+
* The bottom layer holds the two permanently-bound prefixes.
|
|
71
|
+
*/
|
|
72
|
+
nsStack = [
|
|
73
|
+
new Map([
|
|
74
|
+
['xml', XML_NS],
|
|
75
|
+
['xmlns', XMLNS_NS],
|
|
76
|
+
]),
|
|
77
|
+
];
|
|
78
|
+
constructor(src) {
|
|
79
|
+
this.src = src;
|
|
80
|
+
}
|
|
81
|
+
// -------------------------------------------------------------------------
|
|
82
|
+
// Public entry point
|
|
83
|
+
// -------------------------------------------------------------------------
|
|
84
|
+
parse() {
|
|
85
|
+
// Strip BOM
|
|
86
|
+
if (this.src.charCodeAt(0) === 0xfeff)
|
|
87
|
+
this.pos = 1;
|
|
88
|
+
const children = [];
|
|
89
|
+
// Optional XML declaration
|
|
90
|
+
if (this.startsWith('<?xml') && this.isXmlDeclStart()) {
|
|
91
|
+
children.push(this.parseXmlDeclaration());
|
|
92
|
+
}
|
|
93
|
+
// Misc* (comments, PIs, whitespace) then optional DOCTYPE then Misc*
|
|
94
|
+
this.parseMisc(children);
|
|
95
|
+
if (this.startsWith('<!DOCTYPE') || this.startsWith('<!doctype')) {
|
|
96
|
+
try {
|
|
97
|
+
children.push(this.parseDoctype());
|
|
98
|
+
}
|
|
99
|
+
catch {
|
|
100
|
+
// If DOCTYPE is deeply malformed just skip to the next '<'
|
|
101
|
+
this.skipToNext('<');
|
|
102
|
+
}
|
|
103
|
+
this.parseMisc(children);
|
|
104
|
+
}
|
|
105
|
+
// Root element
|
|
106
|
+
if (this.pos < this.src.length && this.src[this.pos] === '<') {
|
|
107
|
+
children.push(this.parseElement());
|
|
108
|
+
}
|
|
109
|
+
else if (this.pos < this.src.length) {
|
|
110
|
+
throw this.error('No root element found');
|
|
111
|
+
}
|
|
112
|
+
// Trailing misc
|
|
113
|
+
this.parseMisc(children);
|
|
114
|
+
return { type: 'document', children };
|
|
115
|
+
}
|
|
116
|
+
// -------------------------------------------------------------------------
|
|
117
|
+
// Prolog / misc
|
|
118
|
+
// -------------------------------------------------------------------------
|
|
119
|
+
/**
|
|
120
|
+
* Determines whether the `<?xml` we see is really the XML declaration
|
|
121
|
+
* (followed by whitespace or `?>`) and not a PI named `xmlfoo`.
|
|
122
|
+
*/
|
|
123
|
+
isXmlDeclStart() {
|
|
124
|
+
const c = this.src.charCodeAt(this.pos + 5);
|
|
125
|
+
return isXmlWhitespace(c) || c === 0x3f; // ? for '?>'
|
|
126
|
+
}
|
|
127
|
+
parseMisc(into) {
|
|
128
|
+
while (this.pos < this.src.length) {
|
|
129
|
+
this.skipWhitespace();
|
|
130
|
+
if (this.startsWith('<!--')) {
|
|
131
|
+
into.push(this.parseComment());
|
|
132
|
+
}
|
|
133
|
+
else if (this.startsWith('<?')) {
|
|
134
|
+
into.push(this.parseProcessingInstruction());
|
|
135
|
+
}
|
|
136
|
+
else {
|
|
137
|
+
break;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
parseXmlDeclaration() {
|
|
142
|
+
this.expect('<?xml');
|
|
143
|
+
// Tolerate missing whitespace
|
|
144
|
+
this.skipWhitespace();
|
|
145
|
+
let version = '1.0';
|
|
146
|
+
let encoding = null;
|
|
147
|
+
let standalone = null;
|
|
148
|
+
if (this.startsWith('version')) {
|
|
149
|
+
this.advanceBy(7);
|
|
150
|
+
this.skipWhitespace();
|
|
151
|
+
if (this.current() === '=') {
|
|
152
|
+
this.advance();
|
|
153
|
+
}
|
|
154
|
+
this.skipWhitespace();
|
|
155
|
+
version = this.parseQuotedValue();
|
|
156
|
+
this.skipWhitespace();
|
|
157
|
+
}
|
|
158
|
+
if (this.startsWith('encoding')) {
|
|
159
|
+
this.advanceBy(8);
|
|
160
|
+
this.skipWhitespace();
|
|
161
|
+
if (this.current() === '=') {
|
|
162
|
+
this.advance();
|
|
163
|
+
}
|
|
164
|
+
this.skipWhitespace();
|
|
165
|
+
encoding = this.parseQuotedValue();
|
|
166
|
+
this.skipWhitespace();
|
|
167
|
+
}
|
|
168
|
+
if (this.startsWith('standalone')) {
|
|
169
|
+
this.advanceBy(10);
|
|
170
|
+
this.skipWhitespace();
|
|
171
|
+
if (this.current() === '=') {
|
|
172
|
+
this.advance();
|
|
173
|
+
}
|
|
174
|
+
this.skipWhitespace();
|
|
175
|
+
const val = this.parseQuotedValue();
|
|
176
|
+
standalone = val === 'yes' ? true : val === 'no' ? false : null;
|
|
177
|
+
this.skipWhitespace();
|
|
178
|
+
}
|
|
179
|
+
// Consume ?> — tolerate just > if ?> is missing
|
|
180
|
+
if (this.startsWith('?>')) {
|
|
181
|
+
this.advanceBy(2);
|
|
182
|
+
}
|
|
183
|
+
else if (this.current() === '>') {
|
|
184
|
+
this.advance();
|
|
185
|
+
}
|
|
186
|
+
return { type: 'xml-declaration', version, encoding, standalone };
|
|
187
|
+
}
|
|
188
|
+
parseDoctype() {
|
|
189
|
+
// Case-insensitive match already confirmed by caller
|
|
190
|
+
this.advanceBy('<!DOCTYPE'.length);
|
|
191
|
+
this.skipWhitespace();
|
|
192
|
+
const name = this.tryParseName() ?? 'unknown';
|
|
193
|
+
this.skipWhitespace();
|
|
194
|
+
let publicId = null;
|
|
195
|
+
let systemId = null;
|
|
196
|
+
let internalSubset = null;
|
|
197
|
+
const kw = this.peekKeyword();
|
|
198
|
+
if (kw === 'PUBLIC') {
|
|
199
|
+
this.advanceBy(6);
|
|
200
|
+
this.skipWhitespace();
|
|
201
|
+
publicId = this.parseQuotedValue();
|
|
202
|
+
this.skipWhitespace();
|
|
203
|
+
if (this.current() === '"' || this.current() === "'") {
|
|
204
|
+
systemId = this.parseQuotedValue();
|
|
205
|
+
this.skipWhitespace();
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
else if (kw === 'SYSTEM') {
|
|
209
|
+
this.advanceBy(6);
|
|
210
|
+
this.skipWhitespace();
|
|
211
|
+
systemId = this.parseQuotedValue();
|
|
212
|
+
this.skipWhitespace();
|
|
213
|
+
}
|
|
214
|
+
// Internal subset
|
|
215
|
+
if (this.current() === '[') {
|
|
216
|
+
this.advance();
|
|
217
|
+
const start = this.pos;
|
|
218
|
+
// Scan for the matching ']', respecting quoted strings
|
|
219
|
+
while (this.pos < this.src.length && this.current() !== ']') {
|
|
220
|
+
if (this.current() === '"' || this.current() === "'") {
|
|
221
|
+
const q = this.current();
|
|
222
|
+
this.advance();
|
|
223
|
+
while (this.pos < this.src.length && this.current() !== q)
|
|
224
|
+
this.advance();
|
|
225
|
+
if (this.pos < this.src.length)
|
|
226
|
+
this.advance();
|
|
227
|
+
}
|
|
228
|
+
else {
|
|
229
|
+
this.advance();
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
internalSubset = this.src.slice(start, this.pos);
|
|
233
|
+
if (this.current() === ']')
|
|
234
|
+
this.advance();
|
|
235
|
+
this.skipWhitespace();
|
|
236
|
+
}
|
|
237
|
+
// Consume closing >
|
|
238
|
+
if (this.current() === '>')
|
|
239
|
+
this.advance();
|
|
240
|
+
return { type: 'doctype', name, publicId, systemId, internalSubset };
|
|
241
|
+
}
|
|
242
|
+
// -------------------------------------------------------------------------
|
|
243
|
+
// Element
|
|
244
|
+
// -------------------------------------------------------------------------
|
|
245
|
+
parseElement() {
|
|
246
|
+
this.expect('<');
|
|
247
|
+
const qname = this.parseQName();
|
|
248
|
+
this.skipWhitespace();
|
|
249
|
+
const rawAttrs = [];
|
|
250
|
+
const nsDecls = new Map(); // prefix → URI, '' = default
|
|
251
|
+
while (this.pos < this.src.length && this.current() !== '>' && !this.startsWith('/>')) {
|
|
252
|
+
const ch = this.src.charCodeAt(this.pos);
|
|
253
|
+
if (!isNameStartChar(ch)) {
|
|
254
|
+
// Garbage character inside element tag — skip it tolerantly
|
|
255
|
+
this.advance();
|
|
256
|
+
continue;
|
|
257
|
+
}
|
|
258
|
+
const attrQName = this.parseQName();
|
|
259
|
+
this.skipWhitespace();
|
|
260
|
+
// Tolerate missing = sign
|
|
261
|
+
if (this.current() === '=') {
|
|
262
|
+
this.advance();
|
|
263
|
+
}
|
|
264
|
+
this.skipWhitespace();
|
|
265
|
+
// Tolerate missing quotes — if no quote, read until whitespace/>//>
|
|
266
|
+
const value = this.parseQuotedValueOrBare();
|
|
267
|
+
this.skipWhitespace();
|
|
268
|
+
// Detect namespace declarations
|
|
269
|
+
if (attrQName.prefix === null && attrQName.local === 'xmlns') {
|
|
270
|
+
nsDecls.set('', value);
|
|
271
|
+
}
|
|
272
|
+
else if (attrQName.prefix === 'xmlns') {
|
|
273
|
+
nsDecls.set(attrQName.local, value);
|
|
274
|
+
}
|
|
275
|
+
rawAttrs.push({ prefix: attrQName.prefix, local: attrQName.local, value });
|
|
276
|
+
}
|
|
277
|
+
// ── Push namespace scope ───────────────────────────────────────────────
|
|
278
|
+
this.nsStack.push(nsDecls);
|
|
279
|
+
// ── Resolve element namespace ──────────────────────────────────────────
|
|
280
|
+
const elemNS = this.resolveNS(qname.prefix, true);
|
|
281
|
+
// ── Resolve attribute namespaces ───────────────────────────────────────
|
|
282
|
+
const attributes = rawAttrs.map((raw) => {
|
|
283
|
+
let ns;
|
|
284
|
+
if (raw.prefix === null && raw.local === 'xmlns') {
|
|
285
|
+
ns = XMLNS_NS;
|
|
286
|
+
}
|
|
287
|
+
else if (raw.prefix === 'xmlns') {
|
|
288
|
+
ns = XMLNS_NS;
|
|
289
|
+
}
|
|
290
|
+
else if (raw.prefix !== null) {
|
|
291
|
+
ns = this.resolveNS(raw.prefix, false);
|
|
292
|
+
}
|
|
293
|
+
else {
|
|
294
|
+
ns = null; // unprefixed attributes have no namespace
|
|
295
|
+
}
|
|
296
|
+
return { name: raw.local, prefix: raw.prefix, namespace: ns, value: raw.value };
|
|
297
|
+
});
|
|
298
|
+
// ── Handle self-closing vs content ─────────────────────────────────────
|
|
299
|
+
let selfClosing = false;
|
|
300
|
+
if (this.startsWith('/>')) {
|
|
301
|
+
this.advanceBy(2);
|
|
302
|
+
selfClosing = true;
|
|
303
|
+
}
|
|
304
|
+
else if (this.current() === '>') {
|
|
305
|
+
this.advance();
|
|
306
|
+
}
|
|
307
|
+
else {
|
|
308
|
+
// Malformed — treat as self-closing and try to recover
|
|
309
|
+
selfClosing = true;
|
|
310
|
+
}
|
|
311
|
+
const children = [];
|
|
312
|
+
if (!selfClosing) {
|
|
313
|
+
this.parseChildren(children, qname);
|
|
314
|
+
}
|
|
315
|
+
// ── Pop namespace scope ────────────────────────────────────────────────
|
|
316
|
+
this.nsStack.pop();
|
|
317
|
+
return {
|
|
318
|
+
type: 'element',
|
|
319
|
+
name: qname.local,
|
|
320
|
+
prefix: qname.prefix,
|
|
321
|
+
namespace: elemNS,
|
|
322
|
+
attributes,
|
|
323
|
+
children,
|
|
324
|
+
};
|
|
325
|
+
}
|
|
326
|
+
parseChildren(into, parent) {
|
|
327
|
+
while (this.pos < this.src.length) {
|
|
328
|
+
if (this.startsWith('</')) {
|
|
329
|
+
// Closing tag
|
|
330
|
+
this.advanceBy(2);
|
|
331
|
+
const closeQName = this.tryParseQName();
|
|
332
|
+
this.skipWhitespace();
|
|
333
|
+
if (this.current() === '>')
|
|
334
|
+
this.advance();
|
|
335
|
+
// Tolerant: accept mismatched closing tags (just stop parsing children)
|
|
336
|
+
if (closeQName === null || closeQName.local !== parent.local || closeQName.prefix !== parent.prefix) {
|
|
337
|
+
// Rewind if the tag was for a parent — we handle mismatches by
|
|
338
|
+
// simply returning so the parent's loop can consume the tag.
|
|
339
|
+
// Because we already consumed it, we just return.
|
|
340
|
+
}
|
|
341
|
+
return;
|
|
342
|
+
}
|
|
343
|
+
if (this.startsWith('<![CDATA[')) {
|
|
344
|
+
into.push(this.parseCData());
|
|
345
|
+
}
|
|
346
|
+
else if (this.startsWith('<!--')) {
|
|
347
|
+
into.push(this.parseComment());
|
|
348
|
+
}
|
|
349
|
+
else if (this.startsWith('<?')) {
|
|
350
|
+
into.push(this.parseProcessingInstruction());
|
|
351
|
+
}
|
|
352
|
+
else if (this.current() === '<') {
|
|
353
|
+
// Peek ahead — could be a malformed '<' in text
|
|
354
|
+
const nextCode = this.src.charCodeAt(this.pos + 1);
|
|
355
|
+
if (isNameStartChar(nextCode) || nextCode === 0x3a /* : */ || nextCode === 0x5f /* _ */) {
|
|
356
|
+
into.push(this.parseElement());
|
|
357
|
+
}
|
|
358
|
+
else {
|
|
359
|
+
// Treat the stray '<' as text
|
|
360
|
+
into.push(this.parseText());
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
else {
|
|
364
|
+
const text = this.parseText();
|
|
365
|
+
if (text.value.length > 0)
|
|
366
|
+
into.push(text);
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
// End of input without closing tag — tolerated
|
|
370
|
+
}
|
|
371
|
+
// -------------------------------------------------------------------------
|
|
372
|
+
// Leaf nodes
|
|
373
|
+
// -------------------------------------------------------------------------
|
|
374
|
+
parseComment() {
|
|
375
|
+
this.expect('<!--');
|
|
376
|
+
const start = this.pos;
|
|
377
|
+
const end = this.src.indexOf('-->', this.pos);
|
|
378
|
+
if (end === -1) {
|
|
379
|
+
// Unterminated comment — consume the rest
|
|
380
|
+
const value = this.src.slice(start);
|
|
381
|
+
this.pos = this.src.length;
|
|
382
|
+
return { type: 'comment', value };
|
|
383
|
+
}
|
|
384
|
+
const value = this.src.slice(start, end);
|
|
385
|
+
this.pos = end + 3;
|
|
386
|
+
return { type: 'comment', value };
|
|
387
|
+
}
|
|
388
|
+
parseCData() {
|
|
389
|
+
this.expect('<![CDATA[');
|
|
390
|
+
const start = this.pos;
|
|
391
|
+
const end = this.src.indexOf(']]>', this.pos);
|
|
392
|
+
if (end === -1) {
|
|
393
|
+
const value = this.src.slice(start);
|
|
394
|
+
this.pos = this.src.length;
|
|
395
|
+
return { type: 'cdata', value };
|
|
396
|
+
}
|
|
397
|
+
const value = this.src.slice(start, end);
|
|
398
|
+
this.pos = end + 3;
|
|
399
|
+
return { type: 'cdata', value };
|
|
400
|
+
}
|
|
401
|
+
parseProcessingInstruction() {
|
|
402
|
+
this.expect('<?');
|
|
403
|
+
const target = this.tryParseName() ?? '_pi';
|
|
404
|
+
let data = '';
|
|
405
|
+
if (this.pos < this.src.length && isXmlWhitespace(this.src.charCodeAt(this.pos))) {
|
|
406
|
+
this.skipWhitespace();
|
|
407
|
+
const end = this.src.indexOf('?>', this.pos);
|
|
408
|
+
if (end === -1) {
|
|
409
|
+
data = this.src.slice(this.pos);
|
|
410
|
+
this.pos = this.src.length;
|
|
411
|
+
}
|
|
412
|
+
else {
|
|
413
|
+
data = this.src.slice(this.pos, end).trimEnd();
|
|
414
|
+
this.pos = end + 2;
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
else {
|
|
418
|
+
// No data, just consume '?>'
|
|
419
|
+
if (this.startsWith('?>'))
|
|
420
|
+
this.advanceBy(2);
|
|
421
|
+
}
|
|
422
|
+
return { type: 'processing-instruction', target, data };
|
|
423
|
+
}
|
|
424
|
+
parseText() {
|
|
425
|
+
const parts = [];
|
|
426
|
+
while (this.pos < this.src.length && this.current() !== '<') {
|
|
427
|
+
if (this.current() === '&') {
|
|
428
|
+
parts.push(this.parseEntityRef());
|
|
429
|
+
}
|
|
430
|
+
else {
|
|
431
|
+
// Fast-path: find the next special character
|
|
432
|
+
const next = this.nextSpecialInText();
|
|
433
|
+
if (next === -1) {
|
|
434
|
+
parts.push(this.src.slice(this.pos));
|
|
435
|
+
this.pos = this.src.length;
|
|
436
|
+
}
|
|
437
|
+
else {
|
|
438
|
+
parts.push(this.src.slice(this.pos, next));
|
|
439
|
+
this.pos = next;
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
return { type: 'text', value: parts.join('') };
|
|
444
|
+
}
|
|
445
|
+
/** Returns the position of the next `<` or `&` at or after `this.pos`. */
|
|
446
|
+
nextSpecialInText() {
|
|
447
|
+
const lt = this.src.indexOf('<', this.pos);
|
|
448
|
+
const amp = this.src.indexOf('&', this.pos);
|
|
449
|
+
if (lt === -1 && amp === -1)
|
|
450
|
+
return -1;
|
|
451
|
+
if (lt === -1)
|
|
452
|
+
return amp;
|
|
453
|
+
if (amp === -1)
|
|
454
|
+
return lt;
|
|
455
|
+
return lt < amp ? lt : amp;
|
|
456
|
+
}
|
|
457
|
+
// -------------------------------------------------------------------------
|
|
458
|
+
// Entity references
|
|
459
|
+
// -------------------------------------------------------------------------
|
|
460
|
+
parseEntityRef() {
|
|
461
|
+
this.advance(); // skip &
|
|
462
|
+
if (this.current() === '#') {
|
|
463
|
+
this.advance(); // skip #
|
|
464
|
+
return this.parseCharRef();
|
|
465
|
+
}
|
|
466
|
+
const start = this.pos;
|
|
467
|
+
while (this.pos < this.src.length && isNameChar(this.src.charCodeAt(this.pos))) {
|
|
468
|
+
this.pos++;
|
|
469
|
+
}
|
|
470
|
+
const name = this.src.slice(start, this.pos);
|
|
471
|
+
if (this.current() === ';') {
|
|
472
|
+
this.advance();
|
|
473
|
+
}
|
|
474
|
+
// Tolerate missing semicolon
|
|
475
|
+
const resolved = PREDEFINED_ENTITIES[name];
|
|
476
|
+
if (resolved !== undefined)
|
|
477
|
+
return resolved;
|
|
478
|
+
// Bare & with no recognisable name (e.g. "& " in malformed content) — preserve literally
|
|
479
|
+
if (name.length === 0)
|
|
480
|
+
return '&';
|
|
481
|
+
// Unknown named entity — return verbatim with & and ;
|
|
482
|
+
return `&${name};`;
|
|
483
|
+
}
|
|
484
|
+
parseCharRef() {
|
|
485
|
+
let codePoint;
|
|
486
|
+
if (this.current() === 'x' || this.current() === 'X') {
|
|
487
|
+
this.advance();
|
|
488
|
+
let hex = '';
|
|
489
|
+
while (this.pos < this.src.length && isHexDigit(this.src.charCodeAt(this.pos))) {
|
|
490
|
+
hex += this.src[this.pos++];
|
|
491
|
+
}
|
|
492
|
+
codePoint = hex.length > 0 ? parseInt(hex, 16) : 0xfffd;
|
|
493
|
+
}
|
|
494
|
+
else {
|
|
495
|
+
let dec = '';
|
|
496
|
+
while (this.pos < this.src.length && isDecimalDigit(this.src.charCodeAt(this.pos))) {
|
|
497
|
+
dec += this.src[this.pos++];
|
|
498
|
+
}
|
|
499
|
+
codePoint = dec.length > 0 ? parseInt(dec, 10) : 0xfffd;
|
|
500
|
+
}
|
|
501
|
+
if (this.current() === ';')
|
|
502
|
+
this.advance();
|
|
503
|
+
// Guard against surrogates and invalid code points
|
|
504
|
+
if (codePoint > 0x10ffff || (codePoint >= 0xd800 && codePoint <= 0xdfff) || codePoint === 0) {
|
|
505
|
+
return '\ufffd';
|
|
506
|
+
}
|
|
507
|
+
return String.fromCodePoint(codePoint);
|
|
508
|
+
}
|
|
509
|
+
// -------------------------------------------------------------------------
|
|
510
|
+
// Attribute value parsing
|
|
511
|
+
// -------------------------------------------------------------------------
|
|
512
|
+
parseQuotedValue() {
|
|
513
|
+
const ch = this.current();
|
|
514
|
+
if (ch !== '"' && ch !== "'") {
|
|
515
|
+
// No quote — tolerate and return empty string
|
|
516
|
+
return '';
|
|
517
|
+
}
|
|
518
|
+
this.advance(); // opening quote
|
|
519
|
+
const parts = [];
|
|
520
|
+
while (this.pos < this.src.length && this.current() !== ch) {
|
|
521
|
+
if (this.current() === '&') {
|
|
522
|
+
parts.push(this.parseEntityRef());
|
|
523
|
+
}
|
|
524
|
+
else {
|
|
525
|
+
const next = this.src.indexOf(ch, this.pos);
|
|
526
|
+
const amp = this.src.indexOf('&', this.pos);
|
|
527
|
+
let end;
|
|
528
|
+
if (next === -1) {
|
|
529
|
+
end = this.src.length;
|
|
530
|
+
}
|
|
531
|
+
else if (amp !== -1 && amp < next) {
|
|
532
|
+
end = amp;
|
|
533
|
+
}
|
|
534
|
+
else {
|
|
535
|
+
end = next;
|
|
536
|
+
}
|
|
537
|
+
parts.push(this.src.slice(this.pos, end));
|
|
538
|
+
this.pos = end;
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
if (this.pos < this.src.length)
|
|
542
|
+
this.advance(); // closing quote
|
|
543
|
+
return parts.join('');
|
|
544
|
+
}
|
|
545
|
+
/**
|
|
546
|
+
* Like `parseQuotedValue` but also handles unquoted attribute values
|
|
547
|
+
* (e.g. `attr=value` — common in broken HTML-as-XML).
|
|
548
|
+
*/
|
|
549
|
+
parseQuotedValueOrBare() {
|
|
550
|
+
const ch = this.current();
|
|
551
|
+
if (ch === '"' || ch === "'")
|
|
552
|
+
return this.parseQuotedValue();
|
|
553
|
+
// Bare value — read until whitespace, >, or />
|
|
554
|
+
const start = this.pos;
|
|
555
|
+
while (this.pos < this.src.length && !isXmlWhitespace(this.src.charCodeAt(this.pos)) && this.current() !== '>' && !this.startsWith('/>')) {
|
|
556
|
+
this.pos++;
|
|
557
|
+
}
|
|
558
|
+
return this.src.slice(start, this.pos);
|
|
559
|
+
}
|
|
560
|
+
// -------------------------------------------------------------------------
|
|
561
|
+
// Name / QName parsing
|
|
562
|
+
// -------------------------------------------------------------------------
|
|
563
|
+
/**
|
|
564
|
+
* Parses an XML Name (may include `:` for QName tokenisation).
|
|
565
|
+
* Throws on invalid input.
|
|
566
|
+
*/
|
|
567
|
+
parseName() {
|
|
568
|
+
const start = this.pos;
|
|
569
|
+
if (!isNameStartChar(this.src.charCodeAt(this.pos))) {
|
|
570
|
+
throw this.error(`Expected XML name character, got ${JSON.stringify(this.current())}`);
|
|
571
|
+
}
|
|
572
|
+
while (this.pos < this.src.length && isNameChar(this.src.charCodeAt(this.pos))) {
|
|
573
|
+
this.pos++;
|
|
574
|
+
}
|
|
575
|
+
return this.src.slice(start, this.pos);
|
|
576
|
+
}
|
|
577
|
+
/** Like `parseName` but returns `null` instead of throwing. */
|
|
578
|
+
tryParseName() {
|
|
579
|
+
if (!isNameStartChar(this.src.charCodeAt(this.pos)))
|
|
580
|
+
return null;
|
|
581
|
+
const start = this.pos;
|
|
582
|
+
while (this.pos < this.src.length && isNameChar(this.src.charCodeAt(this.pos))) {
|
|
583
|
+
this.pos++;
|
|
584
|
+
}
|
|
585
|
+
return this.src.slice(start, this.pos);
|
|
586
|
+
}
|
|
587
|
+
/** Parses a qualified name and splits it on the first `:`. */
|
|
588
|
+
parseQName() {
|
|
589
|
+
const name = this.parseName();
|
|
590
|
+
const colon = name.indexOf(':');
|
|
591
|
+
if (colon !== -1) {
|
|
592
|
+
return { prefix: name.slice(0, colon), local: name.slice(colon + 1) };
|
|
593
|
+
}
|
|
594
|
+
return { prefix: null, local: name };
|
|
595
|
+
}
|
|
596
|
+
/** Like `parseQName` but returns `null` instead of throwing. */
|
|
597
|
+
tryParseQName() {
|
|
598
|
+
const name = this.tryParseName();
|
|
599
|
+
if (name === null)
|
|
600
|
+
return null;
|
|
601
|
+
const colon = name.indexOf(':');
|
|
602
|
+
if (colon !== -1) {
|
|
603
|
+
return { prefix: name.slice(0, colon), local: name.slice(colon + 1) };
|
|
604
|
+
}
|
|
605
|
+
return { prefix: null, local: name };
|
|
606
|
+
}
|
|
607
|
+
// -------------------------------------------------------------------------
|
|
608
|
+
// Namespace resolution
|
|
609
|
+
// -------------------------------------------------------------------------
|
|
610
|
+
/**
|
|
611
|
+
* Resolves `prefix` against the current namespace scope stack.
|
|
612
|
+
*
|
|
613
|
+
* - `prefix === 'xml'` → always `XML_NS`
|
|
614
|
+
* - `prefix === 'xmlns'` → always `XMLNS_NS`
|
|
615
|
+
* - `prefix === null` and `isElement` → default namespace (may be null)
|
|
616
|
+
* - `prefix === null` and `!isElement` → `null` (attrs have no default NS)
|
|
617
|
+
* - Unknown prefix → `null` (tolerant; spec says this is an error)
|
|
618
|
+
*/
|
|
619
|
+
resolveNS(prefix, isElement) {
|
|
620
|
+
if (prefix === 'xml')
|
|
621
|
+
return XML_NS;
|
|
622
|
+
if (prefix === 'xmlns')
|
|
623
|
+
return XMLNS_NS;
|
|
624
|
+
const key = prefix ?? (isElement ? '' : null);
|
|
625
|
+
if (key === null)
|
|
626
|
+
return null;
|
|
627
|
+
for (let i = this.nsStack.length - 1; i >= 0; i--) {
|
|
628
|
+
const scope = this.nsStack[i];
|
|
629
|
+
if (scope?.has(key)) {
|
|
630
|
+
const uri = scope.get(key);
|
|
631
|
+
return uri === '' ? null : uri; // empty URI = un-declare
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
// Unknown prefix — tolerate by returning null
|
|
635
|
+
return null;
|
|
636
|
+
}
|
|
637
|
+
// -------------------------------------------------------------------------
|
|
638
|
+
// Low-level cursor helpers
|
|
639
|
+
// -------------------------------------------------------------------------
|
|
640
|
+
current() {
|
|
641
|
+
return this.src[this.pos] ?? '';
|
|
642
|
+
}
|
|
643
|
+
advance() {
|
|
644
|
+
this.pos++;
|
|
645
|
+
}
|
|
646
|
+
advanceBy(n) {
|
|
647
|
+
this.pos += n;
|
|
648
|
+
}
|
|
649
|
+
startsWith(str) {
|
|
650
|
+
return this.src.startsWith(str, this.pos);
|
|
651
|
+
}
|
|
652
|
+
expect(str) {
|
|
653
|
+
if (!this.src.startsWith(str, this.pos)) {
|
|
654
|
+
throw this.error(`Expected ${JSON.stringify(str)}, got ${JSON.stringify(this.src.slice(this.pos, this.pos + str.length))}`);
|
|
655
|
+
}
|
|
656
|
+
this.pos += str.length;
|
|
657
|
+
}
|
|
658
|
+
skipWhitespace() {
|
|
659
|
+
while (this.pos < this.src.length && isXmlWhitespace(this.src.charCodeAt(this.pos))) {
|
|
660
|
+
this.pos++;
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
/** Scans forward until the given character is found (useful for recovery). */
|
|
664
|
+
skipToNext(ch) {
|
|
665
|
+
const idx = this.src.indexOf(ch, this.pos);
|
|
666
|
+
this.pos = idx === -1 ? this.src.length : idx;
|
|
667
|
+
}
|
|
668
|
+
/**
|
|
669
|
+
* Reads up to 8 ASCII uppercase characters to detect DOCTYPE keywords
|
|
670
|
+
* (PUBLIC / SYSTEM) without consuming them.
|
|
671
|
+
*/
|
|
672
|
+
peekKeyword() {
|
|
673
|
+
let s = '';
|
|
674
|
+
for (let i = this.pos; i < this.src.length && i < this.pos + 8; i++) {
|
|
675
|
+
const c = this.src.charCodeAt(i);
|
|
676
|
+
if (c >= 0x41 && c <= 0x5a)
|
|
677
|
+
s += String.fromCharCode(c);
|
|
678
|
+
else if (c >= 0x61 && c <= 0x7a)
|
|
679
|
+
s += String.fromCharCode(c - 32);
|
|
680
|
+
else
|
|
681
|
+
break;
|
|
682
|
+
}
|
|
683
|
+
return s;
|
|
684
|
+
}
|
|
685
|
+
// -------------------------------------------------------------------------
|
|
686
|
+
// Error helper
|
|
687
|
+
// -------------------------------------------------------------------------
|
|
688
|
+
error(message) {
|
|
689
|
+
// Compute line/col lazily (only on error)
|
|
690
|
+
let line = 1;
|
|
691
|
+
let col = 1;
|
|
692
|
+
for (let i = 0; i < this.pos && i < this.src.length; i++) {
|
|
693
|
+
if (this.src.charCodeAt(i) === 0x0a) {
|
|
694
|
+
line++;
|
|
695
|
+
col = 1;
|
|
696
|
+
}
|
|
697
|
+
else {
|
|
698
|
+
col++;
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
return new ParseError(message, this.pos, line, col);
|
|
702
|
+
}
|
|
652
703
|
}
|
|
653
704
|
// ---------------------------------------------------------------------------
|
|
654
705
|
// Public API
|
|
@@ -666,6 +717,6 @@ class XmlParser {
|
|
|
666
717
|
* completely absent root element.
|
|
667
718
|
*/
|
|
668
719
|
export function parse(xml) {
|
|
669
|
-
|
|
720
|
+
return new XmlParser(xml).parse();
|
|
670
721
|
}
|
|
671
|
-
//# sourceMappingURL=parser.js.map
|
|
722
|
+
//# sourceMappingURL=parser.js.map
|