@loaders.gl/xml 4.2.0-alpha.4 → 4.2.0-alpha.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1292 +1,1438 @@
1
+ // loaders.gl
2
+ // SPDX-License-Identifier: MIT
3
+ // Copyright (c) vis.gl contributors
1
4
  const DEFAULT_SAX_EVENTS = {
2
- ontext: () => {},
3
- onprocessinginstruction: () => {},
4
- onsgmldeclaration: () => {},
5
- ondoctype: () => {},
6
- oncomment: () => {},
7
- onopentagstart: () => {},
8
- onattribute: () => {},
9
- onopentag: () => {},
10
- onclosetag: () => {},
11
- onopencdata: () => {},
12
- oncdata: () => {},
13
- onclosecdata: () => {},
14
- onerror: () => {},
15
- onend: () => {},
16
- onready: () => {},
17
- onscript: () => {},
18
- onopennamespace: () => {},
19
- onclosenamespace: () => {}
5
+ ontext: () => { },
6
+ onprocessinginstruction: () => { },
7
+ onsgmldeclaration: () => { },
8
+ ondoctype: () => { },
9
+ oncomment: () => { },
10
+ onopentagstart: () => { },
11
+ onattribute: () => { },
12
+ onopentag: () => { },
13
+ onclosetag: () => { },
14
+ onopencdata: () => { },
15
+ oncdata: () => { },
16
+ onclosecdata: () => { },
17
+ onerror: () => { },
18
+ onend: () => { },
19
+ onready: () => { },
20
+ onscript: () => { },
21
+ onopennamespace: () => { },
22
+ onclosenamespace: () => { }
20
23
  };
21
24
  const DEFAULT_SAX_PARSER_OPTIONS = {
22
- ...DEFAULT_SAX_EVENTS,
23
- strict: false,
24
- MAX_BUFFER_LENGTH: 64 * 1024,
25
- lowercase: false,
26
- lowercasetags: false,
27
- noscript: false,
28
- strictEntities: false,
29
- xmlns: undefined,
30
- position: undefined,
31
- trim: undefined,
32
- normalize: undefined
25
+ ...DEFAULT_SAX_EVENTS,
26
+ strict: false,
27
+ MAX_BUFFER_LENGTH: 64 * 1024,
28
+ lowercase: false,
29
+ lowercasetags: false,
30
+ noscript: false,
31
+ strictEntities: false,
32
+ xmlns: undefined,
33
+ position: undefined,
34
+ trim: undefined,
35
+ normalize: undefined
33
36
  };
34
- const EVENTS = ['text', 'processinginstruction', 'sgmldeclaration', 'doctype', 'comment', 'opentagstart', 'attribute', 'opentag', 'closetag', 'opencdata', 'cdata', 'closecdata', 'error', 'end', 'ready', 'script', 'opennamespace', 'closenamespace'];
35
- const BUFFERS = ['comment', 'sgmlDecl', 'textNode', 'tagName', 'doctype', 'procInstName', 'procInstBody', 'entity', 'attribName', 'attribValue', 'cdata', 'script'];
37
+ const EVENTS = [
38
+ 'text',
39
+ 'processinginstruction',
40
+ 'sgmldeclaration',
41
+ 'doctype',
42
+ 'comment',
43
+ 'opentagstart',
44
+ 'attribute',
45
+ 'opentag',
46
+ 'closetag',
47
+ 'opencdata',
48
+ 'cdata',
49
+ 'closecdata',
50
+ 'error',
51
+ 'end',
52
+ 'ready',
53
+ 'script',
54
+ 'opennamespace',
55
+ 'closenamespace'
56
+ ];
57
+ const BUFFERS = [
58
+ 'comment',
59
+ 'sgmlDecl',
60
+ 'textNode',
61
+ 'tagName',
62
+ 'doctype',
63
+ 'procInstName',
64
+ 'procInstBody',
65
+ 'entity',
66
+ 'attribName',
67
+ 'attribValue',
68
+ 'cdata',
69
+ 'script'
70
+ ];
36
71
  const nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/;
37
72
  const nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/;
38
73
  const entityStart = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/;
39
74
  const entityBody = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/;
40
75
  export const ENTITIES = {
41
- amp: '&',
42
- gt: '>',
43
- lt: '<',
44
- quot: '"',
45
- apos: "'",
46
- AElig: 198,
47
- Aacute: 193,
48
- Acirc: 194,
49
- Agrave: 192,
50
- Aring: 197,
51
- Atilde: 195,
52
- Auml: 196,
53
- Ccedil: 199,
54
- ETH: 208,
55
- Eacute: 201,
56
- Ecirc: 202,
57
- Egrave: 200,
58
- Euml: 203,
59
- Iacute: 205,
60
- Icirc: 206,
61
- Igrave: 204,
62
- Iuml: 207,
63
- Ntilde: 209,
64
- Oacute: 211,
65
- Ocirc: 212,
66
- Ograve: 210,
67
- Oslash: 216,
68
- Otilde: 213,
69
- Ouml: 214,
70
- THORN: 222,
71
- Uacute: 218,
72
- Ucirc: 219,
73
- Ugrave: 217,
74
- Uuml: 220,
75
- Yacute: 221,
76
- aacute: 225,
77
- acirc: 226,
78
- aelig: 230,
79
- agrave: 224,
80
- aring: 229,
81
- atilde: 227,
82
- auml: 228,
83
- ccedil: 231,
84
- eacute: 233,
85
- ecirc: 234,
86
- egrave: 232,
87
- eth: 240,
88
- euml: 235,
89
- iacute: 237,
90
- icirc: 238,
91
- igrave: 236,
92
- iuml: 239,
93
- ntilde: 241,
94
- oacute: 243,
95
- ocirc: 244,
96
- ograve: 242,
97
- oslash: 248,
98
- otilde: 245,
99
- ouml: 246,
100
- szlig: 223,
101
- thorn: 254,
102
- uacute: 250,
103
- ucirc: 251,
104
- ugrave: 249,
105
- uuml: 252,
106
- yacute: 253,
107
- yuml: 255,
108
- copy: 169,
109
- reg: 174,
110
- nbsp: 160,
111
- iexcl: 161,
112
- cent: 162,
113
- pound: 163,
114
- curren: 164,
115
- yen: 165,
116
- brvbar: 166,
117
- sect: 167,
118
- uml: 168,
119
- ordf: 170,
120
- laquo: 171,
121
- not: 172,
122
- shy: 173,
123
- macr: 175,
124
- deg: 176,
125
- plusmn: 177,
126
- sup1: 185,
127
- sup2: 178,
128
- sup3: 179,
129
- acute: 180,
130
- micro: 181,
131
- para: 182,
132
- middot: 183,
133
- cedil: 184,
134
- ordm: 186,
135
- raquo: 187,
136
- frac14: 188,
137
- frac12: 189,
138
- frac34: 190,
139
- iquest: 191,
140
- times: 215,
141
- divide: 247,
142
- OElig: 338,
143
- oelig: 339,
144
- Scaron: 352,
145
- scaron: 353,
146
- Yuml: 376,
147
- fnof: 402,
148
- circ: 710,
149
- tilde: 732,
150
- Alpha: 913,
151
- Beta: 914,
152
- Gamma: 915,
153
- Delta: 916,
154
- Epsilon: 917,
155
- Zeta: 918,
156
- Eta: 919,
157
- Theta: 920,
158
- Iota: 921,
159
- Kappa: 922,
160
- Lambda: 923,
161
- Mu: 924,
162
- Nu: 925,
163
- Xi: 926,
164
- Omicron: 927,
165
- Pi: 928,
166
- Rho: 929,
167
- Sigma: 931,
168
- Tau: 932,
169
- Upsilon: 933,
170
- Phi: 934,
171
- Chi: 935,
172
- Psi: 936,
173
- Omega: 937,
174
- alpha: 945,
175
- beta: 946,
176
- gamma: 947,
177
- delta: 948,
178
- epsilon: 949,
179
- zeta: 950,
180
- eta: 951,
181
- theta: 952,
182
- iota: 953,
183
- kappa: 954,
184
- lambda: 955,
185
- mu: 956,
186
- nu: 957,
187
- xi: 958,
188
- omicron: 959,
189
- pi: 960,
190
- rho: 961,
191
- sigmaf: 962,
192
- sigma: 963,
193
- tau: 964,
194
- upsilon: 965,
195
- phi: 966,
196
- chi: 967,
197
- psi: 968,
198
- omega: 969,
199
- thetasym: 977,
200
- upsih: 978,
201
- piv: 982,
202
- ensp: 8194,
203
- emsp: 8195,
204
- thinsp: 8201,
205
- zwnj: 8204,
206
- zwj: 8205,
207
- lrm: 8206,
208
- rlm: 8207,
209
- ndash: 8211,
210
- mdash: 8212,
211
- lsquo: 8216,
212
- rsquo: 8217,
213
- sbquo: 8218,
214
- ldquo: 8220,
215
- rdquo: 8221,
216
- bdquo: 8222,
217
- dagger: 8224,
218
- Dagger: 8225,
219
- bull: 8226,
220
- hellip: 8230,
221
- permil: 8240,
222
- prime: 8242,
223
- Prime: 8243,
224
- lsaquo: 8249,
225
- rsaquo: 8250,
226
- oline: 8254,
227
- frasl: 8260,
228
- euro: 8364,
229
- image: 8465,
230
- weierp: 8472,
231
- real: 8476,
232
- trade: 8482,
233
- alefsym: 8501,
234
- larr: 8592,
235
- uarr: 8593,
236
- rarr: 8594,
237
- darr: 8595,
238
- harr: 8596,
239
- crarr: 8629,
240
- lArr: 8656,
241
- uArr: 8657,
242
- rArr: 8658,
243
- dArr: 8659,
244
- hArr: 8660,
245
- forall: 8704,
246
- part: 8706,
247
- exist: 8707,
248
- empty: 8709,
249
- nabla: 8711,
250
- isin: 8712,
251
- notin: 8713,
252
- ni: 8715,
253
- prod: 8719,
254
- sum: 8721,
255
- minus: 8722,
256
- lowast: 8727,
257
- radic: 8730,
258
- prop: 8733,
259
- infin: 8734,
260
- ang: 8736,
261
- and: 8743,
262
- or: 8744,
263
- cap: 8745,
264
- cup: 8746,
265
- int: 8747,
266
- there4: 8756,
267
- sim: 8764,
268
- cong: 8773,
269
- asymp: 8776,
270
- ne: 8800,
271
- equiv: 8801,
272
- le: 8804,
273
- ge: 8805,
274
- sub: 8834,
275
- sup: 8835,
276
- nsub: 8836,
277
- sube: 8838,
278
- supe: 8839,
279
- oplus: 8853,
280
- otimes: 8855,
281
- perp: 8869,
282
- sdot: 8901,
283
- lceil: 8968,
284
- rceil: 8969,
285
- lfloor: 8970,
286
- rfloor: 8971,
287
- lang: 9001,
288
- rang: 9002,
289
- loz: 9674,
290
- spades: 9824,
291
- clubs: 9827,
292
- hearts: 9829,
293
- diams: 9830
76
+ amp: '&',
77
+ gt: '>',
78
+ lt: '<',
79
+ quot: '"',
80
+ apos: "'",
81
+ AElig: 198,
82
+ Aacute: 193,
83
+ Acirc: 194,
84
+ Agrave: 192,
85
+ Aring: 197,
86
+ Atilde: 195,
87
+ Auml: 196,
88
+ Ccedil: 199,
89
+ ETH: 208,
90
+ Eacute: 201,
91
+ Ecirc: 202,
92
+ Egrave: 200,
93
+ Euml: 203,
94
+ Iacute: 205,
95
+ Icirc: 206,
96
+ Igrave: 204,
97
+ Iuml: 207,
98
+ Ntilde: 209,
99
+ Oacute: 211,
100
+ Ocirc: 212,
101
+ Ograve: 210,
102
+ Oslash: 216,
103
+ Otilde: 213,
104
+ Ouml: 214,
105
+ THORN: 222,
106
+ Uacute: 218,
107
+ Ucirc: 219,
108
+ Ugrave: 217,
109
+ Uuml: 220,
110
+ Yacute: 221,
111
+ aacute: 225,
112
+ acirc: 226,
113
+ aelig: 230,
114
+ agrave: 224,
115
+ aring: 229,
116
+ atilde: 227,
117
+ auml: 228,
118
+ ccedil: 231,
119
+ eacute: 233,
120
+ ecirc: 234,
121
+ egrave: 232,
122
+ eth: 240,
123
+ euml: 235,
124
+ iacute: 237,
125
+ icirc: 238,
126
+ igrave: 236,
127
+ iuml: 239,
128
+ ntilde: 241,
129
+ oacute: 243,
130
+ ocirc: 244,
131
+ ograve: 242,
132
+ oslash: 248,
133
+ otilde: 245,
134
+ ouml: 246,
135
+ szlig: 223,
136
+ thorn: 254,
137
+ uacute: 250,
138
+ ucirc: 251,
139
+ ugrave: 249,
140
+ uuml: 252,
141
+ yacute: 253,
142
+ yuml: 255,
143
+ copy: 169,
144
+ reg: 174,
145
+ nbsp: 160,
146
+ iexcl: 161,
147
+ cent: 162,
148
+ pound: 163,
149
+ curren: 164,
150
+ yen: 165,
151
+ brvbar: 166,
152
+ sect: 167,
153
+ uml: 168,
154
+ ordf: 170,
155
+ laquo: 171,
156
+ not: 172,
157
+ shy: 173,
158
+ macr: 175,
159
+ deg: 176,
160
+ plusmn: 177,
161
+ sup1: 185,
162
+ sup2: 178,
163
+ sup3: 179,
164
+ acute: 180,
165
+ micro: 181,
166
+ para: 182,
167
+ middot: 183,
168
+ cedil: 184,
169
+ ordm: 186,
170
+ raquo: 187,
171
+ frac14: 188,
172
+ frac12: 189,
173
+ frac34: 190,
174
+ iquest: 191,
175
+ times: 215,
176
+ divide: 247,
177
+ OElig: 338,
178
+ oelig: 339,
179
+ Scaron: 352,
180
+ scaron: 353,
181
+ Yuml: 376,
182
+ fnof: 402,
183
+ circ: 710,
184
+ tilde: 732,
185
+ Alpha: 913,
186
+ Beta: 914,
187
+ Gamma: 915,
188
+ Delta: 916,
189
+ Epsilon: 917,
190
+ Zeta: 918,
191
+ Eta: 919,
192
+ Theta: 920,
193
+ Iota: 921,
194
+ Kappa: 922,
195
+ Lambda: 923,
196
+ Mu: 924,
197
+ Nu: 925,
198
+ Xi: 926,
199
+ Omicron: 927,
200
+ Pi: 928,
201
+ Rho: 929,
202
+ Sigma: 931,
203
+ Tau: 932,
204
+ Upsilon: 933,
205
+ Phi: 934,
206
+ Chi: 935,
207
+ Psi: 936,
208
+ Omega: 937,
209
+ alpha: 945,
210
+ beta: 946,
211
+ gamma: 947,
212
+ delta: 948,
213
+ epsilon: 949,
214
+ zeta: 950,
215
+ eta: 951,
216
+ theta: 952,
217
+ iota: 953,
218
+ kappa: 954,
219
+ lambda: 955,
220
+ mu: 956,
221
+ nu: 957,
222
+ xi: 958,
223
+ omicron: 959,
224
+ pi: 960,
225
+ rho: 961,
226
+ sigmaf: 962,
227
+ sigma: 963,
228
+ tau: 964,
229
+ upsilon: 965,
230
+ phi: 966,
231
+ chi: 967,
232
+ psi: 968,
233
+ omega: 969,
234
+ thetasym: 977,
235
+ upsih: 978,
236
+ piv: 982,
237
+ ensp: 8194,
238
+ emsp: 8195,
239
+ thinsp: 8201,
240
+ zwnj: 8204,
241
+ zwj: 8205,
242
+ lrm: 8206,
243
+ rlm: 8207,
244
+ ndash: 8211,
245
+ mdash: 8212,
246
+ lsquo: 8216,
247
+ rsquo: 8217,
248
+ sbquo: 8218,
249
+ ldquo: 8220,
250
+ rdquo: 8221,
251
+ bdquo: 8222,
252
+ dagger: 8224,
253
+ Dagger: 8225,
254
+ bull: 8226,
255
+ hellip: 8230,
256
+ permil: 8240,
257
+ prime: 8242,
258
+ Prime: 8243,
259
+ lsaquo: 8249,
260
+ rsaquo: 8250,
261
+ oline: 8254,
262
+ frasl: 8260,
263
+ euro: 8364,
264
+ image: 8465,
265
+ weierp: 8472,
266
+ real: 8476,
267
+ trade: 8482,
268
+ alefsym: 8501,
269
+ larr: 8592,
270
+ uarr: 8593,
271
+ rarr: 8594,
272
+ darr: 8595,
273
+ harr: 8596,
274
+ crarr: 8629,
275
+ lArr: 8656,
276
+ uArr: 8657,
277
+ rArr: 8658,
278
+ dArr: 8659,
279
+ hArr: 8660,
280
+ forall: 8704,
281
+ part: 8706,
282
+ exist: 8707,
283
+ empty: 8709,
284
+ nabla: 8711,
285
+ isin: 8712,
286
+ notin: 8713,
287
+ ni: 8715,
288
+ prod: 8719,
289
+ sum: 8721,
290
+ minus: 8722,
291
+ lowast: 8727,
292
+ radic: 8730,
293
+ prop: 8733,
294
+ infin: 8734,
295
+ ang: 8736,
296
+ and: 8743,
297
+ or: 8744,
298
+ cap: 8745,
299
+ cup: 8746,
300
+ int: 8747,
301
+ there4: 8756,
302
+ sim: 8764,
303
+ cong: 8773,
304
+ asymp: 8776,
305
+ ne: 8800,
306
+ equiv: 8801,
307
+ le: 8804,
308
+ ge: 8805,
309
+ sub: 8834,
310
+ sup: 8835,
311
+ nsub: 8836,
312
+ sube: 8838,
313
+ supe: 8839,
314
+ oplus: 8853,
315
+ otimes: 8855,
316
+ perp: 8869,
317
+ sdot: 8901,
318
+ lceil: 8968,
319
+ rceil: 8969,
320
+ lfloor: 8970,
321
+ rfloor: 8971,
322
+ lang: 9001,
323
+ rang: 9002,
324
+ loz: 9674,
325
+ spades: 9824,
326
+ clubs: 9827,
327
+ hearts: 9829,
328
+ diams: 9830
294
329
  };
295
- Object.keys(ENTITIES).forEach(key => {
296
- const e = ENTITIES[key];
297
- ENTITIES[key] = typeof e === 'number' ? String.fromCharCode(e) : e;
330
+ Object.keys(ENTITIES).forEach((key) => {
331
+ const e = ENTITIES[key];
332
+ ENTITIES[key] = typeof e === 'number' ? String.fromCharCode(e) : e;
298
333
  });
334
+ /**
335
+ * Internal helper class
336
+ */
299
337
  class SAX {
300
- constructor() {
301
- this.EVENTS = EVENTS;
302
- this.ENTITIES = {
303
- ...ENTITIES
304
- };
305
- this.events = void 0;
306
- this.XML_ENTITIES = {
307
- amp: '&',
308
- gt: '>',
309
- lt: '<',
310
- quot: '"',
311
- apos: "'"
312
- };
313
- this.S = 0;
314
- this.opt = void 0;
315
- this.trackPosition = false;
316
- this.column = 0;
317
- this.line = 0;
318
- this.c = '';
319
- this.error = void 0;
320
- this.q = '';
321
- this.bufferCheckPosition = void 0;
322
- this.closed = false;
323
- this.tags = [];
324
- this.looseCase = '';
325
- this.closedRoot = false;
326
- this.sawRoot = false;
327
- this.strict = false;
328
- this.tag = void 0;
329
- this.strictEntities = void 0;
330
- this.state = void 0;
331
- this.noscript = false;
332
- this.attribList = [];
333
- this.ns = void 0;
334
- this.position = 0;
335
- this.STATE = {
336
- BEGIN: this.S++,
337
- BEGIN_WHITESPACE: this.S++,
338
- TEXT: this.S++,
339
- TEXT_ENTITY: this.S++,
340
- OPEN_WAKA: this.S++,
341
- SGML_DECL: this.S++,
342
- SGML_DECL_QUOTED: this.S++,
343
- DOCTYPE: this.S++,
344
- DOCTYPE_QUOTED: this.S++,
345
- DOCTYPE_DTD: this.S++,
346
- DOCTYPE_DTD_QUOTED: this.S++,
347
- COMMENT_STARTING: this.S++,
348
- COMMENT: this.S++,
349
- COMMENT_ENDING: this.S++,
350
- COMMENT_ENDED: this.S++,
351
- CDATA: this.S++,
352
- CDATA_ENDING: this.S++,
353
- CDATA_ENDING_2: this.S++,
354
- PROC_INST: this.S++,
355
- PROC_INST_BODY: this.S++,
356
- PROC_INST_ENDING: this.S++,
357
- OPEN_TAG: this.S++,
358
- OPEN_TAG_SLASH: this.S++,
359
- ATTRIB: this.S++,
360
- ATTRIB_NAME: this.S++,
361
- ATTRIB_NAME_SAW_WHITE: this.S++,
362
- ATTRIB_VALUE: this.S++,
363
- ATTRIB_VALUE_QUOTED: this.S++,
364
- ATTRIB_VALUE_CLOSED: this.S++,
365
- ATTRIB_VALUE_UNQUOTED: this.S++,
366
- ATTRIB_VALUE_ENTITY_Q: this.S++,
367
- ATTRIB_VALUE_ENTITY_U: this.S++,
368
- CLOSE_TAG: this.S++,
369
- CLOSE_TAG_SAW_WHITE: this.S++,
370
- SCRIPT: this.S++,
371
- SCRIPT_ENDING: this.S++
372
- };
373
- this.BUFFERS = BUFFERS;
374
- this.CDATA = '[CDATA[';
375
- this.DOCTYPE = 'DOCTYPE';
376
- this.XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace';
377
- this.XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/';
378
- this.rootNS = {
379
- xml: this.XML_NAMESPACE,
380
- xmlns: this.XMLNS_NAMESPACE
381
- };
382
- this.comment = void 0;
383
- this.sgmlDecl = void 0;
384
- this.textNode = '';
385
- this.tagName = void 0;
386
- this.doctype = void 0;
387
- this.procInstName = void 0;
388
- this.procInstBody = void 0;
389
- this.entity = '';
390
- this.attribName = void 0;
391
- this.attribValue = void 0;
392
- this.cdata = '';
393
- this.script = '';
394
- this.startTagPosition = 0;
395
- this.S = 0;
396
- for (const s in this.STATE) {
397
- if (this.STATE.hasOwnProperty(s)) {
398
- this.STATE[this.STATE[s]] = s;
399
- }
338
+ constructor() {
339
+ this.EVENTS = EVENTS;
340
+ this.ENTITIES = {
341
+ // TODO: make it readonly, needed for entity-mega test
342
+ // amp, gt, lt, quot and apos are resolved to strings instead of numerical
343
+ // codes, IDK why
344
+ ...ENTITIES
345
+ };
346
+ this.XML_ENTITIES = {
347
+ amp: '&',
348
+ gt: '>',
349
+ lt: '<',
350
+ quot: '"',
351
+ apos: "'"
352
+ };
353
+ this.S = 0;
354
+ this.trackPosition = false;
355
+ this.column = 0;
356
+ this.line = 0;
357
+ this.c = '';
358
+ this.q = '';
359
+ this.closed = false;
360
+ this.tags = [];
361
+ this.looseCase = '';
362
+ this.closedRoot = false;
363
+ this.sawRoot = false;
364
+ this.strict = false;
365
+ this.noscript = false;
366
+ this.attribList = [];
367
+ this.position = 0;
368
+ this.STATE = {
369
+ BEGIN: this.S++, // leading byte order mark or whitespace
370
+ BEGIN_WHITESPACE: this.S++, // leading whitespace
371
+ TEXT: this.S++, // general stuff
372
+ TEXT_ENTITY: this.S++, // &amp and such.
373
+ OPEN_WAKA: this.S++, // <
374
+ SGML_DECL: this.S++, // <!BLARG
375
+ SGML_DECL_QUOTED: this.S++, // <!BLARG foo "bar
376
+ DOCTYPE: this.S++, // <!DOCTYPE
377
+ DOCTYPE_QUOTED: this.S++, // <!DOCTYPE "//blah
378
+ DOCTYPE_DTD: this.S++, // <!DOCTYPE "//blah" [ ...
379
+ DOCTYPE_DTD_QUOTED: this.S++, // <!DOCTYPE "//blah" [ "foo
380
+ COMMENT_STARTING: this.S++, // <!-
381
+ COMMENT: this.S++, // <!--
382
+ COMMENT_ENDING: this.S++, // <!-- blah -
383
+ COMMENT_ENDED: this.S++, // <!-- blah --
384
+ CDATA: this.S++, // <![CDATA[ something
385
+ CDATA_ENDING: this.S++, // ]
386
+ CDATA_ENDING_2: this.S++, // ]]
387
+ PROC_INST: this.S++, // <?hi
388
+ PROC_INST_BODY: this.S++, // <?hi there
389
+ PROC_INST_ENDING: this.S++, // <?hi "there" ?
390
+ OPEN_TAG: this.S++, // <strong
391
+ OPEN_TAG_SLASH: this.S++, // <strong /
392
+ ATTRIB: this.S++, // <a
393
+ ATTRIB_NAME: this.S++, // <a foo
394
+ ATTRIB_NAME_SAW_WHITE: this.S++, // <a foo _
395
+ ATTRIB_VALUE: this.S++, // <a foo=
396
+ ATTRIB_VALUE_QUOTED: this.S++, // <a foo="bar
397
+ ATTRIB_VALUE_CLOSED: this.S++, // <a foo="bar"
398
+ ATTRIB_VALUE_UNQUOTED: this.S++, // <a foo=bar
399
+ ATTRIB_VALUE_ENTITY_Q: this.S++, // <foo bar="&quot;"
400
+ ATTRIB_VALUE_ENTITY_U: this.S++, // <foo bar=&quot
401
+ CLOSE_TAG: this.S++, // </a
402
+ CLOSE_TAG_SAW_WHITE: this.S++, // </a >
403
+ SCRIPT: this.S++, // <script> ...
404
+ SCRIPT_ENDING: this.S++ // <script> ... <
405
+ };
406
+ this.BUFFERS = BUFFERS;
407
+ // private parser: (strict: boolean, opt: any) => SAXParser;
408
+ this.CDATA = '[CDATA[';
409
+ this.DOCTYPE = 'DOCTYPE';
410
+ this.XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace';
411
+ this.XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/';
412
+ this.rootNS = {
413
+ xml: this.XML_NAMESPACE,
414
+ xmlns: this.XMLNS_NAMESPACE
415
+ };
416
+ this.textNode = '';
417
+ this.entity = '';
418
+ this.cdata = '';
419
+ this.script = '';
420
+ this.startTagPosition = 0;
421
+ this.S = 0;
422
+ for (const s in this.STATE) {
423
+ if (this.STATE.hasOwnProperty(s)) {
424
+ this.STATE[this.STATE[s]] = s;
425
+ }
426
+ }
427
+ // shorthand
428
+ this.S = this.STATE;
429
+ }
430
+ static charAt(chunk, i) {
431
+ let result = '';
432
+ if (i < chunk.length) {
433
+ result = chunk.charAt(i);
434
+ }
435
+ return result;
400
436
  }
401
- this.S = this.STATE;
402
- }
403
- static charAt(chunk, i) {
404
- let result = '';
405
- if (i < chunk.length) {
406
- result = chunk.charAt(i);
437
+ static isWhitespace(c) {
438
+ return c === ' ' || c === '\n' || c === '\r' || c === '\t';
407
439
  }
408
- return result;
409
- }
410
- static isWhitespace(c) {
411
- return c === ' ' || c === '\n' || c === '\r' || c === '\t';
412
- }
413
- static isQuote(c) {
414
- return c === '"' || c === "'";
415
- }
416
- static isAttribEnd(c) {
417
- return c === '>' || SAX.isWhitespace(c);
418
- }
419
- static isMatch(regex, c) {
420
- return regex.test(c);
421
- }
422
- static notMatch(regex, c) {
423
- return !SAX.isMatch(regex, c);
424
- }
425
- static qname(name, attribute) {
426
- const i = name.indexOf(':');
427
- const qualName = i < 0 ? ['', name] : name.split(':');
428
- let prefix = qualName[0];
429
- let local = qualName[1];
430
- if (attribute && name === 'xmlns') {
431
- prefix = 'xmlns';
432
- local = '';
440
+ static isQuote(c) {
441
+ return c === '"' || c === "'";
433
442
  }
434
- return {
435
- prefix,
436
- local
437
- };
438
- }
439
- write(chunk) {
440
- if (this.error) {
441
- throw this.error;
443
+ static isAttribEnd(c) {
444
+ return c === '>' || SAX.isWhitespace(c);
442
445
  }
443
- if (this.closed) {
444
- return this.errorFunction('Cannot write after close. Assign an onready handler.');
446
+ static isMatch(regex, c) {
447
+ return regex.test(c);
445
448
  }
446
- if (chunk === null) {
447
- return this.end();
449
+ static notMatch(regex, c) {
450
+ return !SAX.isMatch(regex, c);
448
451
  }
449
- if (typeof chunk === 'object') {
450
- chunk = chunk.toString();
452
+ static qname(name, attribute) {
453
+ const i = name.indexOf(':');
454
+ const qualName = i < 0 ? ['', name] : name.split(':');
455
+ let prefix = qualName[0];
456
+ let local = qualName[1];
457
+ // <x "xmlns"="http://foo">
458
+ if (attribute && name === 'xmlns') {
459
+ prefix = 'xmlns';
460
+ local = '';
461
+ }
462
+ return { prefix, local };
451
463
  }
452
- let i = 0;
453
- let c;
454
- while (true) {
455
- c = SAX.charAt(chunk, i++);
456
- this.c = c;
457
- if (!c) {
458
- break;
459
- }
460
- if (this.trackPosition) {
461
- this.position++;
462
- if (c === '\n') {
463
- this.line++;
464
- this.column = 0;
465
- } else {
466
- this.column++;
464
+ write(chunk) {
465
+ if (this.error) {
466
+ throw this.error;
467
+ }
468
+ if (this.closed) {
469
+ return this.errorFunction('Cannot write after close. Assign an onready handler.');
470
+ }
471
+ if (chunk === null) {
472
+ return this.end();
473
+ }
474
+ if (typeof chunk === 'object') {
475
+ chunk = chunk.toString();
467
476
  }
468
- }
469
- switch (this.state) {
470
- case this.S.BEGIN:
471
- this.state = this.S.BEGIN_WHITESPACE;
472
- if (c === '\uFEFF') {
473
- continue;
474
- }
475
- this.beginWhiteSpace(c);
476
- continue;
477
- case this.S.BEGIN_WHITESPACE:
478
- this.beginWhiteSpace(c);
479
- continue;
480
- case this.S.TEXT:
481
- if (this.sawRoot && !this.closedRoot) {
482
- const starti = i - 1;
483
- while (c && c !== '<' && c !== '&') {
484
- c = SAX.charAt(chunk, i++);
485
- if (c && this.trackPosition) {
477
+ let i = 0;
478
+ let c;
479
+ while (true) {
480
+ c = SAX.charAt(chunk, i++);
481
+ this.c = c;
482
+ if (!c) {
483
+ break;
484
+ }
485
+ if (this.trackPosition) {
486
486
  this.position++;
487
487
  if (c === '\n') {
488
- this.line++;
489
- this.column = 0;
490
- } else {
491
- this.column++;
488
+ this.line++;
489
+ this.column = 0;
490
+ }
491
+ else {
492
+ this.column++;
492
493
  }
493
- }
494
- }
495
- this.textNode += chunk.substring(starti, i - 1);
496
- }
497
- if (c === '<' && !(this.sawRoot && this.closedRoot && !this.strict)) {
498
- this.state = this.S.OPEN_WAKA;
499
- this.startTagPosition = this.position;
500
- } else {
501
- if (!SAX.isWhitespace(c) && (!this.sawRoot || this.closedRoot)) {
502
- this.strictFail('Text data outside of root node.');
503
- }
504
- if (c === '&') {
505
- this.state = this.S.TEXT_ENTITY;
506
- } else {
507
- this.textNode += c;
508
- }
509
- }
510
- continue;
511
- case this.S.SCRIPT:
512
- if (c === '<') {
513
- this.state = this.S.SCRIPT_ENDING;
514
- } else {
515
- this.script += c;
516
- }
517
- continue;
518
- case this.S.SCRIPT_ENDING:
519
- if (c === '/') {
520
- this.state = this.S.CLOSE_TAG;
521
- } else {
522
- this.script += `<${c}`;
523
- this.state = this.S.SCRIPT;
524
- }
525
- continue;
526
- case this.S.OPEN_WAKA:
527
- if (c === '!') {
528
- this.state = this.S.SGML_DECL;
529
- this.sgmlDecl = '';
530
- } else if (SAX.isWhitespace(c)) {} else if (SAX.isMatch(nameStart, c)) {
531
- this.state = this.S.OPEN_TAG;
532
- this.tagName = c;
533
- } else if (c === '/') {
534
- this.state = this.S.CLOSE_TAG;
535
- this.tagName = '';
536
- } else if (c === '?') {
537
- this.state = this.S.PROC_INST;
538
- this.procInstName = this.procInstBody = '';
539
- } else {
540
- this.strictFail('Unencoded <');
541
- if (this.startTagPosition + 1 < this.position) {
542
- const pad = this.position - this.startTagPosition;
543
- c = new Array(pad).join(' ') + c;
544
- }
545
- this.textNode += `<${c}`;
546
- this.state = this.S.TEXT;
547
- }
548
- continue;
549
- case this.S.SGML_DECL:
550
- if ((this.sgmlDecl + c).toUpperCase() === this.CDATA) {
551
- this.emitNode('onopencdata');
552
- this.state = this.S.CDATA;
553
- this.sgmlDecl = '';
554
- this.cdata = '';
555
- } else if (this.sgmlDecl + c === '--') {
556
- this.state = this.S.COMMENT;
557
- this.comment = '';
558
- this.sgmlDecl = '';
559
- } else if ((this.sgmlDecl + c).toUpperCase() === this.DOCTYPE) {
560
- this.state = this.S.DOCTYPE;
561
- if (this.doctype || this.sawRoot) {
562
- this.strictFail('Inappropriately located doctype declaration');
563
- }
564
- this.doctype = '';
565
- this.sgmlDecl = '';
566
- } else if (c === '>') {
567
- this.emitNode('onsgmldeclaration', this.sgmlDecl);
568
- this.sgmlDecl = '';
569
- this.state = this.S.TEXT;
570
- } else if (SAX.isQuote(c)) {
571
- this.state = this.S.SGML_DECL_QUOTED;
572
- this.sgmlDecl += c;
573
- } else {
574
- this.sgmlDecl += c;
575
- }
576
- continue;
577
- case this.S.SGML_DECL_QUOTED:
578
- if (c === this.q) {
579
- this.state = this.S.SGML_DECL;
580
- this.q = '';
581
- }
582
- this.sgmlDecl += c;
583
- continue;
584
- case this.S.DOCTYPE:
585
- if (c === '>') {
586
- this.state = this.S.TEXT;
587
- this.emitNode('ondoctype', this.doctype);
588
- this.doctype = true;
589
- } else {
590
- this.doctype += c;
591
- if (c === '[') {
592
- this.state = this.S.DOCTYPE_DTD;
593
- } else if (SAX.isQuote(c)) {
594
- this.state = this.S.DOCTYPE_QUOTED;
595
- this.q = c;
596
- }
597
- }
598
- continue;
599
- case this.S.DOCTYPE_QUOTED:
600
- this.doctype += c;
601
- if (c === this.q) {
602
- this.q = '';
603
- this.state = this.S.DOCTYPE;
604
- }
605
- continue;
606
- case this.S.DOCTYPE_DTD:
607
- this.doctype += c;
608
- if (c === ']') {
609
- this.state = this.S.DOCTYPE;
610
- } else if (SAX.isQuote(c)) {
611
- this.state = this.S.DOCTYPE_DTD_QUOTED;
612
- this.q = c;
613
- }
614
- continue;
615
- case this.S.DOCTYPE_DTD_QUOTED:
616
- this.doctype += c;
617
- if (c === this.q) {
618
- this.state = this.S.DOCTYPE_DTD;
619
- this.q = '';
620
- }
621
- continue;
622
- case this.S.COMMENT:
623
- if (c === '-') {
624
- this.state = this.S.COMMENT_ENDING;
625
- } else {
626
- this.comment += c;
627
- }
628
- continue;
629
- case this.S.COMMENT_ENDING:
630
- if (c === '-') {
631
- this.state = this.S.COMMENT_ENDED;
632
- this.comment = this.textApplyOptions(this.comment);
633
- if (this.comment) {
634
- this.emitNode('oncomment', this.comment);
635
- }
636
- this.comment = '';
637
- } else {
638
- this.comment += `-${c}`;
639
- this.state = this.S.COMMENT;
640
- }
641
- continue;
642
- case this.S.COMMENT_ENDED:
643
- if (c !== '>') {
644
- this.strictFail('Malformed comment');
645
- this.comment += `--${c}`;
646
- this.state = this.S.COMMENT;
647
- } else {
648
- this.state = this.S.TEXT;
649
- }
650
- continue;
651
- case this.S.CDATA:
652
- if (c === ']') {
653
- this.state = this.S.CDATA_ENDING;
654
- } else {
655
- this.cdata += c;
656
- }
657
- continue;
658
- case this.S.CDATA_ENDING:
659
- if (c === ']') {
660
- this.state = this.S.CDATA_ENDING_2;
661
- } else {
662
- this.cdata += `]${c}`;
663
- this.state = this.S.CDATA;
664
- }
665
- continue;
666
- case this.S.CDATA_ENDING_2:
667
- if (c === '>') {
668
- if (this.cdata) {
669
- this.emitNode('oncdata', this.cdata);
670
- }
671
- this.emitNode('onclosecdata');
672
- this.cdata = '';
673
- this.state = this.S.TEXT;
674
- } else if (c === ']') {
675
- this.cdata += ']';
676
- } else {
677
- this.cdata += `]]${c}`;
678
- this.state = this.S.CDATA;
679
- }
680
- continue;
681
- case this.S.PROC_INST:
682
- if (c === '?') {
683
- this.state = this.S.PROC_INST_ENDING;
684
- } else if (SAX.isWhitespace(c)) {
685
- this.state = this.S.PROC_INST_BODY;
686
- } else {
687
- this.procInstName += c;
688
- }
689
- continue;
690
- case this.S.PROC_INST_BODY:
691
- if (!this.procInstBody && SAX.isWhitespace(c)) {
692
- continue;
693
- } else if (c === '?') {
694
- this.state = this.S.PROC_INST_ENDING;
695
- } else {
696
- this.procInstBody += c;
697
- }
698
- continue;
699
- case this.S.PROC_INST_ENDING:
700
- if (c === '>') {
701
- this.emitNode('onprocessinginstruction', {
702
- name: this.procInstName,
703
- body: this.procInstBody
704
- });
705
- this.procInstName = this.procInstBody = '';
706
- this.state = this.S.TEXT;
707
- } else {
708
- this.procInstBody += `?${c}`;
709
- this.state = this.S.PROC_INST_BODY;
710
- }
711
- continue;
712
- case this.S.OPEN_TAG:
713
- if (SAX.isMatch(nameBody, c)) {
714
- this.tagName += c;
715
- } else {
716
- this.newTag();
717
- if (c === '>') {
718
- this.openTag();
719
- } else if (c === '/') {
720
- this.state = this.S.OPEN_TAG_SLASH;
721
- } else {
722
- if (!SAX.isWhitespace(c)) {
723
- this.strictFail('Invalid character in tag name');
724
- }
725
- this.state = this.S.ATTRIB;
726
- }
727
- }
728
- continue;
729
- case this.S.OPEN_TAG_SLASH:
730
- if (c === '>') {
731
- this.openTag(true);
732
- this.closeTag();
733
- } else {
734
- this.strictFail('Forward-slash in opening tag not followed by >');
735
- this.state = this.S.ATTRIB;
736
- }
737
- continue;
738
- case this.S.ATTRIB:
739
- if (SAX.isWhitespace(c)) {
740
- continue;
741
- } else if (c === '>') {
742
- this.openTag();
743
- } else if (c === '/') {
744
- this.state = this.S.OPEN_TAG_SLASH;
745
- } else if (SAX.isMatch(nameStart, c)) {
746
- this.attribName = c;
747
- this.attribValue = '';
748
- this.state = this.S.ATTRIB_NAME;
749
- } else {
750
- this.strictFail('Invalid attribute name');
751
- }
752
- continue;
753
- case this.S.ATTRIB_NAME:
754
- if (c === '=') {
755
- this.state = this.S.ATTRIB_VALUE;
756
- } else if (c === '>') {
757
- this.strictFail('Attribute without value');
758
- this.attribValue = this.attribName;
759
- this.attrib();
760
- this.openTag();
761
- } else if (SAX.isWhitespace(c)) {
762
- this.state = this.S.ATTRIB_NAME_SAW_WHITE;
763
- } else if (SAX.isMatch(nameBody, c)) {
764
- this.attribName += c;
765
- } else {
766
- this.strictFail('Invalid attribute name');
767
- }
768
- continue;
769
- case this.S.ATTRIB_NAME_SAW_WHITE:
770
- if (c === '=') {
771
- this.state = this.S.ATTRIB_VALUE;
772
- } else if (SAX.isWhitespace(c)) {
773
- continue;
774
- } else {
775
- this.strictFail('Attribute without value');
776
- this.tag.attributes[this.attribName] = '';
777
- this.attribValue = '';
778
- this.emitNode('onattribute', {
779
- name: this.attribName,
780
- value: ''
781
- });
782
- this.attribName = '';
783
- if (c === '>') {
784
- this.openTag();
785
- } else if (SAX.isMatch(nameStart, c)) {
786
- this.attribName = c;
787
- this.state = this.S.ATTRIB_NAME;
788
- } else {
789
- this.strictFail('Invalid attribute name');
790
- this.state = this.S.ATTRIB;
791
- }
792
- }
793
- continue;
794
- case this.S.ATTRIB_VALUE:
795
- if (SAX.isWhitespace(c)) {
796
- continue;
797
- } else if (SAX.isQuote(c)) {
798
- this.q = c;
799
- this.state = this.S.ATTRIB_VALUE_QUOTED;
800
- } else {
801
- this.strictFail('Unquoted attribute value');
802
- this.state = this.S.ATTRIB_VALUE_UNQUOTED;
803
- this.attribValue = c;
804
- }
805
- continue;
806
- case this.S.ATTRIB_VALUE_QUOTED:
807
- if (c !== this.q) {
808
- if (c === '&') {
809
- this.state = this.S.ATTRIB_VALUE_ENTITY_Q;
810
- } else {
811
- this.attribValue += c;
812
- }
813
- continue;
814
- }
815
- this.attrib();
816
- this.q = '';
817
- this.state = this.S.ATTRIB_VALUE_CLOSED;
818
- continue;
819
- case this.S.ATTRIB_VALUE_CLOSED:
820
- if (SAX.isWhitespace(c)) {
821
- this.state = this.S.ATTRIB;
822
- } else if (c === '>') {
823
- this.openTag();
824
- } else if (c === '/') {
825
- this.state = this.S.OPEN_TAG_SLASH;
826
- } else if (SAX.isMatch(nameStart, c)) {
827
- this.strictFail('No whitespace between attributes');
828
- this.attribName = c;
829
- this.attribValue = '';
830
- this.state = this.S.ATTRIB_NAME;
831
- } else {
832
- this.strictFail('Invalid attribute name');
833
- }
834
- continue;
835
- case this.S.ATTRIB_VALUE_UNQUOTED:
836
- if (!SAX.isAttribEnd(c)) {
837
- if (c === '&') {
838
- this.state = this.S.ATTRIB_VALUE_ENTITY_U;
839
- } else {
840
- this.attribValue += c;
841
- }
842
- continue;
843
- }
844
- this.attrib();
845
- if (c === '>') {
846
- this.openTag();
847
- } else {
848
- this.state = this.S.ATTRIB;
849
- }
850
- continue;
851
- case this.S.CLOSE_TAG:
852
- if (!this.tagName) {
853
- if (SAX.isWhitespace(c)) {
854
- continue;
855
- } else if (SAX.notMatch(nameStart, c)) {
856
- if (this.script) {
857
- this.script += `</${c}`;
858
- this.state = this.S.SCRIPT;
859
- } else {
860
- this.strictFail('Invalid tagname in closing tag.');
861
- }
862
- } else {
863
- this.tagName = c;
864
494
  }
865
- } else if (c === '>') {
866
- this.closeTag();
867
- } else if (SAX.isMatch(nameBody, c)) {
868
- this.tagName += c;
869
- } else if (this.script) {
870
- this.script += `</${this.tagName}`;
871
- this.tagName = '';
872
- this.state = this.S.SCRIPT;
873
- } else {
874
- if (!SAX.isWhitespace(c)) {
875
- this.strictFail('Invalid tagname in closing tag');
495
+ switch (this.state) {
496
+ case this.S.BEGIN:
497
+ this.state = this.S.BEGIN_WHITESPACE;
498
+ if (c === '\uFEFF') {
499
+ continue;
500
+ }
501
+ this.beginWhiteSpace(c);
502
+ continue;
503
+ case this.S.BEGIN_WHITESPACE:
504
+ this.beginWhiteSpace(c);
505
+ continue;
506
+ case this.S.TEXT:
507
+ if (this.sawRoot && !this.closedRoot) {
508
+ const starti = i - 1;
509
+ while (c && c !== '<' && c !== '&') {
510
+ c = SAX.charAt(chunk, i++);
511
+ if (c && this.trackPosition) {
512
+ this.position++;
513
+ if (c === '\n') {
514
+ this.line++;
515
+ this.column = 0;
516
+ }
517
+ else {
518
+ this.column++;
519
+ }
520
+ }
521
+ }
522
+ this.textNode += chunk.substring(starti, i - 1);
523
+ }
524
+ if (c === '<' && !(this.sawRoot && this.closedRoot && !this.strict)) {
525
+ this.state = this.S.OPEN_WAKA;
526
+ this.startTagPosition = this.position;
527
+ }
528
+ else {
529
+ if (!SAX.isWhitespace(c) && (!this.sawRoot || this.closedRoot)) {
530
+ this.strictFail('Text data outside of root node.');
531
+ }
532
+ if (c === '&') {
533
+ this.state = this.S.TEXT_ENTITY;
534
+ }
535
+ else {
536
+ this.textNode += c;
537
+ }
538
+ }
539
+ continue;
540
+ case this.S.SCRIPT:
541
+ // only non-strict
542
+ if (c === '<') {
543
+ this.state = this.S.SCRIPT_ENDING;
544
+ }
545
+ else {
546
+ this.script += c;
547
+ }
548
+ continue;
549
+ case this.S.SCRIPT_ENDING:
550
+ if (c === '/') {
551
+ this.state = this.S.CLOSE_TAG;
552
+ }
553
+ else {
554
+ this.script += `<${c}`;
555
+ this.state = this.S.SCRIPT;
556
+ }
557
+ continue;
558
+ case this.S.OPEN_WAKA:
559
+ // either a /, ?, !, or text is coming next.
560
+ if (c === '!') {
561
+ this.state = this.S.SGML_DECL;
562
+ this.sgmlDecl = '';
563
+ }
564
+ else if (SAX.isWhitespace(c)) {
565
+ // wait for it...
566
+ }
567
+ else if (SAX.isMatch(nameStart, c)) {
568
+ this.state = this.S.OPEN_TAG;
569
+ this.tagName = c;
570
+ }
571
+ else if (c === '/') {
572
+ this.state = this.S.CLOSE_TAG;
573
+ this.tagName = '';
574
+ }
575
+ else if (c === '?') {
576
+ this.state = this.S.PROC_INST;
577
+ this.procInstName = this.procInstBody = '';
578
+ }
579
+ else {
580
+ this.strictFail('Unencoded <');
581
+ // if there was some whitespace, then add that in.
582
+ if (this.startTagPosition + 1 < this.position) {
583
+ const pad = this.position - this.startTagPosition;
584
+ c = new Array(pad).join(' ') + c;
585
+ }
586
+ this.textNode += `<${c}`;
587
+ this.state = this.S.TEXT;
588
+ }
589
+ continue;
590
+ case this.S.SGML_DECL:
591
+ if ((this.sgmlDecl + c).toUpperCase() === this.CDATA) {
592
+ this.emitNode('onopencdata');
593
+ this.state = this.S.CDATA;
594
+ this.sgmlDecl = '';
595
+ this.cdata = '';
596
+ }
597
+ else if (this.sgmlDecl + c === '--') {
598
+ this.state = this.S.COMMENT;
599
+ this.comment = '';
600
+ this.sgmlDecl = '';
601
+ }
602
+ else if ((this.sgmlDecl + c).toUpperCase() === this.DOCTYPE) {
603
+ this.state = this.S.DOCTYPE;
604
+ if (this.doctype || this.sawRoot) {
605
+ this.strictFail('Inappropriately located doctype declaration');
606
+ }
607
+ this.doctype = '';
608
+ this.sgmlDecl = '';
609
+ }
610
+ else if (c === '>') {
611
+ this.emitNode('onsgmldeclaration', this.sgmlDecl);
612
+ this.sgmlDecl = '';
613
+ this.state = this.S.TEXT;
614
+ }
615
+ else if (SAX.isQuote(c)) {
616
+ this.state = this.S.SGML_DECL_QUOTED;
617
+ this.sgmlDecl += c;
618
+ }
619
+ else {
620
+ this.sgmlDecl += c;
621
+ }
622
+ continue;
623
+ case this.S.SGML_DECL_QUOTED:
624
+ if (c === this.q) {
625
+ this.state = this.S.SGML_DECL;
626
+ this.q = '';
627
+ }
628
+ this.sgmlDecl += c;
629
+ continue;
630
+ case this.S.DOCTYPE:
631
+ if (c === '>') {
632
+ this.state = this.S.TEXT;
633
+ this.emitNode('ondoctype', this.doctype);
634
+ this.doctype = true; // just remember that we saw it.
635
+ }
636
+ else {
637
+ this.doctype += c;
638
+ if (c === '[') {
639
+ this.state = this.S.DOCTYPE_DTD;
640
+ }
641
+ else if (SAX.isQuote(c)) {
642
+ this.state = this.S.DOCTYPE_QUOTED;
643
+ this.q = c;
644
+ }
645
+ }
646
+ continue;
647
+ case this.S.DOCTYPE_QUOTED:
648
+ this.doctype += c;
649
+ if (c === this.q) {
650
+ this.q = '';
651
+ this.state = this.S.DOCTYPE;
652
+ }
653
+ continue;
654
+ case this.S.DOCTYPE_DTD:
655
+ this.doctype += c;
656
+ if (c === ']') {
657
+ this.state = this.S.DOCTYPE;
658
+ }
659
+ else if (SAX.isQuote(c)) {
660
+ this.state = this.S.DOCTYPE_DTD_QUOTED;
661
+ this.q = c;
662
+ }
663
+ continue;
664
+ case this.S.DOCTYPE_DTD_QUOTED:
665
+ this.doctype += c;
666
+ if (c === this.q) {
667
+ this.state = this.S.DOCTYPE_DTD;
668
+ this.q = '';
669
+ }
670
+ continue;
671
+ case this.S.COMMENT:
672
+ if (c === '-') {
673
+ this.state = this.S.COMMENT_ENDING;
674
+ }
675
+ else {
676
+ this.comment += c;
677
+ }
678
+ continue;
679
+ case this.S.COMMENT_ENDING:
680
+ if (c === '-') {
681
+ this.state = this.S.COMMENT_ENDED;
682
+ this.comment = this.textApplyOptions(this.comment);
683
+ if (this.comment) {
684
+ this.emitNode('oncomment', this.comment);
685
+ }
686
+ this.comment = '';
687
+ }
688
+ else {
689
+ this.comment += `-${c}`;
690
+ this.state = this.S.COMMENT;
691
+ }
692
+ continue;
693
+ case this.S.COMMENT_ENDED:
694
+ if (c !== '>') {
695
+ this.strictFail('Malformed comment');
696
+ // allow <!-- blah -- bloo --> in non-strict mode,
697
+ // which is a comment of " blah -- bloo "
698
+ this.comment += `--${c}`;
699
+ this.state = this.S.COMMENT;
700
+ }
701
+ else {
702
+ this.state = this.S.TEXT;
703
+ }
704
+ continue;
705
+ case this.S.CDATA:
706
+ if (c === ']') {
707
+ this.state = this.S.CDATA_ENDING;
708
+ }
709
+ else {
710
+ this.cdata += c;
711
+ }
712
+ continue;
713
+ case this.S.CDATA_ENDING:
714
+ if (c === ']') {
715
+ this.state = this.S.CDATA_ENDING_2;
716
+ }
717
+ else {
718
+ this.cdata += `]${c}`;
719
+ this.state = this.S.CDATA;
720
+ }
721
+ continue;
722
+ case this.S.CDATA_ENDING_2:
723
+ if (c === '>') {
724
+ if (this.cdata) {
725
+ this.emitNode('oncdata', this.cdata);
726
+ }
727
+ this.emitNode('onclosecdata');
728
+ this.cdata = '';
729
+ this.state = this.S.TEXT;
730
+ }
731
+ else if (c === ']') {
732
+ this.cdata += ']';
733
+ }
734
+ else {
735
+ this.cdata += `]]${c}`;
736
+ this.state = this.S.CDATA;
737
+ }
738
+ continue;
739
+ case this.S.PROC_INST:
740
+ if (c === '?') {
741
+ this.state = this.S.PROC_INST_ENDING;
742
+ }
743
+ else if (SAX.isWhitespace(c)) {
744
+ this.state = this.S.PROC_INST_BODY;
745
+ }
746
+ else {
747
+ this.procInstName += c;
748
+ }
749
+ continue;
750
+ case this.S.PROC_INST_BODY:
751
+ if (!this.procInstBody && SAX.isWhitespace(c)) {
752
+ continue;
753
+ }
754
+ else if (c === '?') {
755
+ this.state = this.S.PROC_INST_ENDING;
756
+ }
757
+ else {
758
+ this.procInstBody += c;
759
+ }
760
+ continue;
761
+ case this.S.PROC_INST_ENDING:
762
+ if (c === '>') {
763
+ this.emitNode('onprocessinginstruction', {
764
+ name: this.procInstName,
765
+ body: this.procInstBody
766
+ });
767
+ this.procInstName = this.procInstBody = '';
768
+ this.state = this.S.TEXT;
769
+ }
770
+ else {
771
+ this.procInstBody += `?${c}`;
772
+ this.state = this.S.PROC_INST_BODY;
773
+ }
774
+ continue;
775
+ case this.S.OPEN_TAG:
776
+ if (SAX.isMatch(nameBody, c)) {
777
+ this.tagName += c;
778
+ }
779
+ else {
780
+ this.newTag();
781
+ if (c === '>') {
782
+ this.openTag();
783
+ }
784
+ else if (c === '/') {
785
+ this.state = this.S.OPEN_TAG_SLASH;
786
+ }
787
+ else {
788
+ if (!SAX.isWhitespace(c)) {
789
+ this.strictFail('Invalid character in tag name');
790
+ }
791
+ this.state = this.S.ATTRIB;
792
+ }
793
+ }
794
+ continue;
795
+ case this.S.OPEN_TAG_SLASH:
796
+ if (c === '>') {
797
+ this.openTag(true);
798
+ this.closeTag();
799
+ }
800
+ else {
801
+ this.strictFail('Forward-slash in opening tag not followed by >');
802
+ this.state = this.S.ATTRIB;
803
+ }
804
+ continue;
805
+ case this.S.ATTRIB:
806
+ // haven't read the attribute name yet.
807
+ if (SAX.isWhitespace(c)) {
808
+ continue;
809
+ }
810
+ else if (c === '>') {
811
+ this.openTag();
812
+ }
813
+ else if (c === '/') {
814
+ this.state = this.S.OPEN_TAG_SLASH;
815
+ }
816
+ else if (SAX.isMatch(nameStart, c)) {
817
+ this.attribName = c;
818
+ this.attribValue = '';
819
+ this.state = this.S.ATTRIB_NAME;
820
+ }
821
+ else {
822
+ this.strictFail('Invalid attribute name');
823
+ }
824
+ continue;
825
+ case this.S.ATTRIB_NAME:
826
+ if (c === '=') {
827
+ this.state = this.S.ATTRIB_VALUE;
828
+ }
829
+ else if (c === '>') {
830
+ this.strictFail('Attribute without value');
831
+ this.attribValue = this.attribName;
832
+ this.attrib();
833
+ this.openTag();
834
+ }
835
+ else if (SAX.isWhitespace(c)) {
836
+ this.state = this.S.ATTRIB_NAME_SAW_WHITE;
837
+ }
838
+ else if (SAX.isMatch(nameBody, c)) {
839
+ this.attribName += c;
840
+ }
841
+ else {
842
+ this.strictFail('Invalid attribute name');
843
+ }
844
+ continue;
845
+ case this.S.ATTRIB_NAME_SAW_WHITE:
846
+ if (c === '=') {
847
+ this.state = this.S.ATTRIB_VALUE;
848
+ }
849
+ else if (SAX.isWhitespace(c)) {
850
+ continue;
851
+ }
852
+ else {
853
+ this.strictFail('Attribute without value');
854
+ this.tag.attributes[this.attribName] = '';
855
+ this.attribValue = '';
856
+ this.emitNode('onattribute', {
857
+ name: this.attribName,
858
+ value: ''
859
+ });
860
+ this.attribName = '';
861
+ if (c === '>') {
862
+ this.openTag();
863
+ }
864
+ else if (SAX.isMatch(nameStart, c)) {
865
+ this.attribName = c;
866
+ this.state = this.S.ATTRIB_NAME;
867
+ }
868
+ else {
869
+ this.strictFail('Invalid attribute name');
870
+ this.state = this.S.ATTRIB;
871
+ }
872
+ }
873
+ continue;
874
+ case this.S.ATTRIB_VALUE:
875
+ if (SAX.isWhitespace(c)) {
876
+ continue;
877
+ }
878
+ else if (SAX.isQuote(c)) {
879
+ this.q = c;
880
+ this.state = this.S.ATTRIB_VALUE_QUOTED;
881
+ }
882
+ else {
883
+ this.strictFail('Unquoted attribute value');
884
+ this.state = this.S.ATTRIB_VALUE_UNQUOTED;
885
+ this.attribValue = c;
886
+ }
887
+ continue;
888
+ case this.S.ATTRIB_VALUE_QUOTED:
889
+ if (c !== this.q) {
890
+ if (c === '&') {
891
+ this.state = this.S.ATTRIB_VALUE_ENTITY_Q;
892
+ }
893
+ else {
894
+ this.attribValue += c;
895
+ }
896
+ continue;
897
+ }
898
+ this.attrib();
899
+ this.q = '';
900
+ this.state = this.S.ATTRIB_VALUE_CLOSED;
901
+ continue;
902
+ case this.S.ATTRIB_VALUE_CLOSED:
903
+ if (SAX.isWhitespace(c)) {
904
+ this.state = this.S.ATTRIB;
905
+ }
906
+ else if (c === '>') {
907
+ this.openTag();
908
+ }
909
+ else if (c === '/') {
910
+ this.state = this.S.OPEN_TAG_SLASH;
911
+ }
912
+ else if (SAX.isMatch(nameStart, c)) {
913
+ this.strictFail('No whitespace between attributes');
914
+ this.attribName = c;
915
+ this.attribValue = '';
916
+ this.state = this.S.ATTRIB_NAME;
917
+ }
918
+ else {
919
+ this.strictFail('Invalid attribute name');
920
+ }
921
+ continue;
922
+ case this.S.ATTRIB_VALUE_UNQUOTED:
923
+ if (!SAX.isAttribEnd(c)) {
924
+ if (c === '&') {
925
+ this.state = this.S.ATTRIB_VALUE_ENTITY_U;
926
+ }
927
+ else {
928
+ this.attribValue += c;
929
+ }
930
+ continue;
931
+ }
932
+ this.attrib();
933
+ if (c === '>') {
934
+ this.openTag();
935
+ }
936
+ else {
937
+ this.state = this.S.ATTRIB;
938
+ }
939
+ continue;
940
+ case this.S.CLOSE_TAG:
941
+ if (!this.tagName) {
942
+ if (SAX.isWhitespace(c)) {
943
+ continue;
944
+ }
945
+ else if (SAX.notMatch(nameStart, c)) {
946
+ if (this.script) {
947
+ this.script += `</${c}`;
948
+ this.state = this.S.SCRIPT;
949
+ }
950
+ else {
951
+ this.strictFail('Invalid tagname in closing tag.');
952
+ }
953
+ }
954
+ else {
955
+ this.tagName = c;
956
+ }
957
+ }
958
+ else if (c === '>') {
959
+ this.closeTag();
960
+ }
961
+ else if (SAX.isMatch(nameBody, c)) {
962
+ this.tagName += c;
963
+ }
964
+ else if (this.script) {
965
+ this.script += `</${this.tagName}`;
966
+ this.tagName = '';
967
+ this.state = this.S.SCRIPT;
968
+ }
969
+ else {
970
+ if (!SAX.isWhitespace(c)) {
971
+ this.strictFail('Invalid tagname in closing tag');
972
+ }
973
+ this.state = this.S.CLOSE_TAG_SAW_WHITE;
974
+ }
975
+ continue;
976
+ case this.S.CLOSE_TAG_SAW_WHITE:
977
+ if (SAX.isWhitespace(c)) {
978
+ continue;
979
+ }
980
+ if (c === '>') {
981
+ this.closeTag();
982
+ }
983
+ else {
984
+ this.strictFail('Invalid characters in closing tag');
985
+ }
986
+ continue;
987
+ case this.S.TEXT_ENTITY:
988
+ case this.S.ATTRIB_VALUE_ENTITY_Q:
989
+ case this.S.ATTRIB_VALUE_ENTITY_U:
990
+ let returnState;
991
+ let buffer;
992
+ switch (this.state) {
993
+ case this.S.TEXT_ENTITY:
994
+ returnState = this.S.TEXT;
995
+ buffer = 'textNode';
996
+ break;
997
+ case this.S.ATTRIB_VALUE_ENTITY_Q:
998
+ returnState = this.S.ATTRIB_VALUE_QUOTED;
999
+ buffer = 'attribValue';
1000
+ break;
1001
+ case this.S.ATTRIB_VALUE_ENTITY_U:
1002
+ returnState = this.S.ATTRIB_VALUE_UNQUOTED;
1003
+ buffer = 'attribValue';
1004
+ break;
1005
+ default:
1006
+ throw new Error(`Unknown state: ${this.state}`);
1007
+ }
1008
+ if (c === ';') {
1009
+ this[buffer] += this.parseEntity();
1010
+ this.entity = '';
1011
+ this.state = returnState;
1012
+ }
1013
+ else if (SAX.isMatch(this.entity.length ? entityBody : entityStart, c)) {
1014
+ this.entity += c;
1015
+ }
1016
+ else {
1017
+ this.strictFail('Invalid character in entity name');
1018
+ this[buffer] += `&${this.entity}${c}`;
1019
+ this.entity = '';
1020
+ this.state = returnState;
1021
+ }
1022
+ continue;
1023
+ default:
1024
+ throw new Error(`Unknown state: ${this.state}`);
876
1025
  }
877
- this.state = this.S.CLOSE_TAG_SAW_WHITE;
878
- }
879
- continue;
880
- case this.S.CLOSE_TAG_SAW_WHITE:
881
- if (SAX.isWhitespace(c)) {
882
- continue;
883
- }
884
- if (c === '>') {
885
- this.closeTag();
886
- } else {
887
- this.strictFail('Invalid characters in closing tag');
888
- }
889
- continue;
890
- case this.S.TEXT_ENTITY:
891
- case this.S.ATTRIB_VALUE_ENTITY_Q:
892
- case this.S.ATTRIB_VALUE_ENTITY_U:
893
- let returnState;
894
- let buffer;
895
- switch (this.state) {
896
- case this.S.TEXT_ENTITY:
897
- returnState = this.S.TEXT;
898
- buffer = 'textNode';
899
- break;
900
- case this.S.ATTRIB_VALUE_ENTITY_Q:
901
- returnState = this.S.ATTRIB_VALUE_QUOTED;
902
- buffer = 'attribValue';
903
- break;
904
- case this.S.ATTRIB_VALUE_ENTITY_U:
905
- returnState = this.S.ATTRIB_VALUE_UNQUOTED;
906
- buffer = 'attribValue';
907
- break;
908
- default:
909
- throw new Error(`Unknown state: ${this.state}`);
910
- }
911
- if (c === ';') {
912
- this[buffer] += this.parseEntity();
913
- this.entity = '';
914
- this.state = returnState;
915
- } else if (SAX.isMatch(this.entity.length ? entityBody : entityStart, c)) {
916
- this.entity += c;
917
- } else {
918
- this.strictFail('Invalid character in entity name');
919
- this[buffer] += `&${this.entity}${c}`;
920
- this.entity = '';
921
- this.state = returnState;
922
- }
923
- continue;
924
- default:
925
- throw new Error(`Unknown state: ${this.state}`);
926
- }
927
- }
928
- if (this.position >= this.bufferCheckPosition) {
929
- this.checkBufferLength();
930
- }
931
- return this;
932
- }
933
- emit(event, data) {
934
- if (this.events.hasOwnProperty(event)) {
935
- const eventName = event.replace(/^on/, '');
936
- this.events[event](data, eventName, this);
937
- }
938
- }
939
- clearBuffers() {
940
- for (let i = 0, l = this.BUFFERS.length; i < l; i++) {
941
- this[this[i]] = '';
1026
+ } // while
1027
+ if (this.position >= this.bufferCheckPosition) {
1028
+ this.checkBufferLength();
1029
+ }
1030
+ return this;
942
1031
  }
943
- }
944
- flushBuffers() {
945
- this.closeText();
946
- if (this.cdata !== '') {
947
- this.emitNode('oncdata', this.cdata);
948
- this.cdata = '';
1032
+ emit(event, data) {
1033
+ if (this.events.hasOwnProperty(event)) {
1034
+ const eventName = event.replace(/^on/, '');
1035
+ this.events[event](data, eventName, this);
1036
+ }
949
1037
  }
950
- if (this.script !== '') {
951
- this.emitNode('onscript', this.script);
952
- this.script = '';
1038
+ clearBuffers() {
1039
+ for (let i = 0, l = this.BUFFERS.length; i < l; i++) {
1040
+ this[this[i]] = '';
1041
+ }
953
1042
  }
954
- }
955
- end() {
956
- if (this.sawRoot && !this.closedRoot) this.strictFail('Unclosed root tag');
957
- if (this.state !== this.S.BEGIN && this.state !== this.S.BEGIN_WHITESPACE && this.state !== this.S.TEXT) {
958
- this.errorFunction('Unexpected end');
1043
+ flushBuffers() {
1044
+ this.closeText();
1045
+ if (this.cdata !== '') {
1046
+ this.emitNode('oncdata', this.cdata);
1047
+ this.cdata = '';
1048
+ }
1049
+ if (this.script !== '') {
1050
+ this.emitNode('onscript', this.script);
1051
+ this.script = '';
1052
+ }
959
1053
  }
960
- this.closeText();
961
- this.c = '';
962
- this.closed = true;
963
- this.emit('onend');
964
- return new SAXParser(this.opt);
965
- }
966
- errorFunction(er) {
967
- this.closeText();
968
- if (this.trackPosition) {
969
- er += `\nLine: ${this.line}\nColumn: ${this.column}\nChar: ${this.c}`;
1054
+ end() {
1055
+ if (this.sawRoot && !this.closedRoot)
1056
+ this.strictFail('Unclosed root tag');
1057
+ if (this.state !== this.S.BEGIN &&
1058
+ this.state !== this.S.BEGIN_WHITESPACE &&
1059
+ this.state !== this.S.TEXT) {
1060
+ this.errorFunction('Unexpected end');
1061
+ }
1062
+ this.closeText();
1063
+ this.c = '';
1064
+ this.closed = true;
1065
+ this.emit('onend');
1066
+ return new SAXParser(this.opt);
970
1067
  }
971
- const error = new Error(er);
972
- this.error = error;
973
- this.emit('onerror', error);
974
- return this;
975
- }
976
- attrib() {
977
- if (!this.strict) {
978
- this.attribName = this.attribName[this.looseCase]();
1068
+ errorFunction(er) {
1069
+ this.closeText();
1070
+ if (this.trackPosition) {
1071
+ er += `\nLine: ${this.line}\nColumn: ${this.column}\nChar: ${this.c}`;
1072
+ }
1073
+ const error = new Error(er);
1074
+ this.error = error;
1075
+ this.emit('onerror', error);
1076
+ return this;
979
1077
  }
980
- if (this.attribList.indexOf(this.attribName) !== -1 || this.tag.attributes.hasOwnProperty(this.attribName)) {
981
- this.attribName = this.attribValue = '';
982
- return;
1078
+ attrib() {
1079
+ if (!this.strict) {
1080
+ this.attribName = this.attribName[this.looseCase]();
1081
+ }
1082
+ if (this.attribList.indexOf(this.attribName) !== -1 ||
1083
+ this.tag.attributes.hasOwnProperty(this.attribName)) {
1084
+ this.attribName = this.attribValue = '';
1085
+ return;
1086
+ }
1087
+ if (this.opt.xmlns) {
1088
+ const qn = SAX.qname(this.attribName, true);
1089
+ const prefix = qn.prefix;
1090
+ const local = qn.local;
1091
+ if (prefix === 'xmlns') {
1092
+ // namespace binding attribute. push the binding into scope
1093
+ if (local === 'xml' && this.attribValue !== this.XML_NAMESPACE) {
1094
+ this.strictFail(`xml: prefix must be bound to ${this.XML_NAMESPACE}\n` + `Actual: ${this.attribValue}`);
1095
+ }
1096
+ else if (local === 'xmlns' && this.attribValue !== this.XMLNS_NAMESPACE) {
1097
+ this.strictFail(`xmlns: prefix must be bound to ${this.XMLNS_NAMESPACE}\n` +
1098
+ `Actual: ${this.attribValue}`);
1099
+ }
1100
+ else {
1101
+ const tag = this.tag;
1102
+ const parent = this.tags[this.tags.length - 1] || this;
1103
+ if (tag.ns === parent.ns) {
1104
+ tag.ns = Object.create(parent.ns);
1105
+ }
1106
+ tag.ns[local] = this.attribValue;
1107
+ }
1108
+ }
1109
+ // defer onattribute events until all attributes have been seen
1110
+ // so any new bindings can take effect. preserve attribute order
1111
+ // so deferred events can be emitted in document order
1112
+ this.attribList.push([this.attribName, this.attribValue]);
1113
+ }
1114
+ else {
1115
+ // in non-xmlns mode, we can emit the event right away
1116
+ this.tag.attributes[this.attribName] = this.attribValue;
1117
+ this.emitNode('onattribute', {
1118
+ name: this.attribName,
1119
+ value: this.attribValue
1120
+ });
1121
+ }
1122
+ this.attribName = this.attribValue = '';
983
1123
  }
984
- if (this.opt.xmlns) {
985
- const qn = SAX.qname(this.attribName, true);
986
- const prefix = qn.prefix;
987
- const local = qn.local;
988
- if (prefix === 'xmlns') {
989
- if (local === 'xml' && this.attribValue !== this.XML_NAMESPACE) {
990
- this.strictFail(`xml: prefix must be bound to ${this.XML_NAMESPACE}\n` + `Actual: ${this.attribValue}`);
991
- } else if (local === 'xmlns' && this.attribValue !== this.XMLNS_NAMESPACE) {
992
- this.strictFail(`xmlns: prefix must be bound to ${this.XMLNS_NAMESPACE}\n` + `Actual: ${this.attribValue}`);
993
- } else {
994
- const tag = this.tag;
995
- const parent = this.tags[this.tags.length - 1] || this;
996
- if (tag.ns === parent.ns) {
997
- tag.ns = Object.create(parent.ns);
998
- }
999
- tag.ns[local] = this.attribValue;
1124
+ newTag() {
1125
+ if (!this.strict)
1126
+ this.tagName = this.tagName[this.looseCase]();
1127
+ const parent = this.tags[this.tags.length - 1] || this;
1128
+ const tag = (this.tag = { name: this.tagName, attributes: {} });
1129
+ // will be overridden if tag contains an xmlns="foo" or xmlns:foo="bar"
1130
+ if (this.opt.xmlns) {
1131
+ tag.ns = parent.ns;
1000
1132
  }
1001
- }
1002
- this.attribList.push([this.attribName, this.attribValue]);
1003
- } else {
1004
- this.tag.attributes[this.attribName] = this.attribValue;
1005
- this.emitNode('onattribute', {
1006
- name: this.attribName,
1007
- value: this.attribValue
1008
- });
1133
+ this.attribList.length = 0;
1134
+ this.emitNode('onopentagstart', tag);
1009
1135
  }
1010
- this.attribName = this.attribValue = '';
1011
- }
1012
- newTag() {
1013
- if (!this.strict) this.tagName = this.tagName[this.looseCase]();
1014
- const parent = this.tags[this.tags.length - 1] || this;
1015
- const tag = this.tag = {
1016
- name: this.tagName,
1017
- attributes: {}
1018
- };
1019
- if (this.opt.xmlns) {
1020
- tag.ns = parent.ns;
1136
+ parseEntity() {
1137
+ let entity = this.entity;
1138
+ const entityLC = entity.toLowerCase();
1139
+ let num = NaN;
1140
+ let numStr = '';
1141
+ if (this.ENTITIES[entity]) {
1142
+ return this.ENTITIES[entity];
1143
+ }
1144
+ if (this.ENTITIES[entityLC]) {
1145
+ return this.ENTITIES[entityLC];
1146
+ }
1147
+ entity = entityLC;
1148
+ if (entity.charAt(0) === '#') {
1149
+ if (entity.charAt(1) === 'x') {
1150
+ entity = entity.slice(2);
1151
+ // TODO: remove tslint:disable
1152
+ // tslint:disable-next-line
1153
+ num = parseInt(entity, 16);
1154
+ numStr = num.toString(16);
1155
+ }
1156
+ else {
1157
+ entity = entity.slice(1);
1158
+ // TODO: remove tslint:disable
1159
+ // tslint:disable-next-line
1160
+ num = parseInt(entity, 10);
1161
+ numStr = num.toString(10);
1162
+ }
1163
+ }
1164
+ entity = entity.replace(/^0+/, '');
1165
+ if (isNaN(num) || numStr.toLowerCase() !== entity) {
1166
+ this.strictFail('Invalid character entity');
1167
+ return `&${this.entity};`;
1168
+ }
1169
+ return String.fromCodePoint(num);
1021
1170
  }
1022
- this.attribList.length = 0;
1023
- this.emitNode('onopentagstart', tag);
1024
- }
1025
- parseEntity() {
1026
- let entity = this.entity;
1027
- const entityLC = entity.toLowerCase();
1028
- let num = NaN;
1029
- let numStr = '';
1030
- if (this.ENTITIES[entity]) {
1031
- return this.ENTITIES[entity];
1171
+ beginWhiteSpace(c) {
1172
+ if (c === '<') {
1173
+ this.state = this.S.OPEN_WAKA;
1174
+ this.startTagPosition = this.position;
1175
+ }
1176
+ else if (!SAX.isWhitespace(c)) {
1177
+ // have to process this as a text node.
1178
+ // weird, but happens.
1179
+ this.strictFail('Non-whitespace before first tag.');
1180
+ this.textNode = c;
1181
+ this.state = this.S.TEXT;
1182
+ }
1183
+ else {
1184
+ }
1032
1185
  }
1033
- if (this.ENTITIES[entityLC]) {
1034
- return this.ENTITIES[entityLC];
1186
+ strictFail(message) {
1187
+ if (typeof this !== 'object' || !(this instanceof SAXParser)) {
1188
+ throw new Error('bad call to strictFail');
1189
+ }
1190
+ if (this.strict) {
1191
+ this.errorFunction(message);
1192
+ }
1035
1193
  }
1036
- entity = entityLC;
1037
- if (entity.charAt(0) === '#') {
1038
- if (entity.charAt(1) === 'x') {
1039
- entity = entity.slice(2);
1040
- num = parseInt(entity, 16);
1041
- numStr = num.toString(16);
1042
- } else {
1043
- entity = entity.slice(1);
1044
- num = parseInt(entity, 10);
1045
- numStr = num.toString(10);
1046
- }
1194
+ textApplyOptions(text) {
1195
+ if (this.opt.trim)
1196
+ text = text.trim();
1197
+ if (this.opt.normalize)
1198
+ text = text.replace(/\s+/g, ' ');
1199
+ return text;
1047
1200
  }
1048
- entity = entity.replace(/^0+/, '');
1049
- if (isNaN(num) || numStr.toLowerCase() !== entity) {
1050
- this.strictFail('Invalid character entity');
1051
- return `&${this.entity};`;
1201
+ emitNode(nodeType, data) {
1202
+ if (this.textNode)
1203
+ this.closeText();
1204
+ this.emit(nodeType, data);
1052
1205
  }
1053
- return String.fromCodePoint(num);
1054
- }
1055
- beginWhiteSpace(c) {
1056
- if (c === '<') {
1057
- this.state = this.S.OPEN_WAKA;
1058
- this.startTagPosition = this.position;
1059
- } else if (!SAX.isWhitespace(c)) {
1060
- this.strictFail('Non-whitespace before first tag.');
1061
- this.textNode = c;
1062
- this.state = this.S.TEXT;
1063
- } else {}
1064
- }
1065
- strictFail(message) {
1066
- if (typeof this !== 'object' || !(this instanceof SAXParser)) {
1067
- throw new Error('bad call to strictFail');
1206
+ closeText() {
1207
+ this.textNode = this.textApplyOptions(this.textNode);
1208
+ // TODO: figure out why this.textNode can be "" and "undefined"
1209
+ if (this.textNode !== undefined && this.textNode !== '' && this.textNode !== 'undefined') {
1210
+ this.emit('ontext', this.textNode);
1211
+ }
1212
+ this.textNode = '';
1068
1213
  }
1069
- if (this.strict) {
1070
- this.errorFunction(message);
1214
+ checkBufferLength() {
1215
+ const maxAllowed = Math.max(this.opt.MAX_BUFFER_LENGTH, 10);
1216
+ let maxActual = 0;
1217
+ for (let i = 0, l = this.BUFFERS.length; i < l; i++) {
1218
+ const len = this[this.BUFFERS[i]]?.length || 0;
1219
+ if (len > maxAllowed) {
1220
+ // Text/cdata nodes can get big, and since they're buffered,
1221
+ // we can get here under normal conditions.
1222
+ // Avoid issues by emitting the text node now,
1223
+ // so at least it won't get any bigger.
1224
+ switch (this.BUFFERS[i]) {
1225
+ case 'textNode':
1226
+ this.closeText();
1227
+ break;
1228
+ case 'cdata':
1229
+ this.emitNode('oncdata', this.cdata);
1230
+ this.cdata = '';
1231
+ break;
1232
+ case 'script':
1233
+ this.emitNode('onscript', this.script);
1234
+ this.script = '';
1235
+ break;
1236
+ default:
1237
+ this.errorFunction(`Max buffer length exceeded: ${this.BUFFERS[i]}`);
1238
+ }
1239
+ }
1240
+ maxActual = Math.max(maxActual, len);
1241
+ }
1242
+ // schedule the next check for the earliest possible buffer overrun.
1243
+ const m = this.opt.MAX_BUFFER_LENGTH - maxActual;
1244
+ this.bufferCheckPosition = m + this.position;
1071
1245
  }
1072
- }
1073
- textApplyOptions(text) {
1074
- if (this.opt.trim) text = text.trim();
1075
- if (this.opt.normalize) text = text.replace(/\s+/g, ' ');
1076
- return text;
1077
- }
1078
- emitNode(nodeType, data) {
1079
- if (this.textNode) this.closeText();
1080
- this.emit(nodeType, data);
1081
- }
1082
- closeText() {
1083
- this.textNode = this.textApplyOptions(this.textNode);
1084
- if (this.textNode !== undefined && this.textNode !== '' && this.textNode !== 'undefined') {
1085
- this.emit('ontext', this.textNode);
1246
+ openTag(selfClosing) {
1247
+ if (this.opt.xmlns) {
1248
+ // emit namespace binding events
1249
+ const tag = this.tag;
1250
+ // add namespace info to tag
1251
+ const qn = SAX.qname(this.tagName);
1252
+ tag.prefix = qn.prefix;
1253
+ tag.local = qn.local;
1254
+ tag.uri = tag.ns[qn.prefix] || '';
1255
+ if (tag.prefix && !tag.uri) {
1256
+ this.strictFail(`Unbound namespace prefix: ${JSON.stringify(this.tagName)}`);
1257
+ tag.uri = qn.prefix;
1258
+ }
1259
+ const parent = this.tags[this.tags.length - 1] || this;
1260
+ if (tag.ns && parent.ns !== tag.ns) {
1261
+ const that = this;
1262
+ Object.keys(tag.ns).forEach((p) => {
1263
+ that.emitNode('onopennamespace', {
1264
+ prefix: p,
1265
+ uri: tag.ns[p]
1266
+ });
1267
+ });
1268
+ }
1269
+ // handle deferred onattribute events
1270
+ // Note: do not apply default ns to attributes:
1271
+ // http://www.w3.org/TR/REC-xml-names/#defaulting
1272
+ for (let i = 0, l = this.attribList.length; i < l; i++) {
1273
+ const nv = this.attribList[i];
1274
+ const name = nv[0];
1275
+ const value = nv[1];
1276
+ const qualName = SAX.qname(name, true);
1277
+ const prefix = qualName.prefix;
1278
+ const local = qualName.local;
1279
+ const uri = prefix === '' ? '' : tag.ns[prefix] || '';
1280
+ const a = {
1281
+ name,
1282
+ value,
1283
+ prefix,
1284
+ local,
1285
+ uri
1286
+ };
1287
+ // if there's any attributes with an undefined namespace,
1288
+ // then fail on them now.
1289
+ if (prefix && prefix !== 'xmlns' && !uri) {
1290
+ this.strictFail(`Unbound namespace prefix: ${JSON.stringify(prefix)}`);
1291
+ a.uri = prefix;
1292
+ }
1293
+ this.tag.attributes[name] = a;
1294
+ this.emitNode('onattribute', a);
1295
+ }
1296
+ this.attribList.length = 0;
1297
+ }
1298
+ this.tag.isSelfClosing = Boolean(selfClosing);
1299
+ // process the tag
1300
+ this.sawRoot = true;
1301
+ this.tags.push(this.tag);
1302
+ this.emitNode('onopentag', this.tag);
1303
+ if (!selfClosing) {
1304
+ // special case for <script> in non-strict mode.
1305
+ if (!this.noscript && this.tagName.toLowerCase() === 'script') {
1306
+ this.state = this.S.SCRIPT;
1307
+ }
1308
+ else {
1309
+ this.state = this.S.TEXT;
1310
+ }
1311
+ this.tag = null;
1312
+ this.tagName = '';
1313
+ }
1314
+ this.attribName = this.attribValue = '';
1315
+ this.attribList.length = 0;
1086
1316
  }
1087
- this.textNode = '';
1088
- }
1089
- checkBufferLength() {
1090
- const maxAllowed = Math.max(this.opt.MAX_BUFFER_LENGTH, 10);
1091
- let maxActual = 0;
1092
- for (let i = 0, l = this.BUFFERS.length; i < l; i++) {
1093
- var _this$this$BUFFERS$i;
1094
- const len = ((_this$this$BUFFERS$i = this[this.BUFFERS[i]]) === null || _this$this$BUFFERS$i === void 0 ? void 0 : _this$this$BUFFERS$i.length) || 0;
1095
- if (len > maxAllowed) {
1096
- switch (this.BUFFERS[i]) {
1097
- case 'textNode':
1098
- this.closeText();
1099
- break;
1100
- case 'cdata':
1101
- this.emitNode('oncdata', this.cdata);
1102
- this.cdata = '';
1103
- break;
1104
- case 'script':
1317
+ closeTag() {
1318
+ if (!this.tagName) {
1319
+ this.strictFail('Weird empty close tag.');
1320
+ this.textNode += '</>';
1321
+ this.state = this.S.TEXT;
1322
+ return;
1323
+ }
1324
+ if (this.script) {
1325
+ if (this.tagName !== 'script') {
1326
+ this.script += `</${this.tagName}>`;
1327
+ this.tagName = '';
1328
+ this.state = this.S.SCRIPT;
1329
+ return;
1330
+ }
1105
1331
  this.emitNode('onscript', this.script);
1106
1332
  this.script = '';
1107
- break;
1108
- default:
1109
- this.errorFunction(`Max buffer length exceeded: ${this.BUFFERS[i]}`);
1110
1333
  }
1111
- }
1112
- maxActual = Math.max(maxActual, len);
1113
- }
1114
- const m = this.opt.MAX_BUFFER_LENGTH - maxActual;
1115
- this.bufferCheckPosition = m + this.position;
1116
- }
1117
- openTag(selfClosing) {
1118
- if (this.opt.xmlns) {
1119
- const tag = this.tag;
1120
- const qn = SAX.qname(this.tagName);
1121
- tag.prefix = qn.prefix;
1122
- tag.local = qn.local;
1123
- tag.uri = tag.ns[qn.prefix] || '';
1124
- if (tag.prefix && !tag.uri) {
1125
- this.strictFail(`Unbound namespace prefix: ${JSON.stringify(this.tagName)}`);
1126
- tag.uri = qn.prefix;
1127
- }
1128
- const parent = this.tags[this.tags.length - 1] || this;
1129
- if (tag.ns && parent.ns !== tag.ns) {
1130
- const that = this;
1131
- Object.keys(tag.ns).forEach(p => {
1132
- that.emitNode('onopennamespace', {
1133
- prefix: p,
1134
- uri: tag.ns[p]
1135
- });
1136
- });
1137
- }
1138
- for (let i = 0, l = this.attribList.length; i < l; i++) {
1139
- const nv = this.attribList[i];
1140
- const name = nv[0];
1141
- const value = nv[1];
1142
- const qualName = SAX.qname(name, true);
1143
- const prefix = qualName.prefix;
1144
- const local = qualName.local;
1145
- const uri = prefix === '' ? '' : tag.ns[prefix] || '';
1146
- const a = {
1147
- name,
1148
- value,
1149
- prefix,
1150
- local,
1151
- uri
1152
- };
1153
- if (prefix && prefix !== 'xmlns' && !uri) {
1154
- this.strictFail(`Unbound namespace prefix: ${JSON.stringify(prefix)}`);
1155
- a.uri = prefix;
1334
+ // first make sure that the closing tag actually exists.
1335
+ // <a><b></c></b></a> will close everything, otherwise.
1336
+ let t = this.tags.length;
1337
+ let tagName = this.tagName;
1338
+ if (!this.strict) {
1339
+ tagName = tagName[this.looseCase]();
1156
1340
  }
1157
- this.tag.attributes[name] = a;
1158
- this.emitNode('onattribute', a);
1159
- }
1160
- this.attribList.length = 0;
1161
- }
1162
- this.tag.isSelfClosing = Boolean(selfClosing);
1163
- this.sawRoot = true;
1164
- this.tags.push(this.tag);
1165
- this.emitNode('onopentag', this.tag);
1166
- if (!selfClosing) {
1167
- if (!this.noscript && this.tagName.toLowerCase() === 'script') {
1168
- this.state = this.S.SCRIPT;
1169
- } else {
1170
- this.state = this.S.TEXT;
1171
- }
1172
- this.tag = null;
1173
- this.tagName = '';
1174
- }
1175
- this.attribName = this.attribValue = '';
1176
- this.attribList.length = 0;
1177
- }
1178
- closeTag() {
1179
- if (!this.tagName) {
1180
- this.strictFail('Weird empty close tag.');
1181
- this.textNode += '</>';
1182
- this.state = this.S.TEXT;
1183
- return;
1184
- }
1185
- if (this.script) {
1186
- if (this.tagName !== 'script') {
1187
- this.script += `</${this.tagName}>`;
1188
- this.tagName = '';
1189
- this.state = this.S.SCRIPT;
1190
- return;
1191
- }
1192
- this.emitNode('onscript', this.script);
1193
- this.script = '';
1194
- }
1195
- let t = this.tags.length;
1196
- let tagName = this.tagName;
1197
- if (!this.strict) {
1198
- tagName = tagName[this.looseCase]();
1199
- }
1200
- while (t--) {
1201
- const close = this.tags[t];
1202
- if (close.name !== tagName) {
1203
- this.strictFail('Unexpected close tag');
1204
- } else {
1205
- break;
1206
- }
1207
- }
1208
- if (t < 0) {
1209
- this.strictFail(`Unmatched closing tag: ${this.tagName}`);
1210
- this.textNode += `</${this.tagName}>`;
1211
- this.state = this.S.TEXT;
1212
- return;
1213
- }
1214
- this.tagName = tagName;
1215
- let s = this.tags.length;
1216
- while (s-- > t) {
1217
- const tag = this.tag = this.tags.pop();
1218
- this.tagName = this.tag.name;
1219
- this.emitNode('onclosetag', this.tagName);
1220
- const x = {};
1221
- for (const i in tag.ns) {
1222
- if (tag.ns.hasOwnProperty(i)) {
1223
- x[i] = tag.ns[i];
1341
+ while (t--) {
1342
+ const close = this.tags[t];
1343
+ if (close.name !== tagName) {
1344
+ // fail the first time in strict mode
1345
+ this.strictFail('Unexpected close tag');
1346
+ }
1347
+ else {
1348
+ break;
1349
+ }
1224
1350
  }
1225
- }
1226
- const parent = this.tags[this.tags.length - 1] || this;
1227
- if (this.opt.xmlns && tag.ns !== parent.ns) {
1228
- const that = this;
1229
- Object.keys(tag.ns).forEach(p => {
1230
- const n = tag.ns[p];
1231
- that.emitNode('onclosenamespace', {
1232
- prefix: p,
1233
- uri: n
1234
- });
1235
- });
1236
- }
1351
+ // didn't find it. we already failed for strict, so just abort.
1352
+ if (t < 0) {
1353
+ this.strictFail(`Unmatched closing tag: ${this.tagName}`);
1354
+ this.textNode += `</${this.tagName}>`;
1355
+ this.state = this.S.TEXT;
1356
+ return;
1357
+ }
1358
+ this.tagName = tagName;
1359
+ let s = this.tags.length;
1360
+ while (s-- > t) {
1361
+ const tag = (this.tag = this.tags.pop());
1362
+ this.tagName = this.tag.name;
1363
+ this.emitNode('onclosetag', this.tagName);
1364
+ const x = {};
1365
+ for (const i in tag.ns) {
1366
+ if (tag.ns.hasOwnProperty(i)) {
1367
+ x[i] = tag.ns[i];
1368
+ }
1369
+ }
1370
+ const parent = this.tags[this.tags.length - 1] || this;
1371
+ if (this.opt.xmlns && tag.ns !== parent.ns) {
1372
+ // remove namespace bindings introduced by tag
1373
+ const that = this;
1374
+ Object.keys(tag.ns).forEach((p) => {
1375
+ const n = tag.ns[p];
1376
+ that.emitNode('onclosenamespace', { prefix: p, uri: n });
1377
+ });
1378
+ }
1379
+ }
1380
+ if (t === 0)
1381
+ this.closedRoot = true;
1382
+ this.tagName = this.attribValue = this.attribName = '';
1383
+ this.attribList.length = 0;
1384
+ this.state = this.S.TEXT;
1237
1385
  }
1238
- if (t === 0) this.closedRoot = true;
1239
- this.tagName = this.attribValue = this.attribName = '';
1240
- this.attribList.length = 0;
1241
- this.state = this.S.TEXT;
1242
- }
1243
1386
  }
1387
+ /**
1388
+ *
1389
+ * @todo Weird inheritance, with some variables initialized in subclass
1390
+ */
1244
1391
  export class SAXParser extends SAX {
1245
- constructor(opt) {
1246
- super();
1247
- this.opt = DEFAULT_SAX_PARSER_OPTIONS;
1248
- this.events = DEFAULT_SAX_EVENTS;
1249
- this.clearBuffers();
1250
- this.opt = opt = {
1251
- ...this.opt,
1252
- ...opt
1253
- };
1254
- this.events = {
1255
- ...this.events,
1256
- ...opt
1257
- };
1258
- this.q = this.c = '';
1259
- this.opt.lowercase = this.opt.lowercase || this.opt.lowercasetags;
1260
- this.bufferCheckPosition = this.opt.MAX_BUFFER_LENGTH;
1261
- this.looseCase = this.opt.lowercase ? 'toLowerCase' : 'toUpperCase';
1262
- this.tags = [];
1263
- this.closed = this.closedRoot = this.sawRoot = false;
1264
- this.tag = this.error = null;
1265
- this.strict = Boolean(this.opt.strict);
1266
- this.noscript = Boolean(this.opt.strict || this.opt.noscript);
1267
- this.state = this.S.BEGIN;
1268
- this.strictEntities = this.opt.strictEntities;
1269
- this.ENTITIES = this.strictEntities ? Object.create(this.XML_ENTITIES) : Object.create(this.ENTITIES);
1270
- this.attribList = [];
1271
- if (this.opt.xmlns) {
1272
- this.ns = Object.create(this.rootNS);
1392
+ constructor(opt) {
1393
+ super();
1394
+ this.opt = DEFAULT_SAX_PARSER_OPTIONS;
1395
+ this.events = DEFAULT_SAX_EVENTS;
1396
+ this.clearBuffers();
1397
+ this.opt = opt = { ...this.opt, ...opt };
1398
+ this.events = { ...this.events, ...opt };
1399
+ this.q = this.c = '';
1400
+ this.opt.lowercase = this.opt.lowercase || this.opt.lowercasetags;
1401
+ this.bufferCheckPosition = this.opt.MAX_BUFFER_LENGTH;
1402
+ this.looseCase = this.opt.lowercase ? 'toLowerCase' : 'toUpperCase';
1403
+ this.tags = [];
1404
+ this.closed = this.closedRoot = this.sawRoot = false;
1405
+ this.tag = this.error = null;
1406
+ this.strict = Boolean(this.opt.strict);
1407
+ this.noscript = Boolean(this.opt.strict || this.opt.noscript);
1408
+ this.state = this.S.BEGIN;
1409
+ this.strictEntities = this.opt.strictEntities;
1410
+ this.ENTITIES = this.strictEntities
1411
+ ? Object.create(this.XML_ENTITIES)
1412
+ : Object.create(this.ENTITIES);
1413
+ this.attribList = [];
1414
+ // namespaces form a prototype chain.
1415
+ // it always points at the current tag,
1416
+ // which protos to its parent tag.
1417
+ if (this.opt.xmlns) {
1418
+ this.ns = Object.create(this.rootNS);
1419
+ }
1420
+ // mostly just for error reporting
1421
+ this.trackPosition = this.opt.position !== false;
1422
+ if (this.trackPosition) {
1423
+ this.position = this.line = this.column = 0;
1424
+ }
1425
+ this.emit('onready');
1426
+ }
1427
+ resume() {
1428
+ this.error = null;
1429
+ return this;
1430
+ }
1431
+ close() {
1432
+ return this.write(null);
1273
1433
  }
1274
- this.trackPosition = this.opt.position !== false;
1275
- if (this.trackPosition) {
1276
- this.position = this.line = this.column = 0;
1434
+ flush() {
1435
+ this.flushBuffers();
1277
1436
  }
1278
- this.emit('onready');
1279
- }
1280
- resume() {
1281
- this.error = null;
1282
- return this;
1283
- }
1284
- close() {
1285
- return this.write(null);
1286
- }
1287
- flush() {
1288
- this.flushBuffers();
1289
- }
1290
1437
  }
1291
1438
  SAXParser.ENTITIES = ENTITIES;
1292
- //# sourceMappingURL=sax.js.map