@loaders.gl/xml 4.2.0-alpha.4 → 4.2.0-alpha.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1292 +1,1453 @@
1
+ // loaders.gl
2
+ // SPDX-License-Identifier: MIT
3
+ // Copyright (c) vis.gl contributors
1
4
  const DEFAULT_SAX_EVENTS = {
2
- ontext: () => {},
3
- onprocessinginstruction: () => {},
4
- onsgmldeclaration: () => {},
5
- ondoctype: () => {},
6
- oncomment: () => {},
7
- onopentagstart: () => {},
8
- onattribute: () => {},
9
- onopentag: () => {},
10
- onclosetag: () => {},
11
- onopencdata: () => {},
12
- oncdata: () => {},
13
- onclosecdata: () => {},
14
- onerror: () => {},
15
- onend: () => {},
16
- onready: () => {},
17
- onscript: () => {},
18
- onopennamespace: () => {},
19
- onclosenamespace: () => {}
5
+ ontext: () => { },
6
+ onprocessinginstruction: () => { },
7
+ onsgmldeclaration: () => { },
8
+ ondoctype: () => { },
9
+ oncomment: () => { },
10
+ onopentagstart: () => { },
11
+ onattribute: () => { },
12
+ onopentag: () => { },
13
+ onclosetag: () => { },
14
+ onopencdata: () => { },
15
+ oncdata: () => { },
16
+ onclosecdata: () => { },
17
+ onerror: () => { },
18
+ onend: () => { },
19
+ onready: () => { },
20
+ onscript: () => { },
21
+ onopennamespace: () => { },
22
+ onclosenamespace: () => { }
20
23
  };
21
24
  const DEFAULT_SAX_PARSER_OPTIONS = {
22
- ...DEFAULT_SAX_EVENTS,
23
- strict: false,
24
- MAX_BUFFER_LENGTH: 64 * 1024,
25
- lowercase: false,
26
- lowercasetags: false,
27
- noscript: false,
28
- strictEntities: false,
29
- xmlns: undefined,
30
- position: undefined,
31
- trim: undefined,
32
- normalize: undefined
25
+ ...DEFAULT_SAX_EVENTS,
26
+ strict: false,
27
+ MAX_BUFFER_LENGTH: 64 * 1024,
28
+ lowercase: false,
29
+ lowercasetags: false,
30
+ noscript: false,
31
+ strictEntities: false,
32
+ xmlns: undefined,
33
+ position: undefined,
34
+ trim: undefined,
35
+ normalize: undefined
33
36
  };
34
- const EVENTS = ['text', 'processinginstruction', 'sgmldeclaration', 'doctype', 'comment', 'opentagstart', 'attribute', 'opentag', 'closetag', 'opencdata', 'cdata', 'closecdata', 'error', 'end', 'ready', 'script', 'opennamespace', 'closenamespace'];
35
- const BUFFERS = ['comment', 'sgmlDecl', 'textNode', 'tagName', 'doctype', 'procInstName', 'procInstBody', 'entity', 'attribName', 'attribValue', 'cdata', 'script'];
37
+ const EVENTS = [
38
+ 'text',
39
+ 'processinginstruction',
40
+ 'sgmldeclaration',
41
+ 'doctype',
42
+ 'comment',
43
+ 'opentagstart',
44
+ 'attribute',
45
+ 'opentag',
46
+ 'closetag',
47
+ 'opencdata',
48
+ 'cdata',
49
+ 'closecdata',
50
+ 'error',
51
+ 'end',
52
+ 'ready',
53
+ 'script',
54
+ 'opennamespace',
55
+ 'closenamespace'
56
+ ];
57
+ const BUFFERS = [
58
+ 'comment',
59
+ 'sgmlDecl',
60
+ 'textNode',
61
+ 'tagName',
62
+ 'doctype',
63
+ 'procInstName',
64
+ 'procInstBody',
65
+ 'entity',
66
+ 'attribName',
67
+ 'attribValue',
68
+ 'cdata',
69
+ 'script'
70
+ ];
36
71
  const nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/;
37
72
  const nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/;
38
73
  const entityStart = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/;
39
74
  const entityBody = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/;
40
75
  export const ENTITIES = {
41
- amp: '&',
42
- gt: '>',
43
- lt: '<',
44
- quot: '"',
45
- apos: "'",
46
- AElig: 198,
47
- Aacute: 193,
48
- Acirc: 194,
49
- Agrave: 192,
50
- Aring: 197,
51
- Atilde: 195,
52
- Auml: 196,
53
- Ccedil: 199,
54
- ETH: 208,
55
- Eacute: 201,
56
- Ecirc: 202,
57
- Egrave: 200,
58
- Euml: 203,
59
- Iacute: 205,
60
- Icirc: 206,
61
- Igrave: 204,
62
- Iuml: 207,
63
- Ntilde: 209,
64
- Oacute: 211,
65
- Ocirc: 212,
66
- Ograve: 210,
67
- Oslash: 216,
68
- Otilde: 213,
69
- Ouml: 214,
70
- THORN: 222,
71
- Uacute: 218,
72
- Ucirc: 219,
73
- Ugrave: 217,
74
- Uuml: 220,
75
- Yacute: 221,
76
- aacute: 225,
77
- acirc: 226,
78
- aelig: 230,
79
- agrave: 224,
80
- aring: 229,
81
- atilde: 227,
82
- auml: 228,
83
- ccedil: 231,
84
- eacute: 233,
85
- ecirc: 234,
86
- egrave: 232,
87
- eth: 240,
88
- euml: 235,
89
- iacute: 237,
90
- icirc: 238,
91
- igrave: 236,
92
- iuml: 239,
93
- ntilde: 241,
94
- oacute: 243,
95
- ocirc: 244,
96
- ograve: 242,
97
- oslash: 248,
98
- otilde: 245,
99
- ouml: 246,
100
- szlig: 223,
101
- thorn: 254,
102
- uacute: 250,
103
- ucirc: 251,
104
- ugrave: 249,
105
- uuml: 252,
106
- yacute: 253,
107
- yuml: 255,
108
- copy: 169,
109
- reg: 174,
110
- nbsp: 160,
111
- iexcl: 161,
112
- cent: 162,
113
- pound: 163,
114
- curren: 164,
115
- yen: 165,
116
- brvbar: 166,
117
- sect: 167,
118
- uml: 168,
119
- ordf: 170,
120
- laquo: 171,
121
- not: 172,
122
- shy: 173,
123
- macr: 175,
124
- deg: 176,
125
- plusmn: 177,
126
- sup1: 185,
127
- sup2: 178,
128
- sup3: 179,
129
- acute: 180,
130
- micro: 181,
131
- para: 182,
132
- middot: 183,
133
- cedil: 184,
134
- ordm: 186,
135
- raquo: 187,
136
- frac14: 188,
137
- frac12: 189,
138
- frac34: 190,
139
- iquest: 191,
140
- times: 215,
141
- divide: 247,
142
- OElig: 338,
143
- oelig: 339,
144
- Scaron: 352,
145
- scaron: 353,
146
- Yuml: 376,
147
- fnof: 402,
148
- circ: 710,
149
- tilde: 732,
150
- Alpha: 913,
151
- Beta: 914,
152
- Gamma: 915,
153
- Delta: 916,
154
- Epsilon: 917,
155
- Zeta: 918,
156
- Eta: 919,
157
- Theta: 920,
158
- Iota: 921,
159
- Kappa: 922,
160
- Lambda: 923,
161
- Mu: 924,
162
- Nu: 925,
163
- Xi: 926,
164
- Omicron: 927,
165
- Pi: 928,
166
- Rho: 929,
167
- Sigma: 931,
168
- Tau: 932,
169
- Upsilon: 933,
170
- Phi: 934,
171
- Chi: 935,
172
- Psi: 936,
173
- Omega: 937,
174
- alpha: 945,
175
- beta: 946,
176
- gamma: 947,
177
- delta: 948,
178
- epsilon: 949,
179
- zeta: 950,
180
- eta: 951,
181
- theta: 952,
182
- iota: 953,
183
- kappa: 954,
184
- lambda: 955,
185
- mu: 956,
186
- nu: 957,
187
- xi: 958,
188
- omicron: 959,
189
- pi: 960,
190
- rho: 961,
191
- sigmaf: 962,
192
- sigma: 963,
193
- tau: 964,
194
- upsilon: 965,
195
- phi: 966,
196
- chi: 967,
197
- psi: 968,
198
- omega: 969,
199
- thetasym: 977,
200
- upsih: 978,
201
- piv: 982,
202
- ensp: 8194,
203
- emsp: 8195,
204
- thinsp: 8201,
205
- zwnj: 8204,
206
- zwj: 8205,
207
- lrm: 8206,
208
- rlm: 8207,
209
- ndash: 8211,
210
- mdash: 8212,
211
- lsquo: 8216,
212
- rsquo: 8217,
213
- sbquo: 8218,
214
- ldquo: 8220,
215
- rdquo: 8221,
216
- bdquo: 8222,
217
- dagger: 8224,
218
- Dagger: 8225,
219
- bull: 8226,
220
- hellip: 8230,
221
- permil: 8240,
222
- prime: 8242,
223
- Prime: 8243,
224
- lsaquo: 8249,
225
- rsaquo: 8250,
226
- oline: 8254,
227
- frasl: 8260,
228
- euro: 8364,
229
- image: 8465,
230
- weierp: 8472,
231
- real: 8476,
232
- trade: 8482,
233
- alefsym: 8501,
234
- larr: 8592,
235
- uarr: 8593,
236
- rarr: 8594,
237
- darr: 8595,
238
- harr: 8596,
239
- crarr: 8629,
240
- lArr: 8656,
241
- uArr: 8657,
242
- rArr: 8658,
243
- dArr: 8659,
244
- hArr: 8660,
245
- forall: 8704,
246
- part: 8706,
247
- exist: 8707,
248
- empty: 8709,
249
- nabla: 8711,
250
- isin: 8712,
251
- notin: 8713,
252
- ni: 8715,
253
- prod: 8719,
254
- sum: 8721,
255
- minus: 8722,
256
- lowast: 8727,
257
- radic: 8730,
258
- prop: 8733,
259
- infin: 8734,
260
- ang: 8736,
261
- and: 8743,
262
- or: 8744,
263
- cap: 8745,
264
- cup: 8746,
265
- int: 8747,
266
- there4: 8756,
267
- sim: 8764,
268
- cong: 8773,
269
- asymp: 8776,
270
- ne: 8800,
271
- equiv: 8801,
272
- le: 8804,
273
- ge: 8805,
274
- sub: 8834,
275
- sup: 8835,
276
- nsub: 8836,
277
- sube: 8838,
278
- supe: 8839,
279
- oplus: 8853,
280
- otimes: 8855,
281
- perp: 8869,
282
- sdot: 8901,
283
- lceil: 8968,
284
- rceil: 8969,
285
- lfloor: 8970,
286
- rfloor: 8971,
287
- lang: 9001,
288
- rang: 9002,
289
- loz: 9674,
290
- spades: 9824,
291
- clubs: 9827,
292
- hearts: 9829,
293
- diams: 9830
76
+ amp: '&',
77
+ gt: '>',
78
+ lt: '<',
79
+ quot: '"',
80
+ apos: "'",
81
+ AElig: 198,
82
+ Aacute: 193,
83
+ Acirc: 194,
84
+ Agrave: 192,
85
+ Aring: 197,
86
+ Atilde: 195,
87
+ Auml: 196,
88
+ Ccedil: 199,
89
+ ETH: 208,
90
+ Eacute: 201,
91
+ Ecirc: 202,
92
+ Egrave: 200,
93
+ Euml: 203,
94
+ Iacute: 205,
95
+ Icirc: 206,
96
+ Igrave: 204,
97
+ Iuml: 207,
98
+ Ntilde: 209,
99
+ Oacute: 211,
100
+ Ocirc: 212,
101
+ Ograve: 210,
102
+ Oslash: 216,
103
+ Otilde: 213,
104
+ Ouml: 214,
105
+ THORN: 222,
106
+ Uacute: 218,
107
+ Ucirc: 219,
108
+ Ugrave: 217,
109
+ Uuml: 220,
110
+ Yacute: 221,
111
+ aacute: 225,
112
+ acirc: 226,
113
+ aelig: 230,
114
+ agrave: 224,
115
+ aring: 229,
116
+ atilde: 227,
117
+ auml: 228,
118
+ ccedil: 231,
119
+ eacute: 233,
120
+ ecirc: 234,
121
+ egrave: 232,
122
+ eth: 240,
123
+ euml: 235,
124
+ iacute: 237,
125
+ icirc: 238,
126
+ igrave: 236,
127
+ iuml: 239,
128
+ ntilde: 241,
129
+ oacute: 243,
130
+ ocirc: 244,
131
+ ograve: 242,
132
+ oslash: 248,
133
+ otilde: 245,
134
+ ouml: 246,
135
+ szlig: 223,
136
+ thorn: 254,
137
+ uacute: 250,
138
+ ucirc: 251,
139
+ ugrave: 249,
140
+ uuml: 252,
141
+ yacute: 253,
142
+ yuml: 255,
143
+ copy: 169,
144
+ reg: 174,
145
+ nbsp: 160,
146
+ iexcl: 161,
147
+ cent: 162,
148
+ pound: 163,
149
+ curren: 164,
150
+ yen: 165,
151
+ brvbar: 166,
152
+ sect: 167,
153
+ uml: 168,
154
+ ordf: 170,
155
+ laquo: 171,
156
+ not: 172,
157
+ shy: 173,
158
+ macr: 175,
159
+ deg: 176,
160
+ plusmn: 177,
161
+ sup1: 185,
162
+ sup2: 178,
163
+ sup3: 179,
164
+ acute: 180,
165
+ micro: 181,
166
+ para: 182,
167
+ middot: 183,
168
+ cedil: 184,
169
+ ordm: 186,
170
+ raquo: 187,
171
+ frac14: 188,
172
+ frac12: 189,
173
+ frac34: 190,
174
+ iquest: 191,
175
+ times: 215,
176
+ divide: 247,
177
+ OElig: 338,
178
+ oelig: 339,
179
+ Scaron: 352,
180
+ scaron: 353,
181
+ Yuml: 376,
182
+ fnof: 402,
183
+ circ: 710,
184
+ tilde: 732,
185
+ Alpha: 913,
186
+ Beta: 914,
187
+ Gamma: 915,
188
+ Delta: 916,
189
+ Epsilon: 917,
190
+ Zeta: 918,
191
+ Eta: 919,
192
+ Theta: 920,
193
+ Iota: 921,
194
+ Kappa: 922,
195
+ Lambda: 923,
196
+ Mu: 924,
197
+ Nu: 925,
198
+ Xi: 926,
199
+ Omicron: 927,
200
+ Pi: 928,
201
+ Rho: 929,
202
+ Sigma: 931,
203
+ Tau: 932,
204
+ Upsilon: 933,
205
+ Phi: 934,
206
+ Chi: 935,
207
+ Psi: 936,
208
+ Omega: 937,
209
+ alpha: 945,
210
+ beta: 946,
211
+ gamma: 947,
212
+ delta: 948,
213
+ epsilon: 949,
214
+ zeta: 950,
215
+ eta: 951,
216
+ theta: 952,
217
+ iota: 953,
218
+ kappa: 954,
219
+ lambda: 955,
220
+ mu: 956,
221
+ nu: 957,
222
+ xi: 958,
223
+ omicron: 959,
224
+ pi: 960,
225
+ rho: 961,
226
+ sigmaf: 962,
227
+ sigma: 963,
228
+ tau: 964,
229
+ upsilon: 965,
230
+ phi: 966,
231
+ chi: 967,
232
+ psi: 968,
233
+ omega: 969,
234
+ thetasym: 977,
235
+ upsih: 978,
236
+ piv: 982,
237
+ ensp: 8194,
238
+ emsp: 8195,
239
+ thinsp: 8201,
240
+ zwnj: 8204,
241
+ zwj: 8205,
242
+ lrm: 8206,
243
+ rlm: 8207,
244
+ ndash: 8211,
245
+ mdash: 8212,
246
+ lsquo: 8216,
247
+ rsquo: 8217,
248
+ sbquo: 8218,
249
+ ldquo: 8220,
250
+ rdquo: 8221,
251
+ bdquo: 8222,
252
+ dagger: 8224,
253
+ Dagger: 8225,
254
+ bull: 8226,
255
+ hellip: 8230,
256
+ permil: 8240,
257
+ prime: 8242,
258
+ Prime: 8243,
259
+ lsaquo: 8249,
260
+ rsaquo: 8250,
261
+ oline: 8254,
262
+ frasl: 8260,
263
+ euro: 8364,
264
+ image: 8465,
265
+ weierp: 8472,
266
+ real: 8476,
267
+ trade: 8482,
268
+ alefsym: 8501,
269
+ larr: 8592,
270
+ uarr: 8593,
271
+ rarr: 8594,
272
+ darr: 8595,
273
+ harr: 8596,
274
+ crarr: 8629,
275
+ lArr: 8656,
276
+ uArr: 8657,
277
+ rArr: 8658,
278
+ dArr: 8659,
279
+ hArr: 8660,
280
+ forall: 8704,
281
+ part: 8706,
282
+ exist: 8707,
283
+ empty: 8709,
284
+ nabla: 8711,
285
+ isin: 8712,
286
+ notin: 8713,
287
+ ni: 8715,
288
+ prod: 8719,
289
+ sum: 8721,
290
+ minus: 8722,
291
+ lowast: 8727,
292
+ radic: 8730,
293
+ prop: 8733,
294
+ infin: 8734,
295
+ ang: 8736,
296
+ and: 8743,
297
+ or: 8744,
298
+ cap: 8745,
299
+ cup: 8746,
300
+ int: 8747,
301
+ there4: 8756,
302
+ sim: 8764,
303
+ cong: 8773,
304
+ asymp: 8776,
305
+ ne: 8800,
306
+ equiv: 8801,
307
+ le: 8804,
308
+ ge: 8805,
309
+ sub: 8834,
310
+ sup: 8835,
311
+ nsub: 8836,
312
+ sube: 8838,
313
+ supe: 8839,
314
+ oplus: 8853,
315
+ otimes: 8855,
316
+ perp: 8869,
317
+ sdot: 8901,
318
+ lceil: 8968,
319
+ rceil: 8969,
320
+ lfloor: 8970,
321
+ rfloor: 8971,
322
+ lang: 9001,
323
+ rang: 9002,
324
+ loz: 9674,
325
+ spades: 9824,
326
+ clubs: 9827,
327
+ hearts: 9829,
328
+ diams: 9830
294
329
  };
295
- Object.keys(ENTITIES).forEach(key => {
296
- const e = ENTITIES[key];
297
- ENTITIES[key] = typeof e === 'number' ? String.fromCharCode(e) : e;
330
+ Object.keys(ENTITIES).forEach((key) => {
331
+ const e = ENTITIES[key];
332
+ ENTITIES[key] = typeof e === 'number' ? String.fromCharCode(e) : e;
298
333
  });
334
+ /**
335
+ * Internal helper class
336
+ */
299
337
  class SAX {
300
- constructor() {
301
- this.EVENTS = EVENTS;
302
- this.ENTITIES = {
303
- ...ENTITIES
338
+ EVENTS = EVENTS;
339
+ ENTITIES = {
340
+ // TODO: make it readonly, needed for entity-mega test
341
+ // amp, gt, lt, quot and apos are resolved to strings instead of numerical
342
+ // codes, IDK why
343
+ ...ENTITIES
304
344
  };
305
- this.events = void 0;
306
- this.XML_ENTITIES = {
307
- amp: '&',
308
- gt: '>',
309
- lt: '<',
310
- quot: '"',
311
- apos: "'"
345
+ XML_ENTITIES = {
346
+ amp: '&',
347
+ gt: '>',
348
+ lt: '<',
349
+ quot: '"',
350
+ apos: "'"
312
351
  };
313
- this.S = 0;
314
- this.opt = void 0;
315
- this.trackPosition = false;
316
- this.column = 0;
317
- this.line = 0;
318
- this.c = '';
319
- this.error = void 0;
320
- this.q = '';
321
- this.bufferCheckPosition = void 0;
322
- this.closed = false;
323
- this.tags = [];
324
- this.looseCase = '';
325
- this.closedRoot = false;
326
- this.sawRoot = false;
327
- this.strict = false;
328
- this.tag = void 0;
329
- this.strictEntities = void 0;
330
- this.state = void 0;
331
- this.noscript = false;
332
- this.attribList = [];
333
- this.ns = void 0;
334
- this.position = 0;
335
- this.STATE = {
336
- BEGIN: this.S++,
337
- BEGIN_WHITESPACE: this.S++,
338
- TEXT: this.S++,
339
- TEXT_ENTITY: this.S++,
340
- OPEN_WAKA: this.S++,
341
- SGML_DECL: this.S++,
342
- SGML_DECL_QUOTED: this.S++,
343
- DOCTYPE: this.S++,
344
- DOCTYPE_QUOTED: this.S++,
345
- DOCTYPE_DTD: this.S++,
346
- DOCTYPE_DTD_QUOTED: this.S++,
347
- COMMENT_STARTING: this.S++,
348
- COMMENT: this.S++,
349
- COMMENT_ENDING: this.S++,
350
- COMMENT_ENDED: this.S++,
351
- CDATA: this.S++,
352
- CDATA_ENDING: this.S++,
353
- CDATA_ENDING_2: this.S++,
354
- PROC_INST: this.S++,
355
- PROC_INST_BODY: this.S++,
356
- PROC_INST_ENDING: this.S++,
357
- OPEN_TAG: this.S++,
358
- OPEN_TAG_SLASH: this.S++,
359
- ATTRIB: this.S++,
360
- ATTRIB_NAME: this.S++,
361
- ATTRIB_NAME_SAW_WHITE: this.S++,
362
- ATTRIB_VALUE: this.S++,
363
- ATTRIB_VALUE_QUOTED: this.S++,
364
- ATTRIB_VALUE_CLOSED: this.S++,
365
- ATTRIB_VALUE_UNQUOTED: this.S++,
366
- ATTRIB_VALUE_ENTITY_Q: this.S++,
367
- ATTRIB_VALUE_ENTITY_U: this.S++,
368
- CLOSE_TAG: this.S++,
369
- CLOSE_TAG_SAW_WHITE: this.S++,
370
- SCRIPT: this.S++,
371
- SCRIPT_ENDING: this.S++
352
+ S = 0;
353
+ opt;
354
+ trackPosition = false;
355
+ column = 0;
356
+ line = 0;
357
+ c = '';
358
+ error;
359
+ q = '';
360
+ bufferCheckPosition;
361
+ closed = false;
362
+ tags = [];
363
+ looseCase = '';
364
+ closedRoot = false;
365
+ sawRoot = false;
366
+ strict = false;
367
+ tag;
368
+ strictEntities;
369
+ state;
370
+ noscript = false;
371
+ attribList = [];
372
+ ns;
373
+ position = 0;
374
+ STATE = {
375
+ BEGIN: this.S++, // leading byte order mark or whitespace
376
+ BEGIN_WHITESPACE: this.S++, // leading whitespace
377
+ TEXT: this.S++, // general stuff
378
+ TEXT_ENTITY: this.S++, // &amp and such.
379
+ OPEN_WAKA: this.S++, // <
380
+ SGML_DECL: this.S++, // <!BLARG
381
+ SGML_DECL_QUOTED: this.S++, // <!BLARG foo "bar
382
+ DOCTYPE: this.S++, // <!DOCTYPE
383
+ DOCTYPE_QUOTED: this.S++, // <!DOCTYPE "//blah
384
+ DOCTYPE_DTD: this.S++, // <!DOCTYPE "//blah" [ ...
385
+ DOCTYPE_DTD_QUOTED: this.S++, // <!DOCTYPE "//blah" [ "foo
386
+ COMMENT_STARTING: this.S++, // <!-
387
+ COMMENT: this.S++, // <!--
388
+ COMMENT_ENDING: this.S++, // <!-- blah -
389
+ COMMENT_ENDED: this.S++, // <!-- blah --
390
+ CDATA: this.S++, // <![CDATA[ something
391
+ CDATA_ENDING: this.S++, // ]
392
+ CDATA_ENDING_2: this.S++, // ]]
393
+ PROC_INST: this.S++, // <?hi
394
+ PROC_INST_BODY: this.S++, // <?hi there
395
+ PROC_INST_ENDING: this.S++, // <?hi "there" ?
396
+ OPEN_TAG: this.S++, // <strong
397
+ OPEN_TAG_SLASH: this.S++, // <strong /
398
+ ATTRIB: this.S++, // <a
399
+ ATTRIB_NAME: this.S++, // <a foo
400
+ ATTRIB_NAME_SAW_WHITE: this.S++, // <a foo _
401
+ ATTRIB_VALUE: this.S++, // <a foo=
402
+ ATTRIB_VALUE_QUOTED: this.S++, // <a foo="bar
403
+ ATTRIB_VALUE_CLOSED: this.S++, // <a foo="bar"
404
+ ATTRIB_VALUE_UNQUOTED: this.S++, // <a foo=bar
405
+ ATTRIB_VALUE_ENTITY_Q: this.S++, // <foo bar="&quot;"
406
+ ATTRIB_VALUE_ENTITY_U: this.S++, // <foo bar=&quot
407
+ CLOSE_TAG: this.S++, // </a
408
+ CLOSE_TAG_SAW_WHITE: this.S++, // </a >
409
+ SCRIPT: this.S++, // <script> ...
410
+ SCRIPT_ENDING: this.S++ // <script> ... <
372
411
  };
373
- this.BUFFERS = BUFFERS;
374
- this.CDATA = '[CDATA[';
375
- this.DOCTYPE = 'DOCTYPE';
376
- this.XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace';
377
- this.XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/';
378
- this.rootNS = {
379
- xml: this.XML_NAMESPACE,
380
- xmlns: this.XMLNS_NAMESPACE
412
+ BUFFERS = BUFFERS;
413
+ // private parser: (strict: boolean, opt: any) => SAXParser;
414
+ CDATA = '[CDATA[';
415
+ DOCTYPE = 'DOCTYPE';
416
+ XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace';
417
+ XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/';
418
+ rootNS = {
419
+ xml: this.XML_NAMESPACE,
420
+ xmlns: this.XMLNS_NAMESPACE
381
421
  };
382
- this.comment = void 0;
383
- this.sgmlDecl = void 0;
384
- this.textNode = '';
385
- this.tagName = void 0;
386
- this.doctype = void 0;
387
- this.procInstName = void 0;
388
- this.procInstBody = void 0;
389
- this.entity = '';
390
- this.attribName = void 0;
391
- this.attribValue = void 0;
392
- this.cdata = '';
393
- this.script = '';
394
- this.startTagPosition = 0;
395
- this.S = 0;
396
- for (const s in this.STATE) {
397
- if (this.STATE.hasOwnProperty(s)) {
398
- this.STATE[this.STATE[s]] = s;
399
- }
422
+ comment;
423
+ sgmlDecl;
424
+ textNode = '';
425
+ tagName;
426
+ doctype;
427
+ procInstName;
428
+ procInstBody;
429
+ entity = '';
430
+ attribName;
431
+ attribValue;
432
+ cdata = '';
433
+ script = '';
434
+ startTagPosition = 0;
435
+ constructor() {
436
+ this.S = 0;
437
+ for (const s in this.STATE) {
438
+ if (this.STATE.hasOwnProperty(s)) {
439
+ this.STATE[this.STATE[s]] = s;
440
+ }
441
+ }
442
+ // shorthand
443
+ this.S = this.STATE;
400
444
  }
401
- this.S = this.STATE;
402
- }
403
- static charAt(chunk, i) {
404
- let result = '';
405
- if (i < chunk.length) {
406
- result = chunk.charAt(i);
445
+ static charAt(chunk, i) {
446
+ let result = '';
447
+ if (i < chunk.length) {
448
+ result = chunk.charAt(i);
449
+ }
450
+ return result;
407
451
  }
408
- return result;
409
- }
410
- static isWhitespace(c) {
411
- return c === ' ' || c === '\n' || c === '\r' || c === '\t';
412
- }
413
- static isQuote(c) {
414
- return c === '"' || c === "'";
415
- }
416
- static isAttribEnd(c) {
417
- return c === '>' || SAX.isWhitespace(c);
418
- }
419
- static isMatch(regex, c) {
420
- return regex.test(c);
421
- }
422
- static notMatch(regex, c) {
423
- return !SAX.isMatch(regex, c);
424
- }
425
- static qname(name, attribute) {
426
- const i = name.indexOf(':');
427
- const qualName = i < 0 ? ['', name] : name.split(':');
428
- let prefix = qualName[0];
429
- let local = qualName[1];
430
- if (attribute && name === 'xmlns') {
431
- prefix = 'xmlns';
432
- local = '';
452
+ static isWhitespace(c) {
453
+ return c === ' ' || c === '\n' || c === '\r' || c === '\t';
433
454
  }
434
- return {
435
- prefix,
436
- local
437
- };
438
- }
439
- write(chunk) {
440
- if (this.error) {
441
- throw this.error;
455
+ static isQuote(c) {
456
+ return c === '"' || c === "'";
442
457
  }
443
- if (this.closed) {
444
- return this.errorFunction('Cannot write after close. Assign an onready handler.');
458
+ static isAttribEnd(c) {
459
+ return c === '>' || SAX.isWhitespace(c);
445
460
  }
446
- if (chunk === null) {
447
- return this.end();
461
+ static isMatch(regex, c) {
462
+ return regex.test(c);
448
463
  }
449
- if (typeof chunk === 'object') {
450
- chunk = chunk.toString();
464
+ static notMatch(regex, c) {
465
+ return !SAX.isMatch(regex, c);
451
466
  }
452
- let i = 0;
453
- let c;
454
- while (true) {
455
- c = SAX.charAt(chunk, i++);
456
- this.c = c;
457
- if (!c) {
458
- break;
459
- }
460
- if (this.trackPosition) {
461
- this.position++;
462
- if (c === '\n') {
463
- this.line++;
464
- this.column = 0;
465
- } else {
466
- this.column++;
467
+ static qname(name, attribute) {
468
+ const i = name.indexOf(':');
469
+ const qualName = i < 0 ? ['', name] : name.split(':');
470
+ let prefix = qualName[0];
471
+ let local = qualName[1];
472
+ // <x "xmlns"="http://foo">
473
+ if (attribute && name === 'xmlns') {
474
+ prefix = 'xmlns';
475
+ local = '';
476
+ }
477
+ return { prefix, local };
478
+ }
479
+ write(chunk) {
480
+ if (this.error) {
481
+ throw this.error;
482
+ }
483
+ if (this.closed) {
484
+ return this.errorFunction('Cannot write after close. Assign an onready handler.');
485
+ }
486
+ if (chunk === null) {
487
+ return this.end();
467
488
  }
468
- }
469
- switch (this.state) {
470
- case this.S.BEGIN:
471
- this.state = this.S.BEGIN_WHITESPACE;
472
- if (c === '\uFEFF') {
473
- continue;
474
- }
475
- this.beginWhiteSpace(c);
476
- continue;
477
- case this.S.BEGIN_WHITESPACE:
478
- this.beginWhiteSpace(c);
479
- continue;
480
- case this.S.TEXT:
481
- if (this.sawRoot && !this.closedRoot) {
482
- const starti = i - 1;
483
- while (c && c !== '<' && c !== '&') {
484
- c = SAX.charAt(chunk, i++);
485
- if (c && this.trackPosition) {
489
+ if (typeof chunk === 'object') {
490
+ chunk = chunk.toString();
491
+ }
492
+ let i = 0;
493
+ let c;
494
+ while (true) {
495
+ c = SAX.charAt(chunk, i++);
496
+ this.c = c;
497
+ if (!c) {
498
+ break;
499
+ }
500
+ if (this.trackPosition) {
486
501
  this.position++;
487
502
  if (c === '\n') {
488
- this.line++;
489
- this.column = 0;
490
- } else {
491
- this.column++;
503
+ this.line++;
504
+ this.column = 0;
505
+ }
506
+ else {
507
+ this.column++;
492
508
  }
493
- }
494
- }
495
- this.textNode += chunk.substring(starti, i - 1);
496
- }
497
- if (c === '<' && !(this.sawRoot && this.closedRoot && !this.strict)) {
498
- this.state = this.S.OPEN_WAKA;
499
- this.startTagPosition = this.position;
500
- } else {
501
- if (!SAX.isWhitespace(c) && (!this.sawRoot || this.closedRoot)) {
502
- this.strictFail('Text data outside of root node.');
503
- }
504
- if (c === '&') {
505
- this.state = this.S.TEXT_ENTITY;
506
- } else {
507
- this.textNode += c;
508
- }
509
- }
510
- continue;
511
- case this.S.SCRIPT:
512
- if (c === '<') {
513
- this.state = this.S.SCRIPT_ENDING;
514
- } else {
515
- this.script += c;
516
- }
517
- continue;
518
- case this.S.SCRIPT_ENDING:
519
- if (c === '/') {
520
- this.state = this.S.CLOSE_TAG;
521
- } else {
522
- this.script += `<${c}`;
523
- this.state = this.S.SCRIPT;
524
- }
525
- continue;
526
- case this.S.OPEN_WAKA:
527
- if (c === '!') {
528
- this.state = this.S.SGML_DECL;
529
- this.sgmlDecl = '';
530
- } else if (SAX.isWhitespace(c)) {} else if (SAX.isMatch(nameStart, c)) {
531
- this.state = this.S.OPEN_TAG;
532
- this.tagName = c;
533
- } else if (c === '/') {
534
- this.state = this.S.CLOSE_TAG;
535
- this.tagName = '';
536
- } else if (c === '?') {
537
- this.state = this.S.PROC_INST;
538
- this.procInstName = this.procInstBody = '';
539
- } else {
540
- this.strictFail('Unencoded <');
541
- if (this.startTagPosition + 1 < this.position) {
542
- const pad = this.position - this.startTagPosition;
543
- c = new Array(pad).join(' ') + c;
544
- }
545
- this.textNode += `<${c}`;
546
- this.state = this.S.TEXT;
547
- }
548
- continue;
549
- case this.S.SGML_DECL:
550
- if ((this.sgmlDecl + c).toUpperCase() === this.CDATA) {
551
- this.emitNode('onopencdata');
552
- this.state = this.S.CDATA;
553
- this.sgmlDecl = '';
554
- this.cdata = '';
555
- } else if (this.sgmlDecl + c === '--') {
556
- this.state = this.S.COMMENT;
557
- this.comment = '';
558
- this.sgmlDecl = '';
559
- } else if ((this.sgmlDecl + c).toUpperCase() === this.DOCTYPE) {
560
- this.state = this.S.DOCTYPE;
561
- if (this.doctype || this.sawRoot) {
562
- this.strictFail('Inappropriately located doctype declaration');
563
- }
564
- this.doctype = '';
565
- this.sgmlDecl = '';
566
- } else if (c === '>') {
567
- this.emitNode('onsgmldeclaration', this.sgmlDecl);
568
- this.sgmlDecl = '';
569
- this.state = this.S.TEXT;
570
- } else if (SAX.isQuote(c)) {
571
- this.state = this.S.SGML_DECL_QUOTED;
572
- this.sgmlDecl += c;
573
- } else {
574
- this.sgmlDecl += c;
575
- }
576
- continue;
577
- case this.S.SGML_DECL_QUOTED:
578
- if (c === this.q) {
579
- this.state = this.S.SGML_DECL;
580
- this.q = '';
581
- }
582
- this.sgmlDecl += c;
583
- continue;
584
- case this.S.DOCTYPE:
585
- if (c === '>') {
586
- this.state = this.S.TEXT;
587
- this.emitNode('ondoctype', this.doctype);
588
- this.doctype = true;
589
- } else {
590
- this.doctype += c;
591
- if (c === '[') {
592
- this.state = this.S.DOCTYPE_DTD;
593
- } else if (SAX.isQuote(c)) {
594
- this.state = this.S.DOCTYPE_QUOTED;
595
- this.q = c;
596
- }
597
- }
598
- continue;
599
- case this.S.DOCTYPE_QUOTED:
600
- this.doctype += c;
601
- if (c === this.q) {
602
- this.q = '';
603
- this.state = this.S.DOCTYPE;
604
- }
605
- continue;
606
- case this.S.DOCTYPE_DTD:
607
- this.doctype += c;
608
- if (c === ']') {
609
- this.state = this.S.DOCTYPE;
610
- } else if (SAX.isQuote(c)) {
611
- this.state = this.S.DOCTYPE_DTD_QUOTED;
612
- this.q = c;
613
- }
614
- continue;
615
- case this.S.DOCTYPE_DTD_QUOTED:
616
- this.doctype += c;
617
- if (c === this.q) {
618
- this.state = this.S.DOCTYPE_DTD;
619
- this.q = '';
620
- }
621
- continue;
622
- case this.S.COMMENT:
623
- if (c === '-') {
624
- this.state = this.S.COMMENT_ENDING;
625
- } else {
626
- this.comment += c;
627
- }
628
- continue;
629
- case this.S.COMMENT_ENDING:
630
- if (c === '-') {
631
- this.state = this.S.COMMENT_ENDED;
632
- this.comment = this.textApplyOptions(this.comment);
633
- if (this.comment) {
634
- this.emitNode('oncomment', this.comment);
635
- }
636
- this.comment = '';
637
- } else {
638
- this.comment += `-${c}`;
639
- this.state = this.S.COMMENT;
640
- }
641
- continue;
642
- case this.S.COMMENT_ENDED:
643
- if (c !== '>') {
644
- this.strictFail('Malformed comment');
645
- this.comment += `--${c}`;
646
- this.state = this.S.COMMENT;
647
- } else {
648
- this.state = this.S.TEXT;
649
- }
650
- continue;
651
- case this.S.CDATA:
652
- if (c === ']') {
653
- this.state = this.S.CDATA_ENDING;
654
- } else {
655
- this.cdata += c;
656
- }
657
- continue;
658
- case this.S.CDATA_ENDING:
659
- if (c === ']') {
660
- this.state = this.S.CDATA_ENDING_2;
661
- } else {
662
- this.cdata += `]${c}`;
663
- this.state = this.S.CDATA;
664
- }
665
- continue;
666
- case this.S.CDATA_ENDING_2:
667
- if (c === '>') {
668
- if (this.cdata) {
669
- this.emitNode('oncdata', this.cdata);
670
- }
671
- this.emitNode('onclosecdata');
672
- this.cdata = '';
673
- this.state = this.S.TEXT;
674
- } else if (c === ']') {
675
- this.cdata += ']';
676
- } else {
677
- this.cdata += `]]${c}`;
678
- this.state = this.S.CDATA;
679
- }
680
- continue;
681
- case this.S.PROC_INST:
682
- if (c === '?') {
683
- this.state = this.S.PROC_INST_ENDING;
684
- } else if (SAX.isWhitespace(c)) {
685
- this.state = this.S.PROC_INST_BODY;
686
- } else {
687
- this.procInstName += c;
688
- }
689
- continue;
690
- case this.S.PROC_INST_BODY:
691
- if (!this.procInstBody && SAX.isWhitespace(c)) {
692
- continue;
693
- } else if (c === '?') {
694
- this.state = this.S.PROC_INST_ENDING;
695
- } else {
696
- this.procInstBody += c;
697
- }
698
- continue;
699
- case this.S.PROC_INST_ENDING:
700
- if (c === '>') {
701
- this.emitNode('onprocessinginstruction', {
702
- name: this.procInstName,
703
- body: this.procInstBody
704
- });
705
- this.procInstName = this.procInstBody = '';
706
- this.state = this.S.TEXT;
707
- } else {
708
- this.procInstBody += `?${c}`;
709
- this.state = this.S.PROC_INST_BODY;
710
- }
711
- continue;
712
- case this.S.OPEN_TAG:
713
- if (SAX.isMatch(nameBody, c)) {
714
- this.tagName += c;
715
- } else {
716
- this.newTag();
717
- if (c === '>') {
718
- this.openTag();
719
- } else if (c === '/') {
720
- this.state = this.S.OPEN_TAG_SLASH;
721
- } else {
722
- if (!SAX.isWhitespace(c)) {
723
- this.strictFail('Invalid character in tag name');
724
- }
725
- this.state = this.S.ATTRIB;
726
- }
727
- }
728
- continue;
729
- case this.S.OPEN_TAG_SLASH:
730
- if (c === '>') {
731
- this.openTag(true);
732
- this.closeTag();
733
- } else {
734
- this.strictFail('Forward-slash in opening tag not followed by >');
735
- this.state = this.S.ATTRIB;
736
- }
737
- continue;
738
- case this.S.ATTRIB:
739
- if (SAX.isWhitespace(c)) {
740
- continue;
741
- } else if (c === '>') {
742
- this.openTag();
743
- } else if (c === '/') {
744
- this.state = this.S.OPEN_TAG_SLASH;
745
- } else if (SAX.isMatch(nameStart, c)) {
746
- this.attribName = c;
747
- this.attribValue = '';
748
- this.state = this.S.ATTRIB_NAME;
749
- } else {
750
- this.strictFail('Invalid attribute name');
751
- }
752
- continue;
753
- case this.S.ATTRIB_NAME:
754
- if (c === '=') {
755
- this.state = this.S.ATTRIB_VALUE;
756
- } else if (c === '>') {
757
- this.strictFail('Attribute without value');
758
- this.attribValue = this.attribName;
759
- this.attrib();
760
- this.openTag();
761
- } else if (SAX.isWhitespace(c)) {
762
- this.state = this.S.ATTRIB_NAME_SAW_WHITE;
763
- } else if (SAX.isMatch(nameBody, c)) {
764
- this.attribName += c;
765
- } else {
766
- this.strictFail('Invalid attribute name');
767
- }
768
- continue;
769
- case this.S.ATTRIB_NAME_SAW_WHITE:
770
- if (c === '=') {
771
- this.state = this.S.ATTRIB_VALUE;
772
- } else if (SAX.isWhitespace(c)) {
773
- continue;
774
- } else {
775
- this.strictFail('Attribute without value');
776
- this.tag.attributes[this.attribName] = '';
777
- this.attribValue = '';
778
- this.emitNode('onattribute', {
779
- name: this.attribName,
780
- value: ''
781
- });
782
- this.attribName = '';
783
- if (c === '>') {
784
- this.openTag();
785
- } else if (SAX.isMatch(nameStart, c)) {
786
- this.attribName = c;
787
- this.state = this.S.ATTRIB_NAME;
788
- } else {
789
- this.strictFail('Invalid attribute name');
790
- this.state = this.S.ATTRIB;
791
- }
792
- }
793
- continue;
794
- case this.S.ATTRIB_VALUE:
795
- if (SAX.isWhitespace(c)) {
796
- continue;
797
- } else if (SAX.isQuote(c)) {
798
- this.q = c;
799
- this.state = this.S.ATTRIB_VALUE_QUOTED;
800
- } else {
801
- this.strictFail('Unquoted attribute value');
802
- this.state = this.S.ATTRIB_VALUE_UNQUOTED;
803
- this.attribValue = c;
804
- }
805
- continue;
806
- case this.S.ATTRIB_VALUE_QUOTED:
807
- if (c !== this.q) {
808
- if (c === '&') {
809
- this.state = this.S.ATTRIB_VALUE_ENTITY_Q;
810
- } else {
811
- this.attribValue += c;
812
- }
813
- continue;
814
- }
815
- this.attrib();
816
- this.q = '';
817
- this.state = this.S.ATTRIB_VALUE_CLOSED;
818
- continue;
819
- case this.S.ATTRIB_VALUE_CLOSED:
820
- if (SAX.isWhitespace(c)) {
821
- this.state = this.S.ATTRIB;
822
- } else if (c === '>') {
823
- this.openTag();
824
- } else if (c === '/') {
825
- this.state = this.S.OPEN_TAG_SLASH;
826
- } else if (SAX.isMatch(nameStart, c)) {
827
- this.strictFail('No whitespace between attributes');
828
- this.attribName = c;
829
- this.attribValue = '';
830
- this.state = this.S.ATTRIB_NAME;
831
- } else {
832
- this.strictFail('Invalid attribute name');
833
- }
834
- continue;
835
- case this.S.ATTRIB_VALUE_UNQUOTED:
836
- if (!SAX.isAttribEnd(c)) {
837
- if (c === '&') {
838
- this.state = this.S.ATTRIB_VALUE_ENTITY_U;
839
- } else {
840
- this.attribValue += c;
841
- }
842
- continue;
843
- }
844
- this.attrib();
845
- if (c === '>') {
846
- this.openTag();
847
- } else {
848
- this.state = this.S.ATTRIB;
849
- }
850
- continue;
851
- case this.S.CLOSE_TAG:
852
- if (!this.tagName) {
853
- if (SAX.isWhitespace(c)) {
854
- continue;
855
- } else if (SAX.notMatch(nameStart, c)) {
856
- if (this.script) {
857
- this.script += `</${c}`;
858
- this.state = this.S.SCRIPT;
859
- } else {
860
- this.strictFail('Invalid tagname in closing tag.');
861
- }
862
- } else {
863
- this.tagName = c;
864
509
  }
865
- } else if (c === '>') {
866
- this.closeTag();
867
- } else if (SAX.isMatch(nameBody, c)) {
868
- this.tagName += c;
869
- } else if (this.script) {
870
- this.script += `</${this.tagName}`;
871
- this.tagName = '';
872
- this.state = this.S.SCRIPT;
873
- } else {
874
- if (!SAX.isWhitespace(c)) {
875
- this.strictFail('Invalid tagname in closing tag');
510
+ switch (this.state) {
511
+ case this.S.BEGIN:
512
+ this.state = this.S.BEGIN_WHITESPACE;
513
+ if (c === '\uFEFF') {
514
+ continue;
515
+ }
516
+ this.beginWhiteSpace(c);
517
+ continue;
518
+ case this.S.BEGIN_WHITESPACE:
519
+ this.beginWhiteSpace(c);
520
+ continue;
521
+ case this.S.TEXT:
522
+ if (this.sawRoot && !this.closedRoot) {
523
+ const starti = i - 1;
524
+ while (c && c !== '<' && c !== '&') {
525
+ c = SAX.charAt(chunk, i++);
526
+ if (c && this.trackPosition) {
527
+ this.position++;
528
+ if (c === '\n') {
529
+ this.line++;
530
+ this.column = 0;
531
+ }
532
+ else {
533
+ this.column++;
534
+ }
535
+ }
536
+ }
537
+ this.textNode += chunk.substring(starti, i - 1);
538
+ }
539
+ if (c === '<' && !(this.sawRoot && this.closedRoot && !this.strict)) {
540
+ this.state = this.S.OPEN_WAKA;
541
+ this.startTagPosition = this.position;
542
+ }
543
+ else {
544
+ if (!SAX.isWhitespace(c) && (!this.sawRoot || this.closedRoot)) {
545
+ this.strictFail('Text data outside of root node.');
546
+ }
547
+ if (c === '&') {
548
+ this.state = this.S.TEXT_ENTITY;
549
+ }
550
+ else {
551
+ this.textNode += c;
552
+ }
553
+ }
554
+ continue;
555
+ case this.S.SCRIPT:
556
+ // only non-strict
557
+ if (c === '<') {
558
+ this.state = this.S.SCRIPT_ENDING;
559
+ }
560
+ else {
561
+ this.script += c;
562
+ }
563
+ continue;
564
+ case this.S.SCRIPT_ENDING:
565
+ if (c === '/') {
566
+ this.state = this.S.CLOSE_TAG;
567
+ }
568
+ else {
569
+ this.script += `<${c}`;
570
+ this.state = this.S.SCRIPT;
571
+ }
572
+ continue;
573
+ case this.S.OPEN_WAKA:
574
+ // either a /, ?, !, or text is coming next.
575
+ if (c === '!') {
576
+ this.state = this.S.SGML_DECL;
577
+ this.sgmlDecl = '';
578
+ }
579
+ else if (SAX.isWhitespace(c)) {
580
+ // wait for it...
581
+ }
582
+ else if (SAX.isMatch(nameStart, c)) {
583
+ this.state = this.S.OPEN_TAG;
584
+ this.tagName = c;
585
+ }
586
+ else if (c === '/') {
587
+ this.state = this.S.CLOSE_TAG;
588
+ this.tagName = '';
589
+ }
590
+ else if (c === '?') {
591
+ this.state = this.S.PROC_INST;
592
+ this.procInstName = this.procInstBody = '';
593
+ }
594
+ else {
595
+ this.strictFail('Unencoded <');
596
+ // if there was some whitespace, then add that in.
597
+ if (this.startTagPosition + 1 < this.position) {
598
+ const pad = this.position - this.startTagPosition;
599
+ c = new Array(pad).join(' ') + c;
600
+ }
601
+ this.textNode += `<${c}`;
602
+ this.state = this.S.TEXT;
603
+ }
604
+ continue;
605
+ case this.S.SGML_DECL:
606
+ if ((this.sgmlDecl + c).toUpperCase() === this.CDATA) {
607
+ this.emitNode('onopencdata');
608
+ this.state = this.S.CDATA;
609
+ this.sgmlDecl = '';
610
+ this.cdata = '';
611
+ }
612
+ else if (this.sgmlDecl + c === '--') {
613
+ this.state = this.S.COMMENT;
614
+ this.comment = '';
615
+ this.sgmlDecl = '';
616
+ }
617
+ else if ((this.sgmlDecl + c).toUpperCase() === this.DOCTYPE) {
618
+ this.state = this.S.DOCTYPE;
619
+ if (this.doctype || this.sawRoot) {
620
+ this.strictFail('Inappropriately located doctype declaration');
621
+ }
622
+ this.doctype = '';
623
+ this.sgmlDecl = '';
624
+ }
625
+ else if (c === '>') {
626
+ this.emitNode('onsgmldeclaration', this.sgmlDecl);
627
+ this.sgmlDecl = '';
628
+ this.state = this.S.TEXT;
629
+ }
630
+ else if (SAX.isQuote(c)) {
631
+ this.state = this.S.SGML_DECL_QUOTED;
632
+ this.sgmlDecl += c;
633
+ }
634
+ else {
635
+ this.sgmlDecl += c;
636
+ }
637
+ continue;
638
+ case this.S.SGML_DECL_QUOTED:
639
+ if (c === this.q) {
640
+ this.state = this.S.SGML_DECL;
641
+ this.q = '';
642
+ }
643
+ this.sgmlDecl += c;
644
+ continue;
645
+ case this.S.DOCTYPE:
646
+ if (c === '>') {
647
+ this.state = this.S.TEXT;
648
+ this.emitNode('ondoctype', this.doctype);
649
+ this.doctype = true; // just remember that we saw it.
650
+ }
651
+ else {
652
+ this.doctype += c;
653
+ if (c === '[') {
654
+ this.state = this.S.DOCTYPE_DTD;
655
+ }
656
+ else if (SAX.isQuote(c)) {
657
+ this.state = this.S.DOCTYPE_QUOTED;
658
+ this.q = c;
659
+ }
660
+ }
661
+ continue;
662
+ case this.S.DOCTYPE_QUOTED:
663
+ this.doctype += c;
664
+ if (c === this.q) {
665
+ this.q = '';
666
+ this.state = this.S.DOCTYPE;
667
+ }
668
+ continue;
669
+ case this.S.DOCTYPE_DTD:
670
+ this.doctype += c;
671
+ if (c === ']') {
672
+ this.state = this.S.DOCTYPE;
673
+ }
674
+ else if (SAX.isQuote(c)) {
675
+ this.state = this.S.DOCTYPE_DTD_QUOTED;
676
+ this.q = c;
677
+ }
678
+ continue;
679
+ case this.S.DOCTYPE_DTD_QUOTED:
680
+ this.doctype += c;
681
+ if (c === this.q) {
682
+ this.state = this.S.DOCTYPE_DTD;
683
+ this.q = '';
684
+ }
685
+ continue;
686
+ case this.S.COMMENT:
687
+ if (c === '-') {
688
+ this.state = this.S.COMMENT_ENDING;
689
+ }
690
+ else {
691
+ this.comment += c;
692
+ }
693
+ continue;
694
+ case this.S.COMMENT_ENDING:
695
+ if (c === '-') {
696
+ this.state = this.S.COMMENT_ENDED;
697
+ this.comment = this.textApplyOptions(this.comment);
698
+ if (this.comment) {
699
+ this.emitNode('oncomment', this.comment);
700
+ }
701
+ this.comment = '';
702
+ }
703
+ else {
704
+ this.comment += `-${c}`;
705
+ this.state = this.S.COMMENT;
706
+ }
707
+ continue;
708
+ case this.S.COMMENT_ENDED:
709
+ if (c !== '>') {
710
+ this.strictFail('Malformed comment');
711
+ // allow <!-- blah -- bloo --> in non-strict mode,
712
+ // which is a comment of " blah -- bloo "
713
+ this.comment += `--${c}`;
714
+ this.state = this.S.COMMENT;
715
+ }
716
+ else {
717
+ this.state = this.S.TEXT;
718
+ }
719
+ continue;
720
+ case this.S.CDATA:
721
+ if (c === ']') {
722
+ this.state = this.S.CDATA_ENDING;
723
+ }
724
+ else {
725
+ this.cdata += c;
726
+ }
727
+ continue;
728
+ case this.S.CDATA_ENDING:
729
+ if (c === ']') {
730
+ this.state = this.S.CDATA_ENDING_2;
731
+ }
732
+ else {
733
+ this.cdata += `]${c}`;
734
+ this.state = this.S.CDATA;
735
+ }
736
+ continue;
737
+ case this.S.CDATA_ENDING_2:
738
+ if (c === '>') {
739
+ if (this.cdata) {
740
+ this.emitNode('oncdata', this.cdata);
741
+ }
742
+ this.emitNode('onclosecdata');
743
+ this.cdata = '';
744
+ this.state = this.S.TEXT;
745
+ }
746
+ else if (c === ']') {
747
+ this.cdata += ']';
748
+ }
749
+ else {
750
+ this.cdata += `]]${c}`;
751
+ this.state = this.S.CDATA;
752
+ }
753
+ continue;
754
+ case this.S.PROC_INST:
755
+ if (c === '?') {
756
+ this.state = this.S.PROC_INST_ENDING;
757
+ }
758
+ else if (SAX.isWhitespace(c)) {
759
+ this.state = this.S.PROC_INST_BODY;
760
+ }
761
+ else {
762
+ this.procInstName += c;
763
+ }
764
+ continue;
765
+ case this.S.PROC_INST_BODY:
766
+ if (!this.procInstBody && SAX.isWhitespace(c)) {
767
+ continue;
768
+ }
769
+ else if (c === '?') {
770
+ this.state = this.S.PROC_INST_ENDING;
771
+ }
772
+ else {
773
+ this.procInstBody += c;
774
+ }
775
+ continue;
776
+ case this.S.PROC_INST_ENDING:
777
+ if (c === '>') {
778
+ this.emitNode('onprocessinginstruction', {
779
+ name: this.procInstName,
780
+ body: this.procInstBody
781
+ });
782
+ this.procInstName = this.procInstBody = '';
783
+ this.state = this.S.TEXT;
784
+ }
785
+ else {
786
+ this.procInstBody += `?${c}`;
787
+ this.state = this.S.PROC_INST_BODY;
788
+ }
789
+ continue;
790
+ case this.S.OPEN_TAG:
791
+ if (SAX.isMatch(nameBody, c)) {
792
+ this.tagName += c;
793
+ }
794
+ else {
795
+ this.newTag();
796
+ if (c === '>') {
797
+ this.openTag();
798
+ }
799
+ else if (c === '/') {
800
+ this.state = this.S.OPEN_TAG_SLASH;
801
+ }
802
+ else {
803
+ if (!SAX.isWhitespace(c)) {
804
+ this.strictFail('Invalid character in tag name');
805
+ }
806
+ this.state = this.S.ATTRIB;
807
+ }
808
+ }
809
+ continue;
810
+ case this.S.OPEN_TAG_SLASH:
811
+ if (c === '>') {
812
+ this.openTag(true);
813
+ this.closeTag();
814
+ }
815
+ else {
816
+ this.strictFail('Forward-slash in opening tag not followed by >');
817
+ this.state = this.S.ATTRIB;
818
+ }
819
+ continue;
820
+ case this.S.ATTRIB:
821
+ // haven't read the attribute name yet.
822
+ if (SAX.isWhitespace(c)) {
823
+ continue;
824
+ }
825
+ else if (c === '>') {
826
+ this.openTag();
827
+ }
828
+ else if (c === '/') {
829
+ this.state = this.S.OPEN_TAG_SLASH;
830
+ }
831
+ else if (SAX.isMatch(nameStart, c)) {
832
+ this.attribName = c;
833
+ this.attribValue = '';
834
+ this.state = this.S.ATTRIB_NAME;
835
+ }
836
+ else {
837
+ this.strictFail('Invalid attribute name');
838
+ }
839
+ continue;
840
+ case this.S.ATTRIB_NAME:
841
+ if (c === '=') {
842
+ this.state = this.S.ATTRIB_VALUE;
843
+ }
844
+ else if (c === '>') {
845
+ this.strictFail('Attribute without value');
846
+ this.attribValue = this.attribName;
847
+ this.attrib();
848
+ this.openTag();
849
+ }
850
+ else if (SAX.isWhitespace(c)) {
851
+ this.state = this.S.ATTRIB_NAME_SAW_WHITE;
852
+ }
853
+ else if (SAX.isMatch(nameBody, c)) {
854
+ this.attribName += c;
855
+ }
856
+ else {
857
+ this.strictFail('Invalid attribute name');
858
+ }
859
+ continue;
860
+ case this.S.ATTRIB_NAME_SAW_WHITE:
861
+ if (c === '=') {
862
+ this.state = this.S.ATTRIB_VALUE;
863
+ }
864
+ else if (SAX.isWhitespace(c)) {
865
+ continue;
866
+ }
867
+ else {
868
+ this.strictFail('Attribute without value');
869
+ this.tag.attributes[this.attribName] = '';
870
+ this.attribValue = '';
871
+ this.emitNode('onattribute', {
872
+ name: this.attribName,
873
+ value: ''
874
+ });
875
+ this.attribName = '';
876
+ if (c === '>') {
877
+ this.openTag();
878
+ }
879
+ else if (SAX.isMatch(nameStart, c)) {
880
+ this.attribName = c;
881
+ this.state = this.S.ATTRIB_NAME;
882
+ }
883
+ else {
884
+ this.strictFail('Invalid attribute name');
885
+ this.state = this.S.ATTRIB;
886
+ }
887
+ }
888
+ continue;
889
+ case this.S.ATTRIB_VALUE:
890
+ if (SAX.isWhitespace(c)) {
891
+ continue;
892
+ }
893
+ else if (SAX.isQuote(c)) {
894
+ this.q = c;
895
+ this.state = this.S.ATTRIB_VALUE_QUOTED;
896
+ }
897
+ else {
898
+ this.strictFail('Unquoted attribute value');
899
+ this.state = this.S.ATTRIB_VALUE_UNQUOTED;
900
+ this.attribValue = c;
901
+ }
902
+ continue;
903
+ case this.S.ATTRIB_VALUE_QUOTED:
904
+ if (c !== this.q) {
905
+ if (c === '&') {
906
+ this.state = this.S.ATTRIB_VALUE_ENTITY_Q;
907
+ }
908
+ else {
909
+ this.attribValue += c;
910
+ }
911
+ continue;
912
+ }
913
+ this.attrib();
914
+ this.q = '';
915
+ this.state = this.S.ATTRIB_VALUE_CLOSED;
916
+ continue;
917
+ case this.S.ATTRIB_VALUE_CLOSED:
918
+ if (SAX.isWhitespace(c)) {
919
+ this.state = this.S.ATTRIB;
920
+ }
921
+ else if (c === '>') {
922
+ this.openTag();
923
+ }
924
+ else if (c === '/') {
925
+ this.state = this.S.OPEN_TAG_SLASH;
926
+ }
927
+ else if (SAX.isMatch(nameStart, c)) {
928
+ this.strictFail('No whitespace between attributes');
929
+ this.attribName = c;
930
+ this.attribValue = '';
931
+ this.state = this.S.ATTRIB_NAME;
932
+ }
933
+ else {
934
+ this.strictFail('Invalid attribute name');
935
+ }
936
+ continue;
937
+ case this.S.ATTRIB_VALUE_UNQUOTED:
938
+ if (!SAX.isAttribEnd(c)) {
939
+ if (c === '&') {
940
+ this.state = this.S.ATTRIB_VALUE_ENTITY_U;
941
+ }
942
+ else {
943
+ this.attribValue += c;
944
+ }
945
+ continue;
946
+ }
947
+ this.attrib();
948
+ if (c === '>') {
949
+ this.openTag();
950
+ }
951
+ else {
952
+ this.state = this.S.ATTRIB;
953
+ }
954
+ continue;
955
+ case this.S.CLOSE_TAG:
956
+ if (!this.tagName) {
957
+ if (SAX.isWhitespace(c)) {
958
+ continue;
959
+ }
960
+ else if (SAX.notMatch(nameStart, c)) {
961
+ if (this.script) {
962
+ this.script += `</${c}`;
963
+ this.state = this.S.SCRIPT;
964
+ }
965
+ else {
966
+ this.strictFail('Invalid tagname in closing tag.');
967
+ }
968
+ }
969
+ else {
970
+ this.tagName = c;
971
+ }
972
+ }
973
+ else if (c === '>') {
974
+ this.closeTag();
975
+ }
976
+ else if (SAX.isMatch(nameBody, c)) {
977
+ this.tagName += c;
978
+ }
979
+ else if (this.script) {
980
+ this.script += `</${this.tagName}`;
981
+ this.tagName = '';
982
+ this.state = this.S.SCRIPT;
983
+ }
984
+ else {
985
+ if (!SAX.isWhitespace(c)) {
986
+ this.strictFail('Invalid tagname in closing tag');
987
+ }
988
+ this.state = this.S.CLOSE_TAG_SAW_WHITE;
989
+ }
990
+ continue;
991
+ case this.S.CLOSE_TAG_SAW_WHITE:
992
+ if (SAX.isWhitespace(c)) {
993
+ continue;
994
+ }
995
+ if (c === '>') {
996
+ this.closeTag();
997
+ }
998
+ else {
999
+ this.strictFail('Invalid characters in closing tag');
1000
+ }
1001
+ continue;
1002
+ case this.S.TEXT_ENTITY:
1003
+ case this.S.ATTRIB_VALUE_ENTITY_Q:
1004
+ case this.S.ATTRIB_VALUE_ENTITY_U:
1005
+ let returnState;
1006
+ let buffer;
1007
+ switch (this.state) {
1008
+ case this.S.TEXT_ENTITY:
1009
+ returnState = this.S.TEXT;
1010
+ buffer = 'textNode';
1011
+ break;
1012
+ case this.S.ATTRIB_VALUE_ENTITY_Q:
1013
+ returnState = this.S.ATTRIB_VALUE_QUOTED;
1014
+ buffer = 'attribValue';
1015
+ break;
1016
+ case this.S.ATTRIB_VALUE_ENTITY_U:
1017
+ returnState = this.S.ATTRIB_VALUE_UNQUOTED;
1018
+ buffer = 'attribValue';
1019
+ break;
1020
+ default:
1021
+ throw new Error(`Unknown state: ${this.state}`);
1022
+ }
1023
+ if (c === ';') {
1024
+ this[buffer] += this.parseEntity();
1025
+ this.entity = '';
1026
+ this.state = returnState;
1027
+ }
1028
+ else if (SAX.isMatch(this.entity.length ? entityBody : entityStart, c)) {
1029
+ this.entity += c;
1030
+ }
1031
+ else {
1032
+ this.strictFail('Invalid character in entity name');
1033
+ this[buffer] += `&${this.entity}${c}`;
1034
+ this.entity = '';
1035
+ this.state = returnState;
1036
+ }
1037
+ continue;
1038
+ default:
1039
+ throw new Error(`Unknown state: ${this.state}`);
876
1040
  }
877
- this.state = this.S.CLOSE_TAG_SAW_WHITE;
878
- }
879
- continue;
880
- case this.S.CLOSE_TAG_SAW_WHITE:
881
- if (SAX.isWhitespace(c)) {
882
- continue;
883
- }
884
- if (c === '>') {
885
- this.closeTag();
886
- } else {
887
- this.strictFail('Invalid characters in closing tag');
888
- }
889
- continue;
890
- case this.S.TEXT_ENTITY:
891
- case this.S.ATTRIB_VALUE_ENTITY_Q:
892
- case this.S.ATTRIB_VALUE_ENTITY_U:
893
- let returnState;
894
- let buffer;
895
- switch (this.state) {
896
- case this.S.TEXT_ENTITY:
897
- returnState = this.S.TEXT;
898
- buffer = 'textNode';
899
- break;
900
- case this.S.ATTRIB_VALUE_ENTITY_Q:
901
- returnState = this.S.ATTRIB_VALUE_QUOTED;
902
- buffer = 'attribValue';
903
- break;
904
- case this.S.ATTRIB_VALUE_ENTITY_U:
905
- returnState = this.S.ATTRIB_VALUE_UNQUOTED;
906
- buffer = 'attribValue';
907
- break;
908
- default:
909
- throw new Error(`Unknown state: ${this.state}`);
910
- }
911
- if (c === ';') {
912
- this[buffer] += this.parseEntity();
913
- this.entity = '';
914
- this.state = returnState;
915
- } else if (SAX.isMatch(this.entity.length ? entityBody : entityStart, c)) {
916
- this.entity += c;
917
- } else {
918
- this.strictFail('Invalid character in entity name');
919
- this[buffer] += `&${this.entity}${c}`;
920
- this.entity = '';
921
- this.state = returnState;
922
- }
923
- continue;
924
- default:
925
- throw new Error(`Unknown state: ${this.state}`);
926
- }
927
- }
928
- if (this.position >= this.bufferCheckPosition) {
929
- this.checkBufferLength();
930
- }
931
- return this;
932
- }
933
- emit(event, data) {
934
- if (this.events.hasOwnProperty(event)) {
935
- const eventName = event.replace(/^on/, '');
936
- this.events[event](data, eventName, this);
937
- }
938
- }
939
- clearBuffers() {
940
- for (let i = 0, l = this.BUFFERS.length; i < l; i++) {
941
- this[this[i]] = '';
1041
+ } // while
1042
+ if (this.position >= this.bufferCheckPosition) {
1043
+ this.checkBufferLength();
1044
+ }
1045
+ return this;
942
1046
  }
943
- }
944
- flushBuffers() {
945
- this.closeText();
946
- if (this.cdata !== '') {
947
- this.emitNode('oncdata', this.cdata);
948
- this.cdata = '';
1047
+ emit(event, data) {
1048
+ if (this.events.hasOwnProperty(event)) {
1049
+ const eventName = event.replace(/^on/, '');
1050
+ this.events[event](data, eventName, this);
1051
+ }
949
1052
  }
950
- if (this.script !== '') {
951
- this.emitNode('onscript', this.script);
952
- this.script = '';
1053
+ clearBuffers() {
1054
+ for (let i = 0, l = this.BUFFERS.length; i < l; i++) {
1055
+ this[this[i]] = '';
1056
+ }
953
1057
  }
954
- }
955
- end() {
956
- if (this.sawRoot && !this.closedRoot) this.strictFail('Unclosed root tag');
957
- if (this.state !== this.S.BEGIN && this.state !== this.S.BEGIN_WHITESPACE && this.state !== this.S.TEXT) {
958
- this.errorFunction('Unexpected end');
1058
+ flushBuffers() {
1059
+ this.closeText();
1060
+ if (this.cdata !== '') {
1061
+ this.emitNode('oncdata', this.cdata);
1062
+ this.cdata = '';
1063
+ }
1064
+ if (this.script !== '') {
1065
+ this.emitNode('onscript', this.script);
1066
+ this.script = '';
1067
+ }
959
1068
  }
960
- this.closeText();
961
- this.c = '';
962
- this.closed = true;
963
- this.emit('onend');
964
- return new SAXParser(this.opt);
965
- }
966
- errorFunction(er) {
967
- this.closeText();
968
- if (this.trackPosition) {
969
- er += `\nLine: ${this.line}\nColumn: ${this.column}\nChar: ${this.c}`;
1069
+ end() {
1070
+ if (this.sawRoot && !this.closedRoot)
1071
+ this.strictFail('Unclosed root tag');
1072
+ if (this.state !== this.S.BEGIN &&
1073
+ this.state !== this.S.BEGIN_WHITESPACE &&
1074
+ this.state !== this.S.TEXT) {
1075
+ this.errorFunction('Unexpected end');
1076
+ }
1077
+ this.closeText();
1078
+ this.c = '';
1079
+ this.closed = true;
1080
+ this.emit('onend');
1081
+ return new SAXParser(this.opt);
970
1082
  }
971
- const error = new Error(er);
972
- this.error = error;
973
- this.emit('onerror', error);
974
- return this;
975
- }
976
- attrib() {
977
- if (!this.strict) {
978
- this.attribName = this.attribName[this.looseCase]();
1083
+ errorFunction(er) {
1084
+ this.closeText();
1085
+ if (this.trackPosition) {
1086
+ er += `\nLine: ${this.line}\nColumn: ${this.column}\nChar: ${this.c}`;
1087
+ }
1088
+ const error = new Error(er);
1089
+ this.error = error;
1090
+ this.emit('onerror', error);
1091
+ return this;
979
1092
  }
980
- if (this.attribList.indexOf(this.attribName) !== -1 || this.tag.attributes.hasOwnProperty(this.attribName)) {
981
- this.attribName = this.attribValue = '';
982
- return;
1093
+ attrib() {
1094
+ if (!this.strict) {
1095
+ this.attribName = this.attribName[this.looseCase]();
1096
+ }
1097
+ if (this.attribList.indexOf(this.attribName) !== -1 ||
1098
+ this.tag.attributes.hasOwnProperty(this.attribName)) {
1099
+ this.attribName = this.attribValue = '';
1100
+ return;
1101
+ }
1102
+ if (this.opt.xmlns) {
1103
+ const qn = SAX.qname(this.attribName, true);
1104
+ const prefix = qn.prefix;
1105
+ const local = qn.local;
1106
+ if (prefix === 'xmlns') {
1107
+ // namespace binding attribute. push the binding into scope
1108
+ if (local === 'xml' && this.attribValue !== this.XML_NAMESPACE) {
1109
+ this.strictFail(`xml: prefix must be bound to ${this.XML_NAMESPACE}\n` + `Actual: ${this.attribValue}`);
1110
+ }
1111
+ else if (local === 'xmlns' && this.attribValue !== this.XMLNS_NAMESPACE) {
1112
+ this.strictFail(`xmlns: prefix must be bound to ${this.XMLNS_NAMESPACE}\n` +
1113
+ `Actual: ${this.attribValue}`);
1114
+ }
1115
+ else {
1116
+ const tag = this.tag;
1117
+ const parent = this.tags[this.tags.length - 1] || this;
1118
+ if (tag.ns === parent.ns) {
1119
+ tag.ns = Object.create(parent.ns);
1120
+ }
1121
+ tag.ns[local] = this.attribValue;
1122
+ }
1123
+ }
1124
+ // defer onattribute events until all attributes have been seen
1125
+ // so any new bindings can take effect. preserve attribute order
1126
+ // so deferred events can be emitted in document order
1127
+ this.attribList.push([this.attribName, this.attribValue]);
1128
+ }
1129
+ else {
1130
+ // in non-xmlns mode, we can emit the event right away
1131
+ this.tag.attributes[this.attribName] = this.attribValue;
1132
+ this.emitNode('onattribute', {
1133
+ name: this.attribName,
1134
+ value: this.attribValue
1135
+ });
1136
+ }
1137
+ this.attribName = this.attribValue = '';
983
1138
  }
984
- if (this.opt.xmlns) {
985
- const qn = SAX.qname(this.attribName, true);
986
- const prefix = qn.prefix;
987
- const local = qn.local;
988
- if (prefix === 'xmlns') {
989
- if (local === 'xml' && this.attribValue !== this.XML_NAMESPACE) {
990
- this.strictFail(`xml: prefix must be bound to ${this.XML_NAMESPACE}\n` + `Actual: ${this.attribValue}`);
991
- } else if (local === 'xmlns' && this.attribValue !== this.XMLNS_NAMESPACE) {
992
- this.strictFail(`xmlns: prefix must be bound to ${this.XMLNS_NAMESPACE}\n` + `Actual: ${this.attribValue}`);
993
- } else {
994
- const tag = this.tag;
995
- const parent = this.tags[this.tags.length - 1] || this;
996
- if (tag.ns === parent.ns) {
997
- tag.ns = Object.create(parent.ns);
998
- }
999
- tag.ns[local] = this.attribValue;
1139
+ newTag() {
1140
+ if (!this.strict)
1141
+ this.tagName = this.tagName[this.looseCase]();
1142
+ const parent = this.tags[this.tags.length - 1] || this;
1143
+ const tag = (this.tag = { name: this.tagName, attributes: {} });
1144
+ // will be overridden if tag contains an xmlns="foo" or xmlns:foo="bar"
1145
+ if (this.opt.xmlns) {
1146
+ tag.ns = parent.ns;
1000
1147
  }
1001
- }
1002
- this.attribList.push([this.attribName, this.attribValue]);
1003
- } else {
1004
- this.tag.attributes[this.attribName] = this.attribValue;
1005
- this.emitNode('onattribute', {
1006
- name: this.attribName,
1007
- value: this.attribValue
1008
- });
1148
+ this.attribList.length = 0;
1149
+ this.emitNode('onopentagstart', tag);
1009
1150
  }
1010
- this.attribName = this.attribValue = '';
1011
- }
1012
- newTag() {
1013
- if (!this.strict) this.tagName = this.tagName[this.looseCase]();
1014
- const parent = this.tags[this.tags.length - 1] || this;
1015
- const tag = this.tag = {
1016
- name: this.tagName,
1017
- attributes: {}
1018
- };
1019
- if (this.opt.xmlns) {
1020
- tag.ns = parent.ns;
1151
+ parseEntity() {
1152
+ let entity = this.entity;
1153
+ const entityLC = entity.toLowerCase();
1154
+ let num = NaN;
1155
+ let numStr = '';
1156
+ if (this.ENTITIES[entity]) {
1157
+ return this.ENTITIES[entity];
1158
+ }
1159
+ if (this.ENTITIES[entityLC]) {
1160
+ return this.ENTITIES[entityLC];
1161
+ }
1162
+ entity = entityLC;
1163
+ if (entity.charAt(0) === '#') {
1164
+ if (entity.charAt(1) === 'x') {
1165
+ entity = entity.slice(2);
1166
+ // TODO: remove tslint:disable
1167
+ // tslint:disable-next-line
1168
+ num = parseInt(entity, 16);
1169
+ numStr = num.toString(16);
1170
+ }
1171
+ else {
1172
+ entity = entity.slice(1);
1173
+ // TODO: remove tslint:disable
1174
+ // tslint:disable-next-line
1175
+ num = parseInt(entity, 10);
1176
+ numStr = num.toString(10);
1177
+ }
1178
+ }
1179
+ entity = entity.replace(/^0+/, '');
1180
+ if (isNaN(num) || numStr.toLowerCase() !== entity) {
1181
+ this.strictFail('Invalid character entity');
1182
+ return `&${this.entity};`;
1183
+ }
1184
+ return String.fromCodePoint(num);
1021
1185
  }
1022
- this.attribList.length = 0;
1023
- this.emitNode('onopentagstart', tag);
1024
- }
1025
- parseEntity() {
1026
- let entity = this.entity;
1027
- const entityLC = entity.toLowerCase();
1028
- let num = NaN;
1029
- let numStr = '';
1030
- if (this.ENTITIES[entity]) {
1031
- return this.ENTITIES[entity];
1186
+ beginWhiteSpace(c) {
1187
+ if (c === '<') {
1188
+ this.state = this.S.OPEN_WAKA;
1189
+ this.startTagPosition = this.position;
1190
+ }
1191
+ else if (!SAX.isWhitespace(c)) {
1192
+ // have to process this as a text node.
1193
+ // weird, but happens.
1194
+ this.strictFail('Non-whitespace before first tag.');
1195
+ this.textNode = c;
1196
+ this.state = this.S.TEXT;
1197
+ }
1198
+ else {
1199
+ }
1032
1200
  }
1033
- if (this.ENTITIES[entityLC]) {
1034
- return this.ENTITIES[entityLC];
1201
+ strictFail(message) {
1202
+ if (typeof this !== 'object' || !(this instanceof SAXParser)) {
1203
+ throw new Error('bad call to strictFail');
1204
+ }
1205
+ if (this.strict) {
1206
+ this.errorFunction(message);
1207
+ }
1035
1208
  }
1036
- entity = entityLC;
1037
- if (entity.charAt(0) === '#') {
1038
- if (entity.charAt(1) === 'x') {
1039
- entity = entity.slice(2);
1040
- num = parseInt(entity, 16);
1041
- numStr = num.toString(16);
1042
- } else {
1043
- entity = entity.slice(1);
1044
- num = parseInt(entity, 10);
1045
- numStr = num.toString(10);
1046
- }
1209
+ textApplyOptions(text) {
1210
+ if (this.opt.trim)
1211
+ text = text.trim();
1212
+ if (this.opt.normalize)
1213
+ text = text.replace(/\s+/g, ' ');
1214
+ return text;
1047
1215
  }
1048
- entity = entity.replace(/^0+/, '');
1049
- if (isNaN(num) || numStr.toLowerCase() !== entity) {
1050
- this.strictFail('Invalid character entity');
1051
- return `&${this.entity};`;
1216
+ emitNode(nodeType, data) {
1217
+ if (this.textNode)
1218
+ this.closeText();
1219
+ this.emit(nodeType, data);
1052
1220
  }
1053
- return String.fromCodePoint(num);
1054
- }
1055
- beginWhiteSpace(c) {
1056
- if (c === '<') {
1057
- this.state = this.S.OPEN_WAKA;
1058
- this.startTagPosition = this.position;
1059
- } else if (!SAX.isWhitespace(c)) {
1060
- this.strictFail('Non-whitespace before first tag.');
1061
- this.textNode = c;
1062
- this.state = this.S.TEXT;
1063
- } else {}
1064
- }
1065
- strictFail(message) {
1066
- if (typeof this !== 'object' || !(this instanceof SAXParser)) {
1067
- throw new Error('bad call to strictFail');
1221
+ closeText() {
1222
+ this.textNode = this.textApplyOptions(this.textNode);
1223
+ // TODO: figure out why this.textNode can be "" and "undefined"
1224
+ if (this.textNode !== undefined && this.textNode !== '' && this.textNode !== 'undefined') {
1225
+ this.emit('ontext', this.textNode);
1226
+ }
1227
+ this.textNode = '';
1068
1228
  }
1069
- if (this.strict) {
1070
- this.errorFunction(message);
1229
+ checkBufferLength() {
1230
+ const maxAllowed = Math.max(this.opt.MAX_BUFFER_LENGTH, 10);
1231
+ let maxActual = 0;
1232
+ for (let i = 0, l = this.BUFFERS.length; i < l; i++) {
1233
+ const len = this[this.BUFFERS[i]]?.length || 0;
1234
+ if (len > maxAllowed) {
1235
+ // Text/cdata nodes can get big, and since they're buffered,
1236
+ // we can get here under normal conditions.
1237
+ // Avoid issues by emitting the text node now,
1238
+ // so at least it won't get any bigger.
1239
+ switch (this.BUFFERS[i]) {
1240
+ case 'textNode':
1241
+ this.closeText();
1242
+ break;
1243
+ case 'cdata':
1244
+ this.emitNode('oncdata', this.cdata);
1245
+ this.cdata = '';
1246
+ break;
1247
+ case 'script':
1248
+ this.emitNode('onscript', this.script);
1249
+ this.script = '';
1250
+ break;
1251
+ default:
1252
+ this.errorFunction(`Max buffer length exceeded: ${this.BUFFERS[i]}`);
1253
+ }
1254
+ }
1255
+ maxActual = Math.max(maxActual, len);
1256
+ }
1257
+ // schedule the next check for the earliest possible buffer overrun.
1258
+ const m = this.opt.MAX_BUFFER_LENGTH - maxActual;
1259
+ this.bufferCheckPosition = m + this.position;
1071
1260
  }
1072
- }
1073
- textApplyOptions(text) {
1074
- if (this.opt.trim) text = text.trim();
1075
- if (this.opt.normalize) text = text.replace(/\s+/g, ' ');
1076
- return text;
1077
- }
1078
- emitNode(nodeType, data) {
1079
- if (this.textNode) this.closeText();
1080
- this.emit(nodeType, data);
1081
- }
1082
- closeText() {
1083
- this.textNode = this.textApplyOptions(this.textNode);
1084
- if (this.textNode !== undefined && this.textNode !== '' && this.textNode !== 'undefined') {
1085
- this.emit('ontext', this.textNode);
1261
+ openTag(selfClosing) {
1262
+ if (this.opt.xmlns) {
1263
+ // emit namespace binding events
1264
+ const tag = this.tag;
1265
+ // add namespace info to tag
1266
+ const qn = SAX.qname(this.tagName);
1267
+ tag.prefix = qn.prefix;
1268
+ tag.local = qn.local;
1269
+ tag.uri = tag.ns[qn.prefix] || '';
1270
+ if (tag.prefix && !tag.uri) {
1271
+ this.strictFail(`Unbound namespace prefix: ${JSON.stringify(this.tagName)}`);
1272
+ tag.uri = qn.prefix;
1273
+ }
1274
+ const parent = this.tags[this.tags.length - 1] || this;
1275
+ if (tag.ns && parent.ns !== tag.ns) {
1276
+ const that = this;
1277
+ Object.keys(tag.ns).forEach((p) => {
1278
+ that.emitNode('onopennamespace', {
1279
+ prefix: p,
1280
+ uri: tag.ns[p]
1281
+ });
1282
+ });
1283
+ }
1284
+ // handle deferred onattribute events
1285
+ // Note: do not apply default ns to attributes:
1286
+ // http://www.w3.org/TR/REC-xml-names/#defaulting
1287
+ for (let i = 0, l = this.attribList.length; i < l; i++) {
1288
+ const nv = this.attribList[i];
1289
+ const name = nv[0];
1290
+ const value = nv[1];
1291
+ const qualName = SAX.qname(name, true);
1292
+ const prefix = qualName.prefix;
1293
+ const local = qualName.local;
1294
+ const uri = prefix === '' ? '' : tag.ns[prefix] || '';
1295
+ const a = {
1296
+ name,
1297
+ value,
1298
+ prefix,
1299
+ local,
1300
+ uri
1301
+ };
1302
+ // if there's any attributes with an undefined namespace,
1303
+ // then fail on them now.
1304
+ if (prefix && prefix !== 'xmlns' && !uri) {
1305
+ this.strictFail(`Unbound namespace prefix: ${JSON.stringify(prefix)}`);
1306
+ a.uri = prefix;
1307
+ }
1308
+ this.tag.attributes[name] = a;
1309
+ this.emitNode('onattribute', a);
1310
+ }
1311
+ this.attribList.length = 0;
1312
+ }
1313
+ this.tag.isSelfClosing = Boolean(selfClosing);
1314
+ // process the tag
1315
+ this.sawRoot = true;
1316
+ this.tags.push(this.tag);
1317
+ this.emitNode('onopentag', this.tag);
1318
+ if (!selfClosing) {
1319
+ // special case for <script> in non-strict mode.
1320
+ if (!this.noscript && this.tagName.toLowerCase() === 'script') {
1321
+ this.state = this.S.SCRIPT;
1322
+ }
1323
+ else {
1324
+ this.state = this.S.TEXT;
1325
+ }
1326
+ this.tag = null;
1327
+ this.tagName = '';
1328
+ }
1329
+ this.attribName = this.attribValue = '';
1330
+ this.attribList.length = 0;
1086
1331
  }
1087
- this.textNode = '';
1088
- }
1089
- checkBufferLength() {
1090
- const maxAllowed = Math.max(this.opt.MAX_BUFFER_LENGTH, 10);
1091
- let maxActual = 0;
1092
- for (let i = 0, l = this.BUFFERS.length; i < l; i++) {
1093
- var _this$this$BUFFERS$i;
1094
- const len = ((_this$this$BUFFERS$i = this[this.BUFFERS[i]]) === null || _this$this$BUFFERS$i === void 0 ? void 0 : _this$this$BUFFERS$i.length) || 0;
1095
- if (len > maxAllowed) {
1096
- switch (this.BUFFERS[i]) {
1097
- case 'textNode':
1098
- this.closeText();
1099
- break;
1100
- case 'cdata':
1101
- this.emitNode('oncdata', this.cdata);
1102
- this.cdata = '';
1103
- break;
1104
- case 'script':
1332
+ closeTag() {
1333
+ if (!this.tagName) {
1334
+ this.strictFail('Weird empty close tag.');
1335
+ this.textNode += '</>';
1336
+ this.state = this.S.TEXT;
1337
+ return;
1338
+ }
1339
+ if (this.script) {
1340
+ if (this.tagName !== 'script') {
1341
+ this.script += `</${this.tagName}>`;
1342
+ this.tagName = '';
1343
+ this.state = this.S.SCRIPT;
1344
+ return;
1345
+ }
1105
1346
  this.emitNode('onscript', this.script);
1106
1347
  this.script = '';
1107
- break;
1108
- default:
1109
- this.errorFunction(`Max buffer length exceeded: ${this.BUFFERS[i]}`);
1110
1348
  }
1111
- }
1112
- maxActual = Math.max(maxActual, len);
1113
- }
1114
- const m = this.opt.MAX_BUFFER_LENGTH - maxActual;
1115
- this.bufferCheckPosition = m + this.position;
1116
- }
1117
- openTag(selfClosing) {
1118
- if (this.opt.xmlns) {
1119
- const tag = this.tag;
1120
- const qn = SAX.qname(this.tagName);
1121
- tag.prefix = qn.prefix;
1122
- tag.local = qn.local;
1123
- tag.uri = tag.ns[qn.prefix] || '';
1124
- if (tag.prefix && !tag.uri) {
1125
- this.strictFail(`Unbound namespace prefix: ${JSON.stringify(this.tagName)}`);
1126
- tag.uri = qn.prefix;
1127
- }
1128
- const parent = this.tags[this.tags.length - 1] || this;
1129
- if (tag.ns && parent.ns !== tag.ns) {
1130
- const that = this;
1131
- Object.keys(tag.ns).forEach(p => {
1132
- that.emitNode('onopennamespace', {
1133
- prefix: p,
1134
- uri: tag.ns[p]
1135
- });
1136
- });
1137
- }
1138
- for (let i = 0, l = this.attribList.length; i < l; i++) {
1139
- const nv = this.attribList[i];
1140
- const name = nv[0];
1141
- const value = nv[1];
1142
- const qualName = SAX.qname(name, true);
1143
- const prefix = qualName.prefix;
1144
- const local = qualName.local;
1145
- const uri = prefix === '' ? '' : tag.ns[prefix] || '';
1146
- const a = {
1147
- name,
1148
- value,
1149
- prefix,
1150
- local,
1151
- uri
1152
- };
1153
- if (prefix && prefix !== 'xmlns' && !uri) {
1154
- this.strictFail(`Unbound namespace prefix: ${JSON.stringify(prefix)}`);
1155
- a.uri = prefix;
1349
+ // first make sure that the closing tag actually exists.
1350
+ // <a><b></c></b></a> will close everything, otherwise.
1351
+ let t = this.tags.length;
1352
+ let tagName = this.tagName;
1353
+ if (!this.strict) {
1354
+ tagName = tagName[this.looseCase]();
1156
1355
  }
1157
- this.tag.attributes[name] = a;
1158
- this.emitNode('onattribute', a);
1159
- }
1160
- this.attribList.length = 0;
1161
- }
1162
- this.tag.isSelfClosing = Boolean(selfClosing);
1163
- this.sawRoot = true;
1164
- this.tags.push(this.tag);
1165
- this.emitNode('onopentag', this.tag);
1166
- if (!selfClosing) {
1167
- if (!this.noscript && this.tagName.toLowerCase() === 'script') {
1168
- this.state = this.S.SCRIPT;
1169
- } else {
1170
- this.state = this.S.TEXT;
1171
- }
1172
- this.tag = null;
1173
- this.tagName = '';
1174
- }
1175
- this.attribName = this.attribValue = '';
1176
- this.attribList.length = 0;
1177
- }
1178
- closeTag() {
1179
- if (!this.tagName) {
1180
- this.strictFail('Weird empty close tag.');
1181
- this.textNode += '</>';
1182
- this.state = this.S.TEXT;
1183
- return;
1184
- }
1185
- if (this.script) {
1186
- if (this.tagName !== 'script') {
1187
- this.script += `</${this.tagName}>`;
1188
- this.tagName = '';
1189
- this.state = this.S.SCRIPT;
1190
- return;
1191
- }
1192
- this.emitNode('onscript', this.script);
1193
- this.script = '';
1194
- }
1195
- let t = this.tags.length;
1196
- let tagName = this.tagName;
1197
- if (!this.strict) {
1198
- tagName = tagName[this.looseCase]();
1199
- }
1200
- while (t--) {
1201
- const close = this.tags[t];
1202
- if (close.name !== tagName) {
1203
- this.strictFail('Unexpected close tag');
1204
- } else {
1205
- break;
1206
- }
1207
- }
1208
- if (t < 0) {
1209
- this.strictFail(`Unmatched closing tag: ${this.tagName}`);
1210
- this.textNode += `</${this.tagName}>`;
1211
- this.state = this.S.TEXT;
1212
- return;
1213
- }
1214
- this.tagName = tagName;
1215
- let s = this.tags.length;
1216
- while (s-- > t) {
1217
- const tag = this.tag = this.tags.pop();
1218
- this.tagName = this.tag.name;
1219
- this.emitNode('onclosetag', this.tagName);
1220
- const x = {};
1221
- for (const i in tag.ns) {
1222
- if (tag.ns.hasOwnProperty(i)) {
1223
- x[i] = tag.ns[i];
1356
+ while (t--) {
1357
+ const close = this.tags[t];
1358
+ if (close.name !== tagName) {
1359
+ // fail the first time in strict mode
1360
+ this.strictFail('Unexpected close tag');
1361
+ }
1362
+ else {
1363
+ break;
1364
+ }
1365
+ }
1366
+ // didn't find it. we already failed for strict, so just abort.
1367
+ if (t < 0) {
1368
+ this.strictFail(`Unmatched closing tag: ${this.tagName}`);
1369
+ this.textNode += `</${this.tagName}>`;
1370
+ this.state = this.S.TEXT;
1371
+ return;
1372
+ }
1373
+ this.tagName = tagName;
1374
+ let s = this.tags.length;
1375
+ while (s-- > t) {
1376
+ const tag = (this.tag = this.tags.pop());
1377
+ this.tagName = this.tag.name;
1378
+ this.emitNode('onclosetag', this.tagName);
1379
+ const x = {};
1380
+ for (const i in tag.ns) {
1381
+ if (tag.ns.hasOwnProperty(i)) {
1382
+ x[i] = tag.ns[i];
1383
+ }
1384
+ }
1385
+ const parent = this.tags[this.tags.length - 1] || this;
1386
+ if (this.opt.xmlns && tag.ns !== parent.ns) {
1387
+ // remove namespace bindings introduced by tag
1388
+ const that = this;
1389
+ Object.keys(tag.ns).forEach((p) => {
1390
+ const n = tag.ns[p];
1391
+ that.emitNode('onclosenamespace', { prefix: p, uri: n });
1392
+ });
1393
+ }
1224
1394
  }
1225
- }
1226
- const parent = this.tags[this.tags.length - 1] || this;
1227
- if (this.opt.xmlns && tag.ns !== parent.ns) {
1228
- const that = this;
1229
- Object.keys(tag.ns).forEach(p => {
1230
- const n = tag.ns[p];
1231
- that.emitNode('onclosenamespace', {
1232
- prefix: p,
1233
- uri: n
1234
- });
1235
- });
1236
- }
1395
+ if (t === 0)
1396
+ this.closedRoot = true;
1397
+ this.tagName = this.attribValue = this.attribName = '';
1398
+ this.attribList.length = 0;
1399
+ this.state = this.S.TEXT;
1237
1400
  }
1238
- if (t === 0) this.closedRoot = true;
1239
- this.tagName = this.attribValue = this.attribName = '';
1240
- this.attribList.length = 0;
1241
- this.state = this.S.TEXT;
1242
- }
1243
1401
  }
1402
+ /**
1403
+ *
1404
+ * @todo Weird inheritance, with some variables initialized in subclass
1405
+ */
1244
1406
  export class SAXParser extends SAX {
1245
- constructor(opt) {
1246
- super();
1247
- this.opt = DEFAULT_SAX_PARSER_OPTIONS;
1248
- this.events = DEFAULT_SAX_EVENTS;
1249
- this.clearBuffers();
1250
- this.opt = opt = {
1251
- ...this.opt,
1252
- ...opt
1253
- };
1254
- this.events = {
1255
- ...this.events,
1256
- ...opt
1257
- };
1258
- this.q = this.c = '';
1259
- this.opt.lowercase = this.opt.lowercase || this.opt.lowercasetags;
1260
- this.bufferCheckPosition = this.opt.MAX_BUFFER_LENGTH;
1261
- this.looseCase = this.opt.lowercase ? 'toLowerCase' : 'toUpperCase';
1262
- this.tags = [];
1263
- this.closed = this.closedRoot = this.sawRoot = false;
1264
- this.tag = this.error = null;
1265
- this.strict = Boolean(this.opt.strict);
1266
- this.noscript = Boolean(this.opt.strict || this.opt.noscript);
1267
- this.state = this.S.BEGIN;
1268
- this.strictEntities = this.opt.strictEntities;
1269
- this.ENTITIES = this.strictEntities ? Object.create(this.XML_ENTITIES) : Object.create(this.ENTITIES);
1270
- this.attribList = [];
1271
- if (this.opt.xmlns) {
1272
- this.ns = Object.create(this.rootNS);
1407
+ static ENTITIES = ENTITIES;
1408
+ opt = DEFAULT_SAX_PARSER_OPTIONS;
1409
+ events = DEFAULT_SAX_EVENTS;
1410
+ constructor(opt) {
1411
+ super();
1412
+ this.clearBuffers();
1413
+ this.opt = opt = { ...this.opt, ...opt };
1414
+ this.events = { ...this.events, ...opt };
1415
+ this.q = this.c = '';
1416
+ this.opt.lowercase = this.opt.lowercase || this.opt.lowercasetags;
1417
+ this.bufferCheckPosition = this.opt.MAX_BUFFER_LENGTH;
1418
+ this.looseCase = this.opt.lowercase ? 'toLowerCase' : 'toUpperCase';
1419
+ this.tags = [];
1420
+ this.closed = this.closedRoot = this.sawRoot = false;
1421
+ this.tag = this.error = null;
1422
+ this.strict = Boolean(this.opt.strict);
1423
+ this.noscript = Boolean(this.opt.strict || this.opt.noscript);
1424
+ this.state = this.S.BEGIN;
1425
+ this.strictEntities = this.opt.strictEntities;
1426
+ this.ENTITIES = this.strictEntities
1427
+ ? Object.create(this.XML_ENTITIES)
1428
+ : Object.create(this.ENTITIES);
1429
+ this.attribList = [];
1430
+ // namespaces form a prototype chain.
1431
+ // it always points at the current tag,
1432
+ // which protos to its parent tag.
1433
+ if (this.opt.xmlns) {
1434
+ this.ns = Object.create(this.rootNS);
1435
+ }
1436
+ // mostly just for error reporting
1437
+ this.trackPosition = this.opt.position !== false;
1438
+ if (this.trackPosition) {
1439
+ this.position = this.line = this.column = 0;
1440
+ }
1441
+ this.emit('onready');
1442
+ }
1443
+ resume() {
1444
+ this.error = null;
1445
+ return this;
1446
+ }
1447
+ close() {
1448
+ return this.write(null);
1273
1449
  }
1274
- this.trackPosition = this.opt.position !== false;
1275
- if (this.trackPosition) {
1276
- this.position = this.line = this.column = 0;
1450
+ flush() {
1451
+ this.flushBuffers();
1277
1452
  }
1278
- this.emit('onready');
1279
- }
1280
- resume() {
1281
- this.error = null;
1282
- return this;
1283
- }
1284
- close() {
1285
- return this.write(null);
1286
- }
1287
- flush() {
1288
- this.flushBuffers();
1289
- }
1290
1453
  }
1291
- SAXParser.ENTITIES = ENTITIES;
1292
- //# sourceMappingURL=sax.js.map