sax 1.1.3 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/lib/sax.js +1450 -1317
  2. package/package.json +6 -3
package/lib/sax.js CHANGED
@@ -1,1430 +1,1563 @@
1
- // wrapper for non-node envs
2
- ;(function (sax) {
3
-
4
- sax.parser = function (strict, opt) { return new SAXParser(strict, opt) }
5
- sax.SAXParser = SAXParser
6
- sax.SAXStream = SAXStream
7
- sax.createStream = createStream
8
-
9
- // When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns.
10
- // When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)),
11
- // since that's the earliest that a buffer overrun could occur. This way, checks are
12
- // as rare as required, but as often as necessary to ensure never crossing this bound.
13
- // Furthermore, buffers are only tested at most once per write(), so passing a very
14
- // large string into write() might have undesirable effects, but this is manageable by
15
- // the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme
16
- // edge case, result in creating at most one complete copy of the string passed in.
17
- // Set to Infinity to have unlimited buffers.
18
- sax.MAX_BUFFER_LENGTH = 64 * 1024
19
-
20
- var buffers = [
21
- "comment", "sgmlDecl", "textNode", "tagName", "doctype",
22
- "procInstName", "procInstBody", "entity", "attribName",
23
- "attribValue", "cdata", "script"
24
- ]
25
-
26
- sax.EVENTS = // for discoverability.
27
- [ "text"
28
- , "processinginstruction"
29
- , "sgmldeclaration"
30
- , "doctype"
31
- , "comment"
32
- , "attribute"
33
- , "opentag"
34
- , "closetag"
35
- , "opencdata"
36
- , "cdata"
37
- , "closecdata"
38
- , "error"
39
- , "end"
40
- , "ready"
41
- , "script"
42
- , "opennamespace"
43
- , "closenamespace"
1
+ ;(function (sax) { // wrapper for non-node envs
2
+ sax.parser = function (strict, opt) { return new SAXParser(strict, opt) }
3
+ sax.SAXParser = SAXParser
4
+ sax.SAXStream = SAXStream
5
+ sax.createStream = createStream
6
+
7
+ // When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns.
8
+ // When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)),
9
+ // since that's the earliest that a buffer overrun could occur. This way, checks are
10
+ // as rare as required, but as often as necessary to ensure never crossing this bound.
11
+ // Furthermore, buffers are only tested at most once per write(), so passing a very
12
+ // large string into write() might have undesirable effects, but this is manageable by
13
+ // the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme
14
+ // edge case, result in creating at most one complete copy of the string passed in.
15
+ // Set to Infinity to have unlimited buffers.
16
+ sax.MAX_BUFFER_LENGTH = 64 * 1024
17
+
18
+ var buffers = [
19
+ 'comment', 'sgmlDecl', 'textNode', 'tagName', 'doctype',
20
+ 'procInstName', 'procInstBody', 'entity', 'attribName',
21
+ 'attribValue', 'cdata', 'script'
44
22
  ]
45
23
 
46
- function SAXParser (strict, opt) {
47
- if (!(this instanceof SAXParser)) return new SAXParser(strict, opt)
48
-
49
- var parser = this
50
- clearBuffers(parser)
51
- parser.q = parser.c = ""
52
- parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
53
- parser.opt = opt || {}
54
- parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags
55
- parser.looseCase = parser.opt.lowercase ? "toLowerCase" : "toUpperCase"
56
- parser.tags = []
57
- parser.closed = parser.closedRoot = parser.sawRoot = false
58
- parser.tag = parser.error = null
59
- parser.strict = !!strict
60
- parser.noscript = !!(strict || parser.opt.noscript)
61
- parser.state = S.BEGIN
62
- parser.strictEntities = parser.opt.strictEntities
63
- parser.ENTITIES = parser.strictEntities ? Object.create(sax.XML_ENTITIES) : Object.create(sax.ENTITIES)
64
- parser.attribList = []
65
-
66
- // namespaces form a prototype chain.
67
- // it always points at the current tag,
68
- // which protos to its parent tag.
69
- if (parser.opt.xmlns) parser.ns = Object.create(rootNS)
70
-
71
- // mostly just for error reporting
72
- parser.trackPosition = parser.opt.position !== false
73
- if (parser.trackPosition) {
74
- parser.position = parser.line = parser.column = 0
75
- }
76
- emit(parser, "onready")
77
- }
78
-
79
- if (!Object.create) Object.create = function (o) {
80
- function f () { this.__proto__ = o }
81
- f.prototype = o
82
- return new f
83
- }
84
-
85
- if (!Object.getPrototypeOf) Object.getPrototypeOf = function (o) {
86
- return o.__proto__
87
- }
88
-
89
- if (!Object.keys) Object.keys = function (o) {
90
- var a = []
91
- for (var i in o) if (o.hasOwnProperty(i)) a.push(i)
92
- return a
93
- }
94
-
95
- function checkBufferLength (parser) {
96
- var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10)
97
- , maxActual = 0
98
- for (var i = 0, l = buffers.length; i < l; i ++) {
99
- var len = parser[buffers[i]].length
100
- if (len > maxAllowed) {
101
- // Text/cdata nodes can get big, and since they're buffered,
102
- // we can get here under normal conditions.
103
- // Avoid issues by emitting the text node now,
104
- // so at least it won't get any bigger.
105
- switch (buffers[i]) {
106
- case "textNode":
107
- closeText(parser)
108
- break
24
+ sax.EVENTS = [
25
+ 'text',
26
+ 'processinginstruction',
27
+ 'sgmldeclaration',
28
+ 'doctype',
29
+ 'comment',
30
+ 'attribute',
31
+ 'opentag',
32
+ 'closetag',
33
+ 'opencdata',
34
+ 'cdata',
35
+ 'closecdata',
36
+ 'error',
37
+ 'end',
38
+ 'ready',
39
+ 'script',
40
+ 'opennamespace',
41
+ 'closenamespace'
42
+ ]
109
43
 
110
- case "cdata":
111
- emitNode(parser, "oncdata", parser.cdata)
112
- parser.cdata = ""
113
- break
44
+ function SAXParser (strict, opt) {
45
+ if (!(this instanceof SAXParser)) {
46
+ return new SAXParser(strict, opt)
47
+ }
114
48
 
115
- case "script":
116
- emitNode(parser, "onscript", parser.script)
117
- parser.script = ""
118
- break
49
+ var parser = this
50
+ clearBuffers(parser)
51
+ parser.q = parser.c = ''
52
+ parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
53
+ parser.opt = opt || {}
54
+ parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags
55
+ parser.looseCase = parser.opt.lowercase ? 'toLowerCase' : 'toUpperCase'
56
+ parser.tags = []
57
+ parser.closed = parser.closedRoot = parser.sawRoot = false
58
+ parser.tag = parser.error = null
59
+ parser.strict = !!strict
60
+ parser.noscript = !!(strict || parser.opt.noscript)
61
+ parser.state = S.BEGIN
62
+ parser.strictEntities = parser.opt.strictEntities
63
+ parser.ENTITIES = parser.strictEntities ? Object.create(sax.XML_ENTITIES) : Object.create(sax.ENTITIES)
64
+ parser.attribList = []
65
+
66
+ // namespaces form a prototype chain.
67
+ // it always points at the current tag,
68
+ // which protos to its parent tag.
69
+ if (parser.opt.xmlns) {
70
+ parser.ns = Object.create(rootNS)
71
+ }
119
72
 
120
- default:
121
- error(parser, "Max buffer length exceeded: "+buffers[i])
122
- }
73
+ // mostly just for error reporting
74
+ parser.trackPosition = parser.opt.position !== false
75
+ if (parser.trackPosition) {
76
+ parser.position = parser.line = parser.column = 0
123
77
  }
124
- maxActual = Math.max(maxActual, len)
125
- }
126
- // schedule the next check for the earliest possible buffer overrun.
127
- parser.bufferCheckPosition = (sax.MAX_BUFFER_LENGTH - maxActual)
128
- + parser.position
129
- }
130
-
131
- function clearBuffers (parser) {
132
- for (var i = 0, l = buffers.length; i < l; i ++) {
133
- parser[buffers[i]] = ""
78
+ emit(parser, 'onready')
134
79
  }
135
- }
136
80
 
137
- function flushBuffers (parser) {
138
- closeText(parser)
139
- if (parser.cdata !== "") {
140
- emitNode(parser, "oncdata", parser.cdata)
141
- parser.cdata = ""
81
+ if (!Object.create) {
82
+ Object.create = function (o) {
83
+ function F () {}
84
+ F.prototype = o
85
+ var newf = new F()
86
+ return newf
87
+ }
142
88
  }
143
- if (parser.script !== "") {
144
- emitNode(parser, "onscript", parser.script)
145
- parser.script = ""
89
+
90
+ if (!Object.keys) {
91
+ Object.keys = function (o) {
92
+ var a = []
93
+ for (var i in o) if (o.hasOwnProperty(i)) a.push(i)
94
+ return a
95
+ }
146
96
  }
147
- }
148
-
149
- SAXParser.prototype =
150
- { end: function () { end(this) }
151
- , write: write
152
- , resume: function () { this.error = null; return this }
153
- , close: function () { return this.write(null) }
154
- , flush: function () { flushBuffers(this) }
97
+
98
+ function checkBufferLength (parser) {
99
+ var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10)
100
+ var maxActual = 0
101
+ for (var i = 0, l = buffers.length; i < l; i++) {
102
+ var len = parser[buffers[i]].length
103
+ if (len > maxAllowed) {
104
+ // Text/cdata nodes can get big, and since they're buffered,
105
+ // we can get here under normal conditions.
106
+ // Avoid issues by emitting the text node now,
107
+ // so at least it won't get any bigger.
108
+ switch (buffers[i]) {
109
+ case 'textNode':
110
+ closeText(parser)
111
+ break
112
+
113
+ case 'cdata':
114
+ emitNode(parser, 'oncdata', parser.cdata)
115
+ parser.cdata = ''
116
+ break
117
+
118
+ case 'script':
119
+ emitNode(parser, 'onscript', parser.script)
120
+ parser.script = ''
121
+ break
122
+
123
+ default:
124
+ error(parser, 'Max buffer length exceeded: ' + buffers[i])
125
+ }
126
+ }
127
+ maxActual = Math.max(maxActual, len)
128
+ }
129
+ // schedule the next check for the earliest possible buffer overrun.
130
+ var m = sax.MAX_BUFFER_LENGTH - maxActual
131
+ parser.bufferCheckPosition = m + parser.position
155
132
  }
156
133
 
157
- try {
158
- var Stream = require("stream").Stream
159
- } catch (ex) {
160
- var Stream = function () {}
161
- }
134
+ function clearBuffers (parser) {
135
+ for (var i = 0, l = buffers.length; i < l; i++) {
136
+ parser[buffers[i]] = ''
137
+ }
138
+ }
162
139
 
140
+ function flushBuffers (parser) {
141
+ closeText(parser)
142
+ if (parser.cdata !== '') {
143
+ emitNode(parser, 'oncdata', parser.cdata)
144
+ parser.cdata = ''
145
+ }
146
+ if (parser.script !== '') {
147
+ emitNode(parser, 'onscript', parser.script)
148
+ parser.script = ''
149
+ }
150
+ }
163
151
 
164
- var streamWraps = sax.EVENTS.filter(function (ev) {
165
- return ev !== "error" && ev !== "end"
166
- })
152
+ SAXParser.prototype = {
153
+ end: function () { end(this) },
154
+ write: write,
155
+ resume: function () { this.error = null; return this },
156
+ close: function () { return this.write(null) },
157
+ flush: function () { flushBuffers(this) }
158
+ }
167
159
 
168
- function createStream (strict, opt) {
169
- return new SAXStream(strict, opt)
170
- }
160
+ var Stream
161
+ try {
162
+ Stream = require('stream').Stream
163
+ } catch (ex) {
164
+ Stream = function () {}
165
+ }
171
166
 
172
- function SAXStream (strict, opt) {
173
- if (!(this instanceof SAXStream)) return new SAXStream(strict, opt)
167
+ var streamWraps = sax.EVENTS.filter(function (ev) {
168
+ return ev !== 'error' && ev !== 'end'
169
+ })
174
170
 
175
- Stream.apply(this)
171
+ function createStream (strict, opt) {
172
+ return new SAXStream(strict, opt)
173
+ }
176
174
 
177
- this._parser = new SAXParser(strict, opt)
178
- this.writable = true
179
- this.readable = true
175
+ function SAXStream (strict, opt) {
176
+ if (!(this instanceof SAXStream)) {
177
+ return new SAXStream(strict, opt)
178
+ }
180
179
 
180
+ Stream.apply(this)
181
181
 
182
- var me = this
182
+ this._parser = new SAXParser(strict, opt)
183
+ this.writable = true
184
+ this.readable = true
183
185
 
184
- this._parser.onend = function () {
185
- me.emit("end")
186
- }
186
+ var me = this
187
187
 
188
- this._parser.onerror = function (er) {
189
- me.emit("error", er)
188
+ this._parser.onend = function () {
189
+ me.emit('end')
190
+ }
190
191
 
191
- // if didn't throw, then means error was handled.
192
- // go ahead and clear error, so we can write again.
193
- me._parser.error = null
194
- }
192
+ this._parser.onerror = function (er) {
193
+ me.emit('error', er)
195
194
 
196
- this._decoder = null;
195
+ // if didn't throw, then means error was handled.
196
+ // go ahead and clear error, so we can write again.
197
+ me._parser.error = null
198
+ }
197
199
 
198
- streamWraps.forEach(function (ev) {
199
- Object.defineProperty(me, "on" + ev, {
200
- get: function () { return me._parser["on" + ev] },
201
- set: function (h) {
202
- if (!h) {
203
- me.removeAllListeners(ev)
204
- return me._parser["on"+ev] = h
205
- }
206
- me.on(ev, h)
207
- },
208
- enumerable: true,
209
- configurable: false
200
+ this._decoder = null
201
+
202
+ streamWraps.forEach(function (ev) {
203
+ Object.defineProperty(me, 'on' + ev, {
204
+ get: function () {
205
+ return me._parser['on' + ev]
206
+ },
207
+ set: function (h) {
208
+ if (!h) {
209
+ me.removeAllListeners(ev)
210
+ me._parser['on' + ev] = h
211
+ return h
212
+ }
213
+ me.on(ev, h)
214
+ },
215
+ enumerable: true,
216
+ configurable: false
217
+ })
210
218
  })
211
- })
212
- }
219
+ }
213
220
 
214
- SAXStream.prototype = Object.create(Stream.prototype,
215
- { constructor: { value: SAXStream } })
221
+ SAXStream.prototype = Object.create(Stream.prototype, {
222
+ constructor: {
223
+ value: SAXStream
224
+ }
225
+ })
216
226
 
217
- SAXStream.prototype.write = function (data) {
218
- if (typeof Buffer === 'function' &&
227
+ SAXStream.prototype.write = function (data) {
228
+ if (typeof Buffer === 'function' &&
219
229
  typeof Buffer.isBuffer === 'function' &&
220
230
  Buffer.isBuffer(data)) {
221
- if (!this._decoder) {
222
- var SD = require('string_decoder').StringDecoder
223
- this._decoder = new SD('utf8')
231
+ if (!this._decoder) {
232
+ var SD = require('string_decoder').StringDecoder
233
+ this._decoder = new SD('utf8')
234
+ }
235
+ data = this._decoder.write(data)
224
236
  }
225
- data = this._decoder.write(data);
237
+
238
+ this._parser.write(data.toString())
239
+ this.emit('data', data)
240
+ return true
226
241
  }
227
242
 
228
- this._parser.write(data.toString())
229
- this.emit("data", data)
230
- return true
231
- }
232
-
233
- SAXStream.prototype.end = function (chunk) {
234
- if (chunk && chunk.length) this.write(chunk)
235
- this._parser.end()
236
- return true
237
- }
238
-
239
- SAXStream.prototype.on = function (ev, handler) {
240
- var me = this
241
- if (!me._parser["on"+ev] && streamWraps.indexOf(ev) !== -1) {
242
- me._parser["on"+ev] = function () {
243
- var args = arguments.length === 1 ? [arguments[0]]
244
- : Array.apply(null, arguments)
245
- args.splice(0, 0, ev)
246
- me.emit.apply(me, args)
243
+ SAXStream.prototype.end = function (chunk) {
244
+ if (chunk && chunk.length) {
245
+ this.write(chunk)
247
246
  }
247
+ this._parser.end()
248
+ return true
248
249
  }
249
250
 
250
- return Stream.prototype.on.call(me, ev, handler)
251
- }
251
+ SAXStream.prototype.on = function (ev, handler) {
252
+ var me = this
253
+ if (!me._parser['on' + ev] && streamWraps.indexOf(ev) !== -1) {
254
+ me._parser['on' + ev] = function () {
255
+ var args = arguments.length === 1 ? [arguments[0]] : Array.apply(null, arguments)
256
+ args.splice(0, 0, ev)
257
+ me.emit.apply(me, args)
258
+ }
259
+ }
252
260
 
261
+ return Stream.prototype.on.call(me, ev, handler)
262
+ }
253
263
 
264
+ // character classes and tokens
265
+ var whitespace = '\r\n\t '
254
266
 
255
- // character classes and tokens
256
- var whitespace = "\r\n\t "
257
267
  // this really needs to be replaced with character classes.
258
268
  // XML allows all manner of ridiculous numbers and digits.
259
- , number = "0124356789"
260
- , letter = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
269
+ var number = '0124356789'
270
+ var letter = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
271
+
261
272
  // (Letter | "_" | ":")
262
- , quote = "'\""
263
- , entity = number+letter+"#"
264
- , attribEnd = whitespace + ">"
265
- , CDATA = "[CDATA["
266
- , DOCTYPE = "DOCTYPE"
267
- , XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
268
- , XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/"
269
- , rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE }
270
-
271
- // turn all the string character sets into character class objects.
272
- whitespace = charClass(whitespace)
273
- number = charClass(number)
274
- letter = charClass(letter)
275
-
276
- // http://www.w3.org/TR/REC-xml/#NT-NameStartChar
277
- // This implementation works on strings, a single character at a time
278
- // as such, it cannot ever support astral-plane characters (10000-EFFFF)
279
- // without a significant breaking change to either this parser, or the
280
- // JavaScript language. Implementation of an emoji-capable xml parser
281
- // is left as an exercise for the reader.
282
- var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
283
-
284
- var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040\.\d-]/
285
-
286
- quote = charClass(quote)
287
- entity = charClass(entity)
288
- attribEnd = charClass(attribEnd)
289
-
290
- function charClass (str) {
291
- return str.split("").reduce(function (s, c) {
292
- s[c] = true
293
- return s
294
- }, {})
295
- }
296
-
297
- function isRegExp (c) {
298
- return Object.prototype.toString.call(c) === '[object RegExp]'
299
- }
300
-
301
- function is (charclass, c) {
302
- return isRegExp(charclass) ? !!c.match(charclass) : charclass[c]
303
- }
304
-
305
- function not (charclass, c) {
306
- return !is(charclass, c)
307
- }
308
-
309
- var S = 0
310
- sax.STATE =
311
- { BEGIN : S++ // leading byte order mark or whitespace
312
- , BEGIN_WHITESPACE : S++ // leading whitespace
313
- , TEXT : S++ // general stuff
314
- , TEXT_ENTITY : S++ // &amp and such.
315
- , OPEN_WAKA : S++ // <
316
- , SGML_DECL : S++ // <!BLARG
317
- , SGML_DECL_QUOTED : S++ // <!BLARG foo "bar
318
- , DOCTYPE : S++ // <!DOCTYPE
319
- , DOCTYPE_QUOTED : S++ // <!DOCTYPE "//blah
320
- , DOCTYPE_DTD : S++ // <!DOCTYPE "//blah" [ ...
321
- , DOCTYPE_DTD_QUOTED : S++ // <!DOCTYPE "//blah" [ "foo
322
- , COMMENT_STARTING : S++ // <!-
323
- , COMMENT : S++ // <!--
324
- , COMMENT_ENDING : S++ // <!-- blah -
325
- , COMMENT_ENDED : S++ // <!-- blah --
326
- , CDATA : S++ // <![CDATA[ something
327
- , CDATA_ENDING : S++ // ]
328
- , CDATA_ENDING_2 : S++ // ]]
329
- , PROC_INST : S++ // <?hi
330
- , PROC_INST_BODY : S++ // <?hi there
331
- , PROC_INST_ENDING : S++ // <?hi "there" ?
332
- , OPEN_TAG : S++ // <strong
333
- , OPEN_TAG_SLASH : S++ // <strong /
334
- , ATTRIB : S++ // <a
335
- , ATTRIB_NAME : S++ // <a foo
336
- , ATTRIB_NAME_SAW_WHITE : S++ // <a foo _
337
- , ATTRIB_VALUE : S++ // <a foo=
338
- , ATTRIB_VALUE_QUOTED : S++ // <a foo="bar
339
- , ATTRIB_VALUE_CLOSED : S++ // <a foo="bar"
340
- , ATTRIB_VALUE_UNQUOTED : S++ // <a foo=bar
341
- , ATTRIB_VALUE_ENTITY_Q : S++ // <foo bar="&quot;"
342
- , ATTRIB_VALUE_ENTITY_U : S++ // <foo bar=&quot;
343
- , CLOSE_TAG : S++ // </a
344
- , CLOSE_TAG_SAW_WHITE : S++ // </a >
345
- , SCRIPT : S++ // <script> ...
346
- , SCRIPT_ENDING : S++ // <script> ... <
347
- }
348
-
349
- sax.XML_ENTITIES =
350
- { "amp" : "&"
351
- , "gt" : ">"
352
- , "lt" : "<"
353
- , "quot" : "\""
354
- , "apos" : "'"
355
- }
356
-
357
- sax.ENTITIES =
358
- { "amp" : "&"
359
- , "gt" : ">"
360
- , "lt" : "<"
361
- , "quot" : "\""
362
- , "apos" : "'"
363
- , "AElig" : 198
364
- , "Aacute" : 193
365
- , "Acirc" : 194
366
- , "Agrave" : 192
367
- , "Aring" : 197
368
- , "Atilde" : 195
369
- , "Auml" : 196
370
- , "Ccedil" : 199
371
- , "ETH" : 208
372
- , "Eacute" : 201
373
- , "Ecirc" : 202
374
- , "Egrave" : 200
375
- , "Euml" : 203
376
- , "Iacute" : 205
377
- , "Icirc" : 206
378
- , "Igrave" : 204
379
- , "Iuml" : 207
380
- , "Ntilde" : 209
381
- , "Oacute" : 211
382
- , "Ocirc" : 212
383
- , "Ograve" : 210
384
- , "Oslash" : 216
385
- , "Otilde" : 213
386
- , "Ouml" : 214
387
- , "THORN" : 222
388
- , "Uacute" : 218
389
- , "Ucirc" : 219
390
- , "Ugrave" : 217
391
- , "Uuml" : 220
392
- , "Yacute" : 221
393
- , "aacute" : 225
394
- , "acirc" : 226
395
- , "aelig" : 230
396
- , "agrave" : 224
397
- , "aring" : 229
398
- , "atilde" : 227
399
- , "auml" : 228
400
- , "ccedil" : 231
401
- , "eacute" : 233
402
- , "ecirc" : 234
403
- , "egrave" : 232
404
- , "eth" : 240
405
- , "euml" : 235
406
- , "iacute" : 237
407
- , "icirc" : 238
408
- , "igrave" : 236
409
- , "iuml" : 239
410
- , "ntilde" : 241
411
- , "oacute" : 243
412
- , "ocirc" : 244
413
- , "ograve" : 242
414
- , "oslash" : 248
415
- , "otilde" : 245
416
- , "ouml" : 246
417
- , "szlig" : 223
418
- , "thorn" : 254
419
- , "uacute" : 250
420
- , "ucirc" : 251
421
- , "ugrave" : 249
422
- , "uuml" : 252
423
- , "yacute" : 253
424
- , "yuml" : 255
425
- , "copy" : 169
426
- , "reg" : 174
427
- , "nbsp" : 160
428
- , "iexcl" : 161
429
- , "cent" : 162
430
- , "pound" : 163
431
- , "curren" : 164
432
- , "yen" : 165
433
- , "brvbar" : 166
434
- , "sect" : 167
435
- , "uml" : 168
436
- , "ordf" : 170
437
- , "laquo" : 171
438
- , "not" : 172
439
- , "shy" : 173
440
- , "macr" : 175
441
- , "deg" : 176
442
- , "plusmn" : 177
443
- , "sup1" : 185
444
- , "sup2" : 178
445
- , "sup3" : 179
446
- , "acute" : 180
447
- , "micro" : 181
448
- , "para" : 182
449
- , "middot" : 183
450
- , "cedil" : 184
451
- , "ordm" : 186
452
- , "raquo" : 187
453
- , "frac14" : 188
454
- , "frac12" : 189
455
- , "frac34" : 190
456
- , "iquest" : 191
457
- , "times" : 215
458
- , "divide" : 247
459
- , "OElig" : 338
460
- , "oelig" : 339
461
- , "Scaron" : 352
462
- , "scaron" : 353
463
- , "Yuml" : 376
464
- , "fnof" : 402
465
- , "circ" : 710
466
- , "tilde" : 732
467
- , "Alpha" : 913
468
- , "Beta" : 914
469
- , "Gamma" : 915
470
- , "Delta" : 916
471
- , "Epsilon" : 917
472
- , "Zeta" : 918
473
- , "Eta" : 919
474
- , "Theta" : 920
475
- , "Iota" : 921
476
- , "Kappa" : 922
477
- , "Lambda" : 923
478
- , "Mu" : 924
479
- , "Nu" : 925
480
- , "Xi" : 926
481
- , "Omicron" : 927
482
- , "Pi" : 928
483
- , "Rho" : 929
484
- , "Sigma" : 931
485
- , "Tau" : 932
486
- , "Upsilon" : 933
487
- , "Phi" : 934
488
- , "Chi" : 935
489
- , "Psi" : 936
490
- , "Omega" : 937
491
- , "alpha" : 945
492
- , "beta" : 946
493
- , "gamma" : 947
494
- , "delta" : 948
495
- , "epsilon" : 949
496
- , "zeta" : 950
497
- , "eta" : 951
498
- , "theta" : 952
499
- , "iota" : 953
500
- , "kappa" : 954
501
- , "lambda" : 955
502
- , "mu" : 956
503
- , "nu" : 957
504
- , "xi" : 958
505
- , "omicron" : 959
506
- , "pi" : 960
507
- , "rho" : 961
508
- , "sigmaf" : 962
509
- , "sigma" : 963
510
- , "tau" : 964
511
- , "upsilon" : 965
512
- , "phi" : 966
513
- , "chi" : 967
514
- , "psi" : 968
515
- , "omega" : 969
516
- , "thetasym" : 977
517
- , "upsih" : 978
518
- , "piv" : 982
519
- , "ensp" : 8194
520
- , "emsp" : 8195
521
- , "thinsp" : 8201
522
- , "zwnj" : 8204
523
- , "zwj" : 8205
524
- , "lrm" : 8206
525
- , "rlm" : 8207
526
- , "ndash" : 8211
527
- , "mdash" : 8212
528
- , "lsquo" : 8216
529
- , "rsquo" : 8217
530
- , "sbquo" : 8218
531
- , "ldquo" : 8220
532
- , "rdquo" : 8221
533
- , "bdquo" : 8222
534
- , "dagger" : 8224
535
- , "Dagger" : 8225
536
- , "bull" : 8226
537
- , "hellip" : 8230
538
- , "permil" : 8240
539
- , "prime" : 8242
540
- , "Prime" : 8243
541
- , "lsaquo" : 8249
542
- , "rsaquo" : 8250
543
- , "oline" : 8254
544
- , "frasl" : 8260
545
- , "euro" : 8364
546
- , "image" : 8465
547
- , "weierp" : 8472
548
- , "real" : 8476
549
- , "trade" : 8482
550
- , "alefsym" : 8501
551
- , "larr" : 8592
552
- , "uarr" : 8593
553
- , "rarr" : 8594
554
- , "darr" : 8595
555
- , "harr" : 8596
556
- , "crarr" : 8629
557
- , "lArr" : 8656
558
- , "uArr" : 8657
559
- , "rArr" : 8658
560
- , "dArr" : 8659
561
- , "hArr" : 8660
562
- , "forall" : 8704
563
- , "part" : 8706
564
- , "exist" : 8707
565
- , "empty" : 8709
566
- , "nabla" : 8711
567
- , "isin" : 8712
568
- , "notin" : 8713
569
- , "ni" : 8715
570
- , "prod" : 8719
571
- , "sum" : 8721
572
- , "minus" : 8722
573
- , "lowast" : 8727
574
- , "radic" : 8730
575
- , "prop" : 8733
576
- , "infin" : 8734
577
- , "ang" : 8736
578
- , "and" : 8743
579
- , "or" : 8744
580
- , "cap" : 8745
581
- , "cup" : 8746
582
- , "int" : 8747
583
- , "there4" : 8756
584
- , "sim" : 8764
585
- , "cong" : 8773
586
- , "asymp" : 8776
587
- , "ne" : 8800
588
- , "equiv" : 8801
589
- , "le" : 8804
590
- , "ge" : 8805
591
- , "sub" : 8834
592
- , "sup" : 8835
593
- , "nsub" : 8836
594
- , "sube" : 8838
595
- , "supe" : 8839
596
- , "oplus" : 8853
597
- , "otimes" : 8855
598
- , "perp" : 8869
599
- , "sdot" : 8901
600
- , "lceil" : 8968
601
- , "rceil" : 8969
602
- , "lfloor" : 8970
603
- , "rfloor" : 8971
604
- , "lang" : 9001
605
- , "rang" : 9002
606
- , "loz" : 9674
607
- , "spades" : 9824
608
- , "clubs" : 9827
609
- , "hearts" : 9829
610
- , "diams" : 9830
611
- }
612
-
613
- Object.keys(sax.ENTITIES).forEach(function (key) {
273
+ var quote = '\'"'
274
+ var attribEnd = whitespace + '>'
275
+ var CDATA = '[CDATA['
276
+ var DOCTYPE = 'DOCTYPE'
277
+ var XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
278
+ var XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/'
279
+ var rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE }
280
+
281
+ // turn all the string character sets into character class objects.
282
+ whitespace = charClass(whitespace)
283
+ number = charClass(number)
284
+ letter = charClass(letter)
285
+
286
+ // http://www.w3.org/TR/REC-xml/#NT-NameStartChar
287
+ // This implementation works on strings, a single character at a time
288
+ // as such, it cannot ever support astral-plane characters (10000-EFFFF)
289
+ // without a significant breaking change to either this parser, or the
290
+ // JavaScript language. Implementation of an emoji-capable xml parser
291
+ // is left as an exercise for the reader.
292
+ var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
293
+
294
+ var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040\.\d-]/
295
+
296
+ var entityStart = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
297
+ var entityBody = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040\.\d-]/
298
+
299
+ quote = charClass(quote)
300
+ attribEnd = charClass(attribEnd)
301
+
302
+ function charClass (str) {
303
+ return str.split('').reduce(function (s, c) {
304
+ s[c] = true
305
+ return s
306
+ }, {})
307
+ }
308
+
309
+ function isRegExp (c) {
310
+ return Object.prototype.toString.call(c) === '[object RegExp]'
311
+ }
312
+
313
+ function is (charclass, c) {
314
+ return isRegExp(charclass) ? !!c.match(charclass) : charclass[c]
315
+ }
316
+
317
+ function not (charclass, c) {
318
+ return !is(charclass, c)
319
+ }
320
+
321
+ var S = 0
322
+ sax.STATE = {
323
+ BEGIN: S++, // leading byte order mark or whitespace
324
+ BEGIN_WHITESPACE: S++, // leading whitespace
325
+ TEXT: S++, // general stuff
326
+ TEXT_ENTITY: S++, // &amp and such.
327
+ OPEN_WAKA: S++, // <
328
+ SGML_DECL: S++, // <!BLARG
329
+ SGML_DECL_QUOTED: S++, // <!BLARG foo "bar
330
+ DOCTYPE: S++, // <!DOCTYPE
331
+ DOCTYPE_QUOTED: S++, // <!DOCTYPE "//blah
332
+ DOCTYPE_DTD: S++, // <!DOCTYPE "//blah" [ ...
333
+ DOCTYPE_DTD_QUOTED: S++, // <!DOCTYPE "//blah" [ "foo
334
+ COMMENT_STARTING: S++, // <!-
335
+ COMMENT: S++, // <!--
336
+ COMMENT_ENDING: S++, // <!-- blah -
337
+ COMMENT_ENDED: S++, // <!-- blah --
338
+ CDATA: S++, // <![CDATA[ something
339
+ CDATA_ENDING: S++, // ]
340
+ CDATA_ENDING_2: S++, // ]]
341
+ PROC_INST: S++, // <?hi
342
+ PROC_INST_BODY: S++, // <?hi there
343
+ PROC_INST_ENDING: S++, // <?hi "there" ?
344
+ OPEN_TAG: S++, // <strong
345
+ OPEN_TAG_SLASH: S++, // <strong /
346
+ ATTRIB: S++, // <a
347
+ ATTRIB_NAME: S++, // <a foo
348
+ ATTRIB_NAME_SAW_WHITE: S++, // <a foo _
349
+ ATTRIB_VALUE: S++, // <a foo=
350
+ ATTRIB_VALUE_QUOTED: S++, // <a foo="bar
351
+ ATTRIB_VALUE_CLOSED: S++, // <a foo="bar"
352
+ ATTRIB_VALUE_UNQUOTED: S++, // <a foo=bar
353
+ ATTRIB_VALUE_ENTITY_Q: S++, // <foo bar="&quot;"
354
+ ATTRIB_VALUE_ENTITY_U: S++, // <foo bar=&quot
355
+ CLOSE_TAG: S++, // </a
356
+ CLOSE_TAG_SAW_WHITE: S++, // </a >
357
+ SCRIPT: S++, // <script> ...
358
+ SCRIPT_ENDING: S++ // <script> ... <
359
+ }
360
+
361
+ sax.XML_ENTITIES = {
362
+ 'amp': '&',
363
+ 'gt': '>',
364
+ 'lt': '<',
365
+ 'quot': '"',
366
+ 'apos': "'"
367
+ }
368
+
369
+ sax.ENTITIES = {
370
+ 'amp': '&',
371
+ 'gt': '>',
372
+ 'lt': '<',
373
+ 'quot': '"',
374
+ 'apos': "'",
375
+ 'AElig': 198,
376
+ 'Aacute': 193,
377
+ 'Acirc': 194,
378
+ 'Agrave': 192,
379
+ 'Aring': 197,
380
+ 'Atilde': 195,
381
+ 'Auml': 196,
382
+ 'Ccedil': 199,
383
+ 'ETH': 208,
384
+ 'Eacute': 201,
385
+ 'Ecirc': 202,
386
+ 'Egrave': 200,
387
+ 'Euml': 203,
388
+ 'Iacute': 205,
389
+ 'Icirc': 206,
390
+ 'Igrave': 204,
391
+ 'Iuml': 207,
392
+ 'Ntilde': 209,
393
+ 'Oacute': 211,
394
+ 'Ocirc': 212,
395
+ 'Ograve': 210,
396
+ 'Oslash': 216,
397
+ 'Otilde': 213,
398
+ 'Ouml': 214,
399
+ 'THORN': 222,
400
+ 'Uacute': 218,
401
+ 'Ucirc': 219,
402
+ 'Ugrave': 217,
403
+ 'Uuml': 220,
404
+ 'Yacute': 221,
405
+ 'aacute': 225,
406
+ 'acirc': 226,
407
+ 'aelig': 230,
408
+ 'agrave': 224,
409
+ 'aring': 229,
410
+ 'atilde': 227,
411
+ 'auml': 228,
412
+ 'ccedil': 231,
413
+ 'eacute': 233,
414
+ 'ecirc': 234,
415
+ 'egrave': 232,
416
+ 'eth': 240,
417
+ 'euml': 235,
418
+ 'iacute': 237,
419
+ 'icirc': 238,
420
+ 'igrave': 236,
421
+ 'iuml': 239,
422
+ 'ntilde': 241,
423
+ 'oacute': 243,
424
+ 'ocirc': 244,
425
+ 'ograve': 242,
426
+ 'oslash': 248,
427
+ 'otilde': 245,
428
+ 'ouml': 246,
429
+ 'szlig': 223,
430
+ 'thorn': 254,
431
+ 'uacute': 250,
432
+ 'ucirc': 251,
433
+ 'ugrave': 249,
434
+ 'uuml': 252,
435
+ 'yacute': 253,
436
+ 'yuml': 255,
437
+ 'copy': 169,
438
+ 'reg': 174,
439
+ 'nbsp': 160,
440
+ 'iexcl': 161,
441
+ 'cent': 162,
442
+ 'pound': 163,
443
+ 'curren': 164,
444
+ 'yen': 165,
445
+ 'brvbar': 166,
446
+ 'sect': 167,
447
+ 'uml': 168,
448
+ 'ordf': 170,
449
+ 'laquo': 171,
450
+ 'not': 172,
451
+ 'shy': 173,
452
+ 'macr': 175,
453
+ 'deg': 176,
454
+ 'plusmn': 177,
455
+ 'sup1': 185,
456
+ 'sup2': 178,
457
+ 'sup3': 179,
458
+ 'acute': 180,
459
+ 'micro': 181,
460
+ 'para': 182,
461
+ 'middot': 183,
462
+ 'cedil': 184,
463
+ 'ordm': 186,
464
+ 'raquo': 187,
465
+ 'frac14': 188,
466
+ 'frac12': 189,
467
+ 'frac34': 190,
468
+ 'iquest': 191,
469
+ 'times': 215,
470
+ 'divide': 247,
471
+ 'OElig': 338,
472
+ 'oelig': 339,
473
+ 'Scaron': 352,
474
+ 'scaron': 353,
475
+ 'Yuml': 376,
476
+ 'fnof': 402,
477
+ 'circ': 710,
478
+ 'tilde': 732,
479
+ 'Alpha': 913,
480
+ 'Beta': 914,
481
+ 'Gamma': 915,
482
+ 'Delta': 916,
483
+ 'Epsilon': 917,
484
+ 'Zeta': 918,
485
+ 'Eta': 919,
486
+ 'Theta': 920,
487
+ 'Iota': 921,
488
+ 'Kappa': 922,
489
+ 'Lambda': 923,
490
+ 'Mu': 924,
491
+ 'Nu': 925,
492
+ 'Xi': 926,
493
+ 'Omicron': 927,
494
+ 'Pi': 928,
495
+ 'Rho': 929,
496
+ 'Sigma': 931,
497
+ 'Tau': 932,
498
+ 'Upsilon': 933,
499
+ 'Phi': 934,
500
+ 'Chi': 935,
501
+ 'Psi': 936,
502
+ 'Omega': 937,
503
+ 'alpha': 945,
504
+ 'beta': 946,
505
+ 'gamma': 947,
506
+ 'delta': 948,
507
+ 'epsilon': 949,
508
+ 'zeta': 950,
509
+ 'eta': 951,
510
+ 'theta': 952,
511
+ 'iota': 953,
512
+ 'kappa': 954,
513
+ 'lambda': 955,
514
+ 'mu': 956,
515
+ 'nu': 957,
516
+ 'xi': 958,
517
+ 'omicron': 959,
518
+ 'pi': 960,
519
+ 'rho': 961,
520
+ 'sigmaf': 962,
521
+ 'sigma': 963,
522
+ 'tau': 964,
523
+ 'upsilon': 965,
524
+ 'phi': 966,
525
+ 'chi': 967,
526
+ 'psi': 968,
527
+ 'omega': 969,
528
+ 'thetasym': 977,
529
+ 'upsih': 978,
530
+ 'piv': 982,
531
+ 'ensp': 8194,
532
+ 'emsp': 8195,
533
+ 'thinsp': 8201,
534
+ 'zwnj': 8204,
535
+ 'zwj': 8205,
536
+ 'lrm': 8206,
537
+ 'rlm': 8207,
538
+ 'ndash': 8211,
539
+ 'mdash': 8212,
540
+ 'lsquo': 8216,
541
+ 'rsquo': 8217,
542
+ 'sbquo': 8218,
543
+ 'ldquo': 8220,
544
+ 'rdquo': 8221,
545
+ 'bdquo': 8222,
546
+ 'dagger': 8224,
547
+ 'Dagger': 8225,
548
+ 'bull': 8226,
549
+ 'hellip': 8230,
550
+ 'permil': 8240,
551
+ 'prime': 8242,
552
+ 'Prime': 8243,
553
+ 'lsaquo': 8249,
554
+ 'rsaquo': 8250,
555
+ 'oline': 8254,
556
+ 'frasl': 8260,
557
+ 'euro': 8364,
558
+ 'image': 8465,
559
+ 'weierp': 8472,
560
+ 'real': 8476,
561
+ 'trade': 8482,
562
+ 'alefsym': 8501,
563
+ 'larr': 8592,
564
+ 'uarr': 8593,
565
+ 'rarr': 8594,
566
+ 'darr': 8595,
567
+ 'harr': 8596,
568
+ 'crarr': 8629,
569
+ 'lArr': 8656,
570
+ 'uArr': 8657,
571
+ 'rArr': 8658,
572
+ 'dArr': 8659,
573
+ 'hArr': 8660,
574
+ 'forall': 8704,
575
+ 'part': 8706,
576
+ 'exist': 8707,
577
+ 'empty': 8709,
578
+ 'nabla': 8711,
579
+ 'isin': 8712,
580
+ 'notin': 8713,
581
+ 'ni': 8715,
582
+ 'prod': 8719,
583
+ 'sum': 8721,
584
+ 'minus': 8722,
585
+ 'lowast': 8727,
586
+ 'radic': 8730,
587
+ 'prop': 8733,
588
+ 'infin': 8734,
589
+ 'ang': 8736,
590
+ 'and': 8743,
591
+ 'or': 8744,
592
+ 'cap': 8745,
593
+ 'cup': 8746,
594
+ 'int': 8747,
595
+ 'there4': 8756,
596
+ 'sim': 8764,
597
+ 'cong': 8773,
598
+ 'asymp': 8776,
599
+ 'ne': 8800,
600
+ 'equiv': 8801,
601
+ 'le': 8804,
602
+ 'ge': 8805,
603
+ 'sub': 8834,
604
+ 'sup': 8835,
605
+ 'nsub': 8836,
606
+ 'sube': 8838,
607
+ 'supe': 8839,
608
+ 'oplus': 8853,
609
+ 'otimes': 8855,
610
+ 'perp': 8869,
611
+ 'sdot': 8901,
612
+ 'lceil': 8968,
613
+ 'rceil': 8969,
614
+ 'lfloor': 8970,
615
+ 'rfloor': 8971,
616
+ 'lang': 9001,
617
+ 'rang': 9002,
618
+ 'loz': 9674,
619
+ 'spades': 9824,
620
+ 'clubs': 9827,
621
+ 'hearts': 9829,
622
+ 'diams': 9830
623
+ }
624
+
625
+ Object.keys(sax.ENTITIES).forEach(function (key) {
614
626
  var e = sax.ENTITIES[key]
615
627
  var s = typeof e === 'number' ? String.fromCharCode(e) : e
616
628
  sax.ENTITIES[key] = s
617
- })
618
-
619
- for (var S in sax.STATE) sax.STATE[sax.STATE[S]] = S
620
-
621
- // shorthand
622
- S = sax.STATE
623
-
624
- function emit (parser, event, data) {
625
- parser[event] && parser[event](data)
626
- }
627
-
628
- function emitNode (parser, nodeType, data) {
629
- if (parser.textNode) closeText(parser)
630
- emit(parser, nodeType, data)
631
- }
632
-
633
- function closeText (parser) {
634
- parser.textNode = textopts(parser.opt, parser.textNode)
635
- if (parser.textNode) emit(parser, "ontext", parser.textNode)
636
- parser.textNode = ""
637
- }
638
-
639
- function textopts (opt, text) {
640
- if (opt.trim) text = text.trim()
641
- if (opt.normalize) text = text.replace(/\s+/g, " ")
642
- return text
643
- }
644
-
645
- function error (parser, er) {
646
- closeText(parser)
647
- if (parser.trackPosition) {
648
- er += "\nLine: "+parser.line+
649
- "\nColumn: "+parser.column+
650
- "\nChar: "+parser.c
629
+ })
630
+
631
+ for (var s in sax.STATE) {
632
+ sax.STATE[sax.STATE[s]] = s
651
633
  }
652
- er = new Error(er)
653
- parser.error = er
654
- emit(parser, "onerror", er)
655
- return parser
656
- }
657
-
658
- function end (parser) {
659
- if (parser.sawRoot && !parser.closedRoot) strictFail(parser, "Unclosed root tag")
660
- if ((parser.state !== S.BEGIN) &&
661
- (parser.state !== S.BEGIN_WHITESPACE) &&
662
- (parser.state !== S.TEXT))
663
- error(parser, "Unexpected end")
664
- closeText(parser)
665
- parser.c = ""
666
- parser.closed = true
667
- emit(parser, "onend")
668
- SAXParser.call(parser, parser.strict, parser.opt)
669
- return parser
670
- }
671
-
672
- function strictFail (parser, message) {
673
- if (typeof parser !== 'object' || !(parser instanceof SAXParser))
674
- throw new Error('bad call to strictFail');
675
- if (parser.strict) error(parser, message)
676
- }
677
-
678
- function newTag (parser) {
679
- if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]()
680
- var parent = parser.tags[parser.tags.length - 1] || parser
681
- , tag = parser.tag = { name : parser.tagName, attributes : {} }
682
-
683
- // will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar"
684
- if (parser.opt.xmlns) tag.ns = parent.ns
685
- parser.attribList.length = 0
686
- }
687
-
688
- function qname (name, attribute) {
689
- var i = name.indexOf(":")
690
- , qualName = i < 0 ? [ "", name ] : name.split(":")
691
- , prefix = qualName[0]
692
- , local = qualName[1]
693
-
694
- // <x "xmlns"="http://foo">
695
- if (attribute && name === "xmlns") {
696
- prefix = "xmlns"
697
- local = ""
634
+
635
+ // shorthand
636
+ S = sax.STATE
637
+
638
+ function emit (parser, event, data) {
639
+ parser[event] && parser[event](data)
698
640
  }
699
641
 
700
- return { prefix: prefix, local: local }
701
- }
642
+ function emitNode (parser, nodeType, data) {
643
+ if (parser.textNode) closeText(parser)
644
+ emit(parser, nodeType, data)
645
+ }
702
646
 
703
- function attrib (parser) {
704
- if (!parser.strict) parser.attribName = parser.attribName[parser.looseCase]()
647
+ function closeText (parser) {
648
+ parser.textNode = textopts(parser.opt, parser.textNode)
649
+ if (parser.textNode) emit(parser, 'ontext', parser.textNode)
650
+ parser.textNode = ''
651
+ }
705
652
 
706
- if (parser.attribList.indexOf(parser.attribName) !== -1 ||
707
- parser.tag.attributes.hasOwnProperty(parser.attribName)) {
708
- return parser.attribName = parser.attribValue = ""
653
+ function textopts (opt, text) {
654
+ if (opt.trim) text = text.trim()
655
+ if (opt.normalize) text = text.replace(/\s+/g, ' ')
656
+ return text
709
657
  }
710
658
 
711
- if (parser.opt.xmlns) {
712
- var qn = qname(parser.attribName, true)
713
- , prefix = qn.prefix
714
- , local = qn.local
715
-
716
- if (prefix === "xmlns") {
717
- // namespace binding attribute; push the binding into scope
718
- if (local === "xml" && parser.attribValue !== XML_NAMESPACE) {
719
- strictFail( parser
720
- , "xml: prefix must be bound to " + XML_NAMESPACE + "\n"
721
- + "Actual: " + parser.attribValue )
722
- } else if (local === "xmlns" && parser.attribValue !== XMLNS_NAMESPACE) {
723
- strictFail( parser
724
- , "xmlns: prefix must be bound to " + XMLNS_NAMESPACE + "\n"
725
- + "Actual: " + parser.attribValue )
726
- } else {
727
- var tag = parser.tag
728
- , parent = parser.tags[parser.tags.length - 1] || parser
729
- if (tag.ns === parent.ns) {
730
- tag.ns = Object.create(parent.ns)
731
- }
732
- tag.ns[local] = parser.attribValue
733
- }
659
+ function error (parser, er) {
660
+ closeText(parser)
661
+ if (parser.trackPosition) {
662
+ er += '\nLine: ' + parser.line +
663
+ '\nColumn: ' + parser.column +
664
+ '\nChar: ' + parser.c
734
665
  }
666
+ er = new Error(er)
667
+ parser.error = er
668
+ emit(parser, 'onerror', er)
669
+ return parser
670
+ }
735
671
 
736
- // defer onattribute events until all attributes have been seen
737
- // so any new bindings can take effect; preserve attribute order
738
- // so deferred events can be emitted in document order
739
- parser.attribList.push([parser.attribName, parser.attribValue])
740
- } else {
741
- // in non-xmlns mode, we can emit the event right away
742
- parser.tag.attributes[parser.attribName] = parser.attribValue
743
- emitNode( parser
744
- , "onattribute"
745
- , { name: parser.attribName
746
- , value: parser.attribValue } )
672
+ function end (parser) {
673
+ if (parser.sawRoot && !parser.closedRoot) strictFail(parser, 'Unclosed root tag')
674
+ if ((parser.state !== S.BEGIN) &&
675
+ (parser.state !== S.BEGIN_WHITESPACE) &&
676
+ (parser.state !== S.TEXT)) {
677
+ error(parser, 'Unexpected end')
678
+ }
679
+ closeText(parser)
680
+ parser.c = ''
681
+ parser.closed = true
682
+ emit(parser, 'onend')
683
+ SAXParser.call(parser, parser.strict, parser.opt)
684
+ return parser
685
+ }
686
+
687
+ function strictFail (parser, message) {
688
+ if (typeof parser !== 'object' || !(parser instanceof SAXParser)) {
689
+ throw new Error('bad call to strictFail')
690
+ }
691
+ if (parser.strict) {
692
+ error(parser, message)
693
+ }
747
694
  }
748
695
 
749
- parser.attribName = parser.attribValue = ""
750
- }
696
+ function newTag (parser) {
697
+ if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]()
698
+ var parent = parser.tags[parser.tags.length - 1] || parser
699
+ var tag = parser.tag = { name: parser.tagName, attributes: {} }
751
700
 
752
- function openTag (parser, selfClosing) {
753
- if (parser.opt.xmlns) {
754
- // emit namespace binding events
755
- var tag = parser.tag
701
+ // will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar"
702
+ if (parser.opt.xmlns) {
703
+ tag.ns = parent.ns
704
+ }
705
+ parser.attribList.length = 0
706
+ }
756
707
 
757
- // add namespace info to tag
758
- var qn = qname(parser.tagName)
759
- tag.prefix = qn.prefix
760
- tag.local = qn.local
761
- tag.uri = tag.ns[qn.prefix] || ""
708
+ function qname (name, attribute) {
709
+ var i = name.indexOf(':')
710
+ var qualName = i < 0 ? [ '', name ] : name.split(':')
711
+ var prefix = qualName[0]
712
+ var local = qualName[1]
762
713
 
763
- if (tag.prefix && !tag.uri) {
764
- strictFail(parser, "Unbound namespace prefix: "
765
- + JSON.stringify(parser.tagName))
766
- tag.uri = qn.prefix
714
+ // <x "xmlns"="http://foo">
715
+ if (attribute && name === 'xmlns') {
716
+ prefix = 'xmlns'
717
+ local = ''
767
718
  }
768
719
 
769
- var parent = parser.tags[parser.tags.length - 1] || parser
770
- if (tag.ns && parent.ns !== tag.ns) {
771
- Object.keys(tag.ns).forEach(function (p) {
772
- emitNode( parser
773
- , "onopennamespace"
774
- , { prefix: p , uri: tag.ns[p] } )
775
- })
720
+ return { prefix: prefix, local: local }
721
+ }
722
+
723
+ function attrib (parser) {
724
+ if (!parser.strict) {
725
+ parser.attribName = parser.attribName[parser.looseCase]()
776
726
  }
777
727
 
778
- // handle deferred onattribute events
779
- // Note: do not apply default ns to attributes:
780
- // http://www.w3.org/TR/REC-xml-names/#defaulting
781
- for (var i = 0, l = parser.attribList.length; i < l; i ++) {
782
- var nv = parser.attribList[i]
783
- var name = nv[0]
784
- , value = nv[1]
785
- , qualName = qname(name, true)
786
- , prefix = qualName.prefix
787
- , local = qualName.local
788
- , uri = prefix == "" ? "" : (tag.ns[prefix] || "")
789
- , a = { name: name
790
- , value: value
791
- , prefix: prefix
792
- , local: local
793
- , uri: uri
794
- }
728
+ if (parser.attribList.indexOf(parser.attribName) !== -1 ||
729
+ parser.tag.attributes.hasOwnProperty(parser.attribName)) {
730
+ parser.attribName = parser.attribValue = ''
731
+ return
732
+ }
795
733
 
796
- // if there's any attributes with an undefined namespace,
797
- // then fail on them now.
798
- if (prefix && prefix != "xmlns" && !uri) {
799
- strictFail(parser, "Unbound namespace prefix: "
800
- + JSON.stringify(prefix))
801
- a.uri = prefix
734
+ if (parser.opt.xmlns) {
735
+ var qn = qname(parser.attribName, true)
736
+ var prefix = qn.prefix
737
+ var local = qn.local
738
+
739
+ if (prefix === 'xmlns') {
740
+ // namespace binding attribute. push the binding into scope
741
+ if (local === 'xml' && parser.attribValue !== XML_NAMESPACE) {
742
+ strictFail(parser,
743
+ 'xml: prefix must be bound to ' + XML_NAMESPACE + '\n' +
744
+ 'Actual: ' + parser.attribValue)
745
+ } else if (local === 'xmlns' && parser.attribValue !== XMLNS_NAMESPACE) {
746
+ strictFail(parser,
747
+ 'xmlns: prefix must be bound to ' + XMLNS_NAMESPACE + '\n' +
748
+ 'Actual: ' + parser.attribValue)
749
+ } else {
750
+ var tag = parser.tag
751
+ var parent = parser.tags[parser.tags.length - 1] || parser
752
+ if (tag.ns === parent.ns) {
753
+ tag.ns = Object.create(parent.ns)
754
+ }
755
+ tag.ns[local] = parser.attribValue
756
+ }
802
757
  }
803
- parser.tag.attributes[name] = a
804
- emitNode(parser, "onattribute", a)
758
+
759
+ // defer onattribute events until all attributes have been seen
760
+ // so any new bindings can take effect. preserve attribute order
761
+ // so deferred events can be emitted in document order
762
+ parser.attribList.push([parser.attribName, parser.attribValue])
763
+ } else {
764
+ // in non-xmlns mode, we can emit the event right away
765
+ parser.tag.attributes[parser.attribName] = parser.attribValue
766
+ emitNode(parser, 'onattribute', {
767
+ name: parser.attribName,
768
+ value: parser.attribValue
769
+ })
805
770
  }
806
- parser.attribList.length = 0
771
+
772
+ parser.attribName = parser.attribValue = ''
807
773
  }
808
774
 
809
- parser.tag.isSelfClosing = !!selfClosing
775
+ function openTag (parser, selfClosing) {
776
+ if (parser.opt.xmlns) {
777
+ // emit namespace binding events
778
+ var tag = parser.tag
779
+
780
+ // add namespace info to tag
781
+ var qn = qname(parser.tagName)
782
+ tag.prefix = qn.prefix
783
+ tag.local = qn.local
784
+ tag.uri = tag.ns[qn.prefix] || ''
785
+
786
+ if (tag.prefix && !tag.uri) {
787
+ strictFail(parser, 'Unbound namespace prefix: ' +
788
+ JSON.stringify(parser.tagName))
789
+ tag.uri = qn.prefix
790
+ }
810
791
 
811
- // process the tag
812
- parser.sawRoot = true
813
- parser.tags.push(parser.tag)
814
- emitNode(parser, "onopentag", parser.tag)
815
- if (!selfClosing) {
816
- // special case for <script> in non-strict mode.
817
- if (!parser.noscript && parser.tagName.toLowerCase() === "script") {
818
- parser.state = S.SCRIPT
819
- } else {
820
- parser.state = S.TEXT
792
+ var parent = parser.tags[parser.tags.length - 1] || parser
793
+ if (tag.ns && parent.ns !== tag.ns) {
794
+ Object.keys(tag.ns).forEach(function (p) {
795
+ emitNode(parser, 'onopennamespace', {
796
+ prefix: p,
797
+ uri: tag.ns[p]
798
+ })
799
+ })
800
+ }
801
+
802
+ // handle deferred onattribute events
803
+ // Note: do not apply default ns to attributes:
804
+ // http://www.w3.org/TR/REC-xml-names/#defaulting
805
+ for (var i = 0, l = parser.attribList.length; i < l; i++) {
806
+ var nv = parser.attribList[i]
807
+ var name = nv[0]
808
+ var value = nv[1]
809
+ var qualName = qname(name, true)
810
+ var prefix = qualName.prefix
811
+ var local = qualName.local
812
+ var uri = prefix === '' ? '' : (tag.ns[prefix] || '')
813
+ var a = {
814
+ name: name,
815
+ value: value,
816
+ prefix: prefix,
817
+ local: local,
818
+ uri: uri
819
+ }
820
+
821
+ // if there's any attributes with an undefined namespace,
822
+ // then fail on them now.
823
+ if (prefix && prefix !== 'xmlns' && !uri) {
824
+ strictFail(parser, 'Unbound namespace prefix: ' +
825
+ JSON.stringify(prefix))
826
+ a.uri = prefix
827
+ }
828
+ parser.tag.attributes[name] = a
829
+ emitNode(parser, 'onattribute', a)
830
+ }
831
+ parser.attribList.length = 0
821
832
  }
822
- parser.tag = null
823
- parser.tagName = ""
824
- }
825
- parser.attribName = parser.attribValue = ""
826
- parser.attribList.length = 0
827
- }
828
-
829
- function closeTag (parser) {
830
- if (!parser.tagName) {
831
- strictFail(parser, "Weird empty close tag.")
832
- parser.textNode += "</>"
833
- parser.state = S.TEXT
834
- return
833
+
834
+ parser.tag.isSelfClosing = !!selfClosing
835
+
836
+ // process the tag
837
+ parser.sawRoot = true
838
+ parser.tags.push(parser.tag)
839
+ emitNode(parser, 'onopentag', parser.tag)
840
+ if (!selfClosing) {
841
+ // special case for <script> in non-strict mode.
842
+ if (!parser.noscript && parser.tagName.toLowerCase() === 'script') {
843
+ parser.state = S.SCRIPT
844
+ } else {
845
+ parser.state = S.TEXT
846
+ }
847
+ parser.tag = null
848
+ parser.tagName = ''
849
+ }
850
+ parser.attribName = parser.attribValue = ''
851
+ parser.attribList.length = 0
835
852
  }
836
853
 
837
- if (parser.script) {
838
- if (parser.tagName !== "script") {
839
- parser.script += "</" + parser.tagName + ">"
840
- parser.tagName = ""
841
- parser.state = S.SCRIPT
854
+ function closeTag (parser) {
855
+ if (!parser.tagName) {
856
+ strictFail(parser, 'Weird empty close tag.')
857
+ parser.textNode += '</>'
858
+ parser.state = S.TEXT
842
859
  return
843
860
  }
844
- emitNode(parser, "onscript", parser.script)
845
- parser.script = ""
846
- }
847
861
 
848
- // first make sure that the closing tag actually exists.
849
- // <a><b></c></b></a> will close everything, otherwise.
850
- var t = parser.tags.length
851
- var tagName = parser.tagName
852
- if (!parser.strict) tagName = tagName[parser.looseCase]()
853
- var closeTo = tagName
854
- while (t --) {
855
- var close = parser.tags[t]
856
- if (close.name !== closeTo) {
857
- // fail the first time in strict mode
858
- strictFail(parser, "Unexpected close tag")
859
- } else break
860
- }
862
+ if (parser.script) {
863
+ if (parser.tagName !== 'script') {
864
+ parser.script += '</' + parser.tagName + '>'
865
+ parser.tagName = ''
866
+ parser.state = S.SCRIPT
867
+ return
868
+ }
869
+ emitNode(parser, 'onscript', parser.script)
870
+ parser.script = ''
871
+ }
872
+
873
+ // first make sure that the closing tag actually exists.
874
+ // <a><b></c></b></a> will close everything, otherwise.
875
+ var t = parser.tags.length
876
+ var tagName = parser.tagName
877
+ if (!parser.strict) {
878
+ tagName = tagName[parser.looseCase]()
879
+ }
880
+ var closeTo = tagName
881
+ while (t--) {
882
+ var close = parser.tags[t]
883
+ if (close.name !== closeTo) {
884
+ // fail the first time in strict mode
885
+ strictFail(parser, 'Unexpected close tag')
886
+ } else {
887
+ break
888
+ }
889
+ }
861
890
 
862
- // didn't find it. we already failed for strict, so just abort.
863
- if (t < 0) {
864
- strictFail(parser, "Unmatched closing tag: "+parser.tagName)
865
- parser.textNode += "</" + parser.tagName + ">"
891
+ // didn't find it. we already failed for strict, so just abort.
892
+ if (t < 0) {
893
+ strictFail(parser, 'Unmatched closing tag: ' + parser.tagName)
894
+ parser.textNode += '</' + parser.tagName + '>'
895
+ parser.state = S.TEXT
896
+ return
897
+ }
898
+ parser.tagName = tagName
899
+ var s = parser.tags.length
900
+ while (s-- > t) {
901
+ var tag = parser.tag = parser.tags.pop()
902
+ parser.tagName = parser.tag.name
903
+ emitNode(parser, 'onclosetag', parser.tagName)
904
+
905
+ var x = {}
906
+ for (var i in tag.ns) {
907
+ x[i] = tag.ns[i]
908
+ }
909
+
910
+ var parent = parser.tags[parser.tags.length - 1] || parser
911
+ if (parser.opt.xmlns && tag.ns !== parent.ns) {
912
+ // remove namespace bindings introduced by tag
913
+ Object.keys(tag.ns).forEach(function (p) {
914
+ var n = tag.ns[p]
915
+ emitNode(parser, 'onclosenamespace', { prefix: p, uri: n })
916
+ })
917
+ }
918
+ }
919
+ if (t === 0) parser.closedRoot = true
920
+ parser.tagName = parser.attribValue = parser.attribName = ''
921
+ parser.attribList.length = 0
866
922
  parser.state = S.TEXT
867
- return
868
923
  }
869
- parser.tagName = tagName
870
- var s = parser.tags.length
871
- while (s --> t) {
872
- var tag = parser.tag = parser.tags.pop()
873
- parser.tagName = parser.tag.name
874
- emitNode(parser, "onclosetag", parser.tagName)
875
924
 
876
- var x = {}
877
- for (var i in tag.ns) x[i] = tag.ns[i]
925
+ function parseEntity (parser) {
926
+ var entity = parser.entity
927
+ var entityLC = entity.toLowerCase()
928
+ var num
929
+ var numStr = ''
878
930
 
879
- var parent = parser.tags[parser.tags.length - 1] || parser
880
- if (parser.opt.xmlns && tag.ns !== parent.ns) {
881
- // remove namespace bindings introduced by tag
882
- Object.keys(tag.ns).forEach(function (p) {
883
- var n = tag.ns[p]
884
- emitNode(parser, "onclosenamespace", { prefix: p, uri: n })
885
- })
931
+ if (parser.ENTITIES[entity]) {
932
+ return parser.ENTITIES[entity]
886
933
  }
887
- }
888
- if (t === 0) parser.closedRoot = true
889
- parser.tagName = parser.attribValue = parser.attribName = ""
890
- parser.attribList.length = 0
891
- parser.state = S.TEXT
892
- }
893
-
894
- function parseEntity (parser) {
895
- var entity = parser.entity
896
- , entityLC = entity.toLowerCase()
897
- , num
898
- , numStr = ""
899
- if (parser.ENTITIES[entity])
900
- return parser.ENTITIES[entity]
901
- if (parser.ENTITIES[entityLC])
902
- return parser.ENTITIES[entityLC]
903
- entity = entityLC
904
- if (entity.charAt(0) === "#") {
905
- if (entity.charAt(1) === "x") {
906
- entity = entity.slice(2)
907
- num = parseInt(entity, 16)
908
- numStr = num.toString(16)
909
- } else {
910
- entity = entity.slice(1)
911
- num = parseInt(entity, 10)
912
- numStr = num.toString(10)
934
+ if (parser.ENTITIES[entityLC]) {
935
+ return parser.ENTITIES[entityLC]
913
936
  }
914
- }
915
- entity = entity.replace(/^0+/, "")
916
- if (numStr.toLowerCase() !== entity) {
917
- strictFail(parser, "Invalid character entity")
918
- return "&"+parser.entity + ";"
937
+ entity = entityLC
938
+ if (entity.charAt(0) === '#') {
939
+ if (entity.charAt(1) === 'x') {
940
+ entity = entity.slice(2)
941
+ num = parseInt(entity, 16)
942
+ numStr = num.toString(16)
943
+ } else {
944
+ entity = entity.slice(1)
945
+ num = parseInt(entity, 10)
946
+ numStr = num.toString(10)
947
+ }
948
+ }
949
+ entity = entity.replace(/^0+/, '')
950
+ if (numStr.toLowerCase() !== entity) {
951
+ strictFail(parser, 'Invalid character entity')
952
+ return '&' + parser.entity + ';'
953
+ }
954
+
955
+ return String.fromCodePoint(num)
919
956
  }
920
957
 
921
- return String.fromCodePoint(num)
922
- }
923
-
924
- function write (chunk) {
925
- var parser = this
926
- if (this.error) throw this.error
927
- if (parser.closed) return error(parser,
928
- "Cannot write after close. Assign an onready handler.")
929
- if (chunk === null) return end(parser)
930
- var i = 0, c = ""
931
- while (parser.c = c = chunk.charAt(i++)) {
932
- if (parser.trackPosition) {
933
- parser.position ++
934
- if (c === "\n") {
935
- parser.line ++
936
- parser.column = 0
937
- } else parser.column ++
958
+ function beginWhiteSpace (parser, c) {
959
+ if (c === '<') {
960
+ parser.state = S.OPEN_WAKA
961
+ parser.startTagPosition = parser.position
962
+ } else if (not(whitespace, c)) {
963
+ // have to process this as a text node.
964
+ // weird, but happens.
965
+ strictFail(parser, 'Non-whitespace before first tag.')
966
+ parser.textNode = c
967
+ parser.state = S.TEXT
938
968
  }
939
- switch (parser.state) {
969
+ }
940
970
 
941
- case S.BEGIN:
942
- parser.state = S.BEGIN_WHITESPACE
943
- if (c === "\uFEFF") {
944
- continue;
945
- }
946
- // no continue - fall through
947
-
948
- case S.BEGIN_WHITESPACE:
949
- if (c === "<") {
950
- parser.state = S.OPEN_WAKA
951
- parser.startTagPosition = parser.position
952
- } else if (not(whitespace,c)) {
953
- // have to process this as a text node.
954
- // weird, but happens.
955
- strictFail(parser, "Non-whitespace before first tag.")
956
- parser.textNode = c
957
- parser.state = S.TEXT
971
+ function write (chunk) {
972
+ var parser = this
973
+ if (this.error) {
974
+ throw this.error
975
+ }
976
+ if (parser.closed) {
977
+ return error(parser,
978
+ 'Cannot write after close. Assign an onready handler.')
979
+ }
980
+ if (chunk === null) {
981
+ return end(parser)
982
+ }
983
+ var i = 0
984
+ var c = ''
985
+ while (true) {
986
+ c = chunk.charAt(i++)
987
+ parser.c = c
988
+ if (!c) {
989
+ break
990
+ }
991
+ if (parser.trackPosition) {
992
+ parser.position++
993
+ if (c === '\n') {
994
+ parser.line++
995
+ parser.column = 0
996
+ } else {
997
+ parser.column++
958
998
  }
959
- continue
960
-
961
- case S.TEXT:
962
- if (parser.sawRoot && !parser.closedRoot) {
963
- var starti = i-1
964
- while (c && c!=="<" && c!=="&") {
965
- c = chunk.charAt(i++)
966
- if (c && parser.trackPosition) {
967
- parser.position ++
968
- if (c === "\n") {
969
- parser.line ++
970
- parser.column = 0
971
- } else parser.column ++
999
+ }
1000
+ switch (parser.state) {
1001
+ case S.BEGIN:
1002
+ parser.state = S.BEGIN_WHITESPACE
1003
+ if (c === '\uFEFF') {
1004
+ continue
1005
+ }
1006
+ beginWhiteSpace(parser, c)
1007
+ continue
1008
+
1009
+ case S.BEGIN_WHITESPACE:
1010
+ beginWhiteSpace(parser, c)
1011
+ continue
1012
+
1013
+ case S.TEXT:
1014
+ if (parser.sawRoot && !parser.closedRoot) {
1015
+ var starti = i - 1
1016
+ while (c && c !== '<' && c !== '&') {
1017
+ c = chunk.charAt(i++)
1018
+ if (c && parser.trackPosition) {
1019
+ parser.position++
1020
+ if (c === '\n') {
1021
+ parser.line++
1022
+ parser.column = 0
1023
+ } else {
1024
+ parser.column++
1025
+ }
1026
+ }
972
1027
  }
1028
+ parser.textNode += chunk.substring(starti, i - 1)
973
1029
  }
974
- parser.textNode += chunk.substring(starti, i-1)
975
- }
976
- if (c === "<" && !(parser.sawRoot && parser.closedRoot && !parser.strict)) {
977
- parser.state = S.OPEN_WAKA
978
- parser.startTagPosition = parser.position
979
- } else {
980
- if (not(whitespace, c) && (!parser.sawRoot || parser.closedRoot))
981
- strictFail(parser, "Text data outside of root node.")
982
- if (c === "&") parser.state = S.TEXT_ENTITY
983
- else parser.textNode += c
984
- }
985
- continue
986
-
987
- case S.SCRIPT:
988
- // only non-strict
989
- if (c === "<") {
990
- parser.state = S.SCRIPT_ENDING
991
- } else parser.script += c
992
- continue
993
-
994
- case S.SCRIPT_ENDING:
995
- if (c === "/") {
996
- parser.state = S.CLOSE_TAG
997
- } else {
998
- parser.script += "<" + c
999
- parser.state = S.SCRIPT
1000
- }
1001
- continue
1002
-
1003
- case S.OPEN_WAKA:
1004
- // either a /, ?, !, or text is coming next.
1005
- if (c === "!") {
1006
- parser.state = S.SGML_DECL
1007
- parser.sgmlDecl = ""
1008
- } else if (is(whitespace, c)) {
1009
- // wait for it...
1010
- } else if (is(nameStart,c)) {
1011
- parser.state = S.OPEN_TAG
1012
- parser.tagName = c
1013
- } else if (c === "/") {
1014
- parser.state = S.CLOSE_TAG
1015
- parser.tagName = ""
1016
- } else if (c === "?") {
1017
- parser.state = S.PROC_INST
1018
- parser.procInstName = parser.procInstBody = ""
1019
- } else {
1020
- strictFail(parser, "Unencoded <")
1021
- // if there was some whitespace, then add that in.
1022
- if (parser.startTagPosition + 1 < parser.position) {
1023
- var pad = parser.position - parser.startTagPosition
1024
- c = new Array(pad).join(" ") + c
1030
+ if (c === '<' && !(parser.sawRoot && parser.closedRoot && !parser.strict)) {
1031
+ parser.state = S.OPEN_WAKA
1032
+ parser.startTagPosition = parser.position
1033
+ } else {
1034
+ if (not(whitespace, c) && (!parser.sawRoot || parser.closedRoot)) {
1035
+ strictFail(parser, 'Text data outside of root node.')
1036
+ }
1037
+ if (c === '&') {
1038
+ parser.state = S.TEXT_ENTITY
1039
+ } else {
1040
+ parser.textNode += c
1041
+ }
1042
+ }
1043
+ continue
1044
+
1045
+ case S.SCRIPT:
1046
+ // only non-strict
1047
+ if (c === '<') {
1048
+ parser.state = S.SCRIPT_ENDING
1049
+ } else {
1050
+ parser.script += c
1051
+ }
1052
+ continue
1053
+
1054
+ case S.SCRIPT_ENDING:
1055
+ if (c === '/') {
1056
+ parser.state = S.CLOSE_TAG
1057
+ } else {
1058
+ parser.script += '<' + c
1059
+ parser.state = S.SCRIPT
1060
+ }
1061
+ continue
1062
+
1063
+ case S.OPEN_WAKA:
1064
+ // either a /, ?, !, or text is coming next.
1065
+ if (c === '!') {
1066
+ parser.state = S.SGML_DECL
1067
+ parser.sgmlDecl = ''
1068
+ } else if (is(whitespace, c)) {
1069
+ // wait for it...
1070
+ } else if (is(nameStart, c)) {
1071
+ parser.state = S.OPEN_TAG
1072
+ parser.tagName = c
1073
+ } else if (c === '/') {
1074
+ parser.state = S.CLOSE_TAG
1075
+ parser.tagName = ''
1076
+ } else if (c === '?') {
1077
+ parser.state = S.PROC_INST
1078
+ parser.procInstName = parser.procInstBody = ''
1079
+ } else {
1080
+ strictFail(parser, 'Unencoded <')
1081
+ // if there was some whitespace, then add that in.
1082
+ if (parser.startTagPosition + 1 < parser.position) {
1083
+ var pad = parser.position - parser.startTagPosition
1084
+ c = new Array(pad).join(' ') + c
1085
+ }
1086
+ parser.textNode += '<' + c
1087
+ parser.state = S.TEXT
1088
+ }
1089
+ continue
1090
+
1091
+ case S.SGML_DECL:
1092
+ if ((parser.sgmlDecl + c).toUpperCase() === CDATA) {
1093
+ emitNode(parser, 'onopencdata')
1094
+ parser.state = S.CDATA
1095
+ parser.sgmlDecl = ''
1096
+ parser.cdata = ''
1097
+ } else if (parser.sgmlDecl + c === '--') {
1098
+ parser.state = S.COMMENT
1099
+ parser.comment = ''
1100
+ parser.sgmlDecl = ''
1101
+ } else if ((parser.sgmlDecl + c).toUpperCase() === DOCTYPE) {
1102
+ parser.state = S.DOCTYPE
1103
+ if (parser.doctype || parser.sawRoot) {
1104
+ strictFail(parser,
1105
+ 'Inappropriately located doctype declaration')
1106
+ }
1107
+ parser.doctype = ''
1108
+ parser.sgmlDecl = ''
1109
+ } else if (c === '>') {
1110
+ emitNode(parser, 'onsgmldeclaration', parser.sgmlDecl)
1111
+ parser.sgmlDecl = ''
1112
+ parser.state = S.TEXT
1113
+ } else if (is(quote, c)) {
1114
+ parser.state = S.SGML_DECL_QUOTED
1115
+ parser.sgmlDecl += c
1116
+ } else {
1117
+ parser.sgmlDecl += c
1118
+ }
1119
+ continue
1120
+
1121
+ case S.SGML_DECL_QUOTED:
1122
+ if (c === parser.q) {
1123
+ parser.state = S.SGML_DECL
1124
+ parser.q = ''
1025
1125
  }
1026
- parser.textNode += "<" + c
1027
- parser.state = S.TEXT
1028
- }
1029
- continue
1030
-
1031
- case S.SGML_DECL:
1032
- if ((parser.sgmlDecl+c).toUpperCase() === CDATA) {
1033
- emitNode(parser, "onopencdata")
1034
- parser.state = S.CDATA
1035
- parser.sgmlDecl = ""
1036
- parser.cdata = ""
1037
- } else if (parser.sgmlDecl+c === "--") {
1038
- parser.state = S.COMMENT
1039
- parser.comment = ""
1040
- parser.sgmlDecl = ""
1041
- } else if ((parser.sgmlDecl+c).toUpperCase() === DOCTYPE) {
1042
- parser.state = S.DOCTYPE
1043
- if (parser.doctype || parser.sawRoot) strictFail(parser,
1044
- "Inappropriately located doctype declaration")
1045
- parser.doctype = ""
1046
- parser.sgmlDecl = ""
1047
- } else if (c === ">") {
1048
- emitNode(parser, "onsgmldeclaration", parser.sgmlDecl)
1049
- parser.sgmlDecl = ""
1050
- parser.state = S.TEXT
1051
- } else if (is(quote, c)) {
1052
- parser.state = S.SGML_DECL_QUOTED
1053
1126
  parser.sgmlDecl += c
1054
- } else parser.sgmlDecl += c
1055
- continue
1127
+ continue
1056
1128
 
1057
- case S.SGML_DECL_QUOTED:
1058
- if (c === parser.q) {
1059
- parser.state = S.SGML_DECL
1060
- parser.q = ""
1061
- }
1062
- parser.sgmlDecl += c
1063
- continue
1064
-
1065
- case S.DOCTYPE:
1066
- if (c === ">") {
1067
- parser.state = S.TEXT
1068
- emitNode(parser, "ondoctype", parser.doctype)
1069
- parser.doctype = true // just remember that we saw it.
1070
- } else {
1129
+ case S.DOCTYPE:
1130
+ if (c === '>') {
1131
+ parser.state = S.TEXT
1132
+ emitNode(parser, 'ondoctype', parser.doctype)
1133
+ parser.doctype = true // just remember that we saw it.
1134
+ } else {
1135
+ parser.doctype += c
1136
+ if (c === '[') {
1137
+ parser.state = S.DOCTYPE_DTD
1138
+ } else if (is(quote, c)) {
1139
+ parser.state = S.DOCTYPE_QUOTED
1140
+ parser.q = c
1141
+ }
1142
+ }
1143
+ continue
1144
+
1145
+ case S.DOCTYPE_QUOTED:
1071
1146
  parser.doctype += c
1072
- if (c === "[") parser.state = S.DOCTYPE_DTD
1073
- else if (is(quote, c)) {
1074
- parser.state = S.DOCTYPE_QUOTED
1147
+ if (c === parser.q) {
1148
+ parser.q = ''
1149
+ parser.state = S.DOCTYPE
1150
+ }
1151
+ continue
1152
+
1153
+ case S.DOCTYPE_DTD:
1154
+ parser.doctype += c
1155
+ if (c === ']') {
1156
+ parser.state = S.DOCTYPE
1157
+ } else if (is(quote, c)) {
1158
+ parser.state = S.DOCTYPE_DTD_QUOTED
1075
1159
  parser.q = c
1076
1160
  }
1077
- }
1078
- continue
1161
+ continue
1079
1162
 
1080
- case S.DOCTYPE_QUOTED:
1081
- parser.doctype += c
1082
- if (c === parser.q) {
1083
- parser.q = ""
1084
- parser.state = S.DOCTYPE
1085
- }
1086
- continue
1087
-
1088
- case S.DOCTYPE_DTD:
1089
- parser.doctype += c
1090
- if (c === "]") parser.state = S.DOCTYPE
1091
- else if (is(quote,c)) {
1092
- parser.state = S.DOCTYPE_DTD_QUOTED
1093
- parser.q = c
1094
- }
1095
- continue
1163
+ case S.DOCTYPE_DTD_QUOTED:
1164
+ parser.doctype += c
1165
+ if (c === parser.q) {
1166
+ parser.state = S.DOCTYPE_DTD
1167
+ parser.q = ''
1168
+ }
1169
+ continue
1096
1170
 
1097
- case S.DOCTYPE_DTD_QUOTED:
1098
- parser.doctype += c
1099
- if (c === parser.q) {
1100
- parser.state = S.DOCTYPE_DTD
1101
- parser.q = ""
1102
- }
1103
- continue
1104
-
1105
- case S.COMMENT:
1106
- if (c === "-") parser.state = S.COMMENT_ENDING
1107
- else parser.comment += c
1108
- continue
1109
-
1110
- case S.COMMENT_ENDING:
1111
- if (c === "-") {
1112
- parser.state = S.COMMENT_ENDED
1113
- parser.comment = textopts(parser.opt, parser.comment)
1114
- if (parser.comment) emitNode(parser, "oncomment", parser.comment)
1115
- parser.comment = ""
1116
- } else {
1117
- parser.comment += "-" + c
1118
- parser.state = S.COMMENT
1119
- }
1120
- continue
1121
-
1122
- case S.COMMENT_ENDED:
1123
- if (c !== ">") {
1124
- strictFail(parser, "Malformed comment")
1125
- // allow <!-- blah -- bloo --> in non-strict mode,
1126
- // which is a comment of " blah -- bloo "
1127
- parser.comment += "--" + c
1128
- parser.state = S.COMMENT
1129
- } else parser.state = S.TEXT
1130
- continue
1131
-
1132
- case S.CDATA:
1133
- if (c === "]") parser.state = S.CDATA_ENDING
1134
- else parser.cdata += c
1135
- continue
1136
-
1137
- case S.CDATA_ENDING:
1138
- if (c === "]") parser.state = S.CDATA_ENDING_2
1139
- else {
1140
- parser.cdata += "]" + c
1141
- parser.state = S.CDATA
1142
- }
1143
- continue
1144
-
1145
- case S.CDATA_ENDING_2:
1146
- if (c === ">") {
1147
- if (parser.cdata) emitNode(parser, "oncdata", parser.cdata)
1148
- emitNode(parser, "onclosecdata")
1149
- parser.cdata = ""
1150
- parser.state = S.TEXT
1151
- } else if (c === "]") {
1152
- parser.cdata += "]"
1153
- } else {
1154
- parser.cdata += "]]" + c
1155
- parser.state = S.CDATA
1156
- }
1157
- continue
1158
-
1159
- case S.PROC_INST:
1160
- if (c === "?") parser.state = S.PROC_INST_ENDING
1161
- else if (is(whitespace, c)) parser.state = S.PROC_INST_BODY
1162
- else parser.procInstName += c
1163
- continue
1164
-
1165
- case S.PROC_INST_BODY:
1166
- if (!parser.procInstBody && is(whitespace, c)) continue
1167
- else if (c === "?") parser.state = S.PROC_INST_ENDING
1168
- else parser.procInstBody += c
1169
- continue
1170
-
1171
- case S.PROC_INST_ENDING:
1172
- if (c === ">") {
1173
- emitNode(parser, "onprocessinginstruction", {
1174
- name : parser.procInstName,
1175
- body : parser.procInstBody
1176
- })
1177
- parser.procInstName = parser.procInstBody = ""
1178
- parser.state = S.TEXT
1179
- } else {
1180
- parser.procInstBody += "?" + c
1181
- parser.state = S.PROC_INST_BODY
1182
- }
1183
- continue
1184
-
1185
- case S.OPEN_TAG:
1186
- if (is(nameBody, c)) parser.tagName += c
1187
- else {
1188
- newTag(parser)
1189
- if (c === ">") openTag(parser)
1190
- else if (c === "/") parser.state = S.OPEN_TAG_SLASH
1191
- else {
1192
- if (not(whitespace, c)) strictFail(
1193
- parser, "Invalid character in tag name")
1171
+ case S.COMMENT:
1172
+ if (c === '-') {
1173
+ parser.state = S.COMMENT_ENDING
1174
+ } else {
1175
+ parser.comment += c
1176
+ }
1177
+ continue
1178
+
1179
+ case S.COMMENT_ENDING:
1180
+ if (c === '-') {
1181
+ parser.state = S.COMMENT_ENDED
1182
+ parser.comment = textopts(parser.opt, parser.comment)
1183
+ if (parser.comment) {
1184
+ emitNode(parser, 'oncomment', parser.comment)
1185
+ }
1186
+ parser.comment = ''
1187
+ } else {
1188
+ parser.comment += '-' + c
1189
+ parser.state = S.COMMENT
1190
+ }
1191
+ continue
1192
+
1193
+ case S.COMMENT_ENDED:
1194
+ if (c !== '>') {
1195
+ strictFail(parser, 'Malformed comment')
1196
+ // allow <!-- blah -- bloo --> in non-strict mode,
1197
+ // which is a comment of " blah -- bloo "
1198
+ parser.comment += '--' + c
1199
+ parser.state = S.COMMENT
1200
+ } else {
1201
+ parser.state = S.TEXT
1202
+ }
1203
+ continue
1204
+
1205
+ case S.CDATA:
1206
+ if (c === ']') {
1207
+ parser.state = S.CDATA_ENDING
1208
+ } else {
1209
+ parser.cdata += c
1210
+ }
1211
+ continue
1212
+
1213
+ case S.CDATA_ENDING:
1214
+ if (c === ']') {
1215
+ parser.state = S.CDATA_ENDING_2
1216
+ } else {
1217
+ parser.cdata += ']' + c
1218
+ parser.state = S.CDATA
1219
+ }
1220
+ continue
1221
+
1222
+ case S.CDATA_ENDING_2:
1223
+ if (c === '>') {
1224
+ if (parser.cdata) {
1225
+ emitNode(parser, 'oncdata', parser.cdata)
1226
+ }
1227
+ emitNode(parser, 'onclosecdata')
1228
+ parser.cdata = ''
1229
+ parser.state = S.TEXT
1230
+ } else if (c === ']') {
1231
+ parser.cdata += ']'
1232
+ } else {
1233
+ parser.cdata += ']]' + c
1234
+ parser.state = S.CDATA
1235
+ }
1236
+ continue
1237
+
1238
+ case S.PROC_INST:
1239
+ if (c === '?') {
1240
+ parser.state = S.PROC_INST_ENDING
1241
+ } else if (is(whitespace, c)) {
1242
+ parser.state = S.PROC_INST_BODY
1243
+ } else {
1244
+ parser.procInstName += c
1245
+ }
1246
+ continue
1247
+
1248
+ case S.PROC_INST_BODY:
1249
+ if (!parser.procInstBody && is(whitespace, c)) {
1250
+ continue
1251
+ } else if (c === '?') {
1252
+ parser.state = S.PROC_INST_ENDING
1253
+ } else {
1254
+ parser.procInstBody += c
1255
+ }
1256
+ continue
1257
+
1258
+ case S.PROC_INST_ENDING:
1259
+ if (c === '>') {
1260
+ emitNode(parser, 'onprocessinginstruction', {
1261
+ name: parser.procInstName,
1262
+ body: parser.procInstBody
1263
+ })
1264
+ parser.procInstName = parser.procInstBody = ''
1265
+ parser.state = S.TEXT
1266
+ } else {
1267
+ parser.procInstBody += '?' + c
1268
+ parser.state = S.PROC_INST_BODY
1269
+ }
1270
+ continue
1271
+
1272
+ case S.OPEN_TAG:
1273
+ if (is(nameBody, c)) {
1274
+ parser.tagName += c
1275
+ } else {
1276
+ newTag(parser)
1277
+ if (c === '>') {
1278
+ openTag(parser)
1279
+ } else if (c === '/') {
1280
+ parser.state = S.OPEN_TAG_SLASH
1281
+ } else {
1282
+ if (not(whitespace, c)) {
1283
+ strictFail(parser, 'Invalid character in tag name')
1284
+ }
1285
+ parser.state = S.ATTRIB
1286
+ }
1287
+ }
1288
+ continue
1289
+
1290
+ case S.OPEN_TAG_SLASH:
1291
+ if (c === '>') {
1292
+ openTag(parser, true)
1293
+ closeTag(parser)
1294
+ } else {
1295
+ strictFail(parser, 'Forward-slash in opening tag not followed by >')
1194
1296
  parser.state = S.ATTRIB
1195
1297
  }
1196
- }
1197
- continue
1298
+ continue
1198
1299
 
1199
- case S.OPEN_TAG_SLASH:
1200
- if (c === ">") {
1201
- openTag(parser, true)
1202
- closeTag(parser)
1203
- } else {
1204
- strictFail(parser, "Forward-slash in opening tag not followed by >")
1205
- parser.state = S.ATTRIB
1206
- }
1207
- continue
1208
-
1209
- case S.ATTRIB:
1210
- // haven't read the attribute name yet.
1211
- if (is(whitespace, c)) continue
1212
- else if (c === ">") openTag(parser)
1213
- else if (c === "/") parser.state = S.OPEN_TAG_SLASH
1214
- else if (is(nameStart, c)) {
1215
- parser.attribName = c
1216
- parser.attribValue = ""
1217
- parser.state = S.ATTRIB_NAME
1218
- } else strictFail(parser, "Invalid attribute name")
1219
- continue
1220
-
1221
- case S.ATTRIB_NAME:
1222
- if (c === "=") parser.state = S.ATTRIB_VALUE
1223
- else if (c === ">") {
1224
- strictFail(parser, "Attribute without value")
1225
- parser.attribValue = parser.attribName
1226
- attrib(parser)
1227
- openTag(parser)
1228
- }
1229
- else if (is(whitespace, c)) parser.state = S.ATTRIB_NAME_SAW_WHITE
1230
- else if (is(nameBody, c)) parser.attribName += c
1231
- else strictFail(parser, "Invalid attribute name")
1232
- continue
1233
-
1234
- case S.ATTRIB_NAME_SAW_WHITE:
1235
- if (c === "=") parser.state = S.ATTRIB_VALUE
1236
- else if (is(whitespace, c)) continue
1237
- else {
1238
- strictFail(parser, "Attribute without value")
1239
- parser.tag.attributes[parser.attribName] = ""
1240
- parser.attribValue = ""
1241
- emitNode(parser, "onattribute",
1242
- { name : parser.attribName, value : "" })
1243
- parser.attribName = ""
1244
- if (c === ">") openTag(parser)
1245
- else if (is(nameStart, c)) {
1300
+ case S.ATTRIB:
1301
+ // haven't read the attribute name yet.
1302
+ if (is(whitespace, c)) {
1303
+ continue
1304
+ } else if (c === '>') {
1305
+ openTag(parser)
1306
+ } else if (c === '/') {
1307
+ parser.state = S.OPEN_TAG_SLASH
1308
+ } else if (is(nameStart, c)) {
1246
1309
  parser.attribName = c
1310
+ parser.attribValue = ''
1247
1311
  parser.state = S.ATTRIB_NAME
1248
1312
  } else {
1249
- strictFail(parser, "Invalid attribute name")
1250
- parser.state = S.ATTRIB
1313
+ strictFail(parser, 'Invalid attribute name')
1251
1314
  }
1252
- }
1253
- continue
1315
+ continue
1254
1316
 
1255
- case S.ATTRIB_VALUE:
1256
- if (is(whitespace, c)) continue
1257
- else if (is(quote, c)) {
1258
- parser.q = c
1259
- parser.state = S.ATTRIB_VALUE_QUOTED
1260
- } else {
1261
- strictFail(parser, "Unquoted attribute value")
1262
- parser.state = S.ATTRIB_VALUE_UNQUOTED
1263
- parser.attribValue = c
1264
- }
1265
- continue
1317
+ case S.ATTRIB_NAME:
1318
+ if (c === '=') {
1319
+ parser.state = S.ATTRIB_VALUE
1320
+ } else if (c === '>') {
1321
+ strictFail(parser, 'Attribute without value')
1322
+ parser.attribValue = parser.attribName
1323
+ attrib(parser)
1324
+ openTag(parser)
1325
+ } else if (is(whitespace, c)) {
1326
+ parser.state = S.ATTRIB_NAME_SAW_WHITE
1327
+ } else if (is(nameBody, c)) {
1328
+ parser.attribName += c
1329
+ } else {
1330
+ strictFail(parser, 'Invalid attribute name')
1331
+ }
1332
+ continue
1266
1333
 
1267
- case S.ATTRIB_VALUE_QUOTED:
1268
- if (c !== parser.q) {
1269
- if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_Q
1270
- else parser.attribValue += c
1334
+ case S.ATTRIB_NAME_SAW_WHITE:
1335
+ if (c === '=') {
1336
+ parser.state = S.ATTRIB_VALUE
1337
+ } else if (is(whitespace, c)) {
1338
+ continue
1339
+ } else {
1340
+ strictFail(parser, 'Attribute without value')
1341
+ parser.tag.attributes[parser.attribName] = ''
1342
+ parser.attribValue = ''
1343
+ emitNode(parser, 'onattribute', {
1344
+ name: parser.attribName,
1345
+ value: ''
1346
+ })
1347
+ parser.attribName = ''
1348
+ if (c === '>') {
1349
+ openTag(parser)
1350
+ } else if (is(nameStart, c)) {
1351
+ parser.attribName = c
1352
+ parser.state = S.ATTRIB_NAME
1353
+ } else {
1354
+ strictFail(parser, 'Invalid attribute name')
1355
+ parser.state = S.ATTRIB
1356
+ }
1357
+ }
1271
1358
  continue
1272
- }
1273
- attrib(parser)
1274
- parser.q = ""
1275
- parser.state = S.ATTRIB_VALUE_CLOSED
1276
- continue
1277
-
1278
- case S.ATTRIB_VALUE_CLOSED:
1279
- if (is(whitespace, c)) {
1280
- parser.state = S.ATTRIB
1281
- } else if (c === ">") openTag(parser)
1282
- else if (c === "/") parser.state = S.OPEN_TAG_SLASH
1283
- else if (is(nameStart, c)) {
1284
- strictFail(parser, "No whitespace between attributes")
1285
- parser.attribName = c
1286
- parser.attribValue = ""
1287
- parser.state = S.ATTRIB_NAME
1288
- } else strictFail(parser, "Invalid attribute name")
1289
- continue
1290
-
1291
- case S.ATTRIB_VALUE_UNQUOTED:
1292
- if (not(attribEnd,c)) {
1293
- if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_U
1294
- else parser.attribValue += c
1359
+
1360
+ case S.ATTRIB_VALUE:
1361
+ if (is(whitespace, c)) {
1362
+ continue
1363
+ } else if (is(quote, c)) {
1364
+ parser.q = c
1365
+ parser.state = S.ATTRIB_VALUE_QUOTED
1366
+ } else {
1367
+ strictFail(parser, 'Unquoted attribute value')
1368
+ parser.state = S.ATTRIB_VALUE_UNQUOTED
1369
+ parser.attribValue = c
1370
+ }
1295
1371
  continue
1296
- }
1297
- attrib(parser)
1298
- if (c === ">") openTag(parser)
1299
- else parser.state = S.ATTRIB
1300
- continue
1301
-
1302
- case S.CLOSE_TAG:
1303
- if (!parser.tagName) {
1304
- if (is(whitespace, c)) continue
1305
- else if (not(nameStart, c)) {
1306
- if (parser.script) {
1307
- parser.script += "</" + c
1308
- parser.state = S.SCRIPT
1372
+
1373
+ case S.ATTRIB_VALUE_QUOTED:
1374
+ if (c !== parser.q) {
1375
+ if (c === '&') {
1376
+ parser.state = S.ATTRIB_VALUE_ENTITY_Q
1309
1377
  } else {
1310
- strictFail(parser, "Invalid tagname in closing tag.")
1378
+ parser.attribValue += c
1311
1379
  }
1312
- } else parser.tagName = c
1313
- }
1314
- else if (c === ">") closeTag(parser)
1315
- else if (is(nameBody, c)) parser.tagName += c
1316
- else if (parser.script) {
1317
- parser.script += "</" + parser.tagName
1318
- parser.tagName = ""
1319
- parser.state = S.SCRIPT
1320
- } else {
1321
- if (not(whitespace, c)) strictFail(parser,
1322
- "Invalid tagname in closing tag")
1323
- parser.state = S.CLOSE_TAG_SAW_WHITE
1324
- }
1325
- continue
1326
-
1327
- case S.CLOSE_TAG_SAW_WHITE:
1328
- if (is(whitespace, c)) continue
1329
- if (c === ">") closeTag(parser)
1330
- else strictFail(parser, "Invalid characters in closing tag")
1331
- continue
1332
-
1333
- case S.TEXT_ENTITY:
1334
- case S.ATTRIB_VALUE_ENTITY_Q:
1335
- case S.ATTRIB_VALUE_ENTITY_U:
1336
- switch(parser.state) {
1337
- case S.TEXT_ENTITY:
1338
- var returnState = S.TEXT, buffer = "textNode"
1339
- break
1340
-
1341
- case S.ATTRIB_VALUE_ENTITY_Q:
1342
- var returnState = S.ATTRIB_VALUE_QUOTED, buffer = "attribValue"
1343
- break
1344
-
1345
- case S.ATTRIB_VALUE_ENTITY_U:
1346
- var returnState = S.ATTRIB_VALUE_UNQUOTED, buffer = "attribValue"
1347
- break
1348
- }
1349
- if (c === ";") {
1350
- parser[buffer] += parseEntity(parser)
1351
- parser.entity = ""
1352
- parser.state = returnState
1353
- }
1354
- else if (is(entity, c)) parser.entity += c
1355
- else {
1356
- strictFail(parser, "Invalid character entity")
1357
- parser[buffer] += "&" + parser.entity + c
1358
- parser.entity = ""
1359
- parser.state = returnState
1360
- }
1361
- continue
1380
+ continue
1381
+ }
1382
+ attrib(parser)
1383
+ parser.q = ''
1384
+ parser.state = S.ATTRIB_VALUE_CLOSED
1385
+ continue
1386
+
1387
+ case S.ATTRIB_VALUE_CLOSED:
1388
+ if (is(whitespace, c)) {
1389
+ parser.state = S.ATTRIB
1390
+ } else if (c === '>') {
1391
+ openTag(parser)
1392
+ } else if (c === '/') {
1393
+ parser.state = S.OPEN_TAG_SLASH
1394
+ } else if (is(nameStart, c)) {
1395
+ strictFail(parser, 'No whitespace between attributes')
1396
+ parser.attribName = c
1397
+ parser.attribValue = ''
1398
+ parser.state = S.ATTRIB_NAME
1399
+ } else {
1400
+ strictFail(parser, 'Invalid attribute name')
1401
+ }
1402
+ continue
1362
1403
 
1363
- default:
1364
- throw new Error(parser, "Unknown state: " + parser.state)
1404
+ case S.ATTRIB_VALUE_UNQUOTED:
1405
+ if (not(attribEnd, c)) {
1406
+ if (c === '&') {
1407
+ parser.state = S.ATTRIB_VALUE_ENTITY_U
1408
+ } else {
1409
+ parser.attribValue += c
1410
+ }
1411
+ continue
1412
+ }
1413
+ attrib(parser)
1414
+ if (c === '>') {
1415
+ openTag(parser)
1416
+ } else {
1417
+ parser.state = S.ATTRIB
1418
+ }
1419
+ continue
1420
+
1421
+ case S.CLOSE_TAG:
1422
+ if (!parser.tagName) {
1423
+ if (is(whitespace, c)) {
1424
+ continue
1425
+ } else if (not(nameStart, c)) {
1426
+ if (parser.script) {
1427
+ parser.script += '</' + c
1428
+ parser.state = S.SCRIPT
1429
+ } else {
1430
+ strictFail(parser, 'Invalid tagname in closing tag.')
1431
+ }
1432
+ } else {
1433
+ parser.tagName = c
1434
+ }
1435
+ } else if (c === '>') {
1436
+ closeTag(parser)
1437
+ } else if (is(nameBody, c)) {
1438
+ parser.tagName += c
1439
+ } else if (parser.script) {
1440
+ parser.script += '</' + parser.tagName
1441
+ parser.tagName = ''
1442
+ parser.state = S.SCRIPT
1443
+ } else {
1444
+ if (not(whitespace, c)) {
1445
+ strictFail(parser, 'Invalid tagname in closing tag')
1446
+ }
1447
+ parser.state = S.CLOSE_TAG_SAW_WHITE
1448
+ }
1449
+ continue
1450
+
1451
+ case S.CLOSE_TAG_SAW_WHITE:
1452
+ if (is(whitespace, c)) {
1453
+ continue
1454
+ }
1455
+ if (c === '>') {
1456
+ closeTag(parser)
1457
+ } else {
1458
+ strictFail(parser, 'Invalid characters in closing tag')
1459
+ }
1460
+ continue
1461
+
1462
+ case S.TEXT_ENTITY:
1463
+ case S.ATTRIB_VALUE_ENTITY_Q:
1464
+ case S.ATTRIB_VALUE_ENTITY_U:
1465
+ var returnState
1466
+ var buffer
1467
+ switch (parser.state) {
1468
+ case S.TEXT_ENTITY:
1469
+ returnState = S.TEXT
1470
+ buffer = 'textNode'
1471
+ break
1472
+
1473
+ case S.ATTRIB_VALUE_ENTITY_Q:
1474
+ returnState = S.ATTRIB_VALUE_QUOTED
1475
+ buffer = 'attribValue'
1476
+ break
1477
+
1478
+ case S.ATTRIB_VALUE_ENTITY_U:
1479
+ returnState = S.ATTRIB_VALUE_UNQUOTED
1480
+ buffer = 'attribValue'
1481
+ break
1482
+ }
1483
+
1484
+ if (c === ';') {
1485
+ parser[buffer] += parseEntity(parser)
1486
+ parser.entity = ''
1487
+ parser.state = returnState
1488
+ } else if (is(parser.entity.length ? entityBody : entityStart, c)) {
1489
+ parser.entity += c
1490
+ } else {
1491
+ strictFail(parser, 'Invalid character in entity name')
1492
+ parser[buffer] += '&' + parser.entity + c
1493
+ parser.entity = ''
1494
+ parser.state = returnState
1495
+ }
1496
+
1497
+ continue
1498
+
1499
+ default:
1500
+ throw new Error(parser, 'Unknown state: ' + parser.state)
1501
+ }
1502
+ } // while
1503
+
1504
+ if (parser.position >= parser.bufferCheckPosition) {
1505
+ checkBufferLength(parser)
1365
1506
  }
1366
- } // while
1367
- // cdata blocks can get very big under normal conditions. emit and move on.
1368
- // if (parser.state === S.CDATA && parser.cdata) {
1369
- // emitNode(parser, "oncdata", parser.cdata)
1370
- // parser.cdata = ""
1371
- // }
1372
- if (parser.position >= parser.bufferCheckPosition) checkBufferLength(parser)
1373
- return parser
1374
- }
1375
-
1376
- /*! http://mths.be/fromcodepoint v0.1.0 by @mathias */
1377
- if (!String.fromCodePoint) {
1378
- (function() {
1379
- var stringFromCharCode = String.fromCharCode;
1380
- var floor = Math.floor;
1381
- var fromCodePoint = function() {
1382
- var MAX_SIZE = 0x4000;
1383
- var codeUnits = [];
1384
- var highSurrogate;
1385
- var lowSurrogate;
1386
- var index = -1;
1387
- var length = arguments.length;
1388
- if (!length) {
1389
- return '';
1390
- }
1391
- var result = '';
1392
- while (++index < length) {
1393
- var codePoint = Number(arguments[index]);
1394
- if (
1395
- !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
1396
- codePoint < 0 || // not a valid Unicode code point
1397
- codePoint > 0x10FFFF || // not a valid Unicode code point
1398
- floor(codePoint) != codePoint // not an integer
1399
- ) {
1400
- throw RangeError('Invalid code point: ' + codePoint);
1401
- }
1402
- if (codePoint <= 0xFFFF) { // BMP code point
1403
- codeUnits.push(codePoint);
1404
- } else { // Astral code point; split in surrogate halves
1405
- // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
1406
- codePoint -= 0x10000;
1407
- highSurrogate = (codePoint >> 10) + 0xD800;
1408
- lowSurrogate = (codePoint % 0x400) + 0xDC00;
1409
- codeUnits.push(highSurrogate, lowSurrogate);
1410
- }
1411
- if (index + 1 == length || codeUnits.length > MAX_SIZE) {
1412
- result += stringFromCharCode.apply(null, codeUnits);
1413
- codeUnits.length = 0;
1414
- }
1415
- }
1416
- return result;
1417
- };
1418
- if (Object.defineProperty) {
1419
- Object.defineProperty(String, 'fromCodePoint', {
1420
- 'value': fromCodePoint,
1421
- 'configurable': true,
1422
- 'writable': true
1423
- });
1424
- } else {
1425
- String.fromCodePoint = fromCodePoint;
1426
- }
1427
- }());
1428
- }
1507
+ return parser
1508
+ }
1429
1509
 
1430
- })(typeof exports === "undefined" ? sax = {} : exports);
1510
+ /*! http://mths.be/fromcodepoint v0.1.0 by @mathias */
1511
+ if (!String.fromCodePoint) {
1512
+ (function () {
1513
+ var stringFromCharCode = String.fromCharCode
1514
+ var floor = Math.floor
1515
+ var fromCodePoint = function () {
1516
+ var MAX_SIZE = 0x4000
1517
+ var codeUnits = []
1518
+ var highSurrogate
1519
+ var lowSurrogate
1520
+ var index = -1
1521
+ var length = arguments.length
1522
+ if (!length) {
1523
+ return ''
1524
+ }
1525
+ var result = ''
1526
+ while (++index < length) {
1527
+ var codePoint = Number(arguments[index])
1528
+ if (
1529
+ !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
1530
+ codePoint < 0 || // not a valid Unicode code point
1531
+ codePoint > 0x10FFFF || // not a valid Unicode code point
1532
+ floor(codePoint) !== codePoint // not an integer
1533
+ ) {
1534
+ throw RangeError('Invalid code point: ' + codePoint)
1535
+ }
1536
+ if (codePoint <= 0xFFFF) { // BMP code point
1537
+ codeUnits.push(codePoint)
1538
+ } else { // Astral code point; split in surrogate halves
1539
+ // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
1540
+ codePoint -= 0x10000
1541
+ highSurrogate = (codePoint >> 10) + 0xD800
1542
+ lowSurrogate = (codePoint % 0x400) + 0xDC00
1543
+ codeUnits.push(highSurrogate, lowSurrogate)
1544
+ }
1545
+ if (index + 1 === length || codeUnits.length > MAX_SIZE) {
1546
+ result += stringFromCharCode.apply(null, codeUnits)
1547
+ codeUnits.length = 0
1548
+ }
1549
+ }
1550
+ return result
1551
+ }
1552
+ if (Object.defineProperty) {
1553
+ Object.defineProperty(String, 'fromCodePoint', {
1554
+ value: fromCodePoint,
1555
+ configurable: true,
1556
+ writable: true
1557
+ })
1558
+ } else {
1559
+ String.fromCodePoint = fromCodePoint
1560
+ }
1561
+ }())
1562
+ }
1563
+ })(typeof exports === 'undefined' ? this.sax = {} : exports)