sax 1.1.3 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +5 -0
  2. package/lib/sax.js +1462 -1316
  3. package/package.json +5 -3
package/lib/sax.js CHANGED
@@ -1,1430 +1,1576 @@
1
- // wrapper for non-node envs
2
- ;(function (sax) {
3
-
4
- sax.parser = function (strict, opt) { return new SAXParser(strict, opt) }
5
- sax.SAXParser = SAXParser
6
- sax.SAXStream = SAXStream
7
- sax.createStream = createStream
8
-
9
- // When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns.
10
- // When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)),
11
- // since that's the earliest that a buffer overrun could occur. This way, checks are
12
- // as rare as required, but as often as necessary to ensure never crossing this bound.
13
- // Furthermore, buffers are only tested at most once per write(), so passing a very
14
- // large string into write() might have undesirable effects, but this is manageable by
15
- // the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme
16
- // edge case, result in creating at most one complete copy of the string passed in.
17
- // Set to Infinity to have unlimited buffers.
18
- sax.MAX_BUFFER_LENGTH = 64 * 1024
19
-
20
- var buffers = [
21
- "comment", "sgmlDecl", "textNode", "tagName", "doctype",
22
- "procInstName", "procInstBody", "entity", "attribName",
23
- "attribValue", "cdata", "script"
24
- ]
25
-
26
- sax.EVENTS = // for discoverability.
27
- [ "text"
28
- , "processinginstruction"
29
- , "sgmldeclaration"
30
- , "doctype"
31
- , "comment"
32
- , "attribute"
33
- , "opentag"
34
- , "closetag"
35
- , "opencdata"
36
- , "cdata"
37
- , "closecdata"
38
- , "error"
39
- , "end"
40
- , "ready"
41
- , "script"
42
- , "opennamespace"
43
- , "closenamespace"
1
+ ;(function (sax) { // wrapper for non-node envs
2
+ sax.parser = function (strict, opt) { return new SAXParser(strict, opt) }
3
+ sax.SAXParser = SAXParser
4
+ sax.SAXStream = SAXStream
5
+ sax.createStream = createStream
6
+
7
+ // When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns.
8
+ // When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)),
9
+ // since that's the earliest that a buffer overrun could occur. This way, checks are
10
+ // as rare as required, but as often as necessary to ensure never crossing this bound.
11
+ // Furthermore, buffers are only tested at most once per write(), so passing a very
12
+ // large string into write() might have undesirable effects, but this is manageable by
13
+ // the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme
14
+ // edge case, result in creating at most one complete copy of the string passed in.
15
+ // Set to Infinity to have unlimited buffers.
16
+ sax.MAX_BUFFER_LENGTH = 64 * 1024
17
+
18
+ var buffers = [
19
+ 'comment', 'sgmlDecl', 'textNode', 'tagName', 'doctype',
20
+ 'procInstName', 'procInstBody', 'entity', 'attribName',
21
+ 'attribValue', 'cdata', 'script'
44
22
  ]
45
23
 
46
- function SAXParser (strict, opt) {
47
- if (!(this instanceof SAXParser)) return new SAXParser(strict, opt)
48
-
49
- var parser = this
50
- clearBuffers(parser)
51
- parser.q = parser.c = ""
52
- parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
53
- parser.opt = opt || {}
54
- parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags
55
- parser.looseCase = parser.opt.lowercase ? "toLowerCase" : "toUpperCase"
56
- parser.tags = []
57
- parser.closed = parser.closedRoot = parser.sawRoot = false
58
- parser.tag = parser.error = null
59
- parser.strict = !!strict
60
- parser.noscript = !!(strict || parser.opt.noscript)
61
- parser.state = S.BEGIN
62
- parser.strictEntities = parser.opt.strictEntities
63
- parser.ENTITIES = parser.strictEntities ? Object.create(sax.XML_ENTITIES) : Object.create(sax.ENTITIES)
64
- parser.attribList = []
65
-
66
- // namespaces form a prototype chain.
67
- // it always points at the current tag,
68
- // which protos to its parent tag.
69
- if (parser.opt.xmlns) parser.ns = Object.create(rootNS)
70
-
71
- // mostly just for error reporting
72
- parser.trackPosition = parser.opt.position !== false
73
- if (parser.trackPosition) {
74
- parser.position = parser.line = parser.column = 0
75
- }
76
- emit(parser, "onready")
77
- }
78
-
79
- if (!Object.create) Object.create = function (o) {
80
- function f () { this.__proto__ = o }
81
- f.prototype = o
82
- return new f
83
- }
84
-
85
- if (!Object.getPrototypeOf) Object.getPrototypeOf = function (o) {
86
- return o.__proto__
87
- }
88
-
89
- if (!Object.keys) Object.keys = function (o) {
90
- var a = []
91
- for (var i in o) if (o.hasOwnProperty(i)) a.push(i)
92
- return a
93
- }
94
-
95
- function checkBufferLength (parser) {
96
- var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10)
97
- , maxActual = 0
98
- for (var i = 0, l = buffers.length; i < l; i ++) {
99
- var len = parser[buffers[i]].length
100
- if (len > maxAllowed) {
101
- // Text/cdata nodes can get big, and since they're buffered,
102
- // we can get here under normal conditions.
103
- // Avoid issues by emitting the text node now,
104
- // so at least it won't get any bigger.
105
- switch (buffers[i]) {
106
- case "textNode":
107
- closeText(parser)
108
- break
24
+ sax.EVENTS = [
25
+ 'text',
26
+ 'processinginstruction',
27
+ 'sgmldeclaration',
28
+ 'doctype',
29
+ 'comment',
30
+ 'opentagstart',
31
+ 'attribute',
32
+ 'opentag',
33
+ 'closetag',
34
+ 'opencdata',
35
+ 'cdata',
36
+ 'closecdata',
37
+ 'error',
38
+ 'end',
39
+ 'ready',
40
+ 'script',
41
+ 'opennamespace',
42
+ 'closenamespace'
43
+ ]
109
44
 
110
- case "cdata":
111
- emitNode(parser, "oncdata", parser.cdata)
112
- parser.cdata = ""
113
- break
45
+ function SAXParser (strict, opt) {
46
+ if (!(this instanceof SAXParser)) {
47
+ return new SAXParser(strict, opt)
48
+ }
114
49
 
115
- case "script":
116
- emitNode(parser, "onscript", parser.script)
117
- parser.script = ""
118
- break
50
+ var parser = this
51
+ clearBuffers(parser)
52
+ parser.q = parser.c = ''
53
+ parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
54
+ parser.opt = opt || {}
55
+ parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags
56
+ parser.looseCase = parser.opt.lowercase ? 'toLowerCase' : 'toUpperCase'
57
+ parser.tags = []
58
+ parser.closed = parser.closedRoot = parser.sawRoot = false
59
+ parser.tag = parser.error = null
60
+ parser.strict = !!strict
61
+ parser.noscript = !!(strict || parser.opt.noscript)
62
+ parser.state = S.BEGIN
63
+ parser.strictEntities = parser.opt.strictEntities
64
+ parser.ENTITIES = parser.strictEntities ? Object.create(sax.XML_ENTITIES) : Object.create(sax.ENTITIES)
65
+ parser.attribList = []
66
+
67
+ // namespaces form a prototype chain.
68
+ // it always points at the current tag,
69
+ // which protos to its parent tag.
70
+ if (parser.opt.xmlns) {
71
+ parser.ns = Object.create(rootNS)
72
+ }
119
73
 
120
- default:
121
- error(parser, "Max buffer length exceeded: "+buffers[i])
122
- }
74
+ // mostly just for error reporting
75
+ parser.trackPosition = parser.opt.position !== false
76
+ if (parser.trackPosition) {
77
+ parser.position = parser.line = parser.column = 0
123
78
  }
124
- maxActual = Math.max(maxActual, len)
125
- }
126
- // schedule the next check for the earliest possible buffer overrun.
127
- parser.bufferCheckPosition = (sax.MAX_BUFFER_LENGTH - maxActual)
128
- + parser.position
129
- }
130
-
131
- function clearBuffers (parser) {
132
- for (var i = 0, l = buffers.length; i < l; i ++) {
133
- parser[buffers[i]] = ""
79
+ emit(parser, 'onready')
134
80
  }
135
- }
136
81
 
137
- function flushBuffers (parser) {
138
- closeText(parser)
139
- if (parser.cdata !== "") {
140
- emitNode(parser, "oncdata", parser.cdata)
141
- parser.cdata = ""
82
+ if (!Object.create) {
83
+ Object.create = function (o) {
84
+ function F () {}
85
+ F.prototype = o
86
+ var newf = new F()
87
+ return newf
88
+ }
142
89
  }
143
- if (parser.script !== "") {
144
- emitNode(parser, "onscript", parser.script)
145
- parser.script = ""
90
+
91
+ if (!Object.keys) {
92
+ Object.keys = function (o) {
93
+ var a = []
94
+ for (var i in o) if (o.hasOwnProperty(i)) a.push(i)
95
+ return a
96
+ }
146
97
  }
147
- }
148
-
149
- SAXParser.prototype =
150
- { end: function () { end(this) }
151
- , write: write
152
- , resume: function () { this.error = null; return this }
153
- , close: function () { return this.write(null) }
154
- , flush: function () { flushBuffers(this) }
98
+
99
+ function checkBufferLength (parser) {
100
+ var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10)
101
+ var maxActual = 0
102
+ for (var i = 0, l = buffers.length; i < l; i++) {
103
+ var len = parser[buffers[i]].length
104
+ if (len > maxAllowed) {
105
+ // Text/cdata nodes can get big, and since they're buffered,
106
+ // we can get here under normal conditions.
107
+ // Avoid issues by emitting the text node now,
108
+ // so at least it won't get any bigger.
109
+ switch (buffers[i]) {
110
+ case 'textNode':
111
+ closeText(parser)
112
+ break
113
+
114
+ case 'cdata':
115
+ emitNode(parser, 'oncdata', parser.cdata)
116
+ parser.cdata = ''
117
+ break
118
+
119
+ case 'script':
120
+ emitNode(parser, 'onscript', parser.script)
121
+ parser.script = ''
122
+ break
123
+
124
+ default:
125
+ error(parser, 'Max buffer length exceeded: ' + buffers[i])
126
+ }
127
+ }
128
+ maxActual = Math.max(maxActual, len)
129
+ }
130
+ // schedule the next check for the earliest possible buffer overrun.
131
+ var m = sax.MAX_BUFFER_LENGTH - maxActual
132
+ parser.bufferCheckPosition = m + parser.position
155
133
  }
156
134
 
157
- try {
158
- var Stream = require("stream").Stream
159
- } catch (ex) {
160
- var Stream = function () {}
161
- }
135
+ function clearBuffers (parser) {
136
+ for (var i = 0, l = buffers.length; i < l; i++) {
137
+ parser[buffers[i]] = ''
138
+ }
139
+ }
162
140
 
141
+ function flushBuffers (parser) {
142
+ closeText(parser)
143
+ if (parser.cdata !== '') {
144
+ emitNode(parser, 'oncdata', parser.cdata)
145
+ parser.cdata = ''
146
+ }
147
+ if (parser.script !== '') {
148
+ emitNode(parser, 'onscript', parser.script)
149
+ parser.script = ''
150
+ }
151
+ }
163
152
 
164
- var streamWraps = sax.EVENTS.filter(function (ev) {
165
- return ev !== "error" && ev !== "end"
166
- })
153
+ SAXParser.prototype = {
154
+ end: function () { end(this) },
155
+ write: write,
156
+ resume: function () { this.error = null; return this },
157
+ close: function () { return this.write(null) },
158
+ flush: function () { flushBuffers(this) }
159
+ }
167
160
 
168
- function createStream (strict, opt) {
169
- return new SAXStream(strict, opt)
170
- }
161
+ var Stream
162
+ try {
163
+ Stream = require('stream').Stream
164
+ } catch (ex) {
165
+ Stream = function () {}
166
+ }
171
167
 
172
- function SAXStream (strict, opt) {
173
- if (!(this instanceof SAXStream)) return new SAXStream(strict, opt)
168
+ var streamWraps = sax.EVENTS.filter(function (ev) {
169
+ return ev !== 'error' && ev !== 'end'
170
+ })
174
171
 
175
- Stream.apply(this)
172
+ function createStream (strict, opt) {
173
+ return new SAXStream(strict, opt)
174
+ }
176
175
 
177
- this._parser = new SAXParser(strict, opt)
178
- this.writable = true
179
- this.readable = true
176
+ function SAXStream (strict, opt) {
177
+ if (!(this instanceof SAXStream)) {
178
+ return new SAXStream(strict, opt)
179
+ }
180
180
 
181
+ Stream.apply(this)
181
182
 
182
- var me = this
183
+ this._parser = new SAXParser(strict, opt)
184
+ this.writable = true
185
+ this.readable = true
183
186
 
184
- this._parser.onend = function () {
185
- me.emit("end")
186
- }
187
+ var me = this
187
188
 
188
- this._parser.onerror = function (er) {
189
- me.emit("error", er)
189
+ this._parser.onend = function () {
190
+ me.emit('end')
191
+ }
190
192
 
191
- // if didn't throw, then means error was handled.
192
- // go ahead and clear error, so we can write again.
193
- me._parser.error = null
194
- }
193
+ this._parser.onerror = function (er) {
194
+ me.emit('error', er)
195
195
 
196
- this._decoder = null;
196
+ // if didn't throw, then means error was handled.
197
+ // go ahead and clear error, so we can write again.
198
+ me._parser.error = null
199
+ }
197
200
 
198
- streamWraps.forEach(function (ev) {
199
- Object.defineProperty(me, "on" + ev, {
200
- get: function () { return me._parser["on" + ev] },
201
- set: function (h) {
202
- if (!h) {
203
- me.removeAllListeners(ev)
204
- return me._parser["on"+ev] = h
205
- }
206
- me.on(ev, h)
207
- },
208
- enumerable: true,
209
- configurable: false
201
+ this._decoder = null
202
+
203
+ streamWraps.forEach(function (ev) {
204
+ Object.defineProperty(me, 'on' + ev, {
205
+ get: function () {
206
+ return me._parser['on' + ev]
207
+ },
208
+ set: function (h) {
209
+ if (!h) {
210
+ me.removeAllListeners(ev)
211
+ me._parser['on' + ev] = h
212
+ return h
213
+ }
214
+ me.on(ev, h)
215
+ },
216
+ enumerable: true,
217
+ configurable: false
218
+ })
210
219
  })
211
- })
212
- }
220
+ }
213
221
 
214
- SAXStream.prototype = Object.create(Stream.prototype,
215
- { constructor: { value: SAXStream } })
222
+ SAXStream.prototype = Object.create(Stream.prototype, {
223
+ constructor: {
224
+ value: SAXStream
225
+ }
226
+ })
216
227
 
217
- SAXStream.prototype.write = function (data) {
218
- if (typeof Buffer === 'function' &&
228
+ SAXStream.prototype.write = function (data) {
229
+ if (typeof Buffer === 'function' &&
219
230
  typeof Buffer.isBuffer === 'function' &&
220
231
  Buffer.isBuffer(data)) {
221
- if (!this._decoder) {
222
- var SD = require('string_decoder').StringDecoder
223
- this._decoder = new SD('utf8')
232
+ if (!this._decoder) {
233
+ var SD = require('string_decoder').StringDecoder
234
+ this._decoder = new SD('utf8')
235
+ }
236
+ data = this._decoder.write(data)
224
237
  }
225
- data = this._decoder.write(data);
238
+
239
+ this._parser.write(data.toString())
240
+ this.emit('data', data)
241
+ return true
226
242
  }
227
243
 
228
- this._parser.write(data.toString())
229
- this.emit("data", data)
230
- return true
231
- }
232
-
233
- SAXStream.prototype.end = function (chunk) {
234
- if (chunk && chunk.length) this.write(chunk)
235
- this._parser.end()
236
- return true
237
- }
238
-
239
- SAXStream.prototype.on = function (ev, handler) {
240
- var me = this
241
- if (!me._parser["on"+ev] && streamWraps.indexOf(ev) !== -1) {
242
- me._parser["on"+ev] = function () {
243
- var args = arguments.length === 1 ? [arguments[0]]
244
- : Array.apply(null, arguments)
245
- args.splice(0, 0, ev)
246
- me.emit.apply(me, args)
244
+ SAXStream.prototype.end = function (chunk) {
245
+ if (chunk && chunk.length) {
246
+ this.write(chunk)
247
247
  }
248
+ this._parser.end()
249
+ return true
248
250
  }
249
251
 
250
- return Stream.prototype.on.call(me, ev, handler)
251
- }
252
+ SAXStream.prototype.on = function (ev, handler) {
253
+ var me = this
254
+ if (!me._parser['on' + ev] && streamWraps.indexOf(ev) !== -1) {
255
+ me._parser['on' + ev] = function () {
256
+ var args = arguments.length === 1 ? [arguments[0]] : Array.apply(null, arguments)
257
+ args.splice(0, 0, ev)
258
+ me.emit.apply(me, args)
259
+ }
260
+ }
252
261
 
262
+ return Stream.prototype.on.call(me, ev, handler)
263
+ }
253
264
 
265
+ // character classes and tokens
266
+ var whitespace = '\r\n\t '
254
267
 
255
- // character classes and tokens
256
- var whitespace = "\r\n\t "
257
268
  // this really needs to be replaced with character classes.
258
269
  // XML allows all manner of ridiculous numbers and digits.
259
- , number = "0124356789"
260
- , letter = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
270
+ var number = '0124356789'
271
+ var letter = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
272
+
261
273
  // (Letter | "_" | ":")
262
- , quote = "'\""
263
- , entity = number+letter+"#"
264
- , attribEnd = whitespace + ">"
265
- , CDATA = "[CDATA["
266
- , DOCTYPE = "DOCTYPE"
267
- , XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
268
- , XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/"
269
- , rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE }
270
-
271
- // turn all the string character sets into character class objects.
272
- whitespace = charClass(whitespace)
273
- number = charClass(number)
274
- letter = charClass(letter)
275
-
276
- // http://www.w3.org/TR/REC-xml/#NT-NameStartChar
277
- // This implementation works on strings, a single character at a time
278
- // as such, it cannot ever support astral-plane characters (10000-EFFFF)
279
- // without a significant breaking change to either this parser, or the
280
- // JavaScript language. Implementation of an emoji-capable xml parser
281
- // is left as an exercise for the reader.
282
- var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
283
-
284
- var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040\.\d-]/
285
-
286
- quote = charClass(quote)
287
- entity = charClass(entity)
288
- attribEnd = charClass(attribEnd)
289
-
290
- function charClass (str) {
291
- return str.split("").reduce(function (s, c) {
292
- s[c] = true
293
- return s
294
- }, {})
295
- }
296
-
297
- function isRegExp (c) {
298
- return Object.prototype.toString.call(c) === '[object RegExp]'
299
- }
300
-
301
- function is (charclass, c) {
302
- return isRegExp(charclass) ? !!c.match(charclass) : charclass[c]
303
- }
304
-
305
- function not (charclass, c) {
306
- return !is(charclass, c)
307
- }
308
-
309
- var S = 0
310
- sax.STATE =
311
- { BEGIN : S++ // leading byte order mark or whitespace
312
- , BEGIN_WHITESPACE : S++ // leading whitespace
313
- , TEXT : S++ // general stuff
314
- , TEXT_ENTITY : S++ // &amp and such.
315
- , OPEN_WAKA : S++ // <
316
- , SGML_DECL : S++ // <!BLARG
317
- , SGML_DECL_QUOTED : S++ // <!BLARG foo "bar
318
- , DOCTYPE : S++ // <!DOCTYPE
319
- , DOCTYPE_QUOTED : S++ // <!DOCTYPE "//blah
320
- , DOCTYPE_DTD : S++ // <!DOCTYPE "//blah" [ ...
321
- , DOCTYPE_DTD_QUOTED : S++ // <!DOCTYPE "//blah" [ "foo
322
- , COMMENT_STARTING : S++ // <!-
323
- , COMMENT : S++ // <!--
324
- , COMMENT_ENDING : S++ // <!-- blah -
325
- , COMMENT_ENDED : S++ // <!-- blah --
326
- , CDATA : S++ // <![CDATA[ something
327
- , CDATA_ENDING : S++ // ]
328
- , CDATA_ENDING_2 : S++ // ]]
329
- , PROC_INST : S++ // <?hi
330
- , PROC_INST_BODY : S++ // <?hi there
331
- , PROC_INST_ENDING : S++ // <?hi "there" ?
332
- , OPEN_TAG : S++ // <strong
333
- , OPEN_TAG_SLASH : S++ // <strong /
334
- , ATTRIB : S++ // <a
335
- , ATTRIB_NAME : S++ // <a foo
336
- , ATTRIB_NAME_SAW_WHITE : S++ // <a foo _
337
- , ATTRIB_VALUE : S++ // <a foo=
338
- , ATTRIB_VALUE_QUOTED : S++ // <a foo="bar
339
- , ATTRIB_VALUE_CLOSED : S++ // <a foo="bar"
340
- , ATTRIB_VALUE_UNQUOTED : S++ // <a foo=bar
341
- , ATTRIB_VALUE_ENTITY_Q : S++ // <foo bar="&quot;"
342
- , ATTRIB_VALUE_ENTITY_U : S++ // <foo bar=&quot;
343
- , CLOSE_TAG : S++ // </a
344
- , CLOSE_TAG_SAW_WHITE : S++ // </a >
345
- , SCRIPT : S++ // <script> ...
346
- , SCRIPT_ENDING : S++ // <script> ... <
347
- }
348
-
349
- sax.XML_ENTITIES =
350
- { "amp" : "&"
351
- , "gt" : ">"
352
- , "lt" : "<"
353
- , "quot" : "\""
354
- , "apos" : "'"
355
- }
356
-
357
- sax.ENTITIES =
358
- { "amp" : "&"
359
- , "gt" : ">"
360
- , "lt" : "<"
361
- , "quot" : "\""
362
- , "apos" : "'"
363
- , "AElig" : 198
364
- , "Aacute" : 193
365
- , "Acirc" : 194
366
- , "Agrave" : 192
367
- , "Aring" : 197
368
- , "Atilde" : 195
369
- , "Auml" : 196
370
- , "Ccedil" : 199
371
- , "ETH" : 208
372
- , "Eacute" : 201
373
- , "Ecirc" : 202
374
- , "Egrave" : 200
375
- , "Euml" : 203
376
- , "Iacute" : 205
377
- , "Icirc" : 206
378
- , "Igrave" : 204
379
- , "Iuml" : 207
380
- , "Ntilde" : 209
381
- , "Oacute" : 211
382
- , "Ocirc" : 212
383
- , "Ograve" : 210
384
- , "Oslash" : 216
385
- , "Otilde" : 213
386
- , "Ouml" : 214
387
- , "THORN" : 222
388
- , "Uacute" : 218
389
- , "Ucirc" : 219
390
- , "Ugrave" : 217
391
- , "Uuml" : 220
392
- , "Yacute" : 221
393
- , "aacute" : 225
394
- , "acirc" : 226
395
- , "aelig" : 230
396
- , "agrave" : 224
397
- , "aring" : 229
398
- , "atilde" : 227
399
- , "auml" : 228
400
- , "ccedil" : 231
401
- , "eacute" : 233
402
- , "ecirc" : 234
403
- , "egrave" : 232
404
- , "eth" : 240
405
- , "euml" : 235
406
- , "iacute" : 237
407
- , "icirc" : 238
408
- , "igrave" : 236
409
- , "iuml" : 239
410
- , "ntilde" : 241
411
- , "oacute" : 243
412
- , "ocirc" : 244
413
- , "ograve" : 242
414
- , "oslash" : 248
415
- , "otilde" : 245
416
- , "ouml" : 246
417
- , "szlig" : 223
418
- , "thorn" : 254
419
- , "uacute" : 250
420
- , "ucirc" : 251
421
- , "ugrave" : 249
422
- , "uuml" : 252
423
- , "yacute" : 253
424
- , "yuml" : 255
425
- , "copy" : 169
426
- , "reg" : 174
427
- , "nbsp" : 160
428
- , "iexcl" : 161
429
- , "cent" : 162
430
- , "pound" : 163
431
- , "curren" : 164
432
- , "yen" : 165
433
- , "brvbar" : 166
434
- , "sect" : 167
435
- , "uml" : 168
436
- , "ordf" : 170
437
- , "laquo" : 171
438
- , "not" : 172
439
- , "shy" : 173
440
- , "macr" : 175
441
- , "deg" : 176
442
- , "plusmn" : 177
443
- , "sup1" : 185
444
- , "sup2" : 178
445
- , "sup3" : 179
446
- , "acute" : 180
447
- , "micro" : 181
448
- , "para" : 182
449
- , "middot" : 183
450
- , "cedil" : 184
451
- , "ordm" : 186
452
- , "raquo" : 187
453
- , "frac14" : 188
454
- , "frac12" : 189
455
- , "frac34" : 190
456
- , "iquest" : 191
457
- , "times" : 215
458
- , "divide" : 247
459
- , "OElig" : 338
460
- , "oelig" : 339
461
- , "Scaron" : 352
462
- , "scaron" : 353
463
- , "Yuml" : 376
464
- , "fnof" : 402
465
- , "circ" : 710
466
- , "tilde" : 732
467
- , "Alpha" : 913
468
- , "Beta" : 914
469
- , "Gamma" : 915
470
- , "Delta" : 916
471
- , "Epsilon" : 917
472
- , "Zeta" : 918
473
- , "Eta" : 919
474
- , "Theta" : 920
475
- , "Iota" : 921
476
- , "Kappa" : 922
477
- , "Lambda" : 923
478
- , "Mu" : 924
479
- , "Nu" : 925
480
- , "Xi" : 926
481
- , "Omicron" : 927
482
- , "Pi" : 928
483
- , "Rho" : 929
484
- , "Sigma" : 931
485
- , "Tau" : 932
486
- , "Upsilon" : 933
487
- , "Phi" : 934
488
- , "Chi" : 935
489
- , "Psi" : 936
490
- , "Omega" : 937
491
- , "alpha" : 945
492
- , "beta" : 946
493
- , "gamma" : 947
494
- , "delta" : 948
495
- , "epsilon" : 949
496
- , "zeta" : 950
497
- , "eta" : 951
498
- , "theta" : 952
499
- , "iota" : 953
500
- , "kappa" : 954
501
- , "lambda" : 955
502
- , "mu" : 956
503
- , "nu" : 957
504
- , "xi" : 958
505
- , "omicron" : 959
506
- , "pi" : 960
507
- , "rho" : 961
508
- , "sigmaf" : 962
509
- , "sigma" : 963
510
- , "tau" : 964
511
- , "upsilon" : 965
512
- , "phi" : 966
513
- , "chi" : 967
514
- , "psi" : 968
515
- , "omega" : 969
516
- , "thetasym" : 977
517
- , "upsih" : 978
518
- , "piv" : 982
519
- , "ensp" : 8194
520
- , "emsp" : 8195
521
- , "thinsp" : 8201
522
- , "zwnj" : 8204
523
- , "zwj" : 8205
524
- , "lrm" : 8206
525
- , "rlm" : 8207
526
- , "ndash" : 8211
527
- , "mdash" : 8212
528
- , "lsquo" : 8216
529
- , "rsquo" : 8217
530
- , "sbquo" : 8218
531
- , "ldquo" : 8220
532
- , "rdquo" : 8221
533
- , "bdquo" : 8222
534
- , "dagger" : 8224
535
- , "Dagger" : 8225
536
- , "bull" : 8226
537
- , "hellip" : 8230
538
- , "permil" : 8240
539
- , "prime" : 8242
540
- , "Prime" : 8243
541
- , "lsaquo" : 8249
542
- , "rsaquo" : 8250
543
- , "oline" : 8254
544
- , "frasl" : 8260
545
- , "euro" : 8364
546
- , "image" : 8465
547
- , "weierp" : 8472
548
- , "real" : 8476
549
- , "trade" : 8482
550
- , "alefsym" : 8501
551
- , "larr" : 8592
552
- , "uarr" : 8593
553
- , "rarr" : 8594
554
- , "darr" : 8595
555
- , "harr" : 8596
556
- , "crarr" : 8629
557
- , "lArr" : 8656
558
- , "uArr" : 8657
559
- , "rArr" : 8658
560
- , "dArr" : 8659
561
- , "hArr" : 8660
562
- , "forall" : 8704
563
- , "part" : 8706
564
- , "exist" : 8707
565
- , "empty" : 8709
566
- , "nabla" : 8711
567
- , "isin" : 8712
568
- , "notin" : 8713
569
- , "ni" : 8715
570
- , "prod" : 8719
571
- , "sum" : 8721
572
- , "minus" : 8722
573
- , "lowast" : 8727
574
- , "radic" : 8730
575
- , "prop" : 8733
576
- , "infin" : 8734
577
- , "ang" : 8736
578
- , "and" : 8743
579
- , "or" : 8744
580
- , "cap" : 8745
581
- , "cup" : 8746
582
- , "int" : 8747
583
- , "there4" : 8756
584
- , "sim" : 8764
585
- , "cong" : 8773
586
- , "asymp" : 8776
587
- , "ne" : 8800
588
- , "equiv" : 8801
589
- , "le" : 8804
590
- , "ge" : 8805
591
- , "sub" : 8834
592
- , "sup" : 8835
593
- , "nsub" : 8836
594
- , "sube" : 8838
595
- , "supe" : 8839
596
- , "oplus" : 8853
597
- , "otimes" : 8855
598
- , "perp" : 8869
599
- , "sdot" : 8901
600
- , "lceil" : 8968
601
- , "rceil" : 8969
602
- , "lfloor" : 8970
603
- , "rfloor" : 8971
604
- , "lang" : 9001
605
- , "rang" : 9002
606
- , "loz" : 9674
607
- , "spades" : 9824
608
- , "clubs" : 9827
609
- , "hearts" : 9829
610
- , "diams" : 9830
611
- }
612
-
613
- Object.keys(sax.ENTITIES).forEach(function (key) {
274
+ var quote = '\'"'
275
+ var attribEnd = whitespace + '>'
276
+ var CDATA = '[CDATA['
277
+ var DOCTYPE = 'DOCTYPE'
278
+ var XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
279
+ var XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/'
280
+ var rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE }
281
+
282
+ // turn all the string character sets into character class objects.
283
+ whitespace = charClass(whitespace)
284
+ number = charClass(number)
285
+ letter = charClass(letter)
286
+
287
+ // http://www.w3.org/TR/REC-xml/#NT-NameStartChar
288
+ // This implementation works on strings, a single character at a time
289
+ // as such, it cannot ever support astral-plane characters (10000-EFFFF)
290
+ // without a significant breaking change to either this parser, or the
291
+ // JavaScript language. Implementation of an emoji-capable xml parser
292
+ // is left as an exercise for the reader.
293
+ var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
294
+
295
+ var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040\.\d-]/
296
+
297
+ var entityStart = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
298
+ var entityBody = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040\.\d-]/
299
+
300
+ quote = charClass(quote)
301
+ attribEnd = charClass(attribEnd)
302
+
303
+ function charClass (str) {
304
+ return str.split('').reduce(function (s, c) {
305
+ s[c] = true
306
+ return s
307
+ }, {})
308
+ }
309
+
310
+ function isRegExp (c) {
311
+ return Object.prototype.toString.call(c) === '[object RegExp]'
312
+ }
313
+
314
+ function is (charclass, c) {
315
+ return isRegExp(charclass) ? !!c.match(charclass) : charclass[c]
316
+ }
317
+
318
+ function not (charclass, c) {
319
+ return !is(charclass, c)
320
+ }
321
+
322
+ var S = 0
323
+ sax.STATE = {
324
+ BEGIN: S++, // leading byte order mark or whitespace
325
+ BEGIN_WHITESPACE: S++, // leading whitespace
326
+ TEXT: S++, // general stuff
327
+ TEXT_ENTITY: S++, // &amp and such.
328
+ OPEN_WAKA: S++, // <
329
+ SGML_DECL: S++, // <!BLARG
330
+ SGML_DECL_QUOTED: S++, // <!BLARG foo "bar
331
+ DOCTYPE: S++, // <!DOCTYPE
332
+ DOCTYPE_QUOTED: S++, // <!DOCTYPE "//blah
333
+ DOCTYPE_DTD: S++, // <!DOCTYPE "//blah" [ ...
334
+ DOCTYPE_DTD_QUOTED: S++, // <!DOCTYPE "//blah" [ "foo
335
+ COMMENT_STARTING: S++, // <!-
336
+ COMMENT: S++, // <!--
337
+ COMMENT_ENDING: S++, // <!-- blah -
338
+ COMMENT_ENDED: S++, // <!-- blah --
339
+ CDATA: S++, // <![CDATA[ something
340
+ CDATA_ENDING: S++, // ]
341
+ CDATA_ENDING_2: S++, // ]]
342
+ PROC_INST: S++, // <?hi
343
+ PROC_INST_BODY: S++, // <?hi there
344
+ PROC_INST_ENDING: S++, // <?hi "there" ?
345
+ OPEN_TAG: S++, // <strong
346
+ OPEN_TAG_SLASH: S++, // <strong /
347
+ ATTRIB: S++, // <a
348
+ ATTRIB_NAME: S++, // <a foo
349
+ ATTRIB_NAME_SAW_WHITE: S++, // <a foo _
350
+ ATTRIB_VALUE: S++, // <a foo=
351
+ ATTRIB_VALUE_QUOTED: S++, // <a foo="bar
352
+ ATTRIB_VALUE_CLOSED: S++, // <a foo="bar"
353
+ ATTRIB_VALUE_UNQUOTED: S++, // <a foo=bar
354
+ ATTRIB_VALUE_ENTITY_Q: S++, // <foo bar="&quot;"
355
+ ATTRIB_VALUE_ENTITY_U: S++, // <foo bar=&quot
356
+ CLOSE_TAG: S++, // </a
357
+ CLOSE_TAG_SAW_WHITE: S++, // </a >
358
+ SCRIPT: S++, // <script> ...
359
+ SCRIPT_ENDING: S++ // <script> ... <
360
+ }
361
+
362
+ sax.XML_ENTITIES = {
363
+ 'amp': '&',
364
+ 'gt': '>',
365
+ 'lt': '<',
366
+ 'quot': '"',
367
+ 'apos': "'"
368
+ }
369
+
370
+ sax.ENTITIES = {
371
+ 'amp': '&',
372
+ 'gt': '>',
373
+ 'lt': '<',
374
+ 'quot': '"',
375
+ 'apos': "'",
376
+ 'AElig': 198,
377
+ 'Aacute': 193,
378
+ 'Acirc': 194,
379
+ 'Agrave': 192,
380
+ 'Aring': 197,
381
+ 'Atilde': 195,
382
+ 'Auml': 196,
383
+ 'Ccedil': 199,
384
+ 'ETH': 208,
385
+ 'Eacute': 201,
386
+ 'Ecirc': 202,
387
+ 'Egrave': 200,
388
+ 'Euml': 203,
389
+ 'Iacute': 205,
390
+ 'Icirc': 206,
391
+ 'Igrave': 204,
392
+ 'Iuml': 207,
393
+ 'Ntilde': 209,
394
+ 'Oacute': 211,
395
+ 'Ocirc': 212,
396
+ 'Ograve': 210,
397
+ 'Oslash': 216,
398
+ 'Otilde': 213,
399
+ 'Ouml': 214,
400
+ 'THORN': 222,
401
+ 'Uacute': 218,
402
+ 'Ucirc': 219,
403
+ 'Ugrave': 217,
404
+ 'Uuml': 220,
405
+ 'Yacute': 221,
406
+ 'aacute': 225,
407
+ 'acirc': 226,
408
+ 'aelig': 230,
409
+ 'agrave': 224,
410
+ 'aring': 229,
411
+ 'atilde': 227,
412
+ 'auml': 228,
413
+ 'ccedil': 231,
414
+ 'eacute': 233,
415
+ 'ecirc': 234,
416
+ 'egrave': 232,
417
+ 'eth': 240,
418
+ 'euml': 235,
419
+ 'iacute': 237,
420
+ 'icirc': 238,
421
+ 'igrave': 236,
422
+ 'iuml': 239,
423
+ 'ntilde': 241,
424
+ 'oacute': 243,
425
+ 'ocirc': 244,
426
+ 'ograve': 242,
427
+ 'oslash': 248,
428
+ 'otilde': 245,
429
+ 'ouml': 246,
430
+ 'szlig': 223,
431
+ 'thorn': 254,
432
+ 'uacute': 250,
433
+ 'ucirc': 251,
434
+ 'ugrave': 249,
435
+ 'uuml': 252,
436
+ 'yacute': 253,
437
+ 'yuml': 255,
438
+ 'copy': 169,
439
+ 'reg': 174,
440
+ 'nbsp': 160,
441
+ 'iexcl': 161,
442
+ 'cent': 162,
443
+ 'pound': 163,
444
+ 'curren': 164,
445
+ 'yen': 165,
446
+ 'brvbar': 166,
447
+ 'sect': 167,
448
+ 'uml': 168,
449
+ 'ordf': 170,
450
+ 'laquo': 171,
451
+ 'not': 172,
452
+ 'shy': 173,
453
+ 'macr': 175,
454
+ 'deg': 176,
455
+ 'plusmn': 177,
456
+ 'sup1': 185,
457
+ 'sup2': 178,
458
+ 'sup3': 179,
459
+ 'acute': 180,
460
+ 'micro': 181,
461
+ 'para': 182,
462
+ 'middot': 183,
463
+ 'cedil': 184,
464
+ 'ordm': 186,
465
+ 'raquo': 187,
466
+ 'frac14': 188,
467
+ 'frac12': 189,
468
+ 'frac34': 190,
469
+ 'iquest': 191,
470
+ 'times': 215,
471
+ 'divide': 247,
472
+ 'OElig': 338,
473
+ 'oelig': 339,
474
+ 'Scaron': 352,
475
+ 'scaron': 353,
476
+ 'Yuml': 376,
477
+ 'fnof': 402,
478
+ 'circ': 710,
479
+ 'tilde': 732,
480
+ 'Alpha': 913,
481
+ 'Beta': 914,
482
+ 'Gamma': 915,
483
+ 'Delta': 916,
484
+ 'Epsilon': 917,
485
+ 'Zeta': 918,
486
+ 'Eta': 919,
487
+ 'Theta': 920,
488
+ 'Iota': 921,
489
+ 'Kappa': 922,
490
+ 'Lambda': 923,
491
+ 'Mu': 924,
492
+ 'Nu': 925,
493
+ 'Xi': 926,
494
+ 'Omicron': 927,
495
+ 'Pi': 928,
496
+ 'Rho': 929,
497
+ 'Sigma': 931,
498
+ 'Tau': 932,
499
+ 'Upsilon': 933,
500
+ 'Phi': 934,
501
+ 'Chi': 935,
502
+ 'Psi': 936,
503
+ 'Omega': 937,
504
+ 'alpha': 945,
505
+ 'beta': 946,
506
+ 'gamma': 947,
507
+ 'delta': 948,
508
+ 'epsilon': 949,
509
+ 'zeta': 950,
510
+ 'eta': 951,
511
+ 'theta': 952,
512
+ 'iota': 953,
513
+ 'kappa': 954,
514
+ 'lambda': 955,
515
+ 'mu': 956,
516
+ 'nu': 957,
517
+ 'xi': 958,
518
+ 'omicron': 959,
519
+ 'pi': 960,
520
+ 'rho': 961,
521
+ 'sigmaf': 962,
522
+ 'sigma': 963,
523
+ 'tau': 964,
524
+ 'upsilon': 965,
525
+ 'phi': 966,
526
+ 'chi': 967,
527
+ 'psi': 968,
528
+ 'omega': 969,
529
+ 'thetasym': 977,
530
+ 'upsih': 978,
531
+ 'piv': 982,
532
+ 'ensp': 8194,
533
+ 'emsp': 8195,
534
+ 'thinsp': 8201,
535
+ 'zwnj': 8204,
536
+ 'zwj': 8205,
537
+ 'lrm': 8206,
538
+ 'rlm': 8207,
539
+ 'ndash': 8211,
540
+ 'mdash': 8212,
541
+ 'lsquo': 8216,
542
+ 'rsquo': 8217,
543
+ 'sbquo': 8218,
544
+ 'ldquo': 8220,
545
+ 'rdquo': 8221,
546
+ 'bdquo': 8222,
547
+ 'dagger': 8224,
548
+ 'Dagger': 8225,
549
+ 'bull': 8226,
550
+ 'hellip': 8230,
551
+ 'permil': 8240,
552
+ 'prime': 8242,
553
+ 'Prime': 8243,
554
+ 'lsaquo': 8249,
555
+ 'rsaquo': 8250,
556
+ 'oline': 8254,
557
+ 'frasl': 8260,
558
+ 'euro': 8364,
559
+ 'image': 8465,
560
+ 'weierp': 8472,
561
+ 'real': 8476,
562
+ 'trade': 8482,
563
+ 'alefsym': 8501,
564
+ 'larr': 8592,
565
+ 'uarr': 8593,
566
+ 'rarr': 8594,
567
+ 'darr': 8595,
568
+ 'harr': 8596,
569
+ 'crarr': 8629,
570
+ 'lArr': 8656,
571
+ 'uArr': 8657,
572
+ 'rArr': 8658,
573
+ 'dArr': 8659,
574
+ 'hArr': 8660,
575
+ 'forall': 8704,
576
+ 'part': 8706,
577
+ 'exist': 8707,
578
+ 'empty': 8709,
579
+ 'nabla': 8711,
580
+ 'isin': 8712,
581
+ 'notin': 8713,
582
+ 'ni': 8715,
583
+ 'prod': 8719,
584
+ 'sum': 8721,
585
+ 'minus': 8722,
586
+ 'lowast': 8727,
587
+ 'radic': 8730,
588
+ 'prop': 8733,
589
+ 'infin': 8734,
590
+ 'ang': 8736,
591
+ 'and': 8743,
592
+ 'or': 8744,
593
+ 'cap': 8745,
594
+ 'cup': 8746,
595
+ 'int': 8747,
596
+ 'there4': 8756,
597
+ 'sim': 8764,
598
+ 'cong': 8773,
599
+ 'asymp': 8776,
600
+ 'ne': 8800,
601
+ 'equiv': 8801,
602
+ 'le': 8804,
603
+ 'ge': 8805,
604
+ 'sub': 8834,
605
+ 'sup': 8835,
606
+ 'nsub': 8836,
607
+ 'sube': 8838,
608
+ 'supe': 8839,
609
+ 'oplus': 8853,
610
+ 'otimes': 8855,
611
+ 'perp': 8869,
612
+ 'sdot': 8901,
613
+ 'lceil': 8968,
614
+ 'rceil': 8969,
615
+ 'lfloor': 8970,
616
+ 'rfloor': 8971,
617
+ 'lang': 9001,
618
+ 'rang': 9002,
619
+ 'loz': 9674,
620
+ 'spades': 9824,
621
+ 'clubs': 9827,
622
+ 'hearts': 9829,
623
+ 'diams': 9830
624
+ }
625
+
626
+ Object.keys(sax.ENTITIES).forEach(function (key) {
614
627
  var e = sax.ENTITIES[key]
615
628
  var s = typeof e === 'number' ? String.fromCharCode(e) : e
616
629
  sax.ENTITIES[key] = s
617
- })
618
-
619
- for (var S in sax.STATE) sax.STATE[sax.STATE[S]] = S
620
-
621
- // shorthand
622
- S = sax.STATE
623
-
624
- function emit (parser, event, data) {
625
- parser[event] && parser[event](data)
626
- }
627
-
628
- function emitNode (parser, nodeType, data) {
629
- if (parser.textNode) closeText(parser)
630
- emit(parser, nodeType, data)
631
- }
632
-
633
- function closeText (parser) {
634
- parser.textNode = textopts(parser.opt, parser.textNode)
635
- if (parser.textNode) emit(parser, "ontext", parser.textNode)
636
- parser.textNode = ""
637
- }
638
-
639
- function textopts (opt, text) {
640
- if (opt.trim) text = text.trim()
641
- if (opt.normalize) text = text.replace(/\s+/g, " ")
642
- return text
643
- }
644
-
645
- function error (parser, er) {
646
- closeText(parser)
647
- if (parser.trackPosition) {
648
- er += "\nLine: "+parser.line+
649
- "\nColumn: "+parser.column+
650
- "\nChar: "+parser.c
630
+ })
631
+
632
+ for (var s in sax.STATE) {
633
+ sax.STATE[sax.STATE[s]] = s
651
634
  }
652
- er = new Error(er)
653
- parser.error = er
654
- emit(parser, "onerror", er)
655
- return parser
656
- }
657
-
658
- function end (parser) {
659
- if (parser.sawRoot && !parser.closedRoot) strictFail(parser, "Unclosed root tag")
660
- if ((parser.state !== S.BEGIN) &&
661
- (parser.state !== S.BEGIN_WHITESPACE) &&
662
- (parser.state !== S.TEXT))
663
- error(parser, "Unexpected end")
664
- closeText(parser)
665
- parser.c = ""
666
- parser.closed = true
667
- emit(parser, "onend")
668
- SAXParser.call(parser, parser.strict, parser.opt)
669
- return parser
670
- }
671
-
672
- function strictFail (parser, message) {
673
- if (typeof parser !== 'object' || !(parser instanceof SAXParser))
674
- throw new Error('bad call to strictFail');
675
- if (parser.strict) error(parser, message)
676
- }
677
-
678
- function newTag (parser) {
679
- if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]()
680
- var parent = parser.tags[parser.tags.length - 1] || parser
681
- , tag = parser.tag = { name : parser.tagName, attributes : {} }
682
-
683
- // will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar"
684
- if (parser.opt.xmlns) tag.ns = parent.ns
685
- parser.attribList.length = 0
686
- }
687
-
688
- function qname (name, attribute) {
689
- var i = name.indexOf(":")
690
- , qualName = i < 0 ? [ "", name ] : name.split(":")
691
- , prefix = qualName[0]
692
- , local = qualName[1]
693
-
694
- // <x "xmlns"="http://foo">
695
- if (attribute && name === "xmlns") {
696
- prefix = "xmlns"
697
- local = ""
635
+
636
+ // shorthand
637
+ S = sax.STATE
638
+
639
+ function emit (parser, event, data) {
640
+ parser[event] && parser[event](data)
698
641
  }
699
642
 
700
- return { prefix: prefix, local: local }
701
- }
643
+ function emitNode (parser, nodeType, data) {
644
+ if (parser.textNode) closeText(parser)
645
+ emit(parser, nodeType, data)
646
+ }
702
647
 
703
- function attrib (parser) {
704
- if (!parser.strict) parser.attribName = parser.attribName[parser.looseCase]()
648
+ function closeText (parser) {
649
+ parser.textNode = textopts(parser.opt, parser.textNode)
650
+ if (parser.textNode) emit(parser, 'ontext', parser.textNode)
651
+ parser.textNode = ''
652
+ }
705
653
 
706
- if (parser.attribList.indexOf(parser.attribName) !== -1 ||
707
- parser.tag.attributes.hasOwnProperty(parser.attribName)) {
708
- return parser.attribName = parser.attribValue = ""
654
+ function textopts (opt, text) {
655
+ if (opt.trim) text = text.trim()
656
+ if (opt.normalize) text = text.replace(/\s+/g, ' ')
657
+ return text
709
658
  }
710
659
 
711
- if (parser.opt.xmlns) {
712
- var qn = qname(parser.attribName, true)
713
- , prefix = qn.prefix
714
- , local = qn.local
715
-
716
- if (prefix === "xmlns") {
717
- // namespace binding attribute; push the binding into scope
718
- if (local === "xml" && parser.attribValue !== XML_NAMESPACE) {
719
- strictFail( parser
720
- , "xml: prefix must be bound to " + XML_NAMESPACE + "\n"
721
- + "Actual: " + parser.attribValue )
722
- } else if (local === "xmlns" && parser.attribValue !== XMLNS_NAMESPACE) {
723
- strictFail( parser
724
- , "xmlns: prefix must be bound to " + XMLNS_NAMESPACE + "\n"
725
- + "Actual: " + parser.attribValue )
726
- } else {
727
- var tag = parser.tag
728
- , parent = parser.tags[parser.tags.length - 1] || parser
729
- if (tag.ns === parent.ns) {
730
- tag.ns = Object.create(parent.ns)
731
- }
732
- tag.ns[local] = parser.attribValue
733
- }
660
+ function error (parser, er) {
661
+ closeText(parser)
662
+ if (parser.trackPosition) {
663
+ er += '\nLine: ' + parser.line +
664
+ '\nColumn: ' + parser.column +
665
+ '\nChar: ' + parser.c
734
666
  }
667
+ er = new Error(er)
668
+ parser.error = er
669
+ emit(parser, 'onerror', er)
670
+ return parser
671
+ }
735
672
 
736
- // defer onattribute events until all attributes have been seen
737
- // so any new bindings can take effect; preserve attribute order
738
- // so deferred events can be emitted in document order
739
- parser.attribList.push([parser.attribName, parser.attribValue])
740
- } else {
741
- // in non-xmlns mode, we can emit the event right away
742
- parser.tag.attributes[parser.attribName] = parser.attribValue
743
- emitNode( parser
744
- , "onattribute"
745
- , { name: parser.attribName
746
- , value: parser.attribValue } )
673
+ function end (parser) {
674
+ if (parser.sawRoot && !parser.closedRoot) strictFail(parser, 'Unclosed root tag')
675
+ if ((parser.state !== S.BEGIN) &&
676
+ (parser.state !== S.BEGIN_WHITESPACE) &&
677
+ (parser.state !== S.TEXT)) {
678
+ error(parser, 'Unexpected end')
679
+ }
680
+ closeText(parser)
681
+ parser.c = ''
682
+ parser.closed = true
683
+ emit(parser, 'onend')
684
+ SAXParser.call(parser, parser.strict, parser.opt)
685
+ return parser
747
686
  }
748
687
 
749
- parser.attribName = parser.attribValue = ""
750
- }
688
+ function strictFail (parser, message) {
689
+ if (typeof parser !== 'object' || !(parser instanceof SAXParser)) {
690
+ throw new Error('bad call to strictFail')
691
+ }
692
+ if (parser.strict) {
693
+ error(parser, message)
694
+ }
695
+ }
696
+
697
+ function newTag (parser) {
698
+ if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]()
699
+ var parent = parser.tags[parser.tags.length - 1] || parser
700
+ var tag = parser.tag = { name: parser.tagName, attributes: {} }
751
701
 
752
- function openTag (parser, selfClosing) {
753
- if (parser.opt.xmlns) {
754
- // emit namespace binding events
755
- var tag = parser.tag
702
+ // will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar"
703
+ if (parser.opt.xmlns) {
704
+ tag.ns = parent.ns
705
+ }
706
+ parser.attribList.length = 0
707
+ emitNode(parser, 'onopentagstart', tag)
708
+ }
756
709
 
757
- // add namespace info to tag
758
- var qn = qname(parser.tagName)
759
- tag.prefix = qn.prefix
760
- tag.local = qn.local
761
- tag.uri = tag.ns[qn.prefix] || ""
710
+ function qname (name, attribute) {
711
+ var i = name.indexOf(':')
712
+ var qualName = i < 0 ? [ '', name ] : name.split(':')
713
+ var prefix = qualName[0]
714
+ var local = qualName[1]
762
715
 
763
- if (tag.prefix && !tag.uri) {
764
- strictFail(parser, "Unbound namespace prefix: "
765
- + JSON.stringify(parser.tagName))
766
- tag.uri = qn.prefix
716
+ // <x "xmlns"="http://foo">
717
+ if (attribute && name === 'xmlns') {
718
+ prefix = 'xmlns'
719
+ local = ''
767
720
  }
768
721
 
769
- var parent = parser.tags[parser.tags.length - 1] || parser
770
- if (tag.ns && parent.ns !== tag.ns) {
771
- Object.keys(tag.ns).forEach(function (p) {
772
- emitNode( parser
773
- , "onopennamespace"
774
- , { prefix: p , uri: tag.ns[p] } )
775
- })
722
+ return { prefix: prefix, local: local }
723
+ }
724
+
725
+ function attrib (parser) {
726
+ if (!parser.strict) {
727
+ parser.attribName = parser.attribName[parser.looseCase]()
776
728
  }
777
729
 
778
- // handle deferred onattribute events
779
- // Note: do not apply default ns to attributes:
780
- // http://www.w3.org/TR/REC-xml-names/#defaulting
781
- for (var i = 0, l = parser.attribList.length; i < l; i ++) {
782
- var nv = parser.attribList[i]
783
- var name = nv[0]
784
- , value = nv[1]
785
- , qualName = qname(name, true)
786
- , prefix = qualName.prefix
787
- , local = qualName.local
788
- , uri = prefix == "" ? "" : (tag.ns[prefix] || "")
789
- , a = { name: name
790
- , value: value
791
- , prefix: prefix
792
- , local: local
793
- , uri: uri
794
- }
730
+ if (parser.attribList.indexOf(parser.attribName) !== -1 ||
731
+ parser.tag.attributes.hasOwnProperty(parser.attribName)) {
732
+ parser.attribName = parser.attribValue = ''
733
+ return
734
+ }
795
735
 
796
- // if there's any attributes with an undefined namespace,
797
- // then fail on them now.
798
- if (prefix && prefix != "xmlns" && !uri) {
799
- strictFail(parser, "Unbound namespace prefix: "
800
- + JSON.stringify(prefix))
801
- a.uri = prefix
736
+ if (parser.opt.xmlns) {
737
+ var qn = qname(parser.attribName, true)
738
+ var prefix = qn.prefix
739
+ var local = qn.local
740
+
741
+ if (prefix === 'xmlns') {
742
+ // namespace binding attribute. push the binding into scope
743
+ if (local === 'xml' && parser.attribValue !== XML_NAMESPACE) {
744
+ strictFail(parser,
745
+ 'xml: prefix must be bound to ' + XML_NAMESPACE + '\n' +
746
+ 'Actual: ' + parser.attribValue)
747
+ } else if (local === 'xmlns' && parser.attribValue !== XMLNS_NAMESPACE) {
748
+ strictFail(parser,
749
+ 'xmlns: prefix must be bound to ' + XMLNS_NAMESPACE + '\n' +
750
+ 'Actual: ' + parser.attribValue)
751
+ } else {
752
+ var tag = parser.tag
753
+ var parent = parser.tags[parser.tags.length - 1] || parser
754
+ if (tag.ns === parent.ns) {
755
+ tag.ns = Object.create(parent.ns)
756
+ }
757
+ tag.ns[local] = parser.attribValue
758
+ }
802
759
  }
803
- parser.tag.attributes[name] = a
804
- emitNode(parser, "onattribute", a)
760
+
761
+ // defer onattribute events until all attributes have been seen
762
+ // so any new bindings can take effect. preserve attribute order
763
+ // so deferred events can be emitted in document order
764
+ parser.attribList.push([parser.attribName, parser.attribValue])
765
+ } else {
766
+ // in non-xmlns mode, we can emit the event right away
767
+ parser.tag.attributes[parser.attribName] = parser.attribValue
768
+ emitNode(parser, 'onattribute', {
769
+ name: parser.attribName,
770
+ value: parser.attribValue
771
+ })
805
772
  }
806
- parser.attribList.length = 0
773
+
774
+ parser.attribName = parser.attribValue = ''
807
775
  }
808
776
 
809
- parser.tag.isSelfClosing = !!selfClosing
777
+ function openTag (parser, selfClosing) {
778
+ if (parser.opt.xmlns) {
779
+ // emit namespace binding events
780
+ var tag = parser.tag
781
+
782
+ // add namespace info to tag
783
+ var qn = qname(parser.tagName)
784
+ tag.prefix = qn.prefix
785
+ tag.local = qn.local
786
+ tag.uri = tag.ns[qn.prefix] || ''
787
+
788
+ if (tag.prefix && !tag.uri) {
789
+ strictFail(parser, 'Unbound namespace prefix: ' +
790
+ JSON.stringify(parser.tagName))
791
+ tag.uri = qn.prefix
792
+ }
810
793
 
811
- // process the tag
812
- parser.sawRoot = true
813
- parser.tags.push(parser.tag)
814
- emitNode(parser, "onopentag", parser.tag)
815
- if (!selfClosing) {
816
- // special case for <script> in non-strict mode.
817
- if (!parser.noscript && parser.tagName.toLowerCase() === "script") {
818
- parser.state = S.SCRIPT
819
- } else {
820
- parser.state = S.TEXT
794
+ var parent = parser.tags[parser.tags.length - 1] || parser
795
+ if (tag.ns && parent.ns !== tag.ns) {
796
+ Object.keys(tag.ns).forEach(function (p) {
797
+ emitNode(parser, 'onopennamespace', {
798
+ prefix: p,
799
+ uri: tag.ns[p]
800
+ })
801
+ })
802
+ }
803
+
804
+ // handle deferred onattribute events
805
+ // Note: do not apply default ns to attributes:
806
+ // http://www.w3.org/TR/REC-xml-names/#defaulting
807
+ for (var i = 0, l = parser.attribList.length; i < l; i++) {
808
+ var nv = parser.attribList[i]
809
+ var name = nv[0]
810
+ var value = nv[1]
811
+ var qualName = qname(name, true)
812
+ var prefix = qualName.prefix
813
+ var local = qualName.local
814
+ var uri = prefix === '' ? '' : (tag.ns[prefix] || '')
815
+ var a = {
816
+ name: name,
817
+ value: value,
818
+ prefix: prefix,
819
+ local: local,
820
+ uri: uri
821
+ }
822
+
823
+ // if there's any attributes with an undefined namespace,
824
+ // then fail on them now.
825
+ if (prefix && prefix !== 'xmlns' && !uri) {
826
+ strictFail(parser, 'Unbound namespace prefix: ' +
827
+ JSON.stringify(prefix))
828
+ a.uri = prefix
829
+ }
830
+ parser.tag.attributes[name] = a
831
+ emitNode(parser, 'onattribute', a)
832
+ }
833
+ parser.attribList.length = 0
821
834
  }
822
- parser.tag = null
823
- parser.tagName = ""
824
- }
825
- parser.attribName = parser.attribValue = ""
826
- parser.attribList.length = 0
827
- }
828
-
829
- function closeTag (parser) {
830
- if (!parser.tagName) {
831
- strictFail(parser, "Weird empty close tag.")
832
- parser.textNode += "</>"
833
- parser.state = S.TEXT
834
- return
835
+
836
+ parser.tag.isSelfClosing = !!selfClosing
837
+
838
+ // process the tag
839
+ parser.sawRoot = true
840
+ parser.tags.push(parser.tag)
841
+ emitNode(parser, 'onopentag', parser.tag)
842
+ if (!selfClosing) {
843
+ // special case for <script> in non-strict mode.
844
+ if (!parser.noscript && parser.tagName.toLowerCase() === 'script') {
845
+ parser.state = S.SCRIPT
846
+ } else {
847
+ parser.state = S.TEXT
848
+ }
849
+ parser.tag = null
850
+ parser.tagName = ''
851
+ }
852
+ parser.attribName = parser.attribValue = ''
853
+ parser.attribList.length = 0
835
854
  }
836
855
 
837
- if (parser.script) {
838
- if (parser.tagName !== "script") {
839
- parser.script += "</" + parser.tagName + ">"
840
- parser.tagName = ""
841
- parser.state = S.SCRIPT
856
+ function closeTag (parser) {
857
+ if (!parser.tagName) {
858
+ strictFail(parser, 'Weird empty close tag.')
859
+ parser.textNode += '</>'
860
+ parser.state = S.TEXT
842
861
  return
843
862
  }
844
- emitNode(parser, "onscript", parser.script)
845
- parser.script = ""
846
- }
847
863
 
848
- // first make sure that the closing tag actually exists.
849
- // <a><b></c></b></a> will close everything, otherwise.
850
- var t = parser.tags.length
851
- var tagName = parser.tagName
852
- if (!parser.strict) tagName = tagName[parser.looseCase]()
853
- var closeTo = tagName
854
- while (t --) {
855
- var close = parser.tags[t]
856
- if (close.name !== closeTo) {
857
- // fail the first time in strict mode
858
- strictFail(parser, "Unexpected close tag")
859
- } else break
860
- }
864
+ if (parser.script) {
865
+ if (parser.tagName !== 'script') {
866
+ parser.script += '</' + parser.tagName + '>'
867
+ parser.tagName = ''
868
+ parser.state = S.SCRIPT
869
+ return
870
+ }
871
+ emitNode(parser, 'onscript', parser.script)
872
+ parser.script = ''
873
+ }
874
+
875
+ // first make sure that the closing tag actually exists.
876
+ // <a><b></c></b></a> will close everything, otherwise.
877
+ var t = parser.tags.length
878
+ var tagName = parser.tagName
879
+ if (!parser.strict) {
880
+ tagName = tagName[parser.looseCase]()
881
+ }
882
+ var closeTo = tagName
883
+ while (t--) {
884
+ var close = parser.tags[t]
885
+ if (close.name !== closeTo) {
886
+ // fail the first time in strict mode
887
+ strictFail(parser, 'Unexpected close tag')
888
+ } else {
889
+ break
890
+ }
891
+ }
861
892
 
862
- // didn't find it. we already failed for strict, so just abort.
863
- if (t < 0) {
864
- strictFail(parser, "Unmatched closing tag: "+parser.tagName)
865
- parser.textNode += "</" + parser.tagName + ">"
893
+ // didn't find it. we already failed for strict, so just abort.
894
+ if (t < 0) {
895
+ strictFail(parser, 'Unmatched closing tag: ' + parser.tagName)
896
+ parser.textNode += '</' + parser.tagName + '>'
897
+ parser.state = S.TEXT
898
+ return
899
+ }
900
+ parser.tagName = tagName
901
+ var s = parser.tags.length
902
+ while (s-- > t) {
903
+ var tag = parser.tag = parser.tags.pop()
904
+ parser.tagName = parser.tag.name
905
+ emitNode(parser, 'onclosetag', parser.tagName)
906
+
907
+ var x = {}
908
+ for (var i in tag.ns) {
909
+ x[i] = tag.ns[i]
910
+ }
911
+
912
+ var parent = parser.tags[parser.tags.length - 1] || parser
913
+ if (parser.opt.xmlns && tag.ns !== parent.ns) {
914
+ // remove namespace bindings introduced by tag
915
+ Object.keys(tag.ns).forEach(function (p) {
916
+ var n = tag.ns[p]
917
+ emitNode(parser, 'onclosenamespace', { prefix: p, uri: n })
918
+ })
919
+ }
920
+ }
921
+ if (t === 0) parser.closedRoot = true
922
+ parser.tagName = parser.attribValue = parser.attribName = ''
923
+ parser.attribList.length = 0
866
924
  parser.state = S.TEXT
867
- return
868
925
  }
869
- parser.tagName = tagName
870
- var s = parser.tags.length
871
- while (s --> t) {
872
- var tag = parser.tag = parser.tags.pop()
873
- parser.tagName = parser.tag.name
874
- emitNode(parser, "onclosetag", parser.tagName)
875
926
 
876
- var x = {}
877
- for (var i in tag.ns) x[i] = tag.ns[i]
927
+ function parseEntity (parser) {
928
+ var entity = parser.entity
929
+ var entityLC = entity.toLowerCase()
930
+ var num
931
+ var numStr = ''
878
932
 
879
- var parent = parser.tags[parser.tags.length - 1] || parser
880
- if (parser.opt.xmlns && tag.ns !== parent.ns) {
881
- // remove namespace bindings introduced by tag
882
- Object.keys(tag.ns).forEach(function (p) {
883
- var n = tag.ns[p]
884
- emitNode(parser, "onclosenamespace", { prefix: p, uri: n })
885
- })
933
+ if (parser.ENTITIES[entity]) {
934
+ return parser.ENTITIES[entity]
886
935
  }
887
- }
888
- if (t === 0) parser.closedRoot = true
889
- parser.tagName = parser.attribValue = parser.attribName = ""
890
- parser.attribList.length = 0
891
- parser.state = S.TEXT
892
- }
893
-
894
- function parseEntity (parser) {
895
- var entity = parser.entity
896
- , entityLC = entity.toLowerCase()
897
- , num
898
- , numStr = ""
899
- if (parser.ENTITIES[entity])
900
- return parser.ENTITIES[entity]
901
- if (parser.ENTITIES[entityLC])
902
- return parser.ENTITIES[entityLC]
903
- entity = entityLC
904
- if (entity.charAt(0) === "#") {
905
- if (entity.charAt(1) === "x") {
906
- entity = entity.slice(2)
907
- num = parseInt(entity, 16)
908
- numStr = num.toString(16)
909
- } else {
910
- entity = entity.slice(1)
911
- num = parseInt(entity, 10)
912
- numStr = num.toString(10)
936
+ if (parser.ENTITIES[entityLC]) {
937
+ return parser.ENTITIES[entityLC]
913
938
  }
939
+ entity = entityLC
940
+ if (entity.charAt(0) === '#') {
941
+ if (entity.charAt(1) === 'x') {
942
+ entity = entity.slice(2)
943
+ num = parseInt(entity, 16)
944
+ numStr = num.toString(16)
945
+ } else {
946
+ entity = entity.slice(1)
947
+ num = parseInt(entity, 10)
948
+ numStr = num.toString(10)
949
+ }
950
+ }
951
+ entity = entity.replace(/^0+/, '')
952
+ if (numStr.toLowerCase() !== entity) {
953
+ strictFail(parser, 'Invalid character entity')
954
+ return '&' + parser.entity + ';'
955
+ }
956
+
957
+ return String.fromCodePoint(num)
914
958
  }
915
- entity = entity.replace(/^0+/, "")
916
- if (numStr.toLowerCase() !== entity) {
917
- strictFail(parser, "Invalid character entity")
918
- return "&"+parser.entity + ";"
959
+
960
+ function beginWhiteSpace (parser, c) {
961
+ if (c === '<') {
962
+ parser.state = S.OPEN_WAKA
963
+ parser.startTagPosition = parser.position
964
+ } else if (not(whitespace, c)) {
965
+ // have to process this as a text node.
966
+ // weird, but happens.
967
+ strictFail(parser, 'Non-whitespace before first tag.')
968
+ parser.textNode = c
969
+ parser.state = S.TEXT
970
+ }
919
971
  }
920
972
 
921
- return String.fromCodePoint(num)
922
- }
923
-
924
- function write (chunk) {
925
- var parser = this
926
- if (this.error) throw this.error
927
- if (parser.closed) return error(parser,
928
- "Cannot write after close. Assign an onready handler.")
929
- if (chunk === null) return end(parser)
930
- var i = 0, c = ""
931
- while (parser.c = c = chunk.charAt(i++)) {
932
- if (parser.trackPosition) {
933
- parser.position ++
934
- if (c === "\n") {
935
- parser.line ++
936
- parser.column = 0
937
- } else parser.column ++
973
+ function charAt (chunk, i) {
974
+ var result = ''
975
+ if (i < chunk.length) {
976
+ result = chunk.charAt(i)
938
977
  }
939
- switch (parser.state) {
978
+ return result
979
+ }
940
980
 
941
- case S.BEGIN:
942
- parser.state = S.BEGIN_WHITESPACE
943
- if (c === "\uFEFF") {
944
- continue;
945
- }
946
- // no continue - fall through
947
-
948
- case S.BEGIN_WHITESPACE:
949
- if (c === "<") {
950
- parser.state = S.OPEN_WAKA
951
- parser.startTagPosition = parser.position
952
- } else if (not(whitespace,c)) {
953
- // have to process this as a text node.
954
- // weird, but happens.
955
- strictFail(parser, "Non-whitespace before first tag.")
956
- parser.textNode = c
957
- parser.state = S.TEXT
981
+ function write (chunk) {
982
+ var parser = this
983
+ if (this.error) {
984
+ throw this.error
985
+ }
986
+ if (parser.closed) {
987
+ return error(parser,
988
+ 'Cannot write after close. Assign an onready handler.')
989
+ }
990
+ if (chunk === null) {
991
+ return end(parser)
992
+ }
993
+ if (typeof chunk === 'object') {
994
+ chunk = chunk.toString()
995
+ }
996
+ var i = 0
997
+ var c = ''
998
+ while (true) {
999
+ c = charAt(chunk, i++)
1000
+ parser.c = c
1001
+ if (!c) {
1002
+ break
1003
+ }
1004
+ if (parser.trackPosition) {
1005
+ parser.position++
1006
+ if (c === '\n') {
1007
+ parser.line++
1008
+ parser.column = 0
1009
+ } else {
1010
+ parser.column++
958
1011
  }
959
- continue
960
-
961
- case S.TEXT:
962
- if (parser.sawRoot && !parser.closedRoot) {
963
- var starti = i-1
964
- while (c && c!=="<" && c!=="&") {
965
- c = chunk.charAt(i++)
966
- if (c && parser.trackPosition) {
967
- parser.position ++
968
- if (c === "\n") {
969
- parser.line ++
970
- parser.column = 0
971
- } else parser.column ++
1012
+ }
1013
+ switch (parser.state) {
1014
+ case S.BEGIN:
1015
+ parser.state = S.BEGIN_WHITESPACE
1016
+ if (c === '\uFEFF') {
1017
+ continue
1018
+ }
1019
+ beginWhiteSpace(parser, c)
1020
+ continue
1021
+
1022
+ case S.BEGIN_WHITESPACE:
1023
+ beginWhiteSpace(parser, c)
1024
+ continue
1025
+
1026
+ case S.TEXT:
1027
+ if (parser.sawRoot && !parser.closedRoot) {
1028
+ var starti = i - 1
1029
+ while (c && c !== '<' && c !== '&') {
1030
+ c = charAt(chunk, i++)
1031
+ if (c && parser.trackPosition) {
1032
+ parser.position++
1033
+ if (c === '\n') {
1034
+ parser.line++
1035
+ parser.column = 0
1036
+ } else {
1037
+ parser.column++
1038
+ }
1039
+ }
972
1040
  }
1041
+ parser.textNode += chunk.substring(starti, i - 1)
973
1042
  }
974
- parser.textNode += chunk.substring(starti, i-1)
975
- }
976
- if (c === "<" && !(parser.sawRoot && parser.closedRoot && !parser.strict)) {
977
- parser.state = S.OPEN_WAKA
978
- parser.startTagPosition = parser.position
979
- } else {
980
- if (not(whitespace, c) && (!parser.sawRoot || parser.closedRoot))
981
- strictFail(parser, "Text data outside of root node.")
982
- if (c === "&") parser.state = S.TEXT_ENTITY
983
- else parser.textNode += c
984
- }
985
- continue
986
-
987
- case S.SCRIPT:
988
- // only non-strict
989
- if (c === "<") {
990
- parser.state = S.SCRIPT_ENDING
991
- } else parser.script += c
992
- continue
993
-
994
- case S.SCRIPT_ENDING:
995
- if (c === "/") {
996
- parser.state = S.CLOSE_TAG
997
- } else {
998
- parser.script += "<" + c
999
- parser.state = S.SCRIPT
1000
- }
1001
- continue
1002
-
1003
- case S.OPEN_WAKA:
1004
- // either a /, ?, !, or text is coming next.
1005
- if (c === "!") {
1006
- parser.state = S.SGML_DECL
1007
- parser.sgmlDecl = ""
1008
- } else if (is(whitespace, c)) {
1009
- // wait for it...
1010
- } else if (is(nameStart,c)) {
1011
- parser.state = S.OPEN_TAG
1012
- parser.tagName = c
1013
- } else if (c === "/") {
1014
- parser.state = S.CLOSE_TAG
1015
- parser.tagName = ""
1016
- } else if (c === "?") {
1017
- parser.state = S.PROC_INST
1018
- parser.procInstName = parser.procInstBody = ""
1019
- } else {
1020
- strictFail(parser, "Unencoded <")
1021
- // if there was some whitespace, then add that in.
1022
- if (parser.startTagPosition + 1 < parser.position) {
1023
- var pad = parser.position - parser.startTagPosition
1024
- c = new Array(pad).join(" ") + c
1043
+ if (c === '<' && !(parser.sawRoot && parser.closedRoot && !parser.strict)) {
1044
+ parser.state = S.OPEN_WAKA
1045
+ parser.startTagPosition = parser.position
1046
+ } else {
1047
+ if (not(whitespace, c) && (!parser.sawRoot || parser.closedRoot)) {
1048
+ strictFail(parser, 'Text data outside of root node.')
1049
+ }
1050
+ if (c === '&') {
1051
+ parser.state = S.TEXT_ENTITY
1052
+ } else {
1053
+ parser.textNode += c
1054
+ }
1055
+ }
1056
+ continue
1057
+
1058
+ case S.SCRIPT:
1059
+ // only non-strict
1060
+ if (c === '<') {
1061
+ parser.state = S.SCRIPT_ENDING
1062
+ } else {
1063
+ parser.script += c
1064
+ }
1065
+ continue
1066
+
1067
+ case S.SCRIPT_ENDING:
1068
+ if (c === '/') {
1069
+ parser.state = S.CLOSE_TAG
1070
+ } else {
1071
+ parser.script += '<' + c
1072
+ parser.state = S.SCRIPT
1073
+ }
1074
+ continue
1075
+
1076
+ case S.OPEN_WAKA:
1077
+ // either a /, ?, !, or text is coming next.
1078
+ if (c === '!') {
1079
+ parser.state = S.SGML_DECL
1080
+ parser.sgmlDecl = ''
1081
+ } else if (is(whitespace, c)) {
1082
+ // wait for it...
1083
+ } else if (is(nameStart, c)) {
1084
+ parser.state = S.OPEN_TAG
1085
+ parser.tagName = c
1086
+ } else if (c === '/') {
1087
+ parser.state = S.CLOSE_TAG
1088
+ parser.tagName = ''
1089
+ } else if (c === '?') {
1090
+ parser.state = S.PROC_INST
1091
+ parser.procInstName = parser.procInstBody = ''
1092
+ } else {
1093
+ strictFail(parser, 'Unencoded <')
1094
+ // if there was some whitespace, then add that in.
1095
+ if (parser.startTagPosition + 1 < parser.position) {
1096
+ var pad = parser.position - parser.startTagPosition
1097
+ c = new Array(pad).join(' ') + c
1098
+ }
1099
+ parser.textNode += '<' + c
1100
+ parser.state = S.TEXT
1101
+ }
1102
+ continue
1103
+
1104
+ case S.SGML_DECL:
1105
+ if ((parser.sgmlDecl + c).toUpperCase() === CDATA) {
1106
+ emitNode(parser, 'onopencdata')
1107
+ parser.state = S.CDATA
1108
+ parser.sgmlDecl = ''
1109
+ parser.cdata = ''
1110
+ } else if (parser.sgmlDecl + c === '--') {
1111
+ parser.state = S.COMMENT
1112
+ parser.comment = ''
1113
+ parser.sgmlDecl = ''
1114
+ } else if ((parser.sgmlDecl + c).toUpperCase() === DOCTYPE) {
1115
+ parser.state = S.DOCTYPE
1116
+ if (parser.doctype || parser.sawRoot) {
1117
+ strictFail(parser,
1118
+ 'Inappropriately located doctype declaration')
1119
+ }
1120
+ parser.doctype = ''
1121
+ parser.sgmlDecl = ''
1122
+ } else if (c === '>') {
1123
+ emitNode(parser, 'onsgmldeclaration', parser.sgmlDecl)
1124
+ parser.sgmlDecl = ''
1125
+ parser.state = S.TEXT
1126
+ } else if (is(quote, c)) {
1127
+ parser.state = S.SGML_DECL_QUOTED
1128
+ parser.sgmlDecl += c
1129
+ } else {
1130
+ parser.sgmlDecl += c
1131
+ }
1132
+ continue
1133
+
1134
+ case S.SGML_DECL_QUOTED:
1135
+ if (c === parser.q) {
1136
+ parser.state = S.SGML_DECL
1137
+ parser.q = ''
1025
1138
  }
1026
- parser.textNode += "<" + c
1027
- parser.state = S.TEXT
1028
- }
1029
- continue
1030
-
1031
- case S.SGML_DECL:
1032
- if ((parser.sgmlDecl+c).toUpperCase() === CDATA) {
1033
- emitNode(parser, "onopencdata")
1034
- parser.state = S.CDATA
1035
- parser.sgmlDecl = ""
1036
- parser.cdata = ""
1037
- } else if (parser.sgmlDecl+c === "--") {
1038
- parser.state = S.COMMENT
1039
- parser.comment = ""
1040
- parser.sgmlDecl = ""
1041
- } else if ((parser.sgmlDecl+c).toUpperCase() === DOCTYPE) {
1042
- parser.state = S.DOCTYPE
1043
- if (parser.doctype || parser.sawRoot) strictFail(parser,
1044
- "Inappropriately located doctype declaration")
1045
- parser.doctype = ""
1046
- parser.sgmlDecl = ""
1047
- } else if (c === ">") {
1048
- emitNode(parser, "onsgmldeclaration", parser.sgmlDecl)
1049
- parser.sgmlDecl = ""
1050
- parser.state = S.TEXT
1051
- } else if (is(quote, c)) {
1052
- parser.state = S.SGML_DECL_QUOTED
1053
1139
  parser.sgmlDecl += c
1054
- } else parser.sgmlDecl += c
1055
- continue
1140
+ continue
1056
1141
 
1057
- case S.SGML_DECL_QUOTED:
1058
- if (c === parser.q) {
1059
- parser.state = S.SGML_DECL
1060
- parser.q = ""
1061
- }
1062
- parser.sgmlDecl += c
1063
- continue
1064
-
1065
- case S.DOCTYPE:
1066
- if (c === ">") {
1067
- parser.state = S.TEXT
1068
- emitNode(parser, "ondoctype", parser.doctype)
1069
- parser.doctype = true // just remember that we saw it.
1070
- } else {
1142
+ case S.DOCTYPE:
1143
+ if (c === '>') {
1144
+ parser.state = S.TEXT
1145
+ emitNode(parser, 'ondoctype', parser.doctype)
1146
+ parser.doctype = true // just remember that we saw it.
1147
+ } else {
1148
+ parser.doctype += c
1149
+ if (c === '[') {
1150
+ parser.state = S.DOCTYPE_DTD
1151
+ } else if (is(quote, c)) {
1152
+ parser.state = S.DOCTYPE_QUOTED
1153
+ parser.q = c
1154
+ }
1155
+ }
1156
+ continue
1157
+
1158
+ case S.DOCTYPE_QUOTED:
1071
1159
  parser.doctype += c
1072
- if (c === "[") parser.state = S.DOCTYPE_DTD
1073
- else if (is(quote, c)) {
1074
- parser.state = S.DOCTYPE_QUOTED
1160
+ if (c === parser.q) {
1161
+ parser.q = ''
1162
+ parser.state = S.DOCTYPE
1163
+ }
1164
+ continue
1165
+
1166
+ case S.DOCTYPE_DTD:
1167
+ parser.doctype += c
1168
+ if (c === ']') {
1169
+ parser.state = S.DOCTYPE
1170
+ } else if (is(quote, c)) {
1171
+ parser.state = S.DOCTYPE_DTD_QUOTED
1075
1172
  parser.q = c
1076
1173
  }
1077
- }
1078
- continue
1174
+ continue
1079
1175
 
1080
- case S.DOCTYPE_QUOTED:
1081
- parser.doctype += c
1082
- if (c === parser.q) {
1083
- parser.q = ""
1084
- parser.state = S.DOCTYPE
1085
- }
1086
- continue
1087
-
1088
- case S.DOCTYPE_DTD:
1089
- parser.doctype += c
1090
- if (c === "]") parser.state = S.DOCTYPE
1091
- else if (is(quote,c)) {
1092
- parser.state = S.DOCTYPE_DTD_QUOTED
1093
- parser.q = c
1094
- }
1095
- continue
1176
+ case S.DOCTYPE_DTD_QUOTED:
1177
+ parser.doctype += c
1178
+ if (c === parser.q) {
1179
+ parser.state = S.DOCTYPE_DTD
1180
+ parser.q = ''
1181
+ }
1182
+ continue
1096
1183
 
1097
- case S.DOCTYPE_DTD_QUOTED:
1098
- parser.doctype += c
1099
- if (c === parser.q) {
1100
- parser.state = S.DOCTYPE_DTD
1101
- parser.q = ""
1102
- }
1103
- continue
1104
-
1105
- case S.COMMENT:
1106
- if (c === "-") parser.state = S.COMMENT_ENDING
1107
- else parser.comment += c
1108
- continue
1109
-
1110
- case S.COMMENT_ENDING:
1111
- if (c === "-") {
1112
- parser.state = S.COMMENT_ENDED
1113
- parser.comment = textopts(parser.opt, parser.comment)
1114
- if (parser.comment) emitNode(parser, "oncomment", parser.comment)
1115
- parser.comment = ""
1116
- } else {
1117
- parser.comment += "-" + c
1118
- parser.state = S.COMMENT
1119
- }
1120
- continue
1121
-
1122
- case S.COMMENT_ENDED:
1123
- if (c !== ">") {
1124
- strictFail(parser, "Malformed comment")
1125
- // allow <!-- blah -- bloo --> in non-strict mode,
1126
- // which is a comment of " blah -- bloo "
1127
- parser.comment += "--" + c
1128
- parser.state = S.COMMENT
1129
- } else parser.state = S.TEXT
1130
- continue
1131
-
1132
- case S.CDATA:
1133
- if (c === "]") parser.state = S.CDATA_ENDING
1134
- else parser.cdata += c
1135
- continue
1136
-
1137
- case S.CDATA_ENDING:
1138
- if (c === "]") parser.state = S.CDATA_ENDING_2
1139
- else {
1140
- parser.cdata += "]" + c
1141
- parser.state = S.CDATA
1142
- }
1143
- continue
1144
-
1145
- case S.CDATA_ENDING_2:
1146
- if (c === ">") {
1147
- if (parser.cdata) emitNode(parser, "oncdata", parser.cdata)
1148
- emitNode(parser, "onclosecdata")
1149
- parser.cdata = ""
1150
- parser.state = S.TEXT
1151
- } else if (c === "]") {
1152
- parser.cdata += "]"
1153
- } else {
1154
- parser.cdata += "]]" + c
1155
- parser.state = S.CDATA
1156
- }
1157
- continue
1158
-
1159
- case S.PROC_INST:
1160
- if (c === "?") parser.state = S.PROC_INST_ENDING
1161
- else if (is(whitespace, c)) parser.state = S.PROC_INST_BODY
1162
- else parser.procInstName += c
1163
- continue
1164
-
1165
- case S.PROC_INST_BODY:
1166
- if (!parser.procInstBody && is(whitespace, c)) continue
1167
- else if (c === "?") parser.state = S.PROC_INST_ENDING
1168
- else parser.procInstBody += c
1169
- continue
1170
-
1171
- case S.PROC_INST_ENDING:
1172
- if (c === ">") {
1173
- emitNode(parser, "onprocessinginstruction", {
1174
- name : parser.procInstName,
1175
- body : parser.procInstBody
1176
- })
1177
- parser.procInstName = parser.procInstBody = ""
1178
- parser.state = S.TEXT
1179
- } else {
1180
- parser.procInstBody += "?" + c
1181
- parser.state = S.PROC_INST_BODY
1182
- }
1183
- continue
1184
-
1185
- case S.OPEN_TAG:
1186
- if (is(nameBody, c)) parser.tagName += c
1187
- else {
1188
- newTag(parser)
1189
- if (c === ">") openTag(parser)
1190
- else if (c === "/") parser.state = S.OPEN_TAG_SLASH
1191
- else {
1192
- if (not(whitespace, c)) strictFail(
1193
- parser, "Invalid character in tag name")
1184
+ case S.COMMENT:
1185
+ if (c === '-') {
1186
+ parser.state = S.COMMENT_ENDING
1187
+ } else {
1188
+ parser.comment += c
1189
+ }
1190
+ continue
1191
+
1192
+ case S.COMMENT_ENDING:
1193
+ if (c === '-') {
1194
+ parser.state = S.COMMENT_ENDED
1195
+ parser.comment = textopts(parser.opt, parser.comment)
1196
+ if (parser.comment) {
1197
+ emitNode(parser, 'oncomment', parser.comment)
1198
+ }
1199
+ parser.comment = ''
1200
+ } else {
1201
+ parser.comment += '-' + c
1202
+ parser.state = S.COMMENT
1203
+ }
1204
+ continue
1205
+
1206
+ case S.COMMENT_ENDED:
1207
+ if (c !== '>') {
1208
+ strictFail(parser, 'Malformed comment')
1209
+ // allow <!-- blah -- bloo --> in non-strict mode,
1210
+ // which is a comment of " blah -- bloo "
1211
+ parser.comment += '--' + c
1212
+ parser.state = S.COMMENT
1213
+ } else {
1214
+ parser.state = S.TEXT
1215
+ }
1216
+ continue
1217
+
1218
+ case S.CDATA:
1219
+ if (c === ']') {
1220
+ parser.state = S.CDATA_ENDING
1221
+ } else {
1222
+ parser.cdata += c
1223
+ }
1224
+ continue
1225
+
1226
+ case S.CDATA_ENDING:
1227
+ if (c === ']') {
1228
+ parser.state = S.CDATA_ENDING_2
1229
+ } else {
1230
+ parser.cdata += ']' + c
1231
+ parser.state = S.CDATA
1232
+ }
1233
+ continue
1234
+
1235
+ case S.CDATA_ENDING_2:
1236
+ if (c === '>') {
1237
+ if (parser.cdata) {
1238
+ emitNode(parser, 'oncdata', parser.cdata)
1239
+ }
1240
+ emitNode(parser, 'onclosecdata')
1241
+ parser.cdata = ''
1242
+ parser.state = S.TEXT
1243
+ } else if (c === ']') {
1244
+ parser.cdata += ']'
1245
+ } else {
1246
+ parser.cdata += ']]' + c
1247
+ parser.state = S.CDATA
1248
+ }
1249
+ continue
1250
+
1251
+ case S.PROC_INST:
1252
+ if (c === '?') {
1253
+ parser.state = S.PROC_INST_ENDING
1254
+ } else if (is(whitespace, c)) {
1255
+ parser.state = S.PROC_INST_BODY
1256
+ } else {
1257
+ parser.procInstName += c
1258
+ }
1259
+ continue
1260
+
1261
+ case S.PROC_INST_BODY:
1262
+ if (!parser.procInstBody && is(whitespace, c)) {
1263
+ continue
1264
+ } else if (c === '?') {
1265
+ parser.state = S.PROC_INST_ENDING
1266
+ } else {
1267
+ parser.procInstBody += c
1268
+ }
1269
+ continue
1270
+
1271
+ case S.PROC_INST_ENDING:
1272
+ if (c === '>') {
1273
+ emitNode(parser, 'onprocessinginstruction', {
1274
+ name: parser.procInstName,
1275
+ body: parser.procInstBody
1276
+ })
1277
+ parser.procInstName = parser.procInstBody = ''
1278
+ parser.state = S.TEXT
1279
+ } else {
1280
+ parser.procInstBody += '?' + c
1281
+ parser.state = S.PROC_INST_BODY
1282
+ }
1283
+ continue
1284
+
1285
+ case S.OPEN_TAG:
1286
+ if (is(nameBody, c)) {
1287
+ parser.tagName += c
1288
+ } else {
1289
+ newTag(parser)
1290
+ if (c === '>') {
1291
+ openTag(parser)
1292
+ } else if (c === '/') {
1293
+ parser.state = S.OPEN_TAG_SLASH
1294
+ } else {
1295
+ if (not(whitespace, c)) {
1296
+ strictFail(parser, 'Invalid character in tag name')
1297
+ }
1298
+ parser.state = S.ATTRIB
1299
+ }
1300
+ }
1301
+ continue
1302
+
1303
+ case S.OPEN_TAG_SLASH:
1304
+ if (c === '>') {
1305
+ openTag(parser, true)
1306
+ closeTag(parser)
1307
+ } else {
1308
+ strictFail(parser, 'Forward-slash in opening tag not followed by >')
1194
1309
  parser.state = S.ATTRIB
1195
1310
  }
1196
- }
1197
- continue
1311
+ continue
1198
1312
 
1199
- case S.OPEN_TAG_SLASH:
1200
- if (c === ">") {
1201
- openTag(parser, true)
1202
- closeTag(parser)
1203
- } else {
1204
- strictFail(parser, "Forward-slash in opening tag not followed by >")
1205
- parser.state = S.ATTRIB
1206
- }
1207
- continue
1208
-
1209
- case S.ATTRIB:
1210
- // haven't read the attribute name yet.
1211
- if (is(whitespace, c)) continue
1212
- else if (c === ">") openTag(parser)
1213
- else if (c === "/") parser.state = S.OPEN_TAG_SLASH
1214
- else if (is(nameStart, c)) {
1215
- parser.attribName = c
1216
- parser.attribValue = ""
1217
- parser.state = S.ATTRIB_NAME
1218
- } else strictFail(parser, "Invalid attribute name")
1219
- continue
1220
-
1221
- case S.ATTRIB_NAME:
1222
- if (c === "=") parser.state = S.ATTRIB_VALUE
1223
- else if (c === ">") {
1224
- strictFail(parser, "Attribute without value")
1225
- parser.attribValue = parser.attribName
1226
- attrib(parser)
1227
- openTag(parser)
1228
- }
1229
- else if (is(whitespace, c)) parser.state = S.ATTRIB_NAME_SAW_WHITE
1230
- else if (is(nameBody, c)) parser.attribName += c
1231
- else strictFail(parser, "Invalid attribute name")
1232
- continue
1233
-
1234
- case S.ATTRIB_NAME_SAW_WHITE:
1235
- if (c === "=") parser.state = S.ATTRIB_VALUE
1236
- else if (is(whitespace, c)) continue
1237
- else {
1238
- strictFail(parser, "Attribute without value")
1239
- parser.tag.attributes[parser.attribName] = ""
1240
- parser.attribValue = ""
1241
- emitNode(parser, "onattribute",
1242
- { name : parser.attribName, value : "" })
1243
- parser.attribName = ""
1244
- if (c === ">") openTag(parser)
1245
- else if (is(nameStart, c)) {
1313
+ case S.ATTRIB:
1314
+ // haven't read the attribute name yet.
1315
+ if (is(whitespace, c)) {
1316
+ continue
1317
+ } else if (c === '>') {
1318
+ openTag(parser)
1319
+ } else if (c === '/') {
1320
+ parser.state = S.OPEN_TAG_SLASH
1321
+ } else if (is(nameStart, c)) {
1246
1322
  parser.attribName = c
1323
+ parser.attribValue = ''
1247
1324
  parser.state = S.ATTRIB_NAME
1248
1325
  } else {
1249
- strictFail(parser, "Invalid attribute name")
1250
- parser.state = S.ATTRIB
1326
+ strictFail(parser, 'Invalid attribute name')
1251
1327
  }
1252
- }
1253
- continue
1328
+ continue
1254
1329
 
1255
- case S.ATTRIB_VALUE:
1256
- if (is(whitespace, c)) continue
1257
- else if (is(quote, c)) {
1258
- parser.q = c
1259
- parser.state = S.ATTRIB_VALUE_QUOTED
1260
- } else {
1261
- strictFail(parser, "Unquoted attribute value")
1262
- parser.state = S.ATTRIB_VALUE_UNQUOTED
1263
- parser.attribValue = c
1264
- }
1265
- continue
1330
+ case S.ATTRIB_NAME:
1331
+ if (c === '=') {
1332
+ parser.state = S.ATTRIB_VALUE
1333
+ } else if (c === '>') {
1334
+ strictFail(parser, 'Attribute without value')
1335
+ parser.attribValue = parser.attribName
1336
+ attrib(parser)
1337
+ openTag(parser)
1338
+ } else if (is(whitespace, c)) {
1339
+ parser.state = S.ATTRIB_NAME_SAW_WHITE
1340
+ } else if (is(nameBody, c)) {
1341
+ parser.attribName += c
1342
+ } else {
1343
+ strictFail(parser, 'Invalid attribute name')
1344
+ }
1345
+ continue
1266
1346
 
1267
- case S.ATTRIB_VALUE_QUOTED:
1268
- if (c !== parser.q) {
1269
- if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_Q
1270
- else parser.attribValue += c
1347
+ case S.ATTRIB_NAME_SAW_WHITE:
1348
+ if (c === '=') {
1349
+ parser.state = S.ATTRIB_VALUE
1350
+ } else if (is(whitespace, c)) {
1351
+ continue
1352
+ } else {
1353
+ strictFail(parser, 'Attribute without value')
1354
+ parser.tag.attributes[parser.attribName] = ''
1355
+ parser.attribValue = ''
1356
+ emitNode(parser, 'onattribute', {
1357
+ name: parser.attribName,
1358
+ value: ''
1359
+ })
1360
+ parser.attribName = ''
1361
+ if (c === '>') {
1362
+ openTag(parser)
1363
+ } else if (is(nameStart, c)) {
1364
+ parser.attribName = c
1365
+ parser.state = S.ATTRIB_NAME
1366
+ } else {
1367
+ strictFail(parser, 'Invalid attribute name')
1368
+ parser.state = S.ATTRIB
1369
+ }
1370
+ }
1271
1371
  continue
1272
- }
1273
- attrib(parser)
1274
- parser.q = ""
1275
- parser.state = S.ATTRIB_VALUE_CLOSED
1276
- continue
1277
-
1278
- case S.ATTRIB_VALUE_CLOSED:
1279
- if (is(whitespace, c)) {
1280
- parser.state = S.ATTRIB
1281
- } else if (c === ">") openTag(parser)
1282
- else if (c === "/") parser.state = S.OPEN_TAG_SLASH
1283
- else if (is(nameStart, c)) {
1284
- strictFail(parser, "No whitespace between attributes")
1285
- parser.attribName = c
1286
- parser.attribValue = ""
1287
- parser.state = S.ATTRIB_NAME
1288
- } else strictFail(parser, "Invalid attribute name")
1289
- continue
1290
-
1291
- case S.ATTRIB_VALUE_UNQUOTED:
1292
- if (not(attribEnd,c)) {
1293
- if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_U
1294
- else parser.attribValue += c
1372
+
1373
+ case S.ATTRIB_VALUE:
1374
+ if (is(whitespace, c)) {
1375
+ continue
1376
+ } else if (is(quote, c)) {
1377
+ parser.q = c
1378
+ parser.state = S.ATTRIB_VALUE_QUOTED
1379
+ } else {
1380
+ strictFail(parser, 'Unquoted attribute value')
1381
+ parser.state = S.ATTRIB_VALUE_UNQUOTED
1382
+ parser.attribValue = c
1383
+ }
1295
1384
  continue
1296
- }
1297
- attrib(parser)
1298
- if (c === ">") openTag(parser)
1299
- else parser.state = S.ATTRIB
1300
- continue
1301
-
1302
- case S.CLOSE_TAG:
1303
- if (!parser.tagName) {
1304
- if (is(whitespace, c)) continue
1305
- else if (not(nameStart, c)) {
1306
- if (parser.script) {
1307
- parser.script += "</" + c
1308
- parser.state = S.SCRIPT
1385
+
1386
+ case S.ATTRIB_VALUE_QUOTED:
1387
+ if (c !== parser.q) {
1388
+ if (c === '&') {
1389
+ parser.state = S.ATTRIB_VALUE_ENTITY_Q
1309
1390
  } else {
1310
- strictFail(parser, "Invalid tagname in closing tag.")
1391
+ parser.attribValue += c
1311
1392
  }
1312
- } else parser.tagName = c
1313
- }
1314
- else if (c === ">") closeTag(parser)
1315
- else if (is(nameBody, c)) parser.tagName += c
1316
- else if (parser.script) {
1317
- parser.script += "</" + parser.tagName
1318
- parser.tagName = ""
1319
- parser.state = S.SCRIPT
1320
- } else {
1321
- if (not(whitespace, c)) strictFail(parser,
1322
- "Invalid tagname in closing tag")
1323
- parser.state = S.CLOSE_TAG_SAW_WHITE
1324
- }
1325
- continue
1326
-
1327
- case S.CLOSE_TAG_SAW_WHITE:
1328
- if (is(whitespace, c)) continue
1329
- if (c === ">") closeTag(parser)
1330
- else strictFail(parser, "Invalid characters in closing tag")
1331
- continue
1332
-
1333
- case S.TEXT_ENTITY:
1334
- case S.ATTRIB_VALUE_ENTITY_Q:
1335
- case S.ATTRIB_VALUE_ENTITY_U:
1336
- switch(parser.state) {
1337
- case S.TEXT_ENTITY:
1338
- var returnState = S.TEXT, buffer = "textNode"
1339
- break
1340
-
1341
- case S.ATTRIB_VALUE_ENTITY_Q:
1342
- var returnState = S.ATTRIB_VALUE_QUOTED, buffer = "attribValue"
1343
- break
1344
-
1345
- case S.ATTRIB_VALUE_ENTITY_U:
1346
- var returnState = S.ATTRIB_VALUE_UNQUOTED, buffer = "attribValue"
1347
- break
1348
- }
1349
- if (c === ";") {
1350
- parser[buffer] += parseEntity(parser)
1351
- parser.entity = ""
1352
- parser.state = returnState
1353
- }
1354
- else if (is(entity, c)) parser.entity += c
1355
- else {
1356
- strictFail(parser, "Invalid character entity")
1357
- parser[buffer] += "&" + parser.entity + c
1358
- parser.entity = ""
1359
- parser.state = returnState
1360
- }
1361
- continue
1393
+ continue
1394
+ }
1395
+ attrib(parser)
1396
+ parser.q = ''
1397
+ parser.state = S.ATTRIB_VALUE_CLOSED
1398
+ continue
1399
+
1400
+ case S.ATTRIB_VALUE_CLOSED:
1401
+ if (is(whitespace, c)) {
1402
+ parser.state = S.ATTRIB
1403
+ } else if (c === '>') {
1404
+ openTag(parser)
1405
+ } else if (c === '/') {
1406
+ parser.state = S.OPEN_TAG_SLASH
1407
+ } else if (is(nameStart, c)) {
1408
+ strictFail(parser, 'No whitespace between attributes')
1409
+ parser.attribName = c
1410
+ parser.attribValue = ''
1411
+ parser.state = S.ATTRIB_NAME
1412
+ } else {
1413
+ strictFail(parser, 'Invalid attribute name')
1414
+ }
1415
+ continue
1362
1416
 
1363
- default:
1364
- throw new Error(parser, "Unknown state: " + parser.state)
1417
+ case S.ATTRIB_VALUE_UNQUOTED:
1418
+ if (not(attribEnd, c)) {
1419
+ if (c === '&') {
1420
+ parser.state = S.ATTRIB_VALUE_ENTITY_U
1421
+ } else {
1422
+ parser.attribValue += c
1423
+ }
1424
+ continue
1425
+ }
1426
+ attrib(parser)
1427
+ if (c === '>') {
1428
+ openTag(parser)
1429
+ } else {
1430
+ parser.state = S.ATTRIB
1431
+ }
1432
+ continue
1433
+
1434
+ case S.CLOSE_TAG:
1435
+ if (!parser.tagName) {
1436
+ if (is(whitespace, c)) {
1437
+ continue
1438
+ } else if (not(nameStart, c)) {
1439
+ if (parser.script) {
1440
+ parser.script += '</' + c
1441
+ parser.state = S.SCRIPT
1442
+ } else {
1443
+ strictFail(parser, 'Invalid tagname in closing tag.')
1444
+ }
1445
+ } else {
1446
+ parser.tagName = c
1447
+ }
1448
+ } else if (c === '>') {
1449
+ closeTag(parser)
1450
+ } else if (is(nameBody, c)) {
1451
+ parser.tagName += c
1452
+ } else if (parser.script) {
1453
+ parser.script += '</' + parser.tagName
1454
+ parser.tagName = ''
1455
+ parser.state = S.SCRIPT
1456
+ } else {
1457
+ if (not(whitespace, c)) {
1458
+ strictFail(parser, 'Invalid tagname in closing tag')
1459
+ }
1460
+ parser.state = S.CLOSE_TAG_SAW_WHITE
1461
+ }
1462
+ continue
1463
+
1464
+ case S.CLOSE_TAG_SAW_WHITE:
1465
+ if (is(whitespace, c)) {
1466
+ continue
1467
+ }
1468
+ if (c === '>') {
1469
+ closeTag(parser)
1470
+ } else {
1471
+ strictFail(parser, 'Invalid characters in closing tag')
1472
+ }
1473
+ continue
1474
+
1475
+ case S.TEXT_ENTITY:
1476
+ case S.ATTRIB_VALUE_ENTITY_Q:
1477
+ case S.ATTRIB_VALUE_ENTITY_U:
1478
+ var returnState
1479
+ var buffer
1480
+ switch (parser.state) {
1481
+ case S.TEXT_ENTITY:
1482
+ returnState = S.TEXT
1483
+ buffer = 'textNode'
1484
+ break
1485
+
1486
+ case S.ATTRIB_VALUE_ENTITY_Q:
1487
+ returnState = S.ATTRIB_VALUE_QUOTED
1488
+ buffer = 'attribValue'
1489
+ break
1490
+
1491
+ case S.ATTRIB_VALUE_ENTITY_U:
1492
+ returnState = S.ATTRIB_VALUE_UNQUOTED
1493
+ buffer = 'attribValue'
1494
+ break
1495
+ }
1496
+
1497
+ if (c === ';') {
1498
+ parser[buffer] += parseEntity(parser)
1499
+ parser.entity = ''
1500
+ parser.state = returnState
1501
+ } else if (is(parser.entity.length ? entityBody : entityStart, c)) {
1502
+ parser.entity += c
1503
+ } else {
1504
+ strictFail(parser, 'Invalid character in entity name')
1505
+ parser[buffer] += '&' + parser.entity + c
1506
+ parser.entity = ''
1507
+ parser.state = returnState
1508
+ }
1509
+
1510
+ continue
1511
+
1512
+ default:
1513
+ throw new Error(parser, 'Unknown state: ' + parser.state)
1514
+ }
1515
+ } // while
1516
+
1517
+ if (parser.position >= parser.bufferCheckPosition) {
1518
+ checkBufferLength(parser)
1365
1519
  }
1366
- } // while
1367
- // cdata blocks can get very big under normal conditions. emit and move on.
1368
- // if (parser.state === S.CDATA && parser.cdata) {
1369
- // emitNode(parser, "oncdata", parser.cdata)
1370
- // parser.cdata = ""
1371
- // }
1372
- if (parser.position >= parser.bufferCheckPosition) checkBufferLength(parser)
1373
- return parser
1374
- }
1375
-
1376
- /*! http://mths.be/fromcodepoint v0.1.0 by @mathias */
1377
- if (!String.fromCodePoint) {
1378
- (function() {
1379
- var stringFromCharCode = String.fromCharCode;
1380
- var floor = Math.floor;
1381
- var fromCodePoint = function() {
1382
- var MAX_SIZE = 0x4000;
1383
- var codeUnits = [];
1384
- var highSurrogate;
1385
- var lowSurrogate;
1386
- var index = -1;
1387
- var length = arguments.length;
1388
- if (!length) {
1389
- return '';
1390
- }
1391
- var result = '';
1392
- while (++index < length) {
1393
- var codePoint = Number(arguments[index]);
1394
- if (
1395
- !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
1396
- codePoint < 0 || // not a valid Unicode code point
1397
- codePoint > 0x10FFFF || // not a valid Unicode code point
1398
- floor(codePoint) != codePoint // not an integer
1399
- ) {
1400
- throw RangeError('Invalid code point: ' + codePoint);
1401
- }
1402
- if (codePoint <= 0xFFFF) { // BMP code point
1403
- codeUnits.push(codePoint);
1404
- } else { // Astral code point; split in surrogate halves
1405
- // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
1406
- codePoint -= 0x10000;
1407
- highSurrogate = (codePoint >> 10) + 0xD800;
1408
- lowSurrogate = (codePoint % 0x400) + 0xDC00;
1409
- codeUnits.push(highSurrogate, lowSurrogate);
1410
- }
1411
- if (index + 1 == length || codeUnits.length > MAX_SIZE) {
1412
- result += stringFromCharCode.apply(null, codeUnits);
1413
- codeUnits.length = 0;
1414
- }
1415
- }
1416
- return result;
1417
- };
1418
- if (Object.defineProperty) {
1419
- Object.defineProperty(String, 'fromCodePoint', {
1420
- 'value': fromCodePoint,
1421
- 'configurable': true,
1422
- 'writable': true
1423
- });
1424
- } else {
1425
- String.fromCodePoint = fromCodePoint;
1426
- }
1427
- }());
1428
- }
1520
+ return parser
1521
+ }
1429
1522
 
1430
- })(typeof exports === "undefined" ? sax = {} : exports);
1523
+ /*! http://mths.be/fromcodepoint v0.1.0 by @mathias */
1524
+ if (!String.fromCodePoint) {
1525
+ (function () {
1526
+ var stringFromCharCode = String.fromCharCode
1527
+ var floor = Math.floor
1528
+ var fromCodePoint = function () {
1529
+ var MAX_SIZE = 0x4000
1530
+ var codeUnits = []
1531
+ var highSurrogate
1532
+ var lowSurrogate
1533
+ var index = -1
1534
+ var length = arguments.length
1535
+ if (!length) {
1536
+ return ''
1537
+ }
1538
+ var result = ''
1539
+ while (++index < length) {
1540
+ var codePoint = Number(arguments[index])
1541
+ if (
1542
+ !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
1543
+ codePoint < 0 || // not a valid Unicode code point
1544
+ codePoint > 0x10FFFF || // not a valid Unicode code point
1545
+ floor(codePoint) !== codePoint // not an integer
1546
+ ) {
1547
+ throw RangeError('Invalid code point: ' + codePoint)
1548
+ }
1549
+ if (codePoint <= 0xFFFF) { // BMP code point
1550
+ codeUnits.push(codePoint)
1551
+ } else { // Astral code point; split in surrogate halves
1552
+ // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
1553
+ codePoint -= 0x10000
1554
+ highSurrogate = (codePoint >> 10) + 0xD800
1555
+ lowSurrogate = (codePoint % 0x400) + 0xDC00
1556
+ codeUnits.push(highSurrogate, lowSurrogate)
1557
+ }
1558
+ if (index + 1 === length || codeUnits.length > MAX_SIZE) {
1559
+ result += stringFromCharCode.apply(null, codeUnits)
1560
+ codeUnits.length = 0
1561
+ }
1562
+ }
1563
+ return result
1564
+ }
1565
+ if (Object.defineProperty) {
1566
+ Object.defineProperty(String, 'fromCodePoint', {
1567
+ value: fromCodePoint,
1568
+ configurable: true,
1569
+ writable: true
1570
+ })
1571
+ } else {
1572
+ String.fromCodePoint = fromCodePoint
1573
+ }
1574
+ }())
1575
+ }
1576
+ })(typeof exports === 'undefined' ? this.sax = {} : exports)