sax 0.3.4 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AUTHORS +1 -0
- package/README.md +8 -5
- package/examples/example.js +7 -7
- package/lib/sax.js +51 -18
- package/package.json +14 -9
- package/test/case.js +47 -0
- package/test/index.js +11 -3
- package/test/unquoted.js +3 -3
- package/test/xmlns-strict.js +2 -2
- package/test/xmlns-xml-default-ns.js +30 -0
package/AUTHORS
CHANGED
package/README.md
CHANGED
|
@@ -98,9 +98,10 @@ Settings supported:
|
|
|
98
98
|
* `trim` - Boolean. Whether or not to trim text and comment nodes.
|
|
99
99
|
* `normalize` - Boolean. If true, then turn any whitespace into a single
|
|
100
100
|
space.
|
|
101
|
-
* `
|
|
102
|
-
rather than uppercasing them.
|
|
101
|
+
* `lowercase` - Boolean. If true, then lowercase tag names and attribute names
|
|
102
|
+
in loose mode, rather than uppercasing them.
|
|
103
103
|
* `xmlns` - Boolean. If true, then namespaces are supported.
|
|
104
|
+
* `position` - Boolean. If false, then don't track line/col/position.
|
|
104
105
|
|
|
105
106
|
## Methods
|
|
106
107
|
|
|
@@ -164,7 +165,7 @@ might go away at some point. SAX isn't intended to be used to parse SGML,
|
|
|
164
165
|
after all.
|
|
165
166
|
|
|
166
167
|
`opentag` - An opening tag. Argument: object with `name` and `attributes`.
|
|
167
|
-
In non-strict mode, tag names are uppercased, unless the `
|
|
168
|
+
In non-strict mode, tag names are uppercased, unless the `lowercase`
|
|
168
169
|
option is set. If the `xmlns` option is set, then it will contain
|
|
169
170
|
namespace binding information on the `ns` member, and will have a
|
|
170
171
|
`local`, `prefix`, and `uri` member.
|
|
@@ -174,8 +175,10 @@ parent closes. In strict mode, well-formedness is enforced. Note that
|
|
|
174
175
|
self-closing tags will have `closeTag` emitted immediately after `openTag`.
|
|
175
176
|
Argument: tag name.
|
|
176
177
|
|
|
177
|
-
`attribute` - An attribute node. Argument: object with `name` and `value
|
|
178
|
-
|
|
178
|
+
`attribute` - An attribute node. Argument: object with `name` and `value`.
|
|
179
|
+
In non-strict mode, attribute names are uppercased, unless the `lowercase`
|
|
180
|
+
option is set. If the `xmlns` option is set, it will also contains namespace
|
|
181
|
+
information.
|
|
179
182
|
|
|
180
183
|
`comment` - A comment node. Argument: the string of the comment.
|
|
181
184
|
|
package/examples/example.js
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
|
|
2
2
|
var fs = require("fs"),
|
|
3
|
-
|
|
3
|
+
util = require('util'),
|
|
4
4
|
path = require("path"),
|
|
5
5
|
xml = fs.cat(path.join(__dirname, "test.xml")),
|
|
6
6
|
sax = require("../lib/sax"),
|
|
7
7
|
strict = sax.parser(true),
|
|
8
8
|
loose = sax.parser(false, {trim:true}),
|
|
9
9
|
inspector = function (ev) { return function (data) {
|
|
10
|
-
//
|
|
11
|
-
//
|
|
12
|
-
// for (var i in data)
|
|
13
|
-
//
|
|
10
|
+
// util.error("");
|
|
11
|
+
// util.error(ev+": "+util.inspect(data));
|
|
12
|
+
// for (var i in data) util.error(i+ " "+util.inspect(data[i]));
|
|
13
|
+
// util.error(this.line+":"+this.column);
|
|
14
14
|
}};
|
|
15
15
|
|
|
16
16
|
xml.addCallback(function (xml) {
|
|
@@ -20,8 +20,8 @@ xml.addCallback(function (xml) {
|
|
|
20
20
|
loose["on"+ev] = inspector(ev);
|
|
21
21
|
});
|
|
22
22
|
loose.onend = function () {
|
|
23
|
-
//
|
|
24
|
-
//
|
|
23
|
+
// util.error("end");
|
|
24
|
+
// util.error(util.inspect(loose));
|
|
25
25
|
};
|
|
26
26
|
|
|
27
27
|
// do this one char at a time to verify that it works.
|
package/lib/sax.js
CHANGED
|
@@ -51,7 +51,8 @@ function SAXParser (strict, opt) {
|
|
|
51
51
|
parser.q = parser.c = ""
|
|
52
52
|
parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
|
|
53
53
|
parser.opt = opt || {}
|
|
54
|
-
parser.
|
|
54
|
+
parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags;
|
|
55
|
+
parser.looseCase = parser.opt.lowercase ? "toLowerCase" : "toUpperCase"
|
|
55
56
|
parser.tags = []
|
|
56
57
|
parser.closed = parser.closedRoot = parser.sawRoot = false
|
|
57
58
|
parser.tag = parser.error = null
|
|
@@ -67,7 +68,10 @@ function SAXParser (strict, opt) {
|
|
|
67
68
|
if (parser.opt.xmlns) parser.ns = Object.create(rootNS)
|
|
68
69
|
|
|
69
70
|
// mostly just for error reporting
|
|
70
|
-
parser.
|
|
71
|
+
parser.trackPosition = parser.opt.position !== false
|
|
72
|
+
if (parser.trackPosition) {
|
|
73
|
+
parser.position = parser.line = parser.column = 0
|
|
74
|
+
}
|
|
71
75
|
emit(parser, "onready")
|
|
72
76
|
}
|
|
73
77
|
|
|
@@ -134,7 +138,6 @@ SAXParser.prototype =
|
|
|
134
138
|
, write: write
|
|
135
139
|
, resume: function () { this.error = null; return this }
|
|
136
140
|
, close: function () { return this.write(null) }
|
|
137
|
-
, end: function () { return this.write(null) }
|
|
138
141
|
}
|
|
139
142
|
|
|
140
143
|
try {
|
|
@@ -234,14 +237,37 @@ var whitespace = "\r\n\t "
|
|
|
234
237
|
, nameBody = nameStart+number+"-."
|
|
235
238
|
, quote = "'\""
|
|
236
239
|
, entity = number+letter+"#"
|
|
240
|
+
, attribEnd = whitespace + ">"
|
|
237
241
|
, CDATA = "[CDATA["
|
|
238
242
|
, DOCTYPE = "DOCTYPE"
|
|
239
243
|
, XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
|
|
240
244
|
, XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/"
|
|
241
245
|
, rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE }
|
|
242
246
|
|
|
243
|
-
|
|
244
|
-
|
|
247
|
+
// turn all the string character sets into character class objects.
|
|
248
|
+
whitespace = charClass(whitespace)
|
|
249
|
+
number = charClass(number)
|
|
250
|
+
letter = charClass(letter)
|
|
251
|
+
nameStart = charClass(nameStart)
|
|
252
|
+
nameBody = charClass(nameBody)
|
|
253
|
+
quote = charClass(quote)
|
|
254
|
+
entity = charClass(entity)
|
|
255
|
+
attribEnd = charClass(attribEnd)
|
|
256
|
+
|
|
257
|
+
function charClass (str) {
|
|
258
|
+
return str.split("").reduce(function (s, c) {
|
|
259
|
+
s[c] = true
|
|
260
|
+
return s
|
|
261
|
+
}, {})
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
function is (charclass, c) {
|
|
265
|
+
return charclass[c]
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
function not (charclass, c) {
|
|
269
|
+
return !charclass[c]
|
|
270
|
+
}
|
|
245
271
|
|
|
246
272
|
var S = 0
|
|
247
273
|
sax.STATE =
|
|
@@ -318,9 +344,11 @@ function textopts (opt, text) {
|
|
|
318
344
|
|
|
319
345
|
function error (parser, er) {
|
|
320
346
|
closeText(parser)
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
347
|
+
if (parser.trackPosition) {
|
|
348
|
+
er += "\nLine: "+parser.line+
|
|
349
|
+
"\nColumn: "+parser.column+
|
|
350
|
+
"\nChar: "+parser.c
|
|
351
|
+
}
|
|
324
352
|
er = new Error(er)
|
|
325
353
|
parser.error = er
|
|
326
354
|
emit(parser, "onerror", er)
|
|
@@ -342,7 +370,7 @@ function strictFail (parser, message) {
|
|
|
342
370
|
}
|
|
343
371
|
|
|
344
372
|
function newTag (parser) {
|
|
345
|
-
if (!parser.strict) parser.tagName = parser.tagName[parser.
|
|
373
|
+
if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]()
|
|
346
374
|
var parent = parser.tags[parser.tags.length - 1] || parser
|
|
347
375
|
, tag = parser.tag = { name : parser.tagName, attributes : {} }
|
|
348
376
|
|
|
@@ -367,6 +395,7 @@ function qname (name) {
|
|
|
367
395
|
}
|
|
368
396
|
|
|
369
397
|
function attrib (parser) {
|
|
398
|
+
if (!parser.strict) parser.attribName = parser.attribName[parser.looseCase]()
|
|
370
399
|
if (parser.opt.xmlns) {
|
|
371
400
|
var qn = qname(parser.attribName)
|
|
372
401
|
, prefix = qn.prefix
|
|
@@ -434,6 +463,8 @@ function openTag (parser, selfClosing) {
|
|
|
434
463
|
}
|
|
435
464
|
|
|
436
465
|
// handle deferred onattribute events
|
|
466
|
+
// Note: do not apply default ns to attributes:
|
|
467
|
+
// http://www.w3.org/TR/REC-xml-names/#defaulting
|
|
437
468
|
for (var i = 0, l = parser.attribList.length; i < l; i ++) {
|
|
438
469
|
var nv = parser.attribList[i]
|
|
439
470
|
var name = nv[0]
|
|
@@ -441,7 +472,7 @@ function openTag (parser, selfClosing) {
|
|
|
441
472
|
, qualName = qname(name)
|
|
442
473
|
, prefix = qualName.prefix
|
|
443
474
|
, local = qualName.local
|
|
444
|
-
, uri = tag.ns[prefix] || ""
|
|
475
|
+
, uri = prefix == "" ? "" : (tag.ns[prefix] || "")
|
|
445
476
|
, a = { name: name
|
|
446
477
|
, value: value
|
|
447
478
|
, prefix: prefix
|
|
@@ -491,7 +522,7 @@ function closeTag (parser) {
|
|
|
491
522
|
// <a><b></c></b></a> will close everything, otherwise.
|
|
492
523
|
var t = parser.tags.length
|
|
493
524
|
var tagName = parser.tagName
|
|
494
|
-
if (!parser.strict) tagName = tagName[parser.
|
|
525
|
+
if (!parser.strict) tagName = tagName[parser.looseCase]()
|
|
495
526
|
var closeTo = tagName
|
|
496
527
|
while (t --) {
|
|
497
528
|
var close = parser.tags[t]
|
|
@@ -565,11 +596,13 @@ function write (chunk) {
|
|
|
565
596
|
if (chunk === null) return end(parser)
|
|
566
597
|
var i = 0, c = ""
|
|
567
598
|
while (parser.c = c = chunk.charAt(i++)) {
|
|
568
|
-
parser.
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
599
|
+
if (parser.trackPosition) {
|
|
600
|
+
parser.position ++
|
|
601
|
+
if (c === "\n") {
|
|
602
|
+
parser.line ++
|
|
603
|
+
parser.column = 0
|
|
604
|
+
} else parser.column ++
|
|
605
|
+
}
|
|
573
606
|
switch (parser.state) {
|
|
574
607
|
|
|
575
608
|
case S.BEGIN:
|
|
@@ -588,7 +621,7 @@ function write (chunk) {
|
|
|
588
621
|
var starti = i-1
|
|
589
622
|
while (c && c!=="<" && c!=="&") {
|
|
590
623
|
c = chunk.charAt(i++)
|
|
591
|
-
if (c) {
|
|
624
|
+
if (c && parser.trackPosition) {
|
|
592
625
|
parser.position ++
|
|
593
626
|
if (c === "\n") {
|
|
594
627
|
parser.line ++
|
|
@@ -905,7 +938,7 @@ function write (chunk) {
|
|
|
905
938
|
continue
|
|
906
939
|
|
|
907
940
|
case S.ATTRIB_VALUE_UNQUOTED:
|
|
908
|
-
if (not(
|
|
941
|
+
if (not(attribEnd,c)) {
|
|
909
942
|
if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_U
|
|
910
943
|
else parser.attribValue += c
|
|
911
944
|
continue
|
package/package.json
CHANGED
|
@@ -1,10 +1,15 @@
|
|
|
1
|
-
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
1
|
+
{
|
|
2
|
+
"name": "sax",
|
|
3
|
+
"description": "An evented streaming XML parser in JavaScript",
|
|
4
|
+
"author": "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me/)",
|
|
5
|
+
"version": "0.4.2",
|
|
6
|
+
"main": "lib/sax.js",
|
|
7
|
+
"license": {
|
|
8
|
+
"type": "MIT",
|
|
9
|
+
"url": "https://raw.github.com/isaacs/sax-js/master/LICENSE"
|
|
10
|
+
},
|
|
11
|
+
"scripts": {
|
|
12
|
+
"test": "node test/index.js"
|
|
13
|
+
},
|
|
14
|
+
"repository": "git://github.com/isaacs/sax-js.git"
|
|
10
15
|
}
|
package/test/case.js
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
// default to uppercase
|
|
2
|
+
require(__dirname).test
|
|
3
|
+
( { xml :
|
|
4
|
+
"<span class=\"test\" hello=\"world\"></span>"
|
|
5
|
+
, expect :
|
|
6
|
+
[ [ "attribute", { name: "CLASS", value: "test" } ]
|
|
7
|
+
, [ "attribute", { name: "HELLO", value: "world" } ]
|
|
8
|
+
, [ "opentag", { name: "SPAN",
|
|
9
|
+
attributes: { CLASS: "test", HELLO: "world" } } ]
|
|
10
|
+
, [ "closetag", "SPAN" ]
|
|
11
|
+
]
|
|
12
|
+
, strict : false
|
|
13
|
+
, opt : {}
|
|
14
|
+
}
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
// lowercase option : lowercase tag/attribute names
|
|
18
|
+
require(__dirname).test
|
|
19
|
+
( { xml :
|
|
20
|
+
"<span class=\"test\" hello=\"world\"></span>"
|
|
21
|
+
, expect :
|
|
22
|
+
[ [ "attribute", { name: "class", value: "test" } ]
|
|
23
|
+
, [ "attribute", { name: "hello", value: "world" } ]
|
|
24
|
+
, [ "opentag", { name: "span",
|
|
25
|
+
attributes: { class: "test", hello: "world" } } ]
|
|
26
|
+
, [ "closetag", "span" ]
|
|
27
|
+
]
|
|
28
|
+
, strict : false
|
|
29
|
+
, opt : {lowercase:true}
|
|
30
|
+
}
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
// backward compatibility with old lowercasetags opt
|
|
34
|
+
require(__dirname).test
|
|
35
|
+
( { xml :
|
|
36
|
+
"<span class=\"test\" hello=\"world\"></span>"
|
|
37
|
+
, expect :
|
|
38
|
+
[ [ "attribute", { name: "class", value: "test" } ]
|
|
39
|
+
, [ "attribute", { name: "hello", value: "world" } ]
|
|
40
|
+
, [ "opentag", { name: "span",
|
|
41
|
+
attributes: { class: "test", hello: "world" } } ]
|
|
42
|
+
, [ "closetag", "span" ]
|
|
43
|
+
]
|
|
44
|
+
, strict : false
|
|
45
|
+
, opt : {lowercasetags:true}
|
|
46
|
+
}
|
|
47
|
+
)
|
package/test/index.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
var globalsBefore = JSON.stringify(Object.keys(global))
|
|
2
|
+
, util = require("util")
|
|
3
3
|
, assert = require("assert")
|
|
4
4
|
, fs = require("fs")
|
|
5
5
|
, path = require("path")
|
|
@@ -47,7 +47,7 @@ exports.test = function test (options) {
|
|
|
47
47
|
|
|
48
48
|
if (module === require.main) {
|
|
49
49
|
var running = true
|
|
50
|
-
|
|
50
|
+
, failures = 0
|
|
51
51
|
|
|
52
52
|
function fail (file, er) {
|
|
53
53
|
util.error("Failed: "+file)
|
|
@@ -66,6 +66,14 @@ if (module === require.main) {
|
|
|
66
66
|
// run this test.
|
|
67
67
|
try {
|
|
68
68
|
require(path.resolve(__dirname, file))
|
|
69
|
+
var globalsAfter = JSON.stringify(Object.keys(global))
|
|
70
|
+
if (globalsAfter !== globalsBefore) {
|
|
71
|
+
var er = new Error("new globals introduced\n"+
|
|
72
|
+
"expected: "+globalsBefore+"\n"+
|
|
73
|
+
"actual: "+globalsAfter)
|
|
74
|
+
globalsBefore = globalsAfter
|
|
75
|
+
throw er
|
|
76
|
+
}
|
|
69
77
|
console.log("ok " + (++i) + " - " + file)
|
|
70
78
|
} catch (er) {
|
|
71
79
|
console.log("not ok "+ (++i) + " - " + file)
|
package/test/unquoted.js
CHANGED
|
@@ -4,10 +4,10 @@ require(__dirname).test
|
|
|
4
4
|
( { xml :
|
|
5
5
|
"<span class=test hello=world></span>"
|
|
6
6
|
, expect :
|
|
7
|
-
[ [ "attribute", { name: "
|
|
8
|
-
, [ "attribute", { name: "
|
|
7
|
+
[ [ "attribute", { name: "CLASS", value: "test" } ]
|
|
8
|
+
, [ "attribute", { name: "HELLO", value: "world" } ]
|
|
9
9
|
, [ "opentag", { name: "SPAN",
|
|
10
|
-
attributes: {
|
|
10
|
+
attributes: { CLASS: "test", HELLO: "world" } } ]
|
|
11
11
|
, [ "closetag", "SPAN" ]
|
|
12
12
|
]
|
|
13
13
|
, strict : false
|
package/test/xmlns-strict.js
CHANGED
|
@@ -29,9 +29,9 @@ require(__dirname).test
|
|
|
29
29
|
attributes: { "xmlns": { name: "xmlns", value: "uri:default", prefix: "xmlns", local: "", uri: "http://www.w3.org/2000/xmlns/" } },
|
|
30
30
|
ns: { "": "uri:default" } } ]
|
|
31
31
|
|
|
32
|
-
, [ "attribute", { name: "attr", value: "normal", prefix: "", local: "attr", uri: "
|
|
32
|
+
, [ "attribute", { name: "attr", value: "normal", prefix: "", local: "attr", uri: "" } ]
|
|
33
33
|
, [ "opentag", { name: "plain", prefix: "", local: "plain", uri: "uri:default", ns: { '': 'uri:default' },
|
|
34
|
-
attributes: { "attr": { name: "attr", value: "normal", prefix: "", local: "attr", uri: "
|
|
34
|
+
attributes: { "attr": { name: "attr", value: "normal", prefix: "", local: "attr", uri: "" } } } ]
|
|
35
35
|
, [ "closetag", "plain" ]
|
|
36
36
|
|
|
37
37
|
, [ "closetag", "ns1" ]
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
var xmlns_attr =
|
|
2
|
+
{
|
|
3
|
+
name: "xmlns", value: "http://foo", prefix: "xmlns",
|
|
4
|
+
local: "", uri : "http://www.w3.org/2000/xmlns/"
|
|
5
|
+
};
|
|
6
|
+
|
|
7
|
+
var attr_attr =
|
|
8
|
+
{
|
|
9
|
+
name: "attr", value: "bar", prefix: "",
|
|
10
|
+
local : "attr", uri : ""
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
require(__dirname).test
|
|
15
|
+
( { xml :
|
|
16
|
+
"<elm xmlns='http://foo' attr='bar'/>"
|
|
17
|
+
, expect :
|
|
18
|
+
[ [ "opennamespace", { prefix: "", uri: "http://foo" } ]
|
|
19
|
+
, [ "attribute", xmlns_attr ]
|
|
20
|
+
, [ "attribute", attr_attr ]
|
|
21
|
+
, [ "opentag", { name: "elm", prefix: "", local: "elm", uri : "http://foo",
|
|
22
|
+
ns : { "" : "http://foo" },
|
|
23
|
+
attributes: { xmlns: xmlns_attr, attr: attr_attr } } ]
|
|
24
|
+
, [ "closetag", "elm" ]
|
|
25
|
+
, [ "closenamespace", { prefix: "", uri: "http://foo"} ]
|
|
26
|
+
]
|
|
27
|
+
, strict : true
|
|
28
|
+
, opt : {xmlns: true}
|
|
29
|
+
}
|
|
30
|
+
)
|