entities 0.2.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/compile.js +64 -0
- package/index.js +85 -63
- package/package.json +25 -20
- package/readme.md +1 -5
- package/test/test.js +136 -61
package/compile.js
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
var modes = ["XML", "HTML4", "HTML5"];
|
|
2
|
+
|
|
3
|
+
modes.reduce(function(prev, name, i){
|
|
4
|
+
var obj = require("./entities/" + name.toLowerCase() + ".json");
|
|
5
|
+
|
|
6
|
+
if(prev){
|
|
7
|
+
Object.keys(prev).forEach(function(name){
|
|
8
|
+
obj[name] = prev[name];
|
|
9
|
+
});
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
var inverse = getInverse(obj);
|
|
13
|
+
|
|
14
|
+
module.exports[name] = {
|
|
15
|
+
strict: getStrictReplacer(obj),
|
|
16
|
+
//there is no non-strict mode for XML
|
|
17
|
+
normal: i === 0 ? null : getReplacer(obj),
|
|
18
|
+
inverse: getInverseReplacer(inverse),
|
|
19
|
+
inverseObj: inverse,
|
|
20
|
+
obj: obj
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
return obj;
|
|
24
|
+
}, null);
|
|
25
|
+
|
|
26
|
+
function sortDesc(a, b){
|
|
27
|
+
return a < b ? 1 : -1;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function getReplacer(obj){
|
|
31
|
+
var keys = Object.keys(obj).sort(sortDesc);
|
|
32
|
+
var re = keys.join("|")//.replace(/(\w+);\|\1/g, "$1;?");
|
|
33
|
+
|
|
34
|
+
// also match hex and char codes
|
|
35
|
+
re += "|#[xX][\\da-fA-F]+;?|#\\d+;?";
|
|
36
|
+
|
|
37
|
+
return new RegExp("&(?:" + re + ")", "g");
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function getStrictReplacer(obj){
|
|
41
|
+
var keys = Object.keys(obj).sort(sortDesc).filter(RegExp.prototype.test, /;$/);
|
|
42
|
+
var re = keys.map(function(name){
|
|
43
|
+
return name.slice(0, -1); //remove trailing semicolon
|
|
44
|
+
}).join("|");
|
|
45
|
+
|
|
46
|
+
// also match hex and char codes
|
|
47
|
+
re += "|#[xX][\\da-fA-F]+|#\\d+";
|
|
48
|
+
|
|
49
|
+
return new RegExp("&(?:" + re + ");", "g");
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function getInverse(obj){
|
|
53
|
+
return Object.keys(obj).filter(function(name){
|
|
54
|
+
//prefer identifiers with a semicolon
|
|
55
|
+
return name.substr(-1) === ";" || obj[name + ";"] !== obj[name];
|
|
56
|
+
}).reduce(function(inverse, name){
|
|
57
|
+
inverse[obj[name]] = name;
|
|
58
|
+
return inverse;
|
|
59
|
+
}, {});
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function getInverseReplacer(inverse){
|
|
63
|
+
return new RegExp("\\" + Object.keys(inverse).sort().join("|\\"), "g");
|
|
64
|
+
}
|
package/index.js
CHANGED
|
@@ -1,77 +1,99 @@
|
|
|
1
|
-
var
|
|
2
|
-
|
|
1
|
+
var compiled = require("./compile.js"),
|
|
2
|
+
modes = ["XML", "HTML4", "HTML5"];
|
|
3
3
|
|
|
4
|
-
var
|
|
5
|
-
var obj =
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
4
|
+
var levels = modes.map(function(name, i){
|
|
5
|
+
var obj = compiled[name],
|
|
6
|
+
strict = genReplaceFunc(obj.strict, getStrictReplacer(obj.obj)),
|
|
7
|
+
//there is no non-strict mode for XML
|
|
8
|
+
normal = i === 0 ? strict : genReplaceFunc(obj.normal, getReplacer(obj.obj)),
|
|
9
|
+
inverse = getInverse(obj.inverseObj, obj.inverse);
|
|
10
|
+
|
|
11
|
+
exports["decode" + name + "Strict"] = strict;
|
|
12
|
+
exports["decode" + name] = normal;
|
|
13
|
+
exports["encode" + name] = inverse;
|
|
13
14
|
|
|
14
15
|
return {
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
return String.fromCharCode(parseInt(name.substr(3), 16));
|
|
19
|
-
}
|
|
20
|
-
return String.fromCharCode(parseInt(name.substr(2), 10));
|
|
21
|
-
}
|
|
22
|
-
return obj[name.substr(1)];
|
|
23
|
-
},
|
|
24
|
-
re: new RegExp("&(?:" + re + ")", "g"),
|
|
25
|
-
obj: obj
|
|
16
|
+
strict: strict,
|
|
17
|
+
normal: normal,
|
|
18
|
+
inverse: inverse
|
|
26
19
|
};
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
var decode = levels.map(function(l){ return l.normal; }),
|
|
23
|
+
decodeStrict = levels.map(function(l){ return l.strict; }),
|
|
24
|
+
encode = levels.map(function(l){ return l.inverse; });
|
|
25
|
+
|
|
26
|
+
exports.decode = function(data, level){
|
|
27
|
+
if(!(level >= 0 && level < 3)) level = 0;
|
|
28
|
+
return decode[level](data);
|
|
29
|
+
};
|
|
30
|
+
exports.decodeStrict = function(data, level){
|
|
31
|
+
if(!(level >= 0 && level < 3)) level = 0;
|
|
32
|
+
return decodeStrict[level](data);
|
|
33
|
+
};
|
|
34
|
+
exports.encode = function(data, level){
|
|
35
|
+
if(!(level >= 0 && level < 3)) level = 0;
|
|
36
|
+
return encode[level](data);
|
|
27
37
|
};
|
|
28
38
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
39
|
+
function getReplacer(obj){
|
|
40
|
+
return function normalReplacer(name){
|
|
41
|
+
if(name.charAt(1) === "#"){
|
|
42
|
+
if(name.charAt(2).toLowerCase() === "x"){
|
|
43
|
+
return codePointToSymbol(parseInt(name.substr(3), 16));
|
|
44
|
+
}
|
|
45
|
+
return codePointToSymbol(parseInt(name.substr(2), 10));
|
|
46
|
+
}
|
|
47
|
+
return obj[name.substr(1)];
|
|
38
48
|
};
|
|
39
|
-
}
|
|
49
|
+
}
|
|
40
50
|
|
|
41
|
-
|
|
51
|
+
function codePointToSymbol(entity){
|
|
52
|
+
return String.fromCharCode(entity); //TODO
|
|
53
|
+
}
|
|
42
54
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
if(
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
};
|
|
55
|
+
function getStrictReplacer(obj){
|
|
56
|
+
return function strictReplacer(name){
|
|
57
|
+
if(name.charAt(1) === "#"){
|
|
58
|
+
if(name.charAt(2).toLowerCase() === "x"){
|
|
59
|
+
return String.fromCharCode(parseInt(name.substr(3), 16));
|
|
60
|
+
}
|
|
61
|
+
return String.fromCharCode(parseInt(name.substr(2), 10));
|
|
62
|
+
}
|
|
63
|
+
return obj[name.substr(1)];
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
var re_nonASCII = /[^\0-\x7F]/g,
|
|
68
|
+
re_astralSymbols = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
|
|
69
|
+
|
|
70
|
+
function nonUTF8Replacer(c){
|
|
71
|
+
return "&#x" + c.charCodeAt(0).toString(16).toUpperCase() + ";";
|
|
72
|
+
}
|
|
53
73
|
|
|
54
|
-
|
|
74
|
+
function astralReplacer(c){
|
|
75
|
+
// http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
|
|
76
|
+
var high = c.charCodeAt(0);
|
|
77
|
+
var low = c.charCodeAt(1);
|
|
78
|
+
var codePoint = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000;
|
|
79
|
+
return "&#x" + codePoint.toString(16).toUpperCase() + ";";
|
|
80
|
+
}
|
|
55
81
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
module.exports["decode" + name] = function(data){
|
|
82
|
+
function getInverse(inverse, re){
|
|
83
|
+
function func(name){
|
|
84
|
+
return "&" + inverse[name];
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return function(data){
|
|
64
88
|
return data
|
|
65
|
-
|
|
89
|
+
.replace(re, func)
|
|
90
|
+
.replace(re_astralSymbols, astralReplacer)
|
|
91
|
+
.replace(re_nonASCII, nonUTF8Replacer);
|
|
66
92
|
};
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
module.exports["encode" + name] = function(data){
|
|
73
|
-
return data
|
|
74
|
-
.replace(reverse_re, reverse_func)
|
|
75
|
-
.replace(re_notUTF8, charCode_func);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function genReplaceFunc(regex, func){
|
|
96
|
+
return function(data){
|
|
97
|
+
return data.replace(regex, func);
|
|
76
98
|
};
|
|
77
|
-
}
|
|
99
|
+
}
|
package/package.json
CHANGED
|
@@ -1,22 +1,27 @@
|
|
|
1
1
|
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
"
|
|
8
|
-
"
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
"
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
"
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
2
|
+
"name": "entities",
|
|
3
|
+
"version": "0.5.0",
|
|
4
|
+
"description": "Encode & decode XML/HTML entities with ease",
|
|
5
|
+
"author": "Felix Boehm <me@feedic.com>",
|
|
6
|
+
"keywords": [
|
|
7
|
+
"html",
|
|
8
|
+
"xml",
|
|
9
|
+
"entity",
|
|
10
|
+
"encoding"
|
|
11
|
+
],
|
|
12
|
+
"main": "./index.js",
|
|
13
|
+
"directories": {
|
|
14
|
+
"test": "test"
|
|
15
|
+
},
|
|
16
|
+
"devDependencies": {
|
|
17
|
+
"mocha": "~1.9.0"
|
|
18
|
+
},
|
|
19
|
+
"scripts": {
|
|
20
|
+
"test": "mocha"
|
|
21
|
+
},
|
|
22
|
+
"repository": {
|
|
23
|
+
"type": "git",
|
|
24
|
+
"url": "git://github.com/fb55/node-entities.git"
|
|
25
|
+
},
|
|
26
|
+
"license": "BSD-like"
|
|
22
27
|
}
|
package/readme.md
CHANGED
|
@@ -6,13 +6,12 @@ En- & decoder for XML/HTML entities.
|
|
|
6
6
|
* Focussed on ___speed___
|
|
7
7
|
* Supports three levels of entities: __XML__, __HTML4__ & __HTML5__
|
|
8
8
|
* Supports _char code_ entities (eg. `U`)
|
|
9
|
-
* Special optimizations for XML: A more restrictive syntax allows faster parsing
|
|
10
9
|
|
|
11
10
|
##How to…
|
|
12
11
|
|
|
13
12
|
###…install `entities`
|
|
14
13
|
|
|
15
|
-
npm
|
|
14
|
+
npm i entities
|
|
16
15
|
|
|
17
16
|
###…use `entities`
|
|
18
17
|
|
|
@@ -26,6 +25,3 @@ require("entities").decode(<str> data[, <int> level]);
|
|
|
26
25
|
The `level` attribute indicates what level of entities should be decoded (0 = XML, 1 = HTML4 and 2 = HTML5). The default is 0 (read: XML).
|
|
27
26
|
|
|
28
27
|
There are also methods to access the level directly. Just append the name of the level to the action and you're ready to go (e.g. `encodeHTML4(data)`, `decodeXML(data)`).
|
|
29
|
-
|
|
30
|
-
##TODO
|
|
31
|
-
* There should be a way to remove tables that aren't used. The HTML5 table is pretty heavy, if it's not needed, it shouldn't be kept in memory.
|
package/test/test.js
CHANGED
|
@@ -1,65 +1,140 @@
|
|
|
1
|
-
var assert = require(
|
|
2
|
-
|
|
1
|
+
var assert = require("assert"),
|
|
2
|
+
path = require("path"),
|
|
3
|
+
entities = require('../');
|
|
3
4
|
|
|
4
|
-
describe("Encode->decode test", function()
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
5
|
+
describe("Encode->decode test", function(){
|
|
6
|
+
var testcases = [
|
|
7
|
+
{
|
|
8
|
+
input: "asdf & ÿ ü '",
|
|
9
|
+
xml: "asdf & ÿ ü '",
|
|
10
|
+
html4: "asdf & ÿ ü '",
|
|
11
|
+
html5: "asdf & ÿ ü '"
|
|
12
|
+
}, {
|
|
13
|
+
input: "&",
|
|
14
|
+
xml: "&#38;",
|
|
15
|
+
html4: "&#38;",
|
|
16
|
+
html5: "&#38;"
|
|
17
|
+
},
|
|
18
|
+
];
|
|
19
|
+
testcases.forEach(function(tc) {
|
|
20
|
+
var encodedXML = entities.encodeXML(tc.input);
|
|
21
|
+
it("should XML encode " + tc.input, function(){
|
|
22
|
+
assert.equal(encodedXML, tc.xml);
|
|
23
|
+
});
|
|
24
|
+
it("should default to XML encode " + tc.input, function(){
|
|
25
|
+
assert.equal(entities.encode(tc.input), tc.xml);
|
|
26
|
+
});
|
|
27
|
+
it("should XML decode " + encodedXML, function(){
|
|
28
|
+
assert.equal(entities.decodeXML(encodedXML), tc.input);
|
|
29
|
+
});
|
|
30
|
+
it("should default to XML encode " + encodedXML, function(){
|
|
31
|
+
assert.equal(entities.decode(encodedXML), tc.input);
|
|
32
|
+
});
|
|
33
|
+
it("should default strict to XML encode " + encodedXML, function(){
|
|
34
|
+
assert.equal(entities.decodeStrict(encodedXML), tc.input);
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
var encodedHTML4 = entities.encodeHTML4(tc.input);
|
|
38
|
+
it("should HTML4 encode " + tc.input, function(){
|
|
39
|
+
assert.equal(encodedHTML4, tc.html4);
|
|
40
|
+
});
|
|
41
|
+
it("should HTML4 decode " + encodedHTML4, function(){
|
|
42
|
+
assert.equal(entities.decodeHTML4(encodedHTML4), tc.input);
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
var encodedHTML5 = entities.encodeHTML5(tc.input);
|
|
46
|
+
it("should HTML5 encode " + tc.input, function(){
|
|
47
|
+
assert.equal(encodedHTML5, tc.html5);
|
|
48
|
+
});
|
|
49
|
+
it("should HTML5 decode " + encodedHTML5, function(){
|
|
50
|
+
assert.equal(entities.decodeHTML5(encodedHTML5), tc.input);
|
|
51
|
+
});
|
|
52
|
+
});
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
describe("Decode test", function(){
|
|
56
|
+
var testcases = [
|
|
57
|
+
{ input: "&amp;", output: "&" },
|
|
58
|
+
{ input: "&#38;", output: "&" },
|
|
59
|
+
{ input: "&#x26;", output: "&" },
|
|
60
|
+
{ input: "&#X26;", output: "&" },
|
|
61
|
+
{ input: "&#38;", output: "&" },
|
|
62
|
+
{ input: "&#38;", output: "&" },
|
|
63
|
+
{ input: "&#38;", output: "&" },
|
|
64
|
+
{ input: ":", output: ":" },
|
|
65
|
+
{ input: ":", output: ":" },
|
|
66
|
+
{ input: ":", output: ":" },
|
|
67
|
+
{ input: ":", output: ":" }
|
|
68
|
+
];
|
|
69
|
+
testcases.forEach(function(tc) {
|
|
70
|
+
it("should XML decode " + tc.input, function(){
|
|
71
|
+
assert.equal(entities.decodeXML(tc.input), tc.output);
|
|
72
|
+
});
|
|
73
|
+
it("should HTML4 decode " + tc.input, function(){
|
|
74
|
+
assert.equal(entities.decodeHTML4(tc.input), tc.output);
|
|
75
|
+
});
|
|
76
|
+
it("should HTML5 decode " + tc.input, function(){
|
|
77
|
+
assert.equal(entities.decodeHTML5(tc.input), tc.output);
|
|
78
|
+
});
|
|
79
|
+
});
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
var levels = ["xml", "html4", "html5"];
|
|
83
|
+
|
|
84
|
+
describe("Documents", function(){
|
|
85
|
+
levels
|
|
86
|
+
.map(function(n){ return path.join("..", "entities", n); })
|
|
87
|
+
.map(require)
|
|
88
|
+
.forEach(function(doc, i){
|
|
89
|
+
describe("Decode", function(){
|
|
90
|
+
it(levels[i], function(){
|
|
91
|
+
Object.keys(doc).forEach(function(e){
|
|
92
|
+
for(var l = i; l < levels.length; l++){
|
|
93
|
+
assert.equal(entities.decode("&" + e, l), doc[e]);
|
|
94
|
+
}
|
|
95
|
+
});
|
|
96
|
+
});
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
describe("Decode strict", function(){
|
|
100
|
+
it(levels[i], function(){
|
|
101
|
+
Object.keys(doc).forEach(function(e){
|
|
102
|
+
if(e.substr(-1) !== ";"){
|
|
103
|
+
assert.equal(entities.decodeStrict("&" + e, i), "&" + e);
|
|
104
|
+
return;
|
|
105
|
+
}
|
|
106
|
+
for(var l = i; l < levels.length; l++){
|
|
107
|
+
assert.equal(entities.decodeStrict("&" + e, l), doc[e]);
|
|
108
|
+
}
|
|
109
|
+
});
|
|
110
|
+
});
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
describe("Encode", function(){
|
|
114
|
+
it(levels[i], function(){
|
|
115
|
+
Object.keys(doc).forEach(function(e){
|
|
116
|
+
if(e.substr(-1) !== ";") return;
|
|
117
|
+
for(var l = i; l < levels.length; l++){
|
|
118
|
+
assert.equal(entities.decode(entities.encode(doc[e], l), l), doc[e]);
|
|
119
|
+
}
|
|
120
|
+
});
|
|
121
|
+
});
|
|
122
|
+
});
|
|
123
|
+
});
|
|
38
124
|
});
|
|
39
125
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
{
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
testcases.forEach(function(tc) {
|
|
55
|
-
it('should XML decode '+tc.input, function() {
|
|
56
|
-
assert.equal(entities.decodeXML(tc.input), tc.output);
|
|
57
|
-
});
|
|
58
|
-
it('should HTML4 decode '+tc.input, function() {
|
|
59
|
-
assert.equal(entities.decodeHTML4(tc.input), tc.output);
|
|
60
|
-
});
|
|
61
|
-
it('should HTML5 decode '+tc.input, function() {
|
|
62
|
-
assert.equal(entities.decodeHTML5(tc.input), tc.output);
|
|
63
|
-
});
|
|
64
|
-
});
|
|
126
|
+
var astral = {
|
|
127
|
+
"1D306": "\uD834\uDF06",
|
|
128
|
+
"1D11E": "\uD834\uDD1E"
|
|
129
|
+
};
|
|
130
|
+
|
|
131
|
+
describe("Astral entities", function(){
|
|
132
|
+
Object.keys(astral).forEach(function(c){
|
|
133
|
+
/*it("should decode " + astral[c], function(){
|
|
134
|
+
assert.equal(entities.decode("&#x" + c + ";"), astral[c]);
|
|
135
|
+
});*/
|
|
136
|
+
it("should encode " + astral[c], function(){
|
|
137
|
+
assert.equal(entities.encode(astral[c]), "&#x" + c + ";");
|
|
138
|
+
});
|
|
139
|
+
});
|
|
65
140
|
});
|