htmlentities 3.0.1 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/{COPYING → COPYING.txt} +0 -0
- data/{CHANGES → History.txt} +15 -0
- data/README.txt +44 -0
- data/lib/htmlentities.rb +118 -119
- data/lib/htmlentities/html4.rb +257 -0
- data/lib/htmlentities/legacy.rb +27 -0
- data/lib/htmlentities/xhtml1.rb +258 -0
- data/test/entities_test.rb +110 -33
- data/test/html4_test.rb +24 -0
- data/test/legacy_test.rb +34 -0
- data/test/roundtrip_test.rb +94 -0
- data/test/string_test.rb +1 -1
- data/test/{all.rb → test_all.rb} +0 -0
- data/test/xhtml1_test.rb +23 -0
- metadata +21 -13
- data/README +0 -23
@@ -0,0 +1,27 @@
|
|
1
|
+
class HTMLEntities
|
2
|
+
class << self
|
3
|
+
|
4
|
+
#
|
5
|
+
# Legacy compatibility class method allowing direct encoding of XHTML1 entities.
|
6
|
+
# See HTMLEntities#encode for description of parameters.
|
7
|
+
#
|
8
|
+
def encode_entities(*args)
|
9
|
+
xhtml1_entities.encode(*args)
|
10
|
+
end
|
11
|
+
|
12
|
+
#
|
13
|
+
# Legacy compatibility class method allowing direct decoding of XHTML1 entities.
|
14
|
+
# See HTMLEntities#decode for description of parameters.
|
15
|
+
#
|
16
|
+
def decode_entities(*args)
|
17
|
+
xhtml1_entities.decode(*args)
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def xhtml1_entities
|
23
|
+
@xhtml1_entities ||= new('xhtml1')
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,258 @@
|
|
1
|
+
class HTMLEntities
|
2
|
+
MAPPINGS = {} unless defined? MAPPINGS
|
3
|
+
MAPPINGS['xhtml1'] = {
|
4
|
+
'Aacute' => 193,
|
5
|
+
'aacute' => 225,
|
6
|
+
'Acirc' => 194,
|
7
|
+
'acirc' => 226,
|
8
|
+
'acute' => 180,
|
9
|
+
'AElig' => 198,
|
10
|
+
'aelig' => 230,
|
11
|
+
'Agrave' => 192,
|
12
|
+
'agrave' => 224,
|
13
|
+
'alefsym' => 8501,
|
14
|
+
'Alpha' => 913,
|
15
|
+
'alpha' => 945,
|
16
|
+
'amp' => 38,
|
17
|
+
'and' => 8743,
|
18
|
+
'ang' => 8736,
|
19
|
+
'apos' => 39,
|
20
|
+
'Aring' => 197,
|
21
|
+
'aring' => 229,
|
22
|
+
'asymp' => 8776,
|
23
|
+
'Atilde' => 195,
|
24
|
+
'atilde' => 227,
|
25
|
+
'Auml' => 196,
|
26
|
+
'auml' => 228,
|
27
|
+
'bdquo' => 8222,
|
28
|
+
'Beta' => 914,
|
29
|
+
'beta' => 946,
|
30
|
+
'brvbar' => 166,
|
31
|
+
'bull' => 8226,
|
32
|
+
'cap' => 8745,
|
33
|
+
'Ccedil' => 199,
|
34
|
+
'ccedil' => 231,
|
35
|
+
'cedil' => 184,
|
36
|
+
'cent' => 162,
|
37
|
+
'Chi' => 935,
|
38
|
+
'chi' => 967,
|
39
|
+
'circ' => 710,
|
40
|
+
'clubs' => 9827,
|
41
|
+
'cong' => 8773,
|
42
|
+
'copy' => 169,
|
43
|
+
'crarr' => 8629,
|
44
|
+
'cup' => 8746,
|
45
|
+
'curren' => 164,
|
46
|
+
'Dagger' => 8225,
|
47
|
+
'dagger' => 8224,
|
48
|
+
'dArr' => 8659,
|
49
|
+
'darr' => 8595,
|
50
|
+
'deg' => 176,
|
51
|
+
'Delta' => 916,
|
52
|
+
'delta' => 948,
|
53
|
+
'diams' => 9830,
|
54
|
+
'divide' => 247,
|
55
|
+
'Eacute' => 201,
|
56
|
+
'eacute' => 233,
|
57
|
+
'Ecirc' => 202,
|
58
|
+
'ecirc' => 234,
|
59
|
+
'Egrave' => 200,
|
60
|
+
'egrave' => 232,
|
61
|
+
'empty' => 8709,
|
62
|
+
'emsp' => 8195,
|
63
|
+
'ensp' => 8194,
|
64
|
+
'Epsilon' => 917,
|
65
|
+
'epsilon' => 949,
|
66
|
+
'equiv' => 8801,
|
67
|
+
'Eta' => 919,
|
68
|
+
'eta' => 951,
|
69
|
+
'ETH' => 208,
|
70
|
+
'eth' => 240,
|
71
|
+
'Euml' => 203,
|
72
|
+
'euml' => 235,
|
73
|
+
'euro' => 8364,
|
74
|
+
'exist' => 8707,
|
75
|
+
'fnof' => 402,
|
76
|
+
'forall' => 8704,
|
77
|
+
'frac12' => 189,
|
78
|
+
'frac14' => 188,
|
79
|
+
'frac34' => 190,
|
80
|
+
'frasl' => 8260,
|
81
|
+
'Gamma' => 915,
|
82
|
+
'gamma' => 947,
|
83
|
+
'ge' => 8805,
|
84
|
+
'gt' => 62,
|
85
|
+
'hArr' => 8660,
|
86
|
+
'harr' => 8596,
|
87
|
+
'hearts' => 9829,
|
88
|
+
'hellip' => 8230,
|
89
|
+
'Iacute' => 205,
|
90
|
+
'iacute' => 237,
|
91
|
+
'Icirc' => 206,
|
92
|
+
'icirc' => 238,
|
93
|
+
'iexcl' => 161,
|
94
|
+
'Igrave' => 204,
|
95
|
+
'igrave' => 236,
|
96
|
+
'image' => 8465,
|
97
|
+
'infin' => 8734,
|
98
|
+
'int' => 8747,
|
99
|
+
'Iota' => 921,
|
100
|
+
'iota' => 953,
|
101
|
+
'iquest' => 191,
|
102
|
+
'isin' => 8712,
|
103
|
+
'Iuml' => 207,
|
104
|
+
'iuml' => 239,
|
105
|
+
'Kappa' => 922,
|
106
|
+
'kappa' => 954,
|
107
|
+
'Lambda' => 923,
|
108
|
+
'lambda' => 955,
|
109
|
+
'lang' => 9001,
|
110
|
+
'laquo' => 171,
|
111
|
+
'lArr' => 8656,
|
112
|
+
'larr' => 8592,
|
113
|
+
'lceil' => 8968,
|
114
|
+
'ldquo' => 8220,
|
115
|
+
'le' => 8804,
|
116
|
+
'lfloor' => 8970,
|
117
|
+
'lowast' => 8727,
|
118
|
+
'loz' => 9674,
|
119
|
+
'lrm' => 8206,
|
120
|
+
'lsaquo' => 8249,
|
121
|
+
'lsquo' => 8216,
|
122
|
+
'lt' => 60,
|
123
|
+
'macr' => 175,
|
124
|
+
'mdash' => 8212,
|
125
|
+
'micro' => 181,
|
126
|
+
'middot' => 183,
|
127
|
+
'minus' => 8722,
|
128
|
+
'Mu' => 924,
|
129
|
+
'mu' => 956,
|
130
|
+
'nabla' => 8711,
|
131
|
+
'nbsp' => 160,
|
132
|
+
'ndash' => 8211,
|
133
|
+
'ne' => 8800,
|
134
|
+
'ni' => 8715,
|
135
|
+
'not' => 172,
|
136
|
+
'notin' => 8713,
|
137
|
+
'nsub' => 8836,
|
138
|
+
'Ntilde' => 209,
|
139
|
+
'ntilde' => 241,
|
140
|
+
'Nu' => 925,
|
141
|
+
'nu' => 957,
|
142
|
+
'Oacute' => 211,
|
143
|
+
'oacute' => 243,
|
144
|
+
'Ocirc' => 212,
|
145
|
+
'ocirc' => 244,
|
146
|
+
'OElig' => 338,
|
147
|
+
'oelig' => 339,
|
148
|
+
'Ograve' => 210,
|
149
|
+
'ograve' => 242,
|
150
|
+
'oline' => 8254,
|
151
|
+
'Omega' => 937,
|
152
|
+
'omega' => 969,
|
153
|
+
'Omicron' => 927,
|
154
|
+
'omicron' => 959,
|
155
|
+
'oplus' => 8853,
|
156
|
+
'or' => 8744,
|
157
|
+
'ordf' => 170,
|
158
|
+
'ordm' => 186,
|
159
|
+
'Oslash' => 216,
|
160
|
+
'oslash' => 248,
|
161
|
+
'Otilde' => 213,
|
162
|
+
'otilde' => 245,
|
163
|
+
'otimes' => 8855,
|
164
|
+
'Ouml' => 214,
|
165
|
+
'ouml' => 246,
|
166
|
+
'para' => 182,
|
167
|
+
'part' => 8706,
|
168
|
+
'permil' => 8240,
|
169
|
+
'perp' => 8869,
|
170
|
+
'Phi' => 934,
|
171
|
+
'phi' => 966,
|
172
|
+
'Pi' => 928,
|
173
|
+
'pi' => 960,
|
174
|
+
'piv' => 982,
|
175
|
+
'plusmn' => 177,
|
176
|
+
'pound' => 163,
|
177
|
+
'Prime' => 8243,
|
178
|
+
'prime' => 8242,
|
179
|
+
'prod' => 8719,
|
180
|
+
'prop' => 8733,
|
181
|
+
'Psi' => 936,
|
182
|
+
'psi' => 968,
|
183
|
+
'quot' => 34,
|
184
|
+
'radic' => 8730,
|
185
|
+
'rang' => 9002,
|
186
|
+
'raquo' => 187,
|
187
|
+
'rArr' => 8658,
|
188
|
+
'rarr' => 8594,
|
189
|
+
'rceil' => 8969,
|
190
|
+
'rdquo' => 8221,
|
191
|
+
'real' => 8476,
|
192
|
+
'reg' => 174,
|
193
|
+
'rfloor' => 8971,
|
194
|
+
'Rho' => 929,
|
195
|
+
'rho' => 961,
|
196
|
+
'rlm' => 8207,
|
197
|
+
'rsaquo' => 8250,
|
198
|
+
'rsquo' => 8217,
|
199
|
+
'sbquo' => 8218,
|
200
|
+
'Scaron' => 352,
|
201
|
+
'scaron' => 353,
|
202
|
+
'sdot' => 8901,
|
203
|
+
'sect' => 167,
|
204
|
+
'shy' => 173,
|
205
|
+
'Sigma' => 931,
|
206
|
+
'sigma' => 963,
|
207
|
+
'sigmaf' => 962,
|
208
|
+
'sim' => 8764,
|
209
|
+
'spades' => 9824,
|
210
|
+
'sub' => 8834,
|
211
|
+
'sube' => 8838,
|
212
|
+
'sum' => 8721,
|
213
|
+
'sup' => 8835,
|
214
|
+
'sup1' => 185,
|
215
|
+
'sup2' => 178,
|
216
|
+
'sup3' => 179,
|
217
|
+
'supe' => 8839,
|
218
|
+
'szlig' => 223,
|
219
|
+
'Tau' => 932,
|
220
|
+
'tau' => 964,
|
221
|
+
'there4' => 8756,
|
222
|
+
'Theta' => 920,
|
223
|
+
'theta' => 952,
|
224
|
+
'thetasym' => 977,
|
225
|
+
'thinsp' => 8201,
|
226
|
+
'THORN' => 222,
|
227
|
+
'thorn' => 254,
|
228
|
+
'tilde' => 732,
|
229
|
+
'times' => 215,
|
230
|
+
'trade' => 8482,
|
231
|
+
'Uacute' => 218,
|
232
|
+
'uacute' => 250,
|
233
|
+
'uArr' => 8657,
|
234
|
+
'uarr' => 8593,
|
235
|
+
'Ucirc' => 219,
|
236
|
+
'ucirc' => 251,
|
237
|
+
'Ugrave' => 217,
|
238
|
+
'ugrave' => 249,
|
239
|
+
'uml' => 168,
|
240
|
+
'upsih' => 978,
|
241
|
+
'Upsilon' => 933,
|
242
|
+
'upsilon' => 965,
|
243
|
+
'Uuml' => 220,
|
244
|
+
'uuml' => 252,
|
245
|
+
'weierp' => 8472,
|
246
|
+
'Xi' => 926,
|
247
|
+
'xi' => 958,
|
248
|
+
'Yacute' => 221,
|
249
|
+
'yacute' => 253,
|
250
|
+
'yen' => 165,
|
251
|
+
'Yuml' => 376,
|
252
|
+
'yuml' => 255,
|
253
|
+
'Zeta' => 918,
|
254
|
+
'zeta' => 950,
|
255
|
+
'zwj' => 8205,
|
256
|
+
'zwnj' => 8204
|
257
|
+
}
|
258
|
+
end
|
data/test/entities_test.rb
CHANGED
@@ -1,74 +1,124 @@
|
|
1
|
-
|
1
|
+
$:.unshift(File.dirname(__FILE__) + '/../lib')
|
2
2
|
require 'htmlentities'
|
3
3
|
require 'test/unit'
|
4
4
|
|
5
5
|
$KCODE = 'u'
|
6
6
|
|
7
|
-
class
|
7
|
+
class HTMLEntities::EntitiesTest < Test::Unit::TestCase
|
8
|
+
|
9
|
+
attr_reader :xhtml1_entities, :html4_entities
|
10
|
+
|
11
|
+
def setup
|
12
|
+
@xhtml1_entities = HTMLEntities.new('xhtml1')
|
13
|
+
@html4_entities = HTMLEntities.new('html4')
|
14
|
+
end
|
15
|
+
|
16
|
+
class PseudoString
|
17
|
+
def initialize(string)
|
18
|
+
@string = string
|
19
|
+
end
|
20
|
+
def to_s
|
21
|
+
@string
|
22
|
+
end
|
23
|
+
end
|
8
24
|
|
9
|
-
def
|
25
|
+
def test_should_raise_exception_when_unknown_flavor_specified
|
26
|
+
assert_raises(HTMLEntities::UnknownFlavor) do
|
27
|
+
HTMLEntities.new('foo')
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_should_allow_symbol_for_flavor
|
32
|
+
assert_nothing_raised do
|
33
|
+
HTMLEntities.new(:xhtml1)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_should_allow_upper_case_flavor
|
38
|
+
assert_nothing_raised do
|
39
|
+
HTMLEntities.new('XHTML1')
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_should_decode_basic_entities
|
10
44
|
assert_decode('&', '&')
|
11
45
|
assert_decode('<', '<')
|
12
46
|
assert_decode('"', '"')
|
13
47
|
end
|
14
|
-
|
15
|
-
def
|
48
|
+
|
49
|
+
def test_should_encode_basic_entities
|
16
50
|
assert_encode('&', '&', :basic)
|
17
51
|
assert_encode('"', '"')
|
18
52
|
assert_encode('<', '<', :basic)
|
19
53
|
assert_encode('<', '<')
|
20
54
|
end
|
55
|
+
|
56
|
+
def test_should_encode_basic_entities_to_decimal
|
57
|
+
assert_encode('&', '&', :decimal)
|
58
|
+
assert_encode('"', '"', :decimal)
|
59
|
+
assert_encode('<', '<', :decimal)
|
60
|
+
assert_encode('>', '>', :decimal)
|
61
|
+
assert_encode(''', "'", :decimal)
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_should_encode_basic_entities_to_hexadecimal
|
65
|
+
assert_encode('&', '&', :hexadecimal)
|
66
|
+
assert_encode('"', '"', :hexadecimal)
|
67
|
+
assert_encode('<', '<', :hexadecimal)
|
68
|
+
assert_encode('>', '>', :hexadecimal)
|
69
|
+
assert_encode(''', "'", :hexadecimal)
|
70
|
+
end
|
21
71
|
|
22
|
-
def
|
72
|
+
def test_should_decode_extended_named_entities
|
23
73
|
assert_decode('±', '±')
|
24
74
|
assert_decode('ð', 'ð')
|
25
75
|
assert_decode('Œ', 'Œ')
|
26
76
|
assert_decode('œ', 'œ')
|
27
77
|
end
|
28
|
-
|
29
|
-
def
|
78
|
+
|
79
|
+
def test_should_encode_extended_named_entities
|
30
80
|
assert_encode('±', '±', :named)
|
31
81
|
assert_encode('ð', 'ð', :named)
|
32
82
|
assert_encode('Œ', 'Œ', :named)
|
33
83
|
assert_encode('œ', 'œ', :named)
|
34
84
|
end
|
35
85
|
|
36
|
-
def
|
86
|
+
def test_should_decode_decimal_entities
|
37
87
|
assert_decode('“', '“')
|
38
88
|
assert_decode('…', '…')
|
39
89
|
assert_decode(' ', ' ')
|
40
90
|
end
|
41
|
-
|
42
|
-
def
|
91
|
+
|
92
|
+
def test_should_encode_decimal_entities
|
43
93
|
assert_encode('“', '“', :decimal)
|
44
94
|
assert_encode('…', '…', :decimal)
|
45
95
|
end
|
46
96
|
|
47
|
-
def
|
97
|
+
def test_should_decode_hexadecimal_entities
|
48
98
|
assert_decode('−', '−')
|
49
99
|
assert_decode('—', '—')
|
50
100
|
assert_decode('`', '`')
|
51
101
|
assert_decode('`', '`')
|
52
102
|
end
|
53
|
-
|
54
|
-
def
|
103
|
+
|
104
|
+
def test_should_encode_hexadecimal_entities
|
55
105
|
assert_encode('−', '−', :hexadecimal)
|
56
106
|
assert_encode('—', '—', :hexadecimal)
|
57
107
|
end
|
58
108
|
|
59
|
-
def
|
109
|
+
def test_should_decode_text_with_mix_of_entities
|
60
110
|
# Just a random headline - I needed something with accented letters.
|
61
111
|
assert_decode(
|
62
|
-
'Le tabac pourrait bientôt être banni dans tous les lieux publics en France',
|
112
|
+
'Le tabac pourrait bientôt être banni dans tous les lieux publics en France',
|
63
113
|
'Le tabac pourrait bientôt être banni dans tous les lieux publics en France'
|
64
114
|
)
|
65
|
-
assert_decode(
|
115
|
+
assert_decode(
|
66
116
|
'"bientôt" & 文字',
|
67
117
|
'"bientôt" & 文字'
|
68
118
|
)
|
69
119
|
end
|
70
120
|
|
71
|
-
def
|
121
|
+
def test_should_encode_text_using_mix_of_entities
|
72
122
|
assert_encode(
|
73
123
|
'"bientôt" & 文字',
|
74
124
|
'"bientôt" & 文字', :basic, :named, :hexadecimal
|
@@ -78,8 +128,8 @@ class TestHTMLEntities < Test::Unit::TestCase
|
|
78
128
|
'"bientôt" & 文字', :basic, :named, :decimal
|
79
129
|
)
|
80
130
|
end
|
81
|
-
|
82
|
-
def
|
131
|
+
|
132
|
+
def test_should_sort_commands_when_encoding_using_mix_of_entities
|
83
133
|
assert_encode(
|
84
134
|
'"bientôt" & 文字',
|
85
135
|
'"bientôt" & 文字', :named, :hexadecimal, :basic
|
@@ -90,40 +140,67 @@ class TestHTMLEntities < Test::Unit::TestCase
|
|
90
140
|
)
|
91
141
|
end
|
92
142
|
|
93
|
-
def
|
143
|
+
def test_should_detect_illegal_encoding_command
|
94
144
|
assert_raise(HTMLEntities::InstructionError) {
|
95
145
|
HTMLEntities.encode_entities('foo', :bar, :baz)
|
96
146
|
}
|
97
147
|
end
|
98
148
|
|
99
|
-
def
|
149
|
+
def test_should_decode_empty_string
|
100
150
|
assert_decode('', '')
|
151
|
+
end
|
152
|
+
|
153
|
+
def test_should_skip_unknown_entity
|
101
154
|
assert_decode('&bogus;', '&bogus;')
|
155
|
+
end
|
156
|
+
|
157
|
+
def test_should_decode_double_encoded_entity_once
|
102
158
|
assert_decode('&', '&amp;')
|
103
159
|
end
|
104
160
|
|
105
|
-
def
|
161
|
+
def test_should_not_encode_normal_ASCII
|
106
162
|
assert_encode('`', '`')
|
107
163
|
assert_encode(' ', ' ')
|
108
|
-
|
164
|
+
end
|
165
|
+
|
166
|
+
def test_should_double_encode_existing_entity
|
109
167
|
assert_encode('&amp;', '&')
|
110
168
|
end
|
111
169
|
|
112
170
|
# Faults found and patched by Moonwolf
|
113
|
-
def
|
114
|
-
|
115
|
-
|
171
|
+
def test_should_decode_full_hexadecimal_range
|
172
|
+
(0..127).each do |codepoint|
|
173
|
+
assert_decode([codepoint].pack('U'), "&\#x#{codepoint.to_s(16)};")
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
# Reported by Dallas DeVries and Johan Duflost
|
178
|
+
def test_should_decode_named_entities_reported_as_missing_in_3_0_1
|
179
|
+
assert_decode([178].pack('U'), '²')
|
180
|
+
assert_decode([8226].pack('U'), '•')
|
181
|
+
assert_decode([948].pack('U'), 'δ')
|
182
|
+
end
|
183
|
+
|
184
|
+
def test_should_ducktype_parameter_to_string_before_encoding
|
185
|
+
pseudo_string = PseudoString.new('foo')
|
186
|
+
assert_decode('foo', pseudo_string)
|
187
|
+
end
|
188
|
+
|
189
|
+
def test_should_ducktype_parameter_to_string_before_decoding
|
190
|
+
pseudo_string = PseudoString.new('foo')
|
191
|
+
assert_encode('foo', pseudo_string)
|
116
192
|
end
|
117
|
-
|
118
|
-
private
|
119
193
|
|
120
194
|
def assert_decode(expected, input)
|
121
|
-
|
195
|
+
[xhtml1_entities, html4_entities].each do |coder|
|
196
|
+
assert_equal(expected, coder.decode(input))
|
197
|
+
end
|
122
198
|
end
|
123
|
-
|
199
|
+
|
124
200
|
def assert_encode(expected, input, *args)
|
125
|
-
|
201
|
+
[xhtml1_entities, html4_entities].each do |coder|
|
202
|
+
assert_equal(expected, coder.encode(input, *args))
|
203
|
+
end
|
126
204
|
end
|
127
205
|
|
128
206
|
end
|
129
|
-
|