htmlentities 3.0.1 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/{COPYING → COPYING.txt} +0 -0
- data/{CHANGES → History.txt} +15 -0
- data/README.txt +44 -0
- data/lib/htmlentities.rb +118 -119
- data/lib/htmlentities/html4.rb +257 -0
- data/lib/htmlentities/legacy.rb +27 -0
- data/lib/htmlentities/xhtml1.rb +258 -0
- data/test/entities_test.rb +110 -33
- data/test/html4_test.rb +24 -0
- data/test/legacy_test.rb +34 -0
- data/test/roundtrip_test.rb +94 -0
- data/test/string_test.rb +1 -1
- data/test/{all.rb → test_all.rb} +0 -0
- data/test/xhtml1_test.rb +23 -0
- metadata +21 -13
- data/README +0 -23
@@ -0,0 +1,27 @@
|
|
1
|
+
class HTMLEntities
|
2
|
+
class << self
|
3
|
+
|
4
|
+
#
|
5
|
+
# Legacy compatibility class method allowing direct encoding of XHTML1 entities.
|
6
|
+
# See HTMLEntities#encode for description of parameters.
|
7
|
+
#
|
8
|
+
def encode_entities(*args)
|
9
|
+
xhtml1_entities.encode(*args)
|
10
|
+
end
|
11
|
+
|
12
|
+
#
|
13
|
+
# Legacy compatibility class method allowing direct decoding of XHTML1 entities.
|
14
|
+
# See HTMLEntities#decode for description of parameters.
|
15
|
+
#
|
16
|
+
def decode_entities(*args)
|
17
|
+
xhtml1_entities.decode(*args)
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def xhtml1_entities
|
23
|
+
@xhtml1_entities ||= new('xhtml1')
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,258 @@
|
|
1
|
+
class HTMLEntities
|
2
|
+
MAPPINGS = {} unless defined? MAPPINGS
|
3
|
+
MAPPINGS['xhtml1'] = {
|
4
|
+
'Aacute' => 193,
|
5
|
+
'aacute' => 225,
|
6
|
+
'Acirc' => 194,
|
7
|
+
'acirc' => 226,
|
8
|
+
'acute' => 180,
|
9
|
+
'AElig' => 198,
|
10
|
+
'aelig' => 230,
|
11
|
+
'Agrave' => 192,
|
12
|
+
'agrave' => 224,
|
13
|
+
'alefsym' => 8501,
|
14
|
+
'Alpha' => 913,
|
15
|
+
'alpha' => 945,
|
16
|
+
'amp' => 38,
|
17
|
+
'and' => 8743,
|
18
|
+
'ang' => 8736,
|
19
|
+
'apos' => 39,
|
20
|
+
'Aring' => 197,
|
21
|
+
'aring' => 229,
|
22
|
+
'asymp' => 8776,
|
23
|
+
'Atilde' => 195,
|
24
|
+
'atilde' => 227,
|
25
|
+
'Auml' => 196,
|
26
|
+
'auml' => 228,
|
27
|
+
'bdquo' => 8222,
|
28
|
+
'Beta' => 914,
|
29
|
+
'beta' => 946,
|
30
|
+
'brvbar' => 166,
|
31
|
+
'bull' => 8226,
|
32
|
+
'cap' => 8745,
|
33
|
+
'Ccedil' => 199,
|
34
|
+
'ccedil' => 231,
|
35
|
+
'cedil' => 184,
|
36
|
+
'cent' => 162,
|
37
|
+
'Chi' => 935,
|
38
|
+
'chi' => 967,
|
39
|
+
'circ' => 710,
|
40
|
+
'clubs' => 9827,
|
41
|
+
'cong' => 8773,
|
42
|
+
'copy' => 169,
|
43
|
+
'crarr' => 8629,
|
44
|
+
'cup' => 8746,
|
45
|
+
'curren' => 164,
|
46
|
+
'Dagger' => 8225,
|
47
|
+
'dagger' => 8224,
|
48
|
+
'dArr' => 8659,
|
49
|
+
'darr' => 8595,
|
50
|
+
'deg' => 176,
|
51
|
+
'Delta' => 916,
|
52
|
+
'delta' => 948,
|
53
|
+
'diams' => 9830,
|
54
|
+
'divide' => 247,
|
55
|
+
'Eacute' => 201,
|
56
|
+
'eacute' => 233,
|
57
|
+
'Ecirc' => 202,
|
58
|
+
'ecirc' => 234,
|
59
|
+
'Egrave' => 200,
|
60
|
+
'egrave' => 232,
|
61
|
+
'empty' => 8709,
|
62
|
+
'emsp' => 8195,
|
63
|
+
'ensp' => 8194,
|
64
|
+
'Epsilon' => 917,
|
65
|
+
'epsilon' => 949,
|
66
|
+
'equiv' => 8801,
|
67
|
+
'Eta' => 919,
|
68
|
+
'eta' => 951,
|
69
|
+
'ETH' => 208,
|
70
|
+
'eth' => 240,
|
71
|
+
'Euml' => 203,
|
72
|
+
'euml' => 235,
|
73
|
+
'euro' => 8364,
|
74
|
+
'exist' => 8707,
|
75
|
+
'fnof' => 402,
|
76
|
+
'forall' => 8704,
|
77
|
+
'frac12' => 189,
|
78
|
+
'frac14' => 188,
|
79
|
+
'frac34' => 190,
|
80
|
+
'frasl' => 8260,
|
81
|
+
'Gamma' => 915,
|
82
|
+
'gamma' => 947,
|
83
|
+
'ge' => 8805,
|
84
|
+
'gt' => 62,
|
85
|
+
'hArr' => 8660,
|
86
|
+
'harr' => 8596,
|
87
|
+
'hearts' => 9829,
|
88
|
+
'hellip' => 8230,
|
89
|
+
'Iacute' => 205,
|
90
|
+
'iacute' => 237,
|
91
|
+
'Icirc' => 206,
|
92
|
+
'icirc' => 238,
|
93
|
+
'iexcl' => 161,
|
94
|
+
'Igrave' => 204,
|
95
|
+
'igrave' => 236,
|
96
|
+
'image' => 8465,
|
97
|
+
'infin' => 8734,
|
98
|
+
'int' => 8747,
|
99
|
+
'Iota' => 921,
|
100
|
+
'iota' => 953,
|
101
|
+
'iquest' => 191,
|
102
|
+
'isin' => 8712,
|
103
|
+
'Iuml' => 207,
|
104
|
+
'iuml' => 239,
|
105
|
+
'Kappa' => 922,
|
106
|
+
'kappa' => 954,
|
107
|
+
'Lambda' => 923,
|
108
|
+
'lambda' => 955,
|
109
|
+
'lang' => 9001,
|
110
|
+
'laquo' => 171,
|
111
|
+
'lArr' => 8656,
|
112
|
+
'larr' => 8592,
|
113
|
+
'lceil' => 8968,
|
114
|
+
'ldquo' => 8220,
|
115
|
+
'le' => 8804,
|
116
|
+
'lfloor' => 8970,
|
117
|
+
'lowast' => 8727,
|
118
|
+
'loz' => 9674,
|
119
|
+
'lrm' => 8206,
|
120
|
+
'lsaquo' => 8249,
|
121
|
+
'lsquo' => 8216,
|
122
|
+
'lt' => 60,
|
123
|
+
'macr' => 175,
|
124
|
+
'mdash' => 8212,
|
125
|
+
'micro' => 181,
|
126
|
+
'middot' => 183,
|
127
|
+
'minus' => 8722,
|
128
|
+
'Mu' => 924,
|
129
|
+
'mu' => 956,
|
130
|
+
'nabla' => 8711,
|
131
|
+
'nbsp' => 160,
|
132
|
+
'ndash' => 8211,
|
133
|
+
'ne' => 8800,
|
134
|
+
'ni' => 8715,
|
135
|
+
'not' => 172,
|
136
|
+
'notin' => 8713,
|
137
|
+
'nsub' => 8836,
|
138
|
+
'Ntilde' => 209,
|
139
|
+
'ntilde' => 241,
|
140
|
+
'Nu' => 925,
|
141
|
+
'nu' => 957,
|
142
|
+
'Oacute' => 211,
|
143
|
+
'oacute' => 243,
|
144
|
+
'Ocirc' => 212,
|
145
|
+
'ocirc' => 244,
|
146
|
+
'OElig' => 338,
|
147
|
+
'oelig' => 339,
|
148
|
+
'Ograve' => 210,
|
149
|
+
'ograve' => 242,
|
150
|
+
'oline' => 8254,
|
151
|
+
'Omega' => 937,
|
152
|
+
'omega' => 969,
|
153
|
+
'Omicron' => 927,
|
154
|
+
'omicron' => 959,
|
155
|
+
'oplus' => 8853,
|
156
|
+
'or' => 8744,
|
157
|
+
'ordf' => 170,
|
158
|
+
'ordm' => 186,
|
159
|
+
'Oslash' => 216,
|
160
|
+
'oslash' => 248,
|
161
|
+
'Otilde' => 213,
|
162
|
+
'otilde' => 245,
|
163
|
+
'otimes' => 8855,
|
164
|
+
'Ouml' => 214,
|
165
|
+
'ouml' => 246,
|
166
|
+
'para' => 182,
|
167
|
+
'part' => 8706,
|
168
|
+
'permil' => 8240,
|
169
|
+
'perp' => 8869,
|
170
|
+
'Phi' => 934,
|
171
|
+
'phi' => 966,
|
172
|
+
'Pi' => 928,
|
173
|
+
'pi' => 960,
|
174
|
+
'piv' => 982,
|
175
|
+
'plusmn' => 177,
|
176
|
+
'pound' => 163,
|
177
|
+
'Prime' => 8243,
|
178
|
+
'prime' => 8242,
|
179
|
+
'prod' => 8719,
|
180
|
+
'prop' => 8733,
|
181
|
+
'Psi' => 936,
|
182
|
+
'psi' => 968,
|
183
|
+
'quot' => 34,
|
184
|
+
'radic' => 8730,
|
185
|
+
'rang' => 9002,
|
186
|
+
'raquo' => 187,
|
187
|
+
'rArr' => 8658,
|
188
|
+
'rarr' => 8594,
|
189
|
+
'rceil' => 8969,
|
190
|
+
'rdquo' => 8221,
|
191
|
+
'real' => 8476,
|
192
|
+
'reg' => 174,
|
193
|
+
'rfloor' => 8971,
|
194
|
+
'Rho' => 929,
|
195
|
+
'rho' => 961,
|
196
|
+
'rlm' => 8207,
|
197
|
+
'rsaquo' => 8250,
|
198
|
+
'rsquo' => 8217,
|
199
|
+
'sbquo' => 8218,
|
200
|
+
'Scaron' => 352,
|
201
|
+
'scaron' => 353,
|
202
|
+
'sdot' => 8901,
|
203
|
+
'sect' => 167,
|
204
|
+
'shy' => 173,
|
205
|
+
'Sigma' => 931,
|
206
|
+
'sigma' => 963,
|
207
|
+
'sigmaf' => 962,
|
208
|
+
'sim' => 8764,
|
209
|
+
'spades' => 9824,
|
210
|
+
'sub' => 8834,
|
211
|
+
'sube' => 8838,
|
212
|
+
'sum' => 8721,
|
213
|
+
'sup' => 8835,
|
214
|
+
'sup1' => 185,
|
215
|
+
'sup2' => 178,
|
216
|
+
'sup3' => 179,
|
217
|
+
'supe' => 8839,
|
218
|
+
'szlig' => 223,
|
219
|
+
'Tau' => 932,
|
220
|
+
'tau' => 964,
|
221
|
+
'there4' => 8756,
|
222
|
+
'Theta' => 920,
|
223
|
+
'theta' => 952,
|
224
|
+
'thetasym' => 977,
|
225
|
+
'thinsp' => 8201,
|
226
|
+
'THORN' => 222,
|
227
|
+
'thorn' => 254,
|
228
|
+
'tilde' => 732,
|
229
|
+
'times' => 215,
|
230
|
+
'trade' => 8482,
|
231
|
+
'Uacute' => 218,
|
232
|
+
'uacute' => 250,
|
233
|
+
'uArr' => 8657,
|
234
|
+
'uarr' => 8593,
|
235
|
+
'Ucirc' => 219,
|
236
|
+
'ucirc' => 251,
|
237
|
+
'Ugrave' => 217,
|
238
|
+
'ugrave' => 249,
|
239
|
+
'uml' => 168,
|
240
|
+
'upsih' => 978,
|
241
|
+
'Upsilon' => 933,
|
242
|
+
'upsilon' => 965,
|
243
|
+
'Uuml' => 220,
|
244
|
+
'uuml' => 252,
|
245
|
+
'weierp' => 8472,
|
246
|
+
'Xi' => 926,
|
247
|
+
'xi' => 958,
|
248
|
+
'Yacute' => 221,
|
249
|
+
'yacute' => 253,
|
250
|
+
'yen' => 165,
|
251
|
+
'Yuml' => 376,
|
252
|
+
'yuml' => 255,
|
253
|
+
'Zeta' => 918,
|
254
|
+
'zeta' => 950,
|
255
|
+
'zwj' => 8205,
|
256
|
+
'zwnj' => 8204
|
257
|
+
}
|
258
|
+
end
|
data/test/entities_test.rb
CHANGED
@@ -1,74 +1,124 @@
|
|
1
|
-
|
1
|
+
$:.unshift(File.dirname(__FILE__) + '/../lib')
|
2
2
|
require 'htmlentities'
|
3
3
|
require 'test/unit'
|
4
4
|
|
5
5
|
$KCODE = 'u'
|
6
6
|
|
7
|
-
class
|
7
|
+
class HTMLEntities::EntitiesTest < Test::Unit::TestCase
|
8
|
+
|
9
|
+
attr_reader :xhtml1_entities, :html4_entities
|
10
|
+
|
11
|
+
def setup
|
12
|
+
@xhtml1_entities = HTMLEntities.new('xhtml1')
|
13
|
+
@html4_entities = HTMLEntities.new('html4')
|
14
|
+
end
|
15
|
+
|
16
|
+
class PseudoString
|
17
|
+
def initialize(string)
|
18
|
+
@string = string
|
19
|
+
end
|
20
|
+
def to_s
|
21
|
+
@string
|
22
|
+
end
|
23
|
+
end
|
8
24
|
|
9
|
-
def
|
25
|
+
def test_should_raise_exception_when_unknown_flavor_specified
|
26
|
+
assert_raises(HTMLEntities::UnknownFlavor) do
|
27
|
+
HTMLEntities.new('foo')
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_should_allow_symbol_for_flavor
|
32
|
+
assert_nothing_raised do
|
33
|
+
HTMLEntities.new(:xhtml1)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_should_allow_upper_case_flavor
|
38
|
+
assert_nothing_raised do
|
39
|
+
HTMLEntities.new('XHTML1')
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_should_decode_basic_entities
|
10
44
|
assert_decode('&', '&')
|
11
45
|
assert_decode('<', '<')
|
12
46
|
assert_decode('"', '"')
|
13
47
|
end
|
14
|
-
|
15
|
-
def
|
48
|
+
|
49
|
+
def test_should_encode_basic_entities
|
16
50
|
assert_encode('&', '&', :basic)
|
17
51
|
assert_encode('"', '"')
|
18
52
|
assert_encode('<', '<', :basic)
|
19
53
|
assert_encode('<', '<')
|
20
54
|
end
|
55
|
+
|
56
|
+
def test_should_encode_basic_entities_to_decimal
|
57
|
+
assert_encode('&', '&', :decimal)
|
58
|
+
assert_encode('"', '"', :decimal)
|
59
|
+
assert_encode('<', '<', :decimal)
|
60
|
+
assert_encode('>', '>', :decimal)
|
61
|
+
assert_encode(''', "'", :decimal)
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_should_encode_basic_entities_to_hexadecimal
|
65
|
+
assert_encode('&', '&', :hexadecimal)
|
66
|
+
assert_encode('"', '"', :hexadecimal)
|
67
|
+
assert_encode('<', '<', :hexadecimal)
|
68
|
+
assert_encode('>', '>', :hexadecimal)
|
69
|
+
assert_encode(''', "'", :hexadecimal)
|
70
|
+
end
|
21
71
|
|
22
|
-
def
|
72
|
+
def test_should_decode_extended_named_entities
|
23
73
|
assert_decode('±', '±')
|
24
74
|
assert_decode('ð', 'ð')
|
25
75
|
assert_decode('Œ', 'Œ')
|
26
76
|
assert_decode('œ', 'œ')
|
27
77
|
end
|
28
|
-
|
29
|
-
def
|
78
|
+
|
79
|
+
def test_should_encode_extended_named_entities
|
30
80
|
assert_encode('±', '±', :named)
|
31
81
|
assert_encode('ð', 'ð', :named)
|
32
82
|
assert_encode('Œ', 'Œ', :named)
|
33
83
|
assert_encode('œ', 'œ', :named)
|
34
84
|
end
|
35
85
|
|
36
|
-
def
|
86
|
+
def test_should_decode_decimal_entities
|
37
87
|
assert_decode('“', '“')
|
38
88
|
assert_decode('…', '…')
|
39
89
|
assert_decode(' ', ' ')
|
40
90
|
end
|
41
|
-
|
42
|
-
def
|
91
|
+
|
92
|
+
def test_should_encode_decimal_entities
|
43
93
|
assert_encode('“', '“', :decimal)
|
44
94
|
assert_encode('…', '…', :decimal)
|
45
95
|
end
|
46
96
|
|
47
|
-
def
|
97
|
+
def test_should_decode_hexadecimal_entities
|
48
98
|
assert_decode('−', '−')
|
49
99
|
assert_decode('—', '—')
|
50
100
|
assert_decode('`', '`')
|
51
101
|
assert_decode('`', '`')
|
52
102
|
end
|
53
|
-
|
54
|
-
def
|
103
|
+
|
104
|
+
def test_should_encode_hexadecimal_entities
|
55
105
|
assert_encode('−', '−', :hexadecimal)
|
56
106
|
assert_encode('—', '—', :hexadecimal)
|
57
107
|
end
|
58
108
|
|
59
|
-
def
|
109
|
+
def test_should_decode_text_with_mix_of_entities
|
60
110
|
# Just a random headline - I needed something with accented letters.
|
61
111
|
assert_decode(
|
62
|
-
'Le tabac pourrait bientôt être banni dans tous les lieux publics en France',
|
112
|
+
'Le tabac pourrait bientôt être banni dans tous les lieux publics en France',
|
63
113
|
'Le tabac pourrait bientôt être banni dans tous les lieux publics en France'
|
64
114
|
)
|
65
|
-
assert_decode(
|
115
|
+
assert_decode(
|
66
116
|
'"bientôt" & 文字',
|
67
117
|
'"bientôt" & 文字'
|
68
118
|
)
|
69
119
|
end
|
70
120
|
|
71
|
-
def
|
121
|
+
def test_should_encode_text_using_mix_of_entities
|
72
122
|
assert_encode(
|
73
123
|
'"bientôt" & 文字',
|
74
124
|
'"bientôt" & 文字', :basic, :named, :hexadecimal
|
@@ -78,8 +128,8 @@ class TestHTMLEntities < Test::Unit::TestCase
|
|
78
128
|
'"bientôt" & 文字', :basic, :named, :decimal
|
79
129
|
)
|
80
130
|
end
|
81
|
-
|
82
|
-
def
|
131
|
+
|
132
|
+
def test_should_sort_commands_when_encoding_using_mix_of_entities
|
83
133
|
assert_encode(
|
84
134
|
'"bientôt" & 文字',
|
85
135
|
'"bientôt" & 文字', :named, :hexadecimal, :basic
|
@@ -90,40 +140,67 @@ class TestHTMLEntities < Test::Unit::TestCase
|
|
90
140
|
)
|
91
141
|
end
|
92
142
|
|
93
|
-
def
|
143
|
+
def test_should_detect_illegal_encoding_command
|
94
144
|
assert_raise(HTMLEntities::InstructionError) {
|
95
145
|
HTMLEntities.encode_entities('foo', :bar, :baz)
|
96
146
|
}
|
97
147
|
end
|
98
148
|
|
99
|
-
def
|
149
|
+
def test_should_decode_empty_string
|
100
150
|
assert_decode('', '')
|
151
|
+
end
|
152
|
+
|
153
|
+
def test_should_skip_unknown_entity
|
101
154
|
assert_decode('&bogus;', '&bogus;')
|
155
|
+
end
|
156
|
+
|
157
|
+
def test_should_decode_double_encoded_entity_once
|
102
158
|
assert_decode('&', '&amp;')
|
103
159
|
end
|
104
160
|
|
105
|
-
def
|
161
|
+
def test_should_not_encode_normal_ASCII
|
106
162
|
assert_encode('`', '`')
|
107
163
|
assert_encode(' ', ' ')
|
108
|
-
|
164
|
+
end
|
165
|
+
|
166
|
+
def test_should_double_encode_existing_entity
|
109
167
|
assert_encode('&amp;', '&')
|
110
168
|
end
|
111
169
|
|
112
170
|
# Faults found and patched by Moonwolf
|
113
|
-
def
|
114
|
-
|
115
|
-
|
171
|
+
def test_should_decode_full_hexadecimal_range
|
172
|
+
(0..127).each do |codepoint|
|
173
|
+
assert_decode([codepoint].pack('U'), "&\#x#{codepoint.to_s(16)};")
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
# Reported by Dallas DeVries and Johan Duflost
|
178
|
+
def test_should_decode_named_entities_reported_as_missing_in_3_0_1
|
179
|
+
assert_decode([178].pack('U'), '²')
|
180
|
+
assert_decode([8226].pack('U'), '•')
|
181
|
+
assert_decode([948].pack('U'), 'δ')
|
182
|
+
end
|
183
|
+
|
184
|
+
def test_should_ducktype_parameter_to_string_before_encoding
|
185
|
+
pseudo_string = PseudoString.new('foo')
|
186
|
+
assert_decode('foo', pseudo_string)
|
187
|
+
end
|
188
|
+
|
189
|
+
def test_should_ducktype_parameter_to_string_before_decoding
|
190
|
+
pseudo_string = PseudoString.new('foo')
|
191
|
+
assert_encode('foo', pseudo_string)
|
116
192
|
end
|
117
|
-
|
118
|
-
private
|
119
193
|
|
120
194
|
def assert_decode(expected, input)
|
121
|
-
|
195
|
+
[xhtml1_entities, html4_entities].each do |coder|
|
196
|
+
assert_equal(expected, coder.decode(input))
|
197
|
+
end
|
122
198
|
end
|
123
|
-
|
199
|
+
|
124
200
|
def assert_encode(expected, input, *args)
|
125
|
-
|
201
|
+
[xhtml1_entities, html4_entities].each do |coder|
|
202
|
+
assert_equal(expected, coder.encode(input, *args))
|
203
|
+
end
|
126
204
|
end
|
127
205
|
|
128
206
|
end
|
129
|
-
|