htmlentities 3.0.1 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,27 @@
1
+ class HTMLEntities
2
+ class << self
3
+
4
+ #
5
+ # Legacy compatibility class method allowing direct encoding of XHTML1 entities.
6
+ # See HTMLEntities#encode for description of parameters.
7
+ #
8
+ def encode_entities(*args)
9
+ xhtml1_entities.encode(*args)
10
+ end
11
+
12
+ #
13
+ # Legacy compatibility class method allowing direct decoding of XHTML1 entities.
14
+ # See HTMLEntities#decode for description of parameters.
15
+ #
16
+ def decode_entities(*args)
17
+ xhtml1_entities.decode(*args)
18
+ end
19
+
20
+ private
21
+
22
+ def xhtml1_entities
23
+ @xhtml1_entities ||= new('xhtml1')
24
+ end
25
+
26
+ end
27
+ end
@@ -0,0 +1,258 @@
1
+ class HTMLEntities
2
+ MAPPINGS = {} unless defined? MAPPINGS
3
+ MAPPINGS['xhtml1'] = {
4
+ 'Aacute' => 193,
5
+ 'aacute' => 225,
6
+ 'Acirc' => 194,
7
+ 'acirc' => 226,
8
+ 'acute' => 180,
9
+ 'AElig' => 198,
10
+ 'aelig' => 230,
11
+ 'Agrave' => 192,
12
+ 'agrave' => 224,
13
+ 'alefsym' => 8501,
14
+ 'Alpha' => 913,
15
+ 'alpha' => 945,
16
+ 'amp' => 38,
17
+ 'and' => 8743,
18
+ 'ang' => 8736,
19
+ 'apos' => 39,
20
+ 'Aring' => 197,
21
+ 'aring' => 229,
22
+ 'asymp' => 8776,
23
+ 'Atilde' => 195,
24
+ 'atilde' => 227,
25
+ 'Auml' => 196,
26
+ 'auml' => 228,
27
+ 'bdquo' => 8222,
28
+ 'Beta' => 914,
29
+ 'beta' => 946,
30
+ 'brvbar' => 166,
31
+ 'bull' => 8226,
32
+ 'cap' => 8745,
33
+ 'Ccedil' => 199,
34
+ 'ccedil' => 231,
35
+ 'cedil' => 184,
36
+ 'cent' => 162,
37
+ 'Chi' => 935,
38
+ 'chi' => 967,
39
+ 'circ' => 710,
40
+ 'clubs' => 9827,
41
+ 'cong' => 8773,
42
+ 'copy' => 169,
43
+ 'crarr' => 8629,
44
+ 'cup' => 8746,
45
+ 'curren' => 164,
46
+ 'Dagger' => 8225,
47
+ 'dagger' => 8224,
48
+ 'dArr' => 8659,
49
+ 'darr' => 8595,
50
+ 'deg' => 176,
51
+ 'Delta' => 916,
52
+ 'delta' => 948,
53
+ 'diams' => 9830,
54
+ 'divide' => 247,
55
+ 'Eacute' => 201,
56
+ 'eacute' => 233,
57
+ 'Ecirc' => 202,
58
+ 'ecirc' => 234,
59
+ 'Egrave' => 200,
60
+ 'egrave' => 232,
61
+ 'empty' => 8709,
62
+ 'emsp' => 8195,
63
+ 'ensp' => 8194,
64
+ 'Epsilon' => 917,
65
+ 'epsilon' => 949,
66
+ 'equiv' => 8801,
67
+ 'Eta' => 919,
68
+ 'eta' => 951,
69
+ 'ETH' => 208,
70
+ 'eth' => 240,
71
+ 'Euml' => 203,
72
+ 'euml' => 235,
73
+ 'euro' => 8364,
74
+ 'exist' => 8707,
75
+ 'fnof' => 402,
76
+ 'forall' => 8704,
77
+ 'frac12' => 189,
78
+ 'frac14' => 188,
79
+ 'frac34' => 190,
80
+ 'frasl' => 8260,
81
+ 'Gamma' => 915,
82
+ 'gamma' => 947,
83
+ 'ge' => 8805,
84
+ 'gt' => 62,
85
+ 'hArr' => 8660,
86
+ 'harr' => 8596,
87
+ 'hearts' => 9829,
88
+ 'hellip' => 8230,
89
+ 'Iacute' => 205,
90
+ 'iacute' => 237,
91
+ 'Icirc' => 206,
92
+ 'icirc' => 238,
93
+ 'iexcl' => 161,
94
+ 'Igrave' => 204,
95
+ 'igrave' => 236,
96
+ 'image' => 8465,
97
+ 'infin' => 8734,
98
+ 'int' => 8747,
99
+ 'Iota' => 921,
100
+ 'iota' => 953,
101
+ 'iquest' => 191,
102
+ 'isin' => 8712,
103
+ 'Iuml' => 207,
104
+ 'iuml' => 239,
105
+ 'Kappa' => 922,
106
+ 'kappa' => 954,
107
+ 'Lambda' => 923,
108
+ 'lambda' => 955,
109
+ 'lang' => 9001,
110
+ 'laquo' => 171,
111
+ 'lArr' => 8656,
112
+ 'larr' => 8592,
113
+ 'lceil' => 8968,
114
+ 'ldquo' => 8220,
115
+ 'le' => 8804,
116
+ 'lfloor' => 8970,
117
+ 'lowast' => 8727,
118
+ 'loz' => 9674,
119
+ 'lrm' => 8206,
120
+ 'lsaquo' => 8249,
121
+ 'lsquo' => 8216,
122
+ 'lt' => 60,
123
+ 'macr' => 175,
124
+ 'mdash' => 8212,
125
+ 'micro' => 181,
126
+ 'middot' => 183,
127
+ 'minus' => 8722,
128
+ 'Mu' => 924,
129
+ 'mu' => 956,
130
+ 'nabla' => 8711,
131
+ 'nbsp' => 160,
132
+ 'ndash' => 8211,
133
+ 'ne' => 8800,
134
+ 'ni' => 8715,
135
+ 'not' => 172,
136
+ 'notin' => 8713,
137
+ 'nsub' => 8836,
138
+ 'Ntilde' => 209,
139
+ 'ntilde' => 241,
140
+ 'Nu' => 925,
141
+ 'nu' => 957,
142
+ 'Oacute' => 211,
143
+ 'oacute' => 243,
144
+ 'Ocirc' => 212,
145
+ 'ocirc' => 244,
146
+ 'OElig' => 338,
147
+ 'oelig' => 339,
148
+ 'Ograve' => 210,
149
+ 'ograve' => 242,
150
+ 'oline' => 8254,
151
+ 'Omega' => 937,
152
+ 'omega' => 969,
153
+ 'Omicron' => 927,
154
+ 'omicron' => 959,
155
+ 'oplus' => 8853,
156
+ 'or' => 8744,
157
+ 'ordf' => 170,
158
+ 'ordm' => 186,
159
+ 'Oslash' => 216,
160
+ 'oslash' => 248,
161
+ 'Otilde' => 213,
162
+ 'otilde' => 245,
163
+ 'otimes' => 8855,
164
+ 'Ouml' => 214,
165
+ 'ouml' => 246,
166
+ 'para' => 182,
167
+ 'part' => 8706,
168
+ 'permil' => 8240,
169
+ 'perp' => 8869,
170
+ 'Phi' => 934,
171
+ 'phi' => 966,
172
+ 'Pi' => 928,
173
+ 'pi' => 960,
174
+ 'piv' => 982,
175
+ 'plusmn' => 177,
176
+ 'pound' => 163,
177
+ 'Prime' => 8243,
178
+ 'prime' => 8242,
179
+ 'prod' => 8719,
180
+ 'prop' => 8733,
181
+ 'Psi' => 936,
182
+ 'psi' => 968,
183
+ 'quot' => 34,
184
+ 'radic' => 8730,
185
+ 'rang' => 9002,
186
+ 'raquo' => 187,
187
+ 'rArr' => 8658,
188
+ 'rarr' => 8594,
189
+ 'rceil' => 8969,
190
+ 'rdquo' => 8221,
191
+ 'real' => 8476,
192
+ 'reg' => 174,
193
+ 'rfloor' => 8971,
194
+ 'Rho' => 929,
195
+ 'rho' => 961,
196
+ 'rlm' => 8207,
197
+ 'rsaquo' => 8250,
198
+ 'rsquo' => 8217,
199
+ 'sbquo' => 8218,
200
+ 'Scaron' => 352,
201
+ 'scaron' => 353,
202
+ 'sdot' => 8901,
203
+ 'sect' => 167,
204
+ 'shy' => 173,
205
+ 'Sigma' => 931,
206
+ 'sigma' => 963,
207
+ 'sigmaf' => 962,
208
+ 'sim' => 8764,
209
+ 'spades' => 9824,
210
+ 'sub' => 8834,
211
+ 'sube' => 8838,
212
+ 'sum' => 8721,
213
+ 'sup' => 8835,
214
+ 'sup1' => 185,
215
+ 'sup2' => 178,
216
+ 'sup3' => 179,
217
+ 'supe' => 8839,
218
+ 'szlig' => 223,
219
+ 'Tau' => 932,
220
+ 'tau' => 964,
221
+ 'there4' => 8756,
222
+ 'Theta' => 920,
223
+ 'theta' => 952,
224
+ 'thetasym' => 977,
225
+ 'thinsp' => 8201,
226
+ 'THORN' => 222,
227
+ 'thorn' => 254,
228
+ 'tilde' => 732,
229
+ 'times' => 215,
230
+ 'trade' => 8482,
231
+ 'Uacute' => 218,
232
+ 'uacute' => 250,
233
+ 'uArr' => 8657,
234
+ 'uarr' => 8593,
235
+ 'Ucirc' => 219,
236
+ 'ucirc' => 251,
237
+ 'Ugrave' => 217,
238
+ 'ugrave' => 249,
239
+ 'uml' => 168,
240
+ 'upsih' => 978,
241
+ 'Upsilon' => 933,
242
+ 'upsilon' => 965,
243
+ 'Uuml' => 220,
244
+ 'uuml' => 252,
245
+ 'weierp' => 8472,
246
+ 'Xi' => 926,
247
+ 'xi' => 958,
248
+ 'Yacute' => 221,
249
+ 'yacute' => 253,
250
+ 'yen' => 165,
251
+ 'Yuml' => 376,
252
+ 'yuml' => 255,
253
+ 'Zeta' => 918,
254
+ 'zeta' => 950,
255
+ 'zwj' => 8205,
256
+ 'zwnj' => 8204
257
+ }
258
+ end
@@ -1,74 +1,124 @@
1
- $: << File.dirname(__FILE__) + '/../lib/'
1
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
2
2
  require 'htmlentities'
3
3
  require 'test/unit'
4
4
 
5
5
  $KCODE = 'u'
6
6
 
7
- class TestHTMLEntities < Test::Unit::TestCase
7
+ class HTMLEntities::EntitiesTest < Test::Unit::TestCase
8
+
9
+ attr_reader :xhtml1_entities, :html4_entities
10
+
11
+ def setup
12
+ @xhtml1_entities = HTMLEntities.new('xhtml1')
13
+ @html4_entities = HTMLEntities.new('html4')
14
+ end
15
+
16
+ class PseudoString
17
+ def initialize(string)
18
+ @string = string
19
+ end
20
+ def to_s
21
+ @string
22
+ end
23
+ end
8
24
 
9
- def test_basic_decoding
25
+ def test_should_raise_exception_when_unknown_flavor_specified
26
+ assert_raises(HTMLEntities::UnknownFlavor) do
27
+ HTMLEntities.new('foo')
28
+ end
29
+ end
30
+
31
+ def test_should_allow_symbol_for_flavor
32
+ assert_nothing_raised do
33
+ HTMLEntities.new(:xhtml1)
34
+ end
35
+ end
36
+
37
+ def test_should_allow_upper_case_flavor
38
+ assert_nothing_raised do
39
+ HTMLEntities.new('XHTML1')
40
+ end
41
+ end
42
+
43
+ def test_should_decode_basic_entities
10
44
  assert_decode('&', '&amp;')
11
45
  assert_decode('<', '&lt;')
12
46
  assert_decode('"', '&quot;')
13
47
  end
14
-
15
- def test_basic_encoding
48
+
49
+ def test_should_encode_basic_entities
16
50
  assert_encode('&amp;', '&', :basic)
17
51
  assert_encode('&quot;', '"')
18
52
  assert_encode('&lt;', '<', :basic)
19
53
  assert_encode('&lt;', '<')
20
54
  end
55
+
56
+ def test_should_encode_basic_entities_to_decimal
57
+ assert_encode('&#38;', '&', :decimal)
58
+ assert_encode('&#34;', '"', :decimal)
59
+ assert_encode('&#60;', '<', :decimal)
60
+ assert_encode('&#62;', '>', :decimal)
61
+ assert_encode('&#39;', "'", :decimal)
62
+ end
63
+
64
+ def test_should_encode_basic_entities_to_hexadecimal
65
+ assert_encode('&#x26;', '&', :hexadecimal)
66
+ assert_encode('&#x22;', '"', :hexadecimal)
67
+ assert_encode('&#x3c;', '<', :hexadecimal)
68
+ assert_encode('&#x3e;', '>', :hexadecimal)
69
+ assert_encode('&#x27;', "'", :hexadecimal)
70
+ end
21
71
 
22
- def test_extended_decoding
72
+ def test_should_decode_extended_named_entities
23
73
  assert_decode('±', '&plusmn;')
24
74
  assert_decode('ð', '&eth;')
25
75
  assert_decode('Œ', '&OElig;')
26
76
  assert_decode('œ', '&oelig;')
27
77
  end
28
-
29
- def test_extended_encoding
78
+
79
+ def test_should_encode_extended_named_entities
30
80
  assert_encode('&plusmn;', '±', :named)
31
81
  assert_encode('&eth;', 'ð', :named)
32
82
  assert_encode('&OElig;', 'Œ', :named)
33
83
  assert_encode('&oelig;', 'œ', :named)
34
84
  end
35
85
 
36
- def test_decimal_decoding
86
+ def test_should_decode_decimal_entities
37
87
  assert_decode('“', '&#8220;')
38
88
  assert_decode('…', '&#8230;')
39
89
  assert_decode(' ', '&#32;')
40
90
  end
41
-
42
- def test_decimal_encoding
91
+
92
+ def test_should_encode_decimal_entities
43
93
  assert_encode('&#8220;', '“', :decimal)
44
94
  assert_encode('&#8230;', '…', :decimal)
45
95
  end
46
96
 
47
- def test_hexadecimal_decoding
97
+ def test_should_decode_hexadecimal_entities
48
98
  assert_decode('−', '&#x2212;')
49
99
  assert_decode('—', '&#x2014;')
50
100
  assert_decode('`', '&#x0060;')
51
101
  assert_decode('`', '&#x60;')
52
102
  end
53
-
54
- def test_hexadecimal_encoding
103
+
104
+ def test_should_encode_hexadecimal_entities
55
105
  assert_encode('&#x2212;', '−', :hexadecimal)
56
106
  assert_encode('&#x2014;', '—', :hexadecimal)
57
107
  end
58
108
 
59
- def test_mixed_decoding
109
+ def test_should_decode_text_with_mix_of_entities
60
110
  # Just a random headline - I needed something with accented letters.
61
111
  assert_decode(
62
- 'Le tabac pourrait bientôt être banni dans tous les lieux publics en France',
112
+ 'Le tabac pourrait bientôt être banni dans tous les lieux publics en France',
63
113
  'Le tabac pourrait bient&ocirc;t &#234;tre banni dans tous les lieux publics en France'
64
114
  )
65
- assert_decode(
115
+ assert_decode(
66
116
  '"bientôt" & 文字',
67
117
  '&quot;bient&ocirc;t&quot; &amp; &#25991;&#x5b57;'
68
118
  )
69
119
  end
70
120
 
71
- def test_mixed_encoding
121
+ def test_should_encode_text_using_mix_of_entities
72
122
  assert_encode(
73
123
  '&quot;bient&ocirc;t&quot; &amp; &#x6587;&#x5b57;',
74
124
  '"bientôt" & 文字', :basic, :named, :hexadecimal
@@ -78,8 +128,8 @@ class TestHTMLEntities < Test::Unit::TestCase
78
128
  '"bientôt" & 文字', :basic, :named, :decimal
79
129
  )
80
130
  end
81
-
82
- def test_mixed_encoding_with_sort
131
+
132
+ def test_should_sort_commands_when_encoding_using_mix_of_entities
83
133
  assert_encode(
84
134
  '&quot;bient&ocirc;t&quot; &amp; &#x6587;&#x5b57;',
85
135
  '"bientôt" & 文字', :named, :hexadecimal, :basic
@@ -90,40 +140,67 @@ class TestHTMLEntities < Test::Unit::TestCase
90
140
  )
91
141
  end
92
142
 
93
- def test_detect_illegal_encoding_command
143
+ def test_should_detect_illegal_encoding_command
94
144
  assert_raise(HTMLEntities::InstructionError) {
95
145
  HTMLEntities.encode_entities('foo', :bar, :baz)
96
146
  }
97
147
  end
98
148
 
99
- def test_edge_case_decoding
149
+ def test_should_decode_empty_string
100
150
  assert_decode('', '')
151
+ end
152
+
153
+ def test_should_skip_unknown_entity
101
154
  assert_decode('&bogus;', '&bogus;')
155
+ end
156
+
157
+ def test_should_decode_double_encoded_entity_once
102
158
  assert_decode('&amp;', '&amp;amp;')
103
159
  end
104
160
 
105
- def test_edge_case_encoding
161
+ def test_should_not_encode_normal_ASCII
106
162
  assert_encode('`', '`')
107
163
  assert_encode(' ', ' ')
108
- assert_encode('&amp;amp;', '&amp;')
164
+ end
165
+
166
+ def test_should_double_encode_existing_entity
109
167
  assert_encode('&amp;amp;', '&amp;')
110
168
  end
111
169
 
112
170
  # Faults found and patched by Moonwolf
113
- def test_moonwolf_decoding
114
- assert_decode("\x2", '&#2;')
115
- assert_decode("\xf", '&#xf;')
171
+ def test_should_decode_full_hexadecimal_range
172
+ (0..127).each do |codepoint|
173
+ assert_decode([codepoint].pack('U'), "&\#x#{codepoint.to_s(16)};")
174
+ end
175
+ end
176
+
177
+ # Reported by Dallas DeVries and Johan Duflost
178
+ def test_should_decode_named_entities_reported_as_missing_in_3_0_1
179
+ assert_decode([178].pack('U'), '&sup2;')
180
+ assert_decode([8226].pack('U'), '&bull;')
181
+ assert_decode([948].pack('U'), '&delta;')
182
+ end
183
+
184
+ def test_should_ducktype_parameter_to_string_before_encoding
185
+ pseudo_string = PseudoString.new('foo')
186
+ assert_decode('foo', pseudo_string)
187
+ end
188
+
189
+ def test_should_ducktype_parameter_to_string_before_decoding
190
+ pseudo_string = PseudoString.new('foo')
191
+ assert_encode('foo', pseudo_string)
116
192
  end
117
-
118
- private
119
193
 
120
194
  def assert_decode(expected, input)
121
- assert_equal(expected, HTMLEntities.decode_entities(input))
195
+ [xhtml1_entities, html4_entities].each do |coder|
196
+ assert_equal(expected, coder.decode(input))
197
+ end
122
198
  end
123
-
199
+
124
200
  def assert_encode(expected, input, *args)
125
- assert_equal(expected, HTMLEntities.encode_entities(input, *args))
201
+ [xhtml1_entities, html4_entities].each do |coder|
202
+ assert_equal(expected, coder.encode(input, *args))
203
+ end
126
204
  end
127
205
 
128
206
  end
129
-