htmlentities 3.0.1 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,27 @@
1
+ class HTMLEntities
2
+ class << self
3
+
4
+ #
5
+ # Legacy compatibility class method allowing direct encoding of XHTML1 entities.
6
+ # See HTMLEntities#encode for description of parameters.
7
+ #
8
+ def encode_entities(*args)
9
+ xhtml1_entities.encode(*args)
10
+ end
11
+
12
+ #
13
+ # Legacy compatibility class method allowing direct decoding of XHTML1 entities.
14
+ # See HTMLEntities#decode for description of parameters.
15
+ #
16
+ def decode_entities(*args)
17
+ xhtml1_entities.decode(*args)
18
+ end
19
+
20
+ private
21
+
22
+ def xhtml1_entities
23
+ @xhtml1_entities ||= new('xhtml1')
24
+ end
25
+
26
+ end
27
+ end
@@ -0,0 +1,258 @@
1
+ class HTMLEntities
2
+ MAPPINGS = {} unless defined? MAPPINGS
3
+ MAPPINGS['xhtml1'] = {
4
+ 'Aacute' => 193,
5
+ 'aacute' => 225,
6
+ 'Acirc' => 194,
7
+ 'acirc' => 226,
8
+ 'acute' => 180,
9
+ 'AElig' => 198,
10
+ 'aelig' => 230,
11
+ 'Agrave' => 192,
12
+ 'agrave' => 224,
13
+ 'alefsym' => 8501,
14
+ 'Alpha' => 913,
15
+ 'alpha' => 945,
16
+ 'amp' => 38,
17
+ 'and' => 8743,
18
+ 'ang' => 8736,
19
+ 'apos' => 39,
20
+ 'Aring' => 197,
21
+ 'aring' => 229,
22
+ 'asymp' => 8776,
23
+ 'Atilde' => 195,
24
+ 'atilde' => 227,
25
+ 'Auml' => 196,
26
+ 'auml' => 228,
27
+ 'bdquo' => 8222,
28
+ 'Beta' => 914,
29
+ 'beta' => 946,
30
+ 'brvbar' => 166,
31
+ 'bull' => 8226,
32
+ 'cap' => 8745,
33
+ 'Ccedil' => 199,
34
+ 'ccedil' => 231,
35
+ 'cedil' => 184,
36
+ 'cent' => 162,
37
+ 'Chi' => 935,
38
+ 'chi' => 967,
39
+ 'circ' => 710,
40
+ 'clubs' => 9827,
41
+ 'cong' => 8773,
42
+ 'copy' => 169,
43
+ 'crarr' => 8629,
44
+ 'cup' => 8746,
45
+ 'curren' => 164,
46
+ 'Dagger' => 8225,
47
+ 'dagger' => 8224,
48
+ 'dArr' => 8659,
49
+ 'darr' => 8595,
50
+ 'deg' => 176,
51
+ 'Delta' => 916,
52
+ 'delta' => 948,
53
+ 'diams' => 9830,
54
+ 'divide' => 247,
55
+ 'Eacute' => 201,
56
+ 'eacute' => 233,
57
+ 'Ecirc' => 202,
58
+ 'ecirc' => 234,
59
+ 'Egrave' => 200,
60
+ 'egrave' => 232,
61
+ 'empty' => 8709,
62
+ 'emsp' => 8195,
63
+ 'ensp' => 8194,
64
+ 'Epsilon' => 917,
65
+ 'epsilon' => 949,
66
+ 'equiv' => 8801,
67
+ 'Eta' => 919,
68
+ 'eta' => 951,
69
+ 'ETH' => 208,
70
+ 'eth' => 240,
71
+ 'Euml' => 203,
72
+ 'euml' => 235,
73
+ 'euro' => 8364,
74
+ 'exist' => 8707,
75
+ 'fnof' => 402,
76
+ 'forall' => 8704,
77
+ 'frac12' => 189,
78
+ 'frac14' => 188,
79
+ 'frac34' => 190,
80
+ 'frasl' => 8260,
81
+ 'Gamma' => 915,
82
+ 'gamma' => 947,
83
+ 'ge' => 8805,
84
+ 'gt' => 62,
85
+ 'hArr' => 8660,
86
+ 'harr' => 8596,
87
+ 'hearts' => 9829,
88
+ 'hellip' => 8230,
89
+ 'Iacute' => 205,
90
+ 'iacute' => 237,
91
+ 'Icirc' => 206,
92
+ 'icirc' => 238,
93
+ 'iexcl' => 161,
94
+ 'Igrave' => 204,
95
+ 'igrave' => 236,
96
+ 'image' => 8465,
97
+ 'infin' => 8734,
98
+ 'int' => 8747,
99
+ 'Iota' => 921,
100
+ 'iota' => 953,
101
+ 'iquest' => 191,
102
+ 'isin' => 8712,
103
+ 'Iuml' => 207,
104
+ 'iuml' => 239,
105
+ 'Kappa' => 922,
106
+ 'kappa' => 954,
107
+ 'Lambda' => 923,
108
+ 'lambda' => 955,
109
+ 'lang' => 9001,
110
+ 'laquo' => 171,
111
+ 'lArr' => 8656,
112
+ 'larr' => 8592,
113
+ 'lceil' => 8968,
114
+ 'ldquo' => 8220,
115
+ 'le' => 8804,
116
+ 'lfloor' => 8970,
117
+ 'lowast' => 8727,
118
+ 'loz' => 9674,
119
+ 'lrm' => 8206,
120
+ 'lsaquo' => 8249,
121
+ 'lsquo' => 8216,
122
+ 'lt' => 60,
123
+ 'macr' => 175,
124
+ 'mdash' => 8212,
125
+ 'micro' => 181,
126
+ 'middot' => 183,
127
+ 'minus' => 8722,
128
+ 'Mu' => 924,
129
+ 'mu' => 956,
130
+ 'nabla' => 8711,
131
+ 'nbsp' => 160,
132
+ 'ndash' => 8211,
133
+ 'ne' => 8800,
134
+ 'ni' => 8715,
135
+ 'not' => 172,
136
+ 'notin' => 8713,
137
+ 'nsub' => 8836,
138
+ 'Ntilde' => 209,
139
+ 'ntilde' => 241,
140
+ 'Nu' => 925,
141
+ 'nu' => 957,
142
+ 'Oacute' => 211,
143
+ 'oacute' => 243,
144
+ 'Ocirc' => 212,
145
+ 'ocirc' => 244,
146
+ 'OElig' => 338,
147
+ 'oelig' => 339,
148
+ 'Ograve' => 210,
149
+ 'ograve' => 242,
150
+ 'oline' => 8254,
151
+ 'Omega' => 937,
152
+ 'omega' => 969,
153
+ 'Omicron' => 927,
154
+ 'omicron' => 959,
155
+ 'oplus' => 8853,
156
+ 'or' => 8744,
157
+ 'ordf' => 170,
158
+ 'ordm' => 186,
159
+ 'Oslash' => 216,
160
+ 'oslash' => 248,
161
+ 'Otilde' => 213,
162
+ 'otilde' => 245,
163
+ 'otimes' => 8855,
164
+ 'Ouml' => 214,
165
+ 'ouml' => 246,
166
+ 'para' => 182,
167
+ 'part' => 8706,
168
+ 'permil' => 8240,
169
+ 'perp' => 8869,
170
+ 'Phi' => 934,
171
+ 'phi' => 966,
172
+ 'Pi' => 928,
173
+ 'pi' => 960,
174
+ 'piv' => 982,
175
+ 'plusmn' => 177,
176
+ 'pound' => 163,
177
+ 'Prime' => 8243,
178
+ 'prime' => 8242,
179
+ 'prod' => 8719,
180
+ 'prop' => 8733,
181
+ 'Psi' => 936,
182
+ 'psi' => 968,
183
+ 'quot' => 34,
184
+ 'radic' => 8730,
185
+ 'rang' => 9002,
186
+ 'raquo' => 187,
187
+ 'rArr' => 8658,
188
+ 'rarr' => 8594,
189
+ 'rceil' => 8969,
190
+ 'rdquo' => 8221,
191
+ 'real' => 8476,
192
+ 'reg' => 174,
193
+ 'rfloor' => 8971,
194
+ 'Rho' => 929,
195
+ 'rho' => 961,
196
+ 'rlm' => 8207,
197
+ 'rsaquo' => 8250,
198
+ 'rsquo' => 8217,
199
+ 'sbquo' => 8218,
200
+ 'Scaron' => 352,
201
+ 'scaron' => 353,
202
+ 'sdot' => 8901,
203
+ 'sect' => 167,
204
+ 'shy' => 173,
205
+ 'Sigma' => 931,
206
+ 'sigma' => 963,
207
+ 'sigmaf' => 962,
208
+ 'sim' => 8764,
209
+ 'spades' => 9824,
210
+ 'sub' => 8834,
211
+ 'sube' => 8838,
212
+ 'sum' => 8721,
213
+ 'sup' => 8835,
214
+ 'sup1' => 185,
215
+ 'sup2' => 178,
216
+ 'sup3' => 179,
217
+ 'supe' => 8839,
218
+ 'szlig' => 223,
219
+ 'Tau' => 932,
220
+ 'tau' => 964,
221
+ 'there4' => 8756,
222
+ 'Theta' => 920,
223
+ 'theta' => 952,
224
+ 'thetasym' => 977,
225
+ 'thinsp' => 8201,
226
+ 'THORN' => 222,
227
+ 'thorn' => 254,
228
+ 'tilde' => 732,
229
+ 'times' => 215,
230
+ 'trade' => 8482,
231
+ 'Uacute' => 218,
232
+ 'uacute' => 250,
233
+ 'uArr' => 8657,
234
+ 'uarr' => 8593,
235
+ 'Ucirc' => 219,
236
+ 'ucirc' => 251,
237
+ 'Ugrave' => 217,
238
+ 'ugrave' => 249,
239
+ 'uml' => 168,
240
+ 'upsih' => 978,
241
+ 'Upsilon' => 933,
242
+ 'upsilon' => 965,
243
+ 'Uuml' => 220,
244
+ 'uuml' => 252,
245
+ 'weierp' => 8472,
246
+ 'Xi' => 926,
247
+ 'xi' => 958,
248
+ 'Yacute' => 221,
249
+ 'yacute' => 253,
250
+ 'yen' => 165,
251
+ 'Yuml' => 376,
252
+ 'yuml' => 255,
253
+ 'Zeta' => 918,
254
+ 'zeta' => 950,
255
+ 'zwj' => 8205,
256
+ 'zwnj' => 8204
257
+ }
258
+ end
@@ -1,74 +1,124 @@
1
- $: << File.dirname(__FILE__) + '/../lib/'
1
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
2
2
  require 'htmlentities'
3
3
  require 'test/unit'
4
4
 
5
5
  $KCODE = 'u'
6
6
 
7
- class TestHTMLEntities < Test::Unit::TestCase
7
+ class HTMLEntities::EntitiesTest < Test::Unit::TestCase
8
+
9
+ attr_reader :xhtml1_entities, :html4_entities
10
+
11
+ def setup
12
+ @xhtml1_entities = HTMLEntities.new('xhtml1')
13
+ @html4_entities = HTMLEntities.new('html4')
14
+ end
15
+
16
+ class PseudoString
17
+ def initialize(string)
18
+ @string = string
19
+ end
20
+ def to_s
21
+ @string
22
+ end
23
+ end
8
24
 
9
- def test_basic_decoding
25
+ def test_should_raise_exception_when_unknown_flavor_specified
26
+ assert_raises(HTMLEntities::UnknownFlavor) do
27
+ HTMLEntities.new('foo')
28
+ end
29
+ end
30
+
31
+ def test_should_allow_symbol_for_flavor
32
+ assert_nothing_raised do
33
+ HTMLEntities.new(:xhtml1)
34
+ end
35
+ end
36
+
37
+ def test_should_allow_upper_case_flavor
38
+ assert_nothing_raised do
39
+ HTMLEntities.new('XHTML1')
40
+ end
41
+ end
42
+
43
+ def test_should_decode_basic_entities
10
44
  assert_decode('&', '&amp;')
11
45
  assert_decode('<', '&lt;')
12
46
  assert_decode('"', '&quot;')
13
47
  end
14
-
15
- def test_basic_encoding
48
+
49
+ def test_should_encode_basic_entities
16
50
  assert_encode('&amp;', '&', :basic)
17
51
  assert_encode('&quot;', '"')
18
52
  assert_encode('&lt;', '<', :basic)
19
53
  assert_encode('&lt;', '<')
20
54
  end
55
+
56
+ def test_should_encode_basic_entities_to_decimal
57
+ assert_encode('&#38;', '&', :decimal)
58
+ assert_encode('&#34;', '"', :decimal)
59
+ assert_encode('&#60;', '<', :decimal)
60
+ assert_encode('&#62;', '>', :decimal)
61
+ assert_encode('&#39;', "'", :decimal)
62
+ end
63
+
64
+ def test_should_encode_basic_entities_to_hexadecimal
65
+ assert_encode('&#x26;', '&', :hexadecimal)
66
+ assert_encode('&#x22;', '"', :hexadecimal)
67
+ assert_encode('&#x3c;', '<', :hexadecimal)
68
+ assert_encode('&#x3e;', '>', :hexadecimal)
69
+ assert_encode('&#x27;', "'", :hexadecimal)
70
+ end
21
71
 
22
- def test_extended_decoding
72
+ def test_should_decode_extended_named_entities
23
73
  assert_decode('±', '&plusmn;')
24
74
  assert_decode('ð', '&eth;')
25
75
  assert_decode('Œ', '&OElig;')
26
76
  assert_decode('œ', '&oelig;')
27
77
  end
28
-
29
- def test_extended_encoding
78
+
79
+ def test_should_encode_extended_named_entities
30
80
  assert_encode('&plusmn;', '±', :named)
31
81
  assert_encode('&eth;', 'ð', :named)
32
82
  assert_encode('&OElig;', 'Œ', :named)
33
83
  assert_encode('&oelig;', 'œ', :named)
34
84
  end
35
85
 
36
- def test_decimal_decoding
86
+ def test_should_decode_decimal_entities
37
87
  assert_decode('“', '&#8220;')
38
88
  assert_decode('…', '&#8230;')
39
89
  assert_decode(' ', '&#32;')
40
90
  end
41
-
42
- def test_decimal_encoding
91
+
92
+ def test_should_encode_decimal_entities
43
93
  assert_encode('&#8220;', '“', :decimal)
44
94
  assert_encode('&#8230;', '…', :decimal)
45
95
  end
46
96
 
47
- def test_hexadecimal_decoding
97
+ def test_should_decode_hexadecimal_entities
48
98
  assert_decode('−', '&#x2212;')
49
99
  assert_decode('—', '&#x2014;')
50
100
  assert_decode('`', '&#x0060;')
51
101
  assert_decode('`', '&#x60;')
52
102
  end
53
-
54
- def test_hexadecimal_encoding
103
+
104
+ def test_should_encode_hexadecimal_entities
55
105
  assert_encode('&#x2212;', '−', :hexadecimal)
56
106
  assert_encode('&#x2014;', '—', :hexadecimal)
57
107
  end
58
108
 
59
- def test_mixed_decoding
109
+ def test_should_decode_text_with_mix_of_entities
60
110
  # Just a random headline - I needed something with accented letters.
61
111
  assert_decode(
62
- 'Le tabac pourrait bientôt être banni dans tous les lieux publics en France',
112
+ 'Le tabac pourrait bientôt être banni dans tous les lieux publics en France',
63
113
  'Le tabac pourrait bient&ocirc;t &#234;tre banni dans tous les lieux publics en France'
64
114
  )
65
- assert_decode(
115
+ assert_decode(
66
116
  '"bientôt" & 文字',
67
117
  '&quot;bient&ocirc;t&quot; &amp; &#25991;&#x5b57;'
68
118
  )
69
119
  end
70
120
 
71
- def test_mixed_encoding
121
+ def test_should_encode_text_using_mix_of_entities
72
122
  assert_encode(
73
123
  '&quot;bient&ocirc;t&quot; &amp; &#x6587;&#x5b57;',
74
124
  '"bientôt" & 文字', :basic, :named, :hexadecimal
@@ -78,8 +128,8 @@ class TestHTMLEntities < Test::Unit::TestCase
78
128
  '"bientôt" & 文字', :basic, :named, :decimal
79
129
  )
80
130
  end
81
-
82
- def test_mixed_encoding_with_sort
131
+
132
+ def test_should_sort_commands_when_encoding_using_mix_of_entities
83
133
  assert_encode(
84
134
  '&quot;bient&ocirc;t&quot; &amp; &#x6587;&#x5b57;',
85
135
  '"bientôt" & 文字', :named, :hexadecimal, :basic
@@ -90,40 +140,67 @@ class TestHTMLEntities < Test::Unit::TestCase
90
140
  )
91
141
  end
92
142
 
93
- def test_detect_illegal_encoding_command
143
+ def test_should_detect_illegal_encoding_command
94
144
  assert_raise(HTMLEntities::InstructionError) {
95
145
  HTMLEntities.encode_entities('foo', :bar, :baz)
96
146
  }
97
147
  end
98
148
 
99
- def test_edge_case_decoding
149
+ def test_should_decode_empty_string
100
150
  assert_decode('', '')
151
+ end
152
+
153
+ def test_should_skip_unknown_entity
101
154
  assert_decode('&bogus;', '&bogus;')
155
+ end
156
+
157
+ def test_should_decode_double_encoded_entity_once
102
158
  assert_decode('&amp;', '&amp;amp;')
103
159
  end
104
160
 
105
- def test_edge_case_encoding
161
+ def test_should_not_encode_normal_ASCII
106
162
  assert_encode('`', '`')
107
163
  assert_encode(' ', ' ')
108
- assert_encode('&amp;amp;', '&amp;')
164
+ end
165
+
166
+ def test_should_double_encode_existing_entity
109
167
  assert_encode('&amp;amp;', '&amp;')
110
168
  end
111
169
 
112
170
  # Faults found and patched by Moonwolf
113
- def test_moonwolf_decoding
114
- assert_decode("\x2", '&#2;')
115
- assert_decode("\xf", '&#xf;')
171
+ def test_should_decode_full_hexadecimal_range
172
+ (0..127).each do |codepoint|
173
+ assert_decode([codepoint].pack('U'), "&\#x#{codepoint.to_s(16)};")
174
+ end
175
+ end
176
+
177
+ # Reported by Dallas DeVries and Johan Duflost
178
+ def test_should_decode_named_entities_reported_as_missing_in_3_0_1
179
+ assert_decode([178].pack('U'), '&sup2;')
180
+ assert_decode([8226].pack('U'), '&bull;')
181
+ assert_decode([948].pack('U'), '&delta;')
182
+ end
183
+
184
+ def test_should_ducktype_parameter_to_string_before_encoding
185
+ pseudo_string = PseudoString.new('foo')
186
+ assert_decode('foo', pseudo_string)
187
+ end
188
+
189
+ def test_should_ducktype_parameter_to_string_before_decoding
190
+ pseudo_string = PseudoString.new('foo')
191
+ assert_encode('foo', pseudo_string)
116
192
  end
117
-
118
- private
119
193
 
120
194
  def assert_decode(expected, input)
121
- assert_equal(expected, HTMLEntities.decode_entities(input))
195
+ [xhtml1_entities, html4_entities].each do |coder|
196
+ assert_equal(expected, coder.decode(input))
197
+ end
122
198
  end
123
-
199
+
124
200
  def assert_encode(expected, input, *args)
125
- assert_equal(expected, HTMLEntities.encode_entities(input, *args))
201
+ [xhtml1_entities, html4_entities].each do |coder|
202
+ assert_equal(expected, coder.encode(input, *args))
203
+ end
126
204
  end
127
205
 
128
206
  end
129
-