htmlentitties 4.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,219 @@
1
+ # encoding: UTF-8
2
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
3
+ require 'test/unit'
4
+ require 'htmlentities'
5
+
6
+ $KCODE = 'u' unless "1.9".respond_to?(:encoding)
7
+
8
+ class HTMLEntities::EntitiesTest < Test::Unit::TestCase
9
+
10
+ attr_reader :xhtml1_entities, :html4_entities
11
+
12
+ def setup
13
+ @xhtml1_entities = HTMLEntities.new('xhtml1')
14
+ @html4_entities = HTMLEntities.new('html4')
15
+ end
16
+
17
+ class PseudoString
18
+ def initialize(string)
19
+ @string = string
20
+ end
21
+ def to_s
22
+ @string
23
+ end
24
+ end
25
+
26
+ def test_should_raise_exception_when_unknown_flavor_specified
27
+ assert_raises(HTMLEntities::UnknownFlavor) do
28
+ HTMLEntities.new('foo')
29
+ end
30
+ end
31
+
32
+ def test_should_allow_symbol_for_flavor
33
+ assert_nothing_raised do
34
+ HTMLEntities.new(:xhtml1)
35
+ end
36
+ end
37
+
38
+ def test_should_allow_upper_case_flavor
39
+ assert_nothing_raised do
40
+ HTMLEntities.new('XHTML1')
41
+ end
42
+ end
43
+
44
+ def test_should_decode_basic_entities
45
+ assert_decode('&', '&amp;')
46
+ assert_decode('<', '&lt;')
47
+ assert_decode('"', '&quot;')
48
+ end
49
+
50
+ def test_should_encode_basic_entities
51
+ assert_encode('&amp;', '&', :basic)
52
+ assert_encode('&quot;', '"')
53
+ assert_encode('&lt;', '<', :basic)
54
+ assert_encode('&lt;', '<')
55
+ end
56
+
57
+ def test_should_encode_basic_entities_to_decimal
58
+ assert_encode('&#38;', '&', :decimal)
59
+ assert_encode('&#34;', '"', :decimal)
60
+ assert_encode('&#60;', '<', :decimal)
61
+ assert_encode('&#62;', '>', :decimal)
62
+ assert_encode('&#39;', "'", :decimal)
63
+ end
64
+
65
+ def test_should_encode_basic_entities_to_hexadecimal
66
+ assert_encode('&#x26;', '&', :hexadecimal)
67
+ assert_encode('&#x22;', '"', :hexadecimal)
68
+ assert_encode('&#x3c;', '<', :hexadecimal)
69
+ assert_encode('&#x3e;', '>', :hexadecimal)
70
+ assert_encode('&#x27;', "'", :hexadecimal)
71
+ end
72
+
73
+ def test_should_decode_extended_named_entities
74
+ assert_decode('±', '&plusmn;')
75
+ assert_decode('ð', '&eth;')
76
+ assert_decode('Œ', '&OElig;')
77
+ assert_decode('œ', '&oelig;')
78
+ end
79
+
80
+ def test_should_encode_extended_named_entities
81
+ assert_encode('&plusmn;', '±', :named)
82
+ assert_encode('&eth;', 'ð', :named)
83
+ assert_encode('&OElig;', 'Œ', :named)
84
+ assert_encode('&oelig;', 'œ', :named)
85
+ end
86
+
87
+ def test_should_decode_decimal_entities
88
+ assert_decode('“', '&#8220;')
89
+ assert_decode('…', '&#8230;')
90
+ assert_decode(' ', '&#32;')
91
+ end
92
+
93
+ def test_should_encode_decimal_entities
94
+ assert_encode('&#8220;', '“', :decimal)
95
+ assert_encode('&#8230;', '…', :decimal)
96
+ end
97
+
98
+ def test_should_decode_hexadecimal_entities
99
+ assert_decode('−', '&#x2212;')
100
+ assert_decode('—', '&#x2014;')
101
+ assert_decode('`', '&#x0060;')
102
+ assert_decode('`', '&#x60;')
103
+ end
104
+
105
+ def test_should_encode_hexadecimal_entities
106
+ assert_encode('&#x2212;', '−', :hexadecimal)
107
+ assert_encode('&#x2014;', '—', :hexadecimal)
108
+ end
109
+
110
+ def test_should_decode_text_with_mix_of_entities
111
+ # Just a random headline - I needed something with accented letters.
112
+ assert_decode(
113
+ 'Le tabac pourrait bientôt être banni dans tous les lieux publics en France',
114
+ 'Le tabac pourrait bient&ocirc;t &#234;tre banni dans tous les lieux publics en France'
115
+ )
116
+ assert_decode(
117
+ '"bientôt" & 文字',
118
+ '&quot;bient&ocirc;t&quot; &amp; &#25991;&#x5b57;'
119
+ )
120
+ end
121
+
122
+ def test_should_encode_text_using_mix_of_entities
123
+ assert_encode(
124
+ '&quot;bient&ocirc;t&quot; &amp; &#x6587;&#x5b57;',
125
+ '"bientôt" & 文字', :basic, :named, :hexadecimal
126
+ )
127
+ assert_encode(
128
+ '&quot;bient&ocirc;t&quot; &amp; &#25991;&#23383;',
129
+ '"bientôt" & 文字', :basic, :named, :decimal
130
+ )
131
+ end
132
+
133
+ def test_should_sort_commands_when_encoding_using_mix_of_entities
134
+ assert_encode(
135
+ '&quot;bient&ocirc;t&quot; &amp; &#x6587;&#x5b57;',
136
+ '"bientôt" & 文字', :named, :hexadecimal, :basic
137
+ )
138
+ assert_encode(
139
+ '&quot;bient&ocirc;t&quot; &amp; &#25991;&#23383;',
140
+ '"bientôt" & 文字', :decimal, :named, :basic
141
+ )
142
+ end
143
+
144
+ def test_should_detect_illegal_encoding_command
145
+ assert_raise(HTMLEntities::InstructionError) {
146
+ HTMLEntities.encode_entities('foo', :bar, :baz)
147
+ }
148
+ end
149
+
150
+ def test_should_decode_empty_string
151
+ assert_decode('', '')
152
+ end
153
+
154
+ def test_should_skip_unknown_entity
155
+ assert_decode('&bogus;', '&bogus;')
156
+ end
157
+
158
+ def test_should_decode_double_encoded_entity_once
159
+ assert_decode('&amp;', '&amp;amp;')
160
+ end
161
+
162
+ def test_should_not_encode_normal_ASCII
163
+ assert_encode('`', '`')
164
+ assert_encode(' ', ' ')
165
+ end
166
+
167
+ def test_should_double_encode_existing_entity
168
+ assert_encode('&amp;amp;', '&amp;')
169
+ end
170
+
171
+ # Faults found and patched by Moonwolf
172
+ def test_should_decode_full_hexadecimal_range
173
+ (0..127).each do |codepoint|
174
+ assert_decode([codepoint].pack('U'), "&\#x#{codepoint.to_s(16)};")
175
+ end
176
+ end
177
+
178
+ # Reported by Dallas DeVries and Johan Duflost
179
+ def test_should_decode_named_entities_reported_as_missing_in_3_0_1
180
+ assert_decode([178].pack('U'), '&sup2;')
181
+ assert_decode([8226].pack('U'), '&bull;')
182
+ assert_decode([948].pack('U'), '&delta;')
183
+ end
184
+
185
+ if RUBY_VERSION =~ /^1\.8\./
186
+ # Reported by Benoit Larroque
187
+ def test_should_encode_without_error_when_KCODE_is_not_UTF_8
188
+ kcode = $KCODE
189
+ $KCODE = "n"
190
+ coder = HTMLEntities.new;
191
+ text = [8212].pack('U')
192
+ assert_equal "&#8212;", coder.encode(text, :decimal)
193
+ $KCODE = kcode
194
+ end
195
+ end
196
+
197
+ def test_should_ducktype_parameter_to_string_before_encoding
198
+ pseudo_string = PseudoString.new('foo')
199
+ assert_decode('foo', pseudo_string)
200
+ end
201
+
202
+ def test_should_ducktype_parameter_to_string_before_decoding
203
+ pseudo_string = PseudoString.new('foo')
204
+ assert_encode('foo', pseudo_string)
205
+ end
206
+
207
+ def assert_decode(expected, input)
208
+ [xhtml1_entities, html4_entities].each do |coder|
209
+ assert_equal(expected, coder.decode(input))
210
+ end
211
+ end
212
+
213
+ def assert_encode(expected, input, *args)
214
+ [xhtml1_entities, html4_entities].each do |coder|
215
+ assert_equal(expected, coder.encode(input, *args))
216
+ end
217
+ end
218
+
219
+ end
@@ -0,0 +1,112 @@
1
+ # encoding: UTF-8
2
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
3
+ require 'test/unit'
4
+ require 'htmlentities'
5
+
6
+ $KCODE = 'u' unless "1.9".respond_to?(:encoding)
7
+
8
+ class HTMLEntities::ExpandedTest < Test::Unit::TestCase
9
+
10
+ attr_reader :html_entities
11
+
12
+ def setup
13
+ @html_entities = HTMLEntities.new(:expanded)
14
+ end
15
+
16
+ TEST_ENTITIES_SET = [
17
+ ['sub', 0x2282, "xhtml", nil, "⊂", ],
18
+ ['sup', 0x2283, "xhtml", nil, "⊃", ],
19
+ ['nsub', 0x2284, "xhtml", nil, "⊄", ],
20
+ ['subE', 0x2286, nil, "skip", "⊆", ],
21
+ ['sube', 0x2286, "xhtml", nil, "⊆", ],
22
+ ['supE', 0x2287, nil, "skip", "⊇", ],
23
+ ['supe', 0x2287, "xhtml", nil, "⊇", ],
24
+ ['bottom', 0x22a5, nil, "skip", "⊥", ],
25
+ ['perp', 0x22a5, "xhtml", nil, "⊥", ],
26
+ ['models', 0x22a7, nil, nil, "⊧", ],
27
+ ['vDash', 0x22a8, nil, nil, "⊨", ],
28
+ ['Vdash', 0x22a9, nil, nil, "⊩", ],
29
+ ['Vvdash', 0x22aa, nil, nil, "⊪", ],
30
+ ['nvdash', 0x22ac, nil, nil, "⊬", ],
31
+ ['nvDash', 0x22ad, nil, nil, "⊭", ],
32
+ ['nVdash', 0x22ae, nil, nil, "⊮", ],
33
+ ['nsubE', 0x2288, nil, nil, "⊈", ],
34
+ ['nsube', 0x2288, nil, "skip", "⊈", ],
35
+ ['nsupE', 0x2289, nil, nil, "⊉", ],
36
+ ['nsupe', 0x2289, nil, "skip", "⊉", ],
37
+ ['subnE', 0x228a, nil, nil, "⊊", ],
38
+ ['subne', 0x228a, nil, "skip", "⊊", ],
39
+ ['vsubnE', 0x228a, nil, "skip", "⊊", ],
40
+ ['vsubne', 0x228a, nil, "skip", "⊊", ],
41
+ ['nsc', 0x2281, nil, nil, "⊁", ],
42
+ ['nsup', 0x2285, nil, nil, "⊅", ],
43
+ ['b.alpha', 0x03b1, nil, "skip", "α", ],
44
+ ['b.beta', 0x03b2, nil, "skip", "β", ],
45
+ ['b.chi', 0x03c7, nil, "skip", "χ", ],
46
+ ['b.Delta', 0x0394, nil, "skip", "Δ", ],
47
+ ]
48
+
49
+ def test_should_encode_apos_entity
50
+ assert_equal "&apos;", html_entities.encode("'", :named) # note: the normal ' 0x0027, not ʼ 0x02BC
51
+ end
52
+
53
+ def test_should_decode_apos_entity
54
+ assert_equal "é'", html_entities.decode("&eacute;&apos;")
55
+ end
56
+
57
+ def test_should_decode_dotted_entity
58
+ assert_equal "Θ", html_entities.decode("&b.Theta;")
59
+ end
60
+
61
+ def test_should_encode_from_test_set
62
+ TEST_ENTITIES_SET.each do |ent, _, _, skip, decoded|
63
+ next if skip
64
+ assert_equal "&#{ent};", html_entities.encode(decoded, :named)
65
+ end
66
+ end
67
+
68
+ def test_should_decode_from_test_set
69
+ TEST_ENTITIES_SET.each do |ent, _, _, _, decoded|
70
+ assert_equal decoded, html_entities.decode("&#{ent};")
71
+ end
72
+ end
73
+
74
+ def test_should_round_trip_preferred_entities
75
+ TEST_ENTITIES_SET.each do |ent, _, _, skip, decoded|
76
+ next if skip
77
+ assert_equal "&#{ent};", html_entities.encode(html_entities.decode("&#{ent};"), :named)
78
+ assert_equal decoded, html_entities.decode(html_entities.encode(decoded, :named))
79
+ end
80
+ end
81
+
82
+ def test_should_not_round_trip_decoding_skipped_entities
83
+ TEST_ENTITIES_SET.each do |ent, _, _, skip, decoded|
84
+ next unless skip
85
+ assert_not_equal "&#{ent};", html_entities.encode(html_entities.decode("&#{ent};"), :named)
86
+ end
87
+ end
88
+
89
+ def test_should_round_trip_encoding_skipped_entities
90
+ TEST_ENTITIES_SET.each do |ent, _, _, skip, decoded|
91
+ next unless skip
92
+ assert_equal decoded, html_entities.decode(html_entities.encode(decoded, :named))
93
+ end
94
+ end
95
+
96
+ def test_should_treat_all_xhtml1_named_entities_as_xhtml_does
97
+ xhtml_encoder = HTMLEntities.new(:xhtml1)
98
+ HTMLEntities::MAPPINGS['xhtml1'].each do |ent, decoded|
99
+ assert_equal xhtml_encoder.decode("&#{ent};"), html_entities.decode("&#{ent};")
100
+ assert_equal xhtml_encoder.encode(decoded, :named), html_entities.encode(decoded, :named)
101
+ end
102
+ end
103
+
104
+ def test_should_not_agree_with_xhtml1_when_not_in_xhtml
105
+ xhtml_encoder = HTMLEntities.new(:xhtml1)
106
+ TEST_ENTITIES_SET.each do |ent, _, xhtml1, skip, decoded|
107
+ next if xhtml1 || skip
108
+ assert_not_equal xhtml_encoder.decode("&#{ent};"), html_entities.decode("&#{ent};")
109
+ assert_not_equal xhtml_encoder.encode(decoded, :named), html_entities.encode(decoded, :named)
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,29 @@
1
+ # encoding: UTF-8
2
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
3
+ require 'test/unit'
4
+ require 'htmlentities'
5
+
6
+ $KCODE = 'u' unless "1.9".respond_to?(:encoding)
7
+
8
+ class HTML4Test < Test::Unit::TestCase
9
+
10
+ attr_reader :html_entities
11
+
12
+ def setup
13
+ @html_entities = HTMLEntities.new('html4')
14
+ end
15
+
16
+ # Found by Marcos Kuhns
17
+ def test_should_not_encode_apos_entity
18
+ assert_equal "'", html_entities.encode("'", :basic)
19
+ end
20
+
21
+ def test_should_not_decode_apos_entity
22
+ assert_equal "é&apos;", html_entities.decode("&eacute;&apos;")
23
+ end
24
+
25
+ def test_should_not_decode_dotted_entity
26
+ assert_equal "&b.Theta;", html_entities.decode("&b.Theta;")
27
+ end
28
+
29
+ end
@@ -0,0 +1,35 @@
1
+ # encoding: UTF-8
2
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
3
+ require 'test/unit'
4
+ require 'htmlentities'
5
+
6
+ $KCODE = 'u' unless "1.9".respond_to?(:encoding)
7
+
8
+ #
9
+ # Test that version 3.x functionality still works
10
+ #
11
+ class HTMLEntities::LegacyTest < Test::Unit::TestCase
12
+
13
+ def test_should_decode_via_legacy_interface
14
+ assert_decode('&', '&amp;')
15
+ assert_decode('±', '&plusmn;')
16
+ assert_decode('“', '&#8220;')
17
+ assert_decode('—', '&#x2014;')
18
+ end
19
+
20
+ def test_should_encode_via_legacy_interface
21
+ assert_encode('&amp;', '&', :basic)
22
+ assert_encode('&eth;', 'ð', :named)
23
+ assert_encode('&#8230;', '…', :decimal)
24
+ assert_encode('&#x2212;', '−', :hexadecimal)
25
+ end
26
+
27
+ def assert_encode(expected, *encode_args)
28
+ assert_equal expected, HTMLEntities.encode_entities(*encode_args)
29
+ end
30
+
31
+ def assert_decode(expected, *decode_args)
32
+ assert_equal expected, HTMLEntities.decode_entities(*decode_args)
33
+ end
34
+
35
+ end
@@ -0,0 +1,93 @@
1
+ # encoding: UTF-8
2
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
3
+ require 'test/unit'
4
+ require 'htmlentities'
5
+
6
+ class HTMLEntities::RoundtripTest < Test::Unit::TestCase
7
+
8
+ attr_reader :xhtml1_entities, :html4_entities
9
+
10
+ def setup
11
+ @xhtml1_entities = HTMLEntities.new('xhtml1')
12
+ @html4_entities = HTMLEntities.new('html4')
13
+ end
14
+
15
+ def test_should_roundtrip_xhtml1_entities_via_named_encoding
16
+ each_mapping('xhtml1') do |name, string|
17
+ assert_equal(
18
+ string,
19
+ xhtml1_entities.decode(xhtml1_entities.encode(string, :named))
20
+ )
21
+ end
22
+ end
23
+
24
+ def test_should_roundtrip_xhtml1_entities_via_basic_and_named_encoding
25
+ each_mapping('xhtml1') do |name, string|
26
+ assert_equal(
27
+ string,
28
+ xhtml1_entities.decode(xhtml1_entities.encode(string, :basic, :named))
29
+ )
30
+ end
31
+ end
32
+
33
+ def test_should_roundtrip_xhtml1_entities_via_basic_named_and_decimal_encoding
34
+ each_mapping('xhtml1') do |name, string|
35
+ assert_equal(
36
+ string,
37
+ xhtml1_entities.decode(xhtml1_entities.encode(string, :basic, :named, :decimal))
38
+ )
39
+ end
40
+ end
41
+
42
+ def test_should_roundtrip_xhtml1_entities_via_hexadecimal_encoding
43
+ each_mapping('xhtml1') do |name, string|
44
+ assert_equal(
45
+ string,
46
+ xhtml1_entities.decode(xhtml1_entities.encode(string, :hexadecimal))
47
+ )
48
+ end
49
+ end
50
+
51
+ def test_should_roundtrip_html4_entities_via_named_encoding
52
+ each_mapping('html4') do |name, string|
53
+ assert_equal(
54
+ string,
55
+ html4_entities.decode(html4_entities.encode(string, :named))
56
+ )
57
+ end
58
+ end
59
+
60
+ def test_should_roundtrip_html4_entities_via_basic_and_named_encoding
61
+ each_mapping('html4') do |name, string|
62
+ assert_equal(
63
+ string,
64
+ html4_entities.decode(html4_entities.encode(string, :basic, :named))
65
+ )
66
+ end
67
+ end
68
+
69
+ def test_should_roundtrip_html4_entities_via_basic_named_and_decimal_encoding
70
+ each_mapping('html4') do |name, string|
71
+ assert_equal(
72
+ string,
73
+ html4_entities.decode(html4_entities.encode(string, :basic, :named, :decimal))
74
+ )
75
+ end
76
+ end
77
+
78
+ def test_should_roundtrip_html4_entities_via_hexadecimal_encoding
79
+ each_mapping('html4') do |name, string|
80
+ assert_equal(
81
+ string,
82
+ html4_entities.decode(html4_entities.encode(string, :hexadecimal))
83
+ )
84
+ end
85
+ end
86
+
87
+ def each_mapping(flavor)
88
+ HTMLEntities::MAPPINGS[flavor].each do |name, codepoint|
89
+ yield name, [codepoint].pack('U')
90
+ end
91
+ end
92
+
93
+ end