htmlentitties 4.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,219 @@
1
+ # encoding: UTF-8
2
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
3
+ require 'test/unit'
4
+ require 'htmlentities'
5
+
6
+ $KCODE = 'u' unless "1.9".respond_to?(:encoding)
7
+
8
+ class HTMLEntities::EntitiesTest < Test::Unit::TestCase
9
+
10
+ attr_reader :xhtml1_entities, :html4_entities
11
+
12
+ def setup
13
+ @xhtml1_entities = HTMLEntities.new('xhtml1')
14
+ @html4_entities = HTMLEntities.new('html4')
15
+ end
16
+
17
+ class PseudoString
18
+ def initialize(string)
19
+ @string = string
20
+ end
21
+ def to_s
22
+ @string
23
+ end
24
+ end
25
+
26
+ def test_should_raise_exception_when_unknown_flavor_specified
27
+ assert_raises(HTMLEntities::UnknownFlavor) do
28
+ HTMLEntities.new('foo')
29
+ end
30
+ end
31
+
32
+ def test_should_allow_symbol_for_flavor
33
+ assert_nothing_raised do
34
+ HTMLEntities.new(:xhtml1)
35
+ end
36
+ end
37
+
38
+ def test_should_allow_upper_case_flavor
39
+ assert_nothing_raised do
40
+ HTMLEntities.new('XHTML1')
41
+ end
42
+ end
43
+
44
+ def test_should_decode_basic_entities
45
+ assert_decode('&', '&amp;')
46
+ assert_decode('<', '&lt;')
47
+ assert_decode('"', '&quot;')
48
+ end
49
+
50
+ def test_should_encode_basic_entities
51
+ assert_encode('&amp;', '&', :basic)
52
+ assert_encode('&quot;', '"')
53
+ assert_encode('&lt;', '<', :basic)
54
+ assert_encode('&lt;', '<')
55
+ end
56
+
57
+ def test_should_encode_basic_entities_to_decimal
58
+ assert_encode('&#38;', '&', :decimal)
59
+ assert_encode('&#34;', '"', :decimal)
60
+ assert_encode('&#60;', '<', :decimal)
61
+ assert_encode('&#62;', '>', :decimal)
62
+ assert_encode('&#39;', "'", :decimal)
63
+ end
64
+
65
+ def test_should_encode_basic_entities_to_hexadecimal
66
+ assert_encode('&#x26;', '&', :hexadecimal)
67
+ assert_encode('&#x22;', '"', :hexadecimal)
68
+ assert_encode('&#x3c;', '<', :hexadecimal)
69
+ assert_encode('&#x3e;', '>', :hexadecimal)
70
+ assert_encode('&#x27;', "'", :hexadecimal)
71
+ end
72
+
73
+ def test_should_decode_extended_named_entities
74
+ assert_decode('±', '&plusmn;')
75
+ assert_decode('ð', '&eth;')
76
+ assert_decode('Œ', '&OElig;')
77
+ assert_decode('œ', '&oelig;')
78
+ end
79
+
80
+ def test_should_encode_extended_named_entities
81
+ assert_encode('&plusmn;', '±', :named)
82
+ assert_encode('&eth;', 'ð', :named)
83
+ assert_encode('&OElig;', 'Œ', :named)
84
+ assert_encode('&oelig;', 'œ', :named)
85
+ end
86
+
87
+ def test_should_decode_decimal_entities
88
+ assert_decode('“', '&#8220;')
89
+ assert_decode('…', '&#8230;')
90
+ assert_decode(' ', '&#32;')
91
+ end
92
+
93
+ def test_should_encode_decimal_entities
94
+ assert_encode('&#8220;', '“', :decimal)
95
+ assert_encode('&#8230;', '…', :decimal)
96
+ end
97
+
98
+ def test_should_decode_hexadecimal_entities
99
+ assert_decode('−', '&#x2212;')
100
+ assert_decode('—', '&#x2014;')
101
+ assert_decode('`', '&#x0060;')
102
+ assert_decode('`', '&#x60;')
103
+ end
104
+
105
+ def test_should_encode_hexadecimal_entities
106
+ assert_encode('&#x2212;', '−', :hexadecimal)
107
+ assert_encode('&#x2014;', '—', :hexadecimal)
108
+ end
109
+
110
+ def test_should_decode_text_with_mix_of_entities
111
+ # Just a random headline - I needed something with accented letters.
112
+ assert_decode(
113
+ 'Le tabac pourrait bientôt être banni dans tous les lieux publics en France',
114
+ 'Le tabac pourrait bient&ocirc;t &#234;tre banni dans tous les lieux publics en France'
115
+ )
116
+ assert_decode(
117
+ '"bientôt" & 文字',
118
+ '&quot;bient&ocirc;t&quot; &amp; &#25991;&#x5b57;'
119
+ )
120
+ end
121
+
122
+ def test_should_encode_text_using_mix_of_entities
123
+ assert_encode(
124
+ '&quot;bient&ocirc;t&quot; &amp; &#x6587;&#x5b57;',
125
+ '"bientôt" & 文字', :basic, :named, :hexadecimal
126
+ )
127
+ assert_encode(
128
+ '&quot;bient&ocirc;t&quot; &amp; &#25991;&#23383;',
129
+ '"bientôt" & 文字', :basic, :named, :decimal
130
+ )
131
+ end
132
+
133
+ def test_should_sort_commands_when_encoding_using_mix_of_entities
134
+ assert_encode(
135
+ '&quot;bient&ocirc;t&quot; &amp; &#x6587;&#x5b57;',
136
+ '"bientôt" & 文字', :named, :hexadecimal, :basic
137
+ )
138
+ assert_encode(
139
+ '&quot;bient&ocirc;t&quot; &amp; &#25991;&#23383;',
140
+ '"bientôt" & 文字', :decimal, :named, :basic
141
+ )
142
+ end
143
+
144
+ def test_should_detect_illegal_encoding_command
145
+ assert_raise(HTMLEntities::InstructionError) {
146
+ HTMLEntities.encode_entities('foo', :bar, :baz)
147
+ }
148
+ end
149
+
150
+ def test_should_decode_empty_string
151
+ assert_decode('', '')
152
+ end
153
+
154
+ def test_should_skip_unknown_entity
155
+ assert_decode('&bogus;', '&bogus;')
156
+ end
157
+
158
+ def test_should_decode_double_encoded_entity_once
159
+ assert_decode('&amp;', '&amp;amp;')
160
+ end
161
+
162
+ def test_should_not_encode_normal_ASCII
163
+ assert_encode('`', '`')
164
+ assert_encode(' ', ' ')
165
+ end
166
+
167
+ def test_should_double_encode_existing_entity
168
+ assert_encode('&amp;amp;', '&amp;')
169
+ end
170
+
171
+ # Faults found and patched by Moonwolf
172
+ def test_should_decode_full_hexadecimal_range
173
+ (0..127).each do |codepoint|
174
+ assert_decode([codepoint].pack('U'), "&\#x#{codepoint.to_s(16)};")
175
+ end
176
+ end
177
+
178
+ # Reported by Dallas DeVries and Johan Duflost
179
+ def test_should_decode_named_entities_reported_as_missing_in_3_0_1
180
+ assert_decode([178].pack('U'), '&sup2;')
181
+ assert_decode([8226].pack('U'), '&bull;')
182
+ assert_decode([948].pack('U'), '&delta;')
183
+ end
184
+
185
+ if RUBY_VERSION =~ /^1\.8\./
186
+ # Reported by Benoit Larroque
187
+ def test_should_encode_without_error_when_KCODE_is_not_UTF_8
188
+ kcode = $KCODE
189
+ $KCODE = "n"
190
+ coder = HTMLEntities.new;
191
+ text = [8212].pack('U')
192
+ assert_equal "&#8212;", coder.encode(text, :decimal)
193
+ $KCODE = kcode
194
+ end
195
+ end
196
+
197
+ def test_should_ducktype_parameter_to_string_before_encoding
198
+ pseudo_string = PseudoString.new('foo')
199
+ assert_decode('foo', pseudo_string)
200
+ end
201
+
202
+ def test_should_ducktype_parameter_to_string_before_decoding
203
+ pseudo_string = PseudoString.new('foo')
204
+ assert_encode('foo', pseudo_string)
205
+ end
206
+
207
+ def assert_decode(expected, input)
208
+ [xhtml1_entities, html4_entities].each do |coder|
209
+ assert_equal(expected, coder.decode(input))
210
+ end
211
+ end
212
+
213
+ def assert_encode(expected, input, *args)
214
+ [xhtml1_entities, html4_entities].each do |coder|
215
+ assert_equal(expected, coder.encode(input, *args))
216
+ end
217
+ end
218
+
219
+ end
@@ -0,0 +1,112 @@
1
+ # encoding: UTF-8
2
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
3
+ require 'test/unit'
4
+ require 'htmlentities'
5
+
6
+ $KCODE = 'u' unless "1.9".respond_to?(:encoding)
7
+
8
+ class HTMLEntities::ExpandedTest < Test::Unit::TestCase
9
+
10
+ attr_reader :html_entities
11
+
12
+ def setup
13
+ @html_entities = HTMLEntities.new(:expanded)
14
+ end
15
+
16
+ TEST_ENTITIES_SET = [
17
+ ['sub', 0x2282, "xhtml", nil, "⊂", ],
18
+ ['sup', 0x2283, "xhtml", nil, "⊃", ],
19
+ ['nsub', 0x2284, "xhtml", nil, "⊄", ],
20
+ ['subE', 0x2286, nil, "skip", "⊆", ],
21
+ ['sube', 0x2286, "xhtml", nil, "⊆", ],
22
+ ['supE', 0x2287, nil, "skip", "⊇", ],
23
+ ['supe', 0x2287, "xhtml", nil, "⊇", ],
24
+ ['bottom', 0x22a5, nil, "skip", "⊥", ],
25
+ ['perp', 0x22a5, "xhtml", nil, "⊥", ],
26
+ ['models', 0x22a7, nil, nil, "⊧", ],
27
+ ['vDash', 0x22a8, nil, nil, "⊨", ],
28
+ ['Vdash', 0x22a9, nil, nil, "⊩", ],
29
+ ['Vvdash', 0x22aa, nil, nil, "⊪", ],
30
+ ['nvdash', 0x22ac, nil, nil, "⊬", ],
31
+ ['nvDash', 0x22ad, nil, nil, "⊭", ],
32
+ ['nVdash', 0x22ae, nil, nil, "⊮", ],
33
+ ['nsubE', 0x2288, nil, nil, "⊈", ],
34
+ ['nsube', 0x2288, nil, "skip", "⊈", ],
35
+ ['nsupE', 0x2289, nil, nil, "⊉", ],
36
+ ['nsupe', 0x2289, nil, "skip", "⊉", ],
37
+ ['subnE', 0x228a, nil, nil, "⊊", ],
38
+ ['subne', 0x228a, nil, "skip", "⊊", ],
39
+ ['vsubnE', 0x228a, nil, "skip", "⊊", ],
40
+ ['vsubne', 0x228a, nil, "skip", "⊊", ],
41
+ ['nsc', 0x2281, nil, nil, "⊁", ],
42
+ ['nsup', 0x2285, nil, nil, "⊅", ],
43
+ ['b.alpha', 0x03b1, nil, "skip", "α", ],
44
+ ['b.beta', 0x03b2, nil, "skip", "β", ],
45
+ ['b.chi', 0x03c7, nil, "skip", "χ", ],
46
+ ['b.Delta', 0x0394, nil, "skip", "Δ", ],
47
+ ]
48
+
49
+ def test_should_encode_apos_entity
50
+ assert_equal "&apos;", html_entities.encode("'", :named) # note: the normal ' 0x0027, not ʼ 0x02BC
51
+ end
52
+
53
+ def test_should_decode_apos_entity
54
+ assert_equal "é'", html_entities.decode("&eacute;&apos;")
55
+ end
56
+
57
+ def test_should_decode_dotted_entity
58
+ assert_equal "Θ", html_entities.decode("&b.Theta;")
59
+ end
60
+
61
+ def test_should_encode_from_test_set
62
+ TEST_ENTITIES_SET.each do |ent, _, _, skip, decoded|
63
+ next if skip
64
+ assert_equal "&#{ent};", html_entities.encode(decoded, :named)
65
+ end
66
+ end
67
+
68
+ def test_should_decode_from_test_set
69
+ TEST_ENTITIES_SET.each do |ent, _, _, _, decoded|
70
+ assert_equal decoded, html_entities.decode("&#{ent};")
71
+ end
72
+ end
73
+
74
+ def test_should_round_trip_preferred_entities
75
+ TEST_ENTITIES_SET.each do |ent, _, _, skip, decoded|
76
+ next if skip
77
+ assert_equal "&#{ent};", html_entities.encode(html_entities.decode("&#{ent};"), :named)
78
+ assert_equal decoded, html_entities.decode(html_entities.encode(decoded, :named))
79
+ end
80
+ end
81
+
82
+ def test_should_not_round_trip_decoding_skipped_entities
83
+ TEST_ENTITIES_SET.each do |ent, _, _, skip, decoded|
84
+ next unless skip
85
+ assert_not_equal "&#{ent};", html_entities.encode(html_entities.decode("&#{ent};"), :named)
86
+ end
87
+ end
88
+
89
+ def test_should_round_trip_encoding_skipped_entities
90
+ TEST_ENTITIES_SET.each do |ent, _, _, skip, decoded|
91
+ next unless skip
92
+ assert_equal decoded, html_entities.decode(html_entities.encode(decoded, :named))
93
+ end
94
+ end
95
+
96
+ def test_should_treat_all_xhtml1_named_entities_as_xhtml_does
97
+ xhtml_encoder = HTMLEntities.new(:xhtml1)
98
+ HTMLEntities::MAPPINGS['xhtml1'].each do |ent, decoded|
99
+ assert_equal xhtml_encoder.decode("&#{ent};"), html_entities.decode("&#{ent};")
100
+ assert_equal xhtml_encoder.encode(decoded, :named), html_entities.encode(decoded, :named)
101
+ end
102
+ end
103
+
104
+ def test_should_not_agree_with_xhtml1_when_not_in_xhtml
105
+ xhtml_encoder = HTMLEntities.new(:xhtml1)
106
+ TEST_ENTITIES_SET.each do |ent, _, xhtml1, skip, decoded|
107
+ next if xhtml1 || skip
108
+ assert_not_equal xhtml_encoder.decode("&#{ent};"), html_entities.decode("&#{ent};")
109
+ assert_not_equal xhtml_encoder.encode(decoded, :named), html_entities.encode(decoded, :named)
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,29 @@
1
+ # encoding: UTF-8
2
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
3
+ require 'test/unit'
4
+ require 'htmlentities'
5
+
6
+ $KCODE = 'u' unless "1.9".respond_to?(:encoding)
7
+
8
+ class HTML4Test < Test::Unit::TestCase
9
+
10
+ attr_reader :html_entities
11
+
12
+ def setup
13
+ @html_entities = HTMLEntities.new('html4')
14
+ end
15
+
16
+ # Found by Marcos Kuhns
17
+ def test_should_not_encode_apos_entity
18
+ assert_equal "'", html_entities.encode("'", :basic)
19
+ end
20
+
21
+ def test_should_not_decode_apos_entity
22
+ assert_equal "é&apos;", html_entities.decode("&eacute;&apos;")
23
+ end
24
+
25
+ def test_should_not_decode_dotted_entity
26
+ assert_equal "&b.Theta;", html_entities.decode("&b.Theta;")
27
+ end
28
+
29
+ end
@@ -0,0 +1,35 @@
1
+ # encoding: UTF-8
2
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
3
+ require 'test/unit'
4
+ require 'htmlentities'
5
+
6
+ $KCODE = 'u' unless "1.9".respond_to?(:encoding)
7
+
8
+ #
9
+ # Test that version 3.x functionality still works
10
+ #
11
+ class HTMLEntities::LegacyTest < Test::Unit::TestCase
12
+
13
+ def test_should_decode_via_legacy_interface
14
+ assert_decode('&', '&amp;')
15
+ assert_decode('±', '&plusmn;')
16
+ assert_decode('“', '&#8220;')
17
+ assert_decode('—', '&#x2014;')
18
+ end
19
+
20
+ def test_should_encode_via_legacy_interface
21
+ assert_encode('&amp;', '&', :basic)
22
+ assert_encode('&eth;', 'ð', :named)
23
+ assert_encode('&#8230;', '…', :decimal)
24
+ assert_encode('&#x2212;', '−', :hexadecimal)
25
+ end
26
+
27
+ def assert_encode(expected, *encode_args)
28
+ assert_equal expected, HTMLEntities.encode_entities(*encode_args)
29
+ end
30
+
31
+ def assert_decode(expected, *decode_args)
32
+ assert_equal expected, HTMLEntities.decode_entities(*decode_args)
33
+ end
34
+
35
+ end
@@ -0,0 +1,93 @@
1
+ # encoding: UTF-8
2
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
3
+ require 'test/unit'
4
+ require 'htmlentities'
5
+
6
+ class HTMLEntities::RoundtripTest < Test::Unit::TestCase
7
+
8
+ attr_reader :xhtml1_entities, :html4_entities
9
+
10
+ def setup
11
+ @xhtml1_entities = HTMLEntities.new('xhtml1')
12
+ @html4_entities = HTMLEntities.new('html4')
13
+ end
14
+
15
+ def test_should_roundtrip_xhtml1_entities_via_named_encoding
16
+ each_mapping('xhtml1') do |name, string|
17
+ assert_equal(
18
+ string,
19
+ xhtml1_entities.decode(xhtml1_entities.encode(string, :named))
20
+ )
21
+ end
22
+ end
23
+
24
+ def test_should_roundtrip_xhtml1_entities_via_basic_and_named_encoding
25
+ each_mapping('xhtml1') do |name, string|
26
+ assert_equal(
27
+ string,
28
+ xhtml1_entities.decode(xhtml1_entities.encode(string, :basic, :named))
29
+ )
30
+ end
31
+ end
32
+
33
+ def test_should_roundtrip_xhtml1_entities_via_basic_named_and_decimal_encoding
34
+ each_mapping('xhtml1') do |name, string|
35
+ assert_equal(
36
+ string,
37
+ xhtml1_entities.decode(xhtml1_entities.encode(string, :basic, :named, :decimal))
38
+ )
39
+ end
40
+ end
41
+
42
+ def test_should_roundtrip_xhtml1_entities_via_hexadecimal_encoding
43
+ each_mapping('xhtml1') do |name, string|
44
+ assert_equal(
45
+ string,
46
+ xhtml1_entities.decode(xhtml1_entities.encode(string, :hexadecimal))
47
+ )
48
+ end
49
+ end
50
+
51
+ def test_should_roundtrip_html4_entities_via_named_encoding
52
+ each_mapping('html4') do |name, string|
53
+ assert_equal(
54
+ string,
55
+ html4_entities.decode(html4_entities.encode(string, :named))
56
+ )
57
+ end
58
+ end
59
+
60
+ def test_should_roundtrip_html4_entities_via_basic_and_named_encoding
61
+ each_mapping('html4') do |name, string|
62
+ assert_equal(
63
+ string,
64
+ html4_entities.decode(html4_entities.encode(string, :basic, :named))
65
+ )
66
+ end
67
+ end
68
+
69
+ def test_should_roundtrip_html4_entities_via_basic_named_and_decimal_encoding
70
+ each_mapping('html4') do |name, string|
71
+ assert_equal(
72
+ string,
73
+ html4_entities.decode(html4_entities.encode(string, :basic, :named, :decimal))
74
+ )
75
+ end
76
+ end
77
+
78
+ def test_should_roundtrip_html4_entities_via_hexadecimal_encoding
79
+ each_mapping('html4') do |name, string|
80
+ assert_equal(
81
+ string,
82
+ html4_entities.decode(html4_entities.encode(string, :hexadecimal))
83
+ )
84
+ end
85
+ end
86
+
87
+ def each_mapping(flavor)
88
+ HTMLEntities::MAPPINGS[flavor].each do |name, codepoint|
89
+ yield name, [codepoint].pack('U')
90
+ end
91
+ end
92
+
93
+ end