jerska-htmlentities 4.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ class HTMLEntities
2
+ module VERSION #:nodoc:
3
+ MAJOR = 4
4
+ MINOR = 3
5
+ TINY = 3
6
+
7
+ STRING = [MAJOR, MINOR, TINY].join('.')
8
+ end
9
+ end
data/perf/benchmark.rb ADDED
@@ -0,0 +1,13 @@
1
+ # encoding: UTF-8
2
+ $KCODE = 'u' unless "1.9".respond_to?(:encoding)
3
+
4
+ require File.expand_path("../performance", __FILE__)
5
+ require "benchmark"
6
+
7
+ job = HTMLEntitiesJob.new
8
+ job.all(100) # Warm up to give JRuby a fair shake.
9
+
10
+ Benchmark.benchmark do |b|
11
+ b.report("Encoding"){ job.encode(100) }
12
+ b.report("Decoding"){ job.decode(100) }
13
+ end
@@ -0,0 +1,31 @@
1
+ # encoding: UTF-8
2
+ $KCODE = 'u' unless "1.9".respond_to?(:encoding)
3
+
4
+ $:.unshift(File.join(File.dirname(__FILE__), "..", "lib"))
5
+ require "htmlentities"
6
+
7
+ class HTMLEntitiesJob
8
+ def initialize
9
+ @coder = HTMLEntities.new
10
+ @decoded = File.read(File.join(File.dirname(__FILE__), "sample"))
11
+ @encoded = @coder.encode(@decoded, :basic, :named, :hexadecimal)
12
+ end
13
+
14
+ def encode(cycles)
15
+ cycles.times do
16
+ @coder.encode(@decoded, :basic, :named, :hexadecimal)
17
+ @coder.encode(@decoded, :basic, :named, :decimal)
18
+ end
19
+ end
20
+
21
+ def decode(cycles)
22
+ cycles.times do
23
+ @coder.decode(@encoded)
24
+ end
25
+ end
26
+
27
+ def all(cycles)
28
+ encode(cycles)
29
+ decode(cycles)
30
+ end
31
+ end
data/perf/profile.rb ADDED
@@ -0,0 +1,17 @@
1
+ # encoding: UTF-8
2
+ $KCODE = 'u' unless "1.9".respond_to?(:encoding)
3
+
4
+ require File.expand_path("../performance", __FILE__)
5
+ require "profiler"
6
+
7
+ job = HTMLEntitiesJob.new
8
+
9
+ puts "Encoding"
10
+ Profiler__::start_profile
11
+ job.encode(1)
12
+ Profiler__::print_profile($stdout)
13
+
14
+ puts "Decoding"
15
+ Profiler__::start_profile
16
+ job.decode(1)
17
+ Profiler__::print_profile($stdout)
@@ -0,0 +1,164 @@
1
+ # encoding: UTF-8
2
+ require_relative "./test_helper"
3
+
4
+ class HTMLEntities::DecodingTest < Test::Unit::TestCase
5
+
6
+ def setup
7
+ @entities = [:xhtml1, :html4, :expanded].map{ |a| HTMLEntities.new(a) }
8
+ end
9
+
10
+ def assert_decode(expected, input, options = {})
11
+ @entities.each do |coder|
12
+ assert_equal expected, coder.decode(input, options)
13
+ end
14
+ end
15
+
16
+ def test_should_decode_basic_entities
17
+ assert_decode '&', '&amp;'
18
+ assert_decode '<', '&lt;'
19
+ assert_decode '"', '&quot;'
20
+ end
21
+
22
+ def test_should_not_decode_excluded_basic_entities
23
+ assert_decode '&amp;', '&amp;', exclude: ['&']
24
+ assert_decode '&lt;', '&lt;', exclude: ['<']
25
+ assert_decode '&quot;', '&quot;', exclude: ['"']
26
+ end
27
+
28
+ def test_should_decode_extended_named_entities
29
+ assert_decode '±', '&plusmn;'
30
+ assert_decode 'ð', '&eth;'
31
+ assert_decode 'Œ', '&OElig;'
32
+ assert_decode 'œ', '&oelig;'
33
+ end
34
+
35
+ def test_should_not_decode_excluded_extended_named_entities
36
+ assert_decode '&plusmn;', '&plusmn;', exclude: ['±']
37
+ assert_decode '&eth;', '&eth;', exclude: ['ð']
38
+ assert_decode '&OElig;', '&OElig;', exclude: ['Œ']
39
+ assert_decode '&oelig;', '&oelig;', exclude: ['œ']
40
+ end
41
+
42
+ def test_should_decode_decimal_entities
43
+ assert_decode '“', '&#8220;'
44
+ assert_decode '…', '&#8230;'
45
+ assert_decode ' ', '&#32;'
46
+ end
47
+
48
+ def test_should_not_decode_excluded_decimal_entities
49
+ assert_decode '&#8220;', '&#8220;', exclude: ['“']
50
+ assert_decode '&#8230;', '&#8230;', exclude: ['…']
51
+ assert_decode '&#32;', '&#32;', exclude: [' ']
52
+ end
53
+
54
+ def test_should_decode_hexadecimal_entities
55
+ assert_decode '−', '&#x2212;'
56
+ assert_decode '—', '&#x2014;'
57
+ assert_decode '`', '&#x0060;'
58
+ assert_decode '`', '&#x60;'
59
+ end
60
+
61
+ def test_should_not_decode_excluded_hexadecimal_entities
62
+ assert_decode '&#x2212;', '&#x2212;', exclude: ['−']
63
+ assert_decode '&#x2014;', '&#x2014;', exclude: ['—']
64
+ assert_decode '&#x0060;', '&#x0060;', exclude: ['`']
65
+ assert_decode '&#x60;', '&#x60;', exclude: ['`']
66
+ end
67
+
68
+ def test_should_not_mutate_string_being_decoded
69
+ original = "&lt;&#163;"
70
+ input = original.dup
71
+
72
+ HTMLEntities.new.decode(input)
73
+ assert_equal original, input
74
+
75
+ HTMLEntities.new.decode(input, excluded: ['a'])
76
+ assert_equal original, input
77
+ end
78
+
79
+ def test_should_decode_text_with_mix_of_entities
80
+ # Just a random headline - I needed something with accented letters.
81
+ assert_decode(
82
+ 'Le tabac pourrait bientôt être banni dans tous les lieux publics en France',
83
+ 'Le tabac pourrait bient&ocirc;t &#234;tre banni dans tous les lieux publics en France'
84
+ )
85
+ assert_decode(
86
+ '"bientôt" & 文字',
87
+ '&quot;bient&ocirc;t&quot; &amp; &#25991;&#x5b57;'
88
+ )
89
+ end
90
+
91
+ def test_should_decode_text_with_mix_of_entities_only_not_excluded
92
+ # Just a random headline - I needed something with accented letters.
93
+ assert_decode(
94
+ 'Le tabac pourrait bient&ocirc;t être banni dans tous les lieux publics en France',
95
+ 'Le tabac pourrait bient&ocirc;t &#234;tre banni dans tous les lieux publics en France',
96
+ exclude: ['ô']
97
+ )
98
+ assert_decode(
99
+ '"bientôt" & &#25991;字',
100
+ '&quot;bient&ocirc;t&quot; &amp; &#25991;&#x5b57;',
101
+ exclude: ['文']
102
+ )
103
+ assert_decode(
104
+ 'Le tabac pourrait bientôt être banni dans tous les lieux publics en France',
105
+ 'Le tabac pourrait bient&ocirc;t &#234;tre banni dans tous les lieux publics en France',
106
+ exclude: ['文']
107
+ )
108
+ end
109
+
110
+ def test_should_decode_empty_string
111
+ assert_decode '', ''
112
+ assert_decode '', '', exclude: ['a']
113
+ end
114
+
115
+ def test_should_skip_unknown_entity
116
+ assert_decode '&bogus;', '&bogus;'
117
+ assert_decode '&bogus;', '&bogus;', exclude: ['a']
118
+ end
119
+
120
+ def test_should_decode_double_encoded_entity_once
121
+ assert_decode '&amp;', '&amp;amp;'
122
+ assert_decode '&amp;', '&amp;amp;', exclude: ['a']
123
+ end
124
+
125
+ # Faults found and patched by Moonwolf
126
+ def test_should_decode_full_hexadecimal_range
127
+ (0..127).each do |codepoint|
128
+ assert_decode [codepoint].pack('U'), "&\#x#{codepoint.to_s(16)};"
129
+ end
130
+ end
131
+
132
+ def test_should_not_decode_full_hexadecimal_range_if_excluded
133
+ (0..127).each do |codepoint|
134
+ assert_decode "&\#x#{codepoint.to_s(16)};", "&\#x#{codepoint.to_s(16)};", exclude: [[codepoint].pack('U')]
135
+ end
136
+ end
137
+
138
+ # Reported by Dallas DeVries and Johan Duflost
139
+ def test_should_decode_named_entities_reported_as_missing_in_3_0_1
140
+ assert_decode [178].pack('U'), '&sup2;'
141
+ assert_decode [8226].pack('U'), '&bull;'
142
+ assert_decode [948].pack('U'), '&delta;'
143
+ end
144
+
145
+ def test_should_not_decode_named_entities_reported_as_missing_in_3_0_1_if_excluded
146
+ assert_decode '&sup2;', '&sup2;', exclude: [[178].pack('U')]
147
+ assert_decode '&bull;', '&bull;', exclude: [[8226].pack('U')]
148
+ assert_decode '&delta;', '&delta;', exclude: [[948].pack('U')]
149
+ end
150
+
151
+ # Reported by ckruse
152
+ def test_should_decode_only_first_element_in_masked_entities
153
+ input = '&amp;#3346;'
154
+ expected = '&#3346;'
155
+ assert_decode expected, input
156
+ end
157
+
158
+ def test_should_ducktype_parameter_to_string_before_encoding
159
+ obj = Object.new
160
+ def obj.to_s; "foo"; end
161
+ assert_decode "foo", obj
162
+ end
163
+
164
+ end
@@ -0,0 +1,106 @@
1
+ # encoding: UTF-8
2
+ require_relative "./test_helper"
3
+
4
+ class HTMLEntities::EncodingTest < Test::Unit::TestCase
5
+
6
+ def setup
7
+ @entities = [:xhtml1, :html4, :expanded].map{ |a| HTMLEntities.new(a) }
8
+ end
9
+
10
+ def assert_encode(expected, input, *args)
11
+ @entities.each do |coder|
12
+ assert_equal expected, coder.encode(input, *args)
13
+ end
14
+ end
15
+
16
+ def test_should_encode_basic_entities
17
+ assert_encode '&amp;', '&', :basic
18
+ assert_encode '&quot;', '"'
19
+ assert_encode '&lt;', '<', :basic
20
+ assert_encode '&lt;', '<'
21
+ end
22
+
23
+ def test_should_encode_basic_entities_to_decimal
24
+ assert_encode '&#38;', '&', :decimal
25
+ assert_encode '&#34;', '"', :decimal
26
+ assert_encode '&#60;', '<', :decimal
27
+ assert_encode '&#62;', '>', :decimal
28
+ assert_encode '&#39;', "'", :decimal
29
+ end
30
+
31
+ def test_should_encode_basic_entities_to_hexadecimal
32
+ assert_encode '&#x26;', '&', :hexadecimal
33
+ assert_encode '&#x22;', '"', :hexadecimal
34
+ assert_encode '&#x3c;', '<', :hexadecimal
35
+ assert_encode '&#x3e;', '>', :hexadecimal
36
+ assert_encode '&#x27;', "'", :hexadecimal
37
+ end
38
+
39
+ def test_should_encode_extended_named_entities
40
+ assert_encode '&plusmn;', '±', :named
41
+ assert_encode '&eth;', 'ð', :named
42
+ assert_encode '&OElig;', 'Œ', :named
43
+ assert_encode '&oelig;', 'œ', :named
44
+ end
45
+
46
+ def test_should_encode_decimal_entities
47
+ assert_encode '&#8220;', '“', :decimal
48
+ assert_encode '&#8230;', '…', :decimal
49
+ end
50
+
51
+ def test_should_encode_hexadecimal_entities
52
+ assert_encode '&#x2212;', '−', :hexadecimal
53
+ assert_encode '&#x2014;', '—', :hexadecimal
54
+ end
55
+
56
+ def test_should_encode_text_using_mix_of_entities
57
+ assert_encode(
58
+ '&quot;bient&ocirc;t&quot; &amp; &#x6587;&#x5b57;',
59
+ '"bientôt" & 文字', :basic, :named, :hexadecimal
60
+ )
61
+ assert_encode(
62
+ '&quot;bient&ocirc;t&quot; &amp; &#25991;&#23383;',
63
+ '"bientôt" & 文字', :basic, :named, :decimal
64
+ )
65
+ end
66
+
67
+ def test_should_sort_commands_when_encoding_using_mix_of_entities
68
+ assert_encode(
69
+ '&quot;bient&ocirc;t&quot; &amp; &#x6587;&#x5b57;',
70
+ '"bientôt" & 文字', :named, :hexadecimal, :basic
71
+ )
72
+ assert_encode(
73
+ '&quot;bient&ocirc;t&quot; &amp; &#25991;&#23383;',
74
+ '"bientôt" & 文字', :decimal, :named, :basic
75
+ )
76
+ end
77
+
78
+ def test_should_detect_illegal_encoding_command
79
+ assert_raise HTMLEntities::InstructionError do
80
+ HTMLEntities.new.encode('foo', :bar, :baz)
81
+ end
82
+ end
83
+
84
+ def test_should_not_encode_normal_ASCII
85
+ assert_encode '`', '`'
86
+ assert_encode ' ', ' '
87
+ end
88
+
89
+ def test_should_double_encode_existing_entity
90
+ assert_encode '&amp;amp;', '&amp;'
91
+ end
92
+
93
+ def test_should_not_mutate_string_being_encoded
94
+ original = "<£"
95
+ input = original.dup
96
+ HTMLEntities.new.encode(input, :basic, :decimal)
97
+
98
+ assert_equal original, input
99
+ end
100
+
101
+ def test_should_ducktype_parameter_to_string_before_encoding
102
+ obj = Object.new
103
+ def obj.to_s; "foo"; end
104
+ assert_encode "foo", obj
105
+ end
106
+ end
@@ -0,0 +1,24 @@
1
+ # encoding: UTF-8
2
+ require_relative "./test_helper"
3
+
4
+ class HTMLEntities::EntitiesTest < Test::Unit::TestCase
5
+
6
+ def test_should_raise_exception_when_unknown_flavor_specified
7
+ assert_raises HTMLEntities::UnknownFlavor do
8
+ HTMLEntities.new('foo')
9
+ end
10
+ end
11
+
12
+ def test_should_allow_symbol_for_flavor
13
+ assert_nothing_raised do
14
+ HTMLEntities.new(:xhtml1)
15
+ end
16
+ end
17
+
18
+ def test_should_allow_upper_case_flavor
19
+ assert_nothing_raised do
20
+ HTMLEntities.new('XHTML1')
21
+ end
22
+ end
23
+
24
+ end
@@ -0,0 +1,109 @@
1
+ # encoding: UTF-8
2
+ require_relative "./test_helper"
3
+
4
+ class HTMLEntities::ExpandedTest < Test::Unit::TestCase
5
+
6
+ attr_reader :html_entities
7
+
8
+ def setup
9
+ @html_entities = HTMLEntities.new(:expanded)
10
+ end
11
+
12
+ TEST_ENTITIES_SET = [
13
+ ['sub', 0x2282, "xhtml", nil, "⊂", ],
14
+ ['sup', 0x2283, "xhtml", nil, "⊃", ],
15
+ ['nsub', 0x2284, "xhtml", nil, "⊄", ],
16
+ ['subE', 0x2286, nil, "skip", "⊆", ],
17
+ ['sube', 0x2286, "xhtml", nil, "⊆", ],
18
+ ['supE', 0x2287, nil, "skip", "⊇", ],
19
+ ['supe', 0x2287, "xhtml", nil, "⊇", ],
20
+ ['bottom', 0x22a5, nil, "skip", "⊥", ],
21
+ ['perp', 0x22a5, "xhtml", nil, "⊥", ],
22
+ ['models', 0x22a7, nil, nil, "⊧", ],
23
+ ['vDash', 0x22a8, nil, nil, "⊨", ],
24
+ ['Vdash', 0x22a9, nil, nil, "⊩", ],
25
+ ['Vvdash', 0x22aa, nil, nil, "⊪", ],
26
+ ['nvdash', 0x22ac, nil, nil, "⊬", ],
27
+ ['nvDash', 0x22ad, nil, nil, "⊭", ],
28
+ ['nVdash', 0x22ae, nil, nil, "⊮", ],
29
+ ['nsubE', 0x2288, nil, nil, "⊈", ],
30
+ ['nsube', 0x2288, nil, "skip", "⊈", ],
31
+ ['nsupE', 0x2289, nil, nil, "⊉", ],
32
+ ['nsupe', 0x2289, nil, "skip", "⊉", ],
33
+ ['subnE', 0x228a, nil, nil, "⊊", ],
34
+ ['subne', 0x228a, nil, "skip", "⊊", ],
35
+ ['vsubnE', 0x228a, nil, "skip", "⊊", ],
36
+ ['vsubne', 0x228a, nil, "skip", "⊊", ],
37
+ ['nsc', 0x2281, nil, nil, "⊁", ],
38
+ ['nsup', 0x2285, nil, nil, "⊅", ],
39
+ ['b.alpha', 0x03b1, nil, "skip", "α", ],
40
+ ['b.beta', 0x03b2, nil, "skip", "β", ],
41
+ ['b.chi', 0x03c7, nil, "skip", "χ", ],
42
+ ['b.Delta', 0x0394, nil, "skip", "Δ", ],
43
+ ]
44
+
45
+ def test_should_encode_apos_entity
46
+ assert_equal "&apos;", html_entities.encode("'", :named) # note: the normal ' 0x0027, not ʼ 0x02BC
47
+ end
48
+
49
+ def test_should_decode_apos_entity
50
+ assert_equal "é'", html_entities.decode("&eacute;&apos;")
51
+ end
52
+
53
+ def test_should_decode_dotted_entity
54
+ assert_equal "Θ", html_entities.decode("&b.Theta;")
55
+ end
56
+
57
+ def test_should_encode_from_test_set
58
+ TEST_ENTITIES_SET.each do |ent, _, _, skip, decoded|
59
+ next if skip
60
+ assert_equal "&#{ent};", html_entities.encode(decoded, :named)
61
+ end
62
+ end
63
+
64
+ def test_should_decode_from_test_set
65
+ TEST_ENTITIES_SET.each do |ent, _, _, _, decoded|
66
+ assert_equal decoded, html_entities.decode("&#{ent};")
67
+ end
68
+ end
69
+
70
+ def test_should_round_trip_preferred_entities
71
+ TEST_ENTITIES_SET.each do |ent, _, _, skip, decoded|
72
+ next if skip
73
+ assert_equal "&#{ent};", html_entities.encode(html_entities.decode("&#{ent};"), :named)
74
+ assert_equal decoded, html_entities.decode(html_entities.encode(decoded, :named))
75
+ end
76
+ end
77
+
78
+ def test_should_not_round_trip_decoding_skipped_entities
79
+ TEST_ENTITIES_SET.each do |ent, _, _, skip, decoded|
80
+ next unless skip
81
+ assert_not_equal "&#{ent};", html_entities.encode(html_entities.decode("&#{ent};"), :named)
82
+ end
83
+ end
84
+
85
+ def test_should_round_trip_encoding_skipped_entities
86
+ TEST_ENTITIES_SET.each do |ent, _, _, skip, decoded|
87
+ next unless skip
88
+ assert_equal decoded, html_entities.decode(html_entities.encode(decoded, :named))
89
+ end
90
+ end
91
+
92
+ def test_should_treat_all_xhtml1_named_entities_as_xhtml_does
93
+ xhtml_encoder = HTMLEntities.new(:xhtml1)
94
+ HTMLEntities::MAPPINGS['xhtml1'].each do |ent, decoded|
95
+ assert_equal xhtml_encoder.decode("&#{ent};"), html_entities.decode("&#{ent};")
96
+ assert_equal xhtml_encoder.encode(decoded, :named), html_entities.encode(decoded, :named)
97
+ end
98
+ end
99
+
100
+ def test_should_not_agree_with_xhtml1_when_not_in_xhtml
101
+ xhtml_encoder = HTMLEntities.new(:xhtml1)
102
+ TEST_ENTITIES_SET.each do |ent, _, xhtml1, skip, decoded|
103
+ next if xhtml1 || skip
104
+ assert_not_equal xhtml_encoder.decode("&#{ent};"), html_entities.decode("&#{ent};")
105
+ assert_not_equal xhtml_encoder.encode(decoded, :named), html_entities.encode(decoded, :named)
106
+ end
107
+ end
108
+
109
+ end