jerska-htmlentities 4.3.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,9 @@
1
+ class HTMLEntities
2
+ module VERSION #:nodoc:
3
+ MAJOR = 4
4
+ MINOR = 3
5
+ TINY = 3
6
+
7
+ STRING = [MAJOR, MINOR, TINY].join('.')
8
+ end
9
+ end
data/perf/benchmark.rb ADDED
@@ -0,0 +1,13 @@
1
+ # encoding: UTF-8
2
+ $KCODE = 'u' unless "1.9".respond_to?(:encoding)
3
+
4
+ require File.expand_path("../performance", __FILE__)
5
+ require "benchmark"
6
+
7
+ job = HTMLEntitiesJob.new
8
+ job.all(100) # Warm up to give JRuby a fair shake.
9
+
10
+ Benchmark.benchmark do |b|
11
+ b.report("Encoding"){ job.encode(100) }
12
+ b.report("Decoding"){ job.decode(100) }
13
+ end
@@ -0,0 +1,31 @@
1
+ # encoding: UTF-8
2
+ $KCODE = 'u' unless "1.9".respond_to?(:encoding)
3
+
4
+ $:.unshift(File.join(File.dirname(__FILE__), "..", "lib"))
5
+ require "htmlentities"
6
+
7
+ class HTMLEntitiesJob
8
+ def initialize
9
+ @coder = HTMLEntities.new
10
+ @decoded = File.read(File.join(File.dirname(__FILE__), "sample"))
11
+ @encoded = @coder.encode(@decoded, :basic, :named, :hexadecimal)
12
+ end
13
+
14
+ def encode(cycles)
15
+ cycles.times do
16
+ @coder.encode(@decoded, :basic, :named, :hexadecimal)
17
+ @coder.encode(@decoded, :basic, :named, :decimal)
18
+ end
19
+ end
20
+
21
+ def decode(cycles)
22
+ cycles.times do
23
+ @coder.decode(@encoded)
24
+ end
25
+ end
26
+
27
+ def all(cycles)
28
+ encode(cycles)
29
+ decode(cycles)
30
+ end
31
+ end
data/perf/profile.rb ADDED
@@ -0,0 +1,17 @@
1
+ # encoding: UTF-8
2
+ $KCODE = 'u' unless "1.9".respond_to?(:encoding)
3
+
4
+ require File.expand_path("../performance", __FILE__)
5
+ require "profiler"
6
+
7
+ job = HTMLEntitiesJob.new
8
+
9
+ puts "Encoding"
10
+ Profiler__::start_profile
11
+ job.encode(1)
12
+ Profiler__::print_profile($stdout)
13
+
14
+ puts "Decoding"
15
+ Profiler__::start_profile
16
+ job.decode(1)
17
+ Profiler__::print_profile($stdout)
@@ -0,0 +1,164 @@
1
+ # encoding: UTF-8
2
+ require_relative "./test_helper"
3
+
4
+ class HTMLEntities::DecodingTest < Test::Unit::TestCase
5
+
6
+ def setup
7
+ @entities = [:xhtml1, :html4, :expanded].map{ |a| HTMLEntities.new(a) }
8
+ end
9
+
10
+ def assert_decode(expected, input, options = {})
11
+ @entities.each do |coder|
12
+ assert_equal expected, coder.decode(input, options)
13
+ end
14
+ end
15
+
16
+ def test_should_decode_basic_entities
17
+ assert_decode '&', '&amp;'
18
+ assert_decode '<', '&lt;'
19
+ assert_decode '"', '&quot;'
20
+ end
21
+
22
+ def test_should_not_decode_excluded_basic_entities
23
+ assert_decode '&amp;', '&amp;', exclude: ['&']
24
+ assert_decode '&lt;', '&lt;', exclude: ['<']
25
+ assert_decode '&quot;', '&quot;', exclude: ['"']
26
+ end
27
+
28
+ def test_should_decode_extended_named_entities
29
+ assert_decode '±', '&plusmn;'
30
+ assert_decode 'ð', '&eth;'
31
+ assert_decode 'Œ', '&OElig;'
32
+ assert_decode 'œ', '&oelig;'
33
+ end
34
+
35
+ def test_should_not_decode_excluded_extended_named_entities
36
+ assert_decode '&plusmn;', '&plusmn;', exclude: ['±']
37
+ assert_decode '&eth;', '&eth;', exclude: ['ð']
38
+ assert_decode '&OElig;', '&OElig;', exclude: ['Œ']
39
+ assert_decode '&oelig;', '&oelig;', exclude: ['œ']
40
+ end
41
+
42
+ def test_should_decode_decimal_entities
43
+ assert_decode '“', '&#8220;'
44
+ assert_decode '…', '&#8230;'
45
+ assert_decode ' ', '&#32;'
46
+ end
47
+
48
+ def test_should_not_decode_excluded_decimal_entities
49
+ assert_decode '&#8220;', '&#8220;', exclude: ['“']
50
+ assert_decode '&#8230;', '&#8230;', exclude: ['…']
51
+ assert_decode '&#32;', '&#32;', exclude: [' ']
52
+ end
53
+
54
+ def test_should_decode_hexadecimal_entities
55
+ assert_decode '−', '&#x2212;'
56
+ assert_decode '—', '&#x2014;'
57
+ assert_decode '`', '&#x0060;'
58
+ assert_decode '`', '&#x60;'
59
+ end
60
+
61
+ def test_should_not_decode_excluded_hexadecimal_entities
62
+ assert_decode '&#x2212;', '&#x2212;', exclude: ['−']
63
+ assert_decode '&#x2014;', '&#x2014;', exclude: ['—']
64
+ assert_decode '&#x0060;', '&#x0060;', exclude: ['`']
65
+ assert_decode '&#x60;', '&#x60;', exclude: ['`']
66
+ end
67
+
68
+ def test_should_not_mutate_string_being_decoded
69
+ original = "&lt;&#163;"
70
+ input = original.dup
71
+
72
+ HTMLEntities.new.decode(input)
73
+ assert_equal original, input
74
+
75
+ HTMLEntities.new.decode(input, excluded: ['a'])
76
+ assert_equal original, input
77
+ end
78
+
79
+ def test_should_decode_text_with_mix_of_entities
80
+ # Just a random headline - I needed something with accented letters.
81
+ assert_decode(
82
+ 'Le tabac pourrait bientôt être banni dans tous les lieux publics en France',
83
+ 'Le tabac pourrait bient&ocirc;t &#234;tre banni dans tous les lieux publics en France'
84
+ )
85
+ assert_decode(
86
+ '"bientôt" & 文字',
87
+ '&quot;bient&ocirc;t&quot; &amp; &#25991;&#x5b57;'
88
+ )
89
+ end
90
+
91
+ def test_should_decode_text_with_mix_of_entities_only_not_excluded
92
+ # Just a random headline - I needed something with accented letters.
93
+ assert_decode(
94
+ 'Le tabac pourrait bient&ocirc;t être banni dans tous les lieux publics en France',
95
+ 'Le tabac pourrait bient&ocirc;t &#234;tre banni dans tous les lieux publics en France',
96
+ exclude: ['ô']
97
+ )
98
+ assert_decode(
99
+ '"bientôt" & &#25991;字',
100
+ '&quot;bient&ocirc;t&quot; &amp; &#25991;&#x5b57;',
101
+ exclude: ['文']
102
+ )
103
+ assert_decode(
104
+ 'Le tabac pourrait bientôt être banni dans tous les lieux publics en France',
105
+ 'Le tabac pourrait bient&ocirc;t &#234;tre banni dans tous les lieux publics en France',
106
+ exclude: ['文']
107
+ )
108
+ end
109
+
110
+ def test_should_decode_empty_string
111
+ assert_decode '', ''
112
+ assert_decode '', '', exclude: ['a']
113
+ end
114
+
115
+ def test_should_skip_unknown_entity
116
+ assert_decode '&bogus;', '&bogus;'
117
+ assert_decode '&bogus;', '&bogus;', exclude: ['a']
118
+ end
119
+
120
+ def test_should_decode_double_encoded_entity_once
121
+ assert_decode '&amp;', '&amp;amp;'
122
+ assert_decode '&amp;', '&amp;amp;', exclude: ['a']
123
+ end
124
+
125
+ # Faults found and patched by Moonwolf
126
+ def test_should_decode_full_hexadecimal_range
127
+ (0..127).each do |codepoint|
128
+ assert_decode [codepoint].pack('U'), "&\#x#{codepoint.to_s(16)};"
129
+ end
130
+ end
131
+
132
+ def test_should_not_decode_full_hexadecimal_range_if_excluded
133
+ (0..127).each do |codepoint|
134
+ assert_decode "&\#x#{codepoint.to_s(16)};", "&\#x#{codepoint.to_s(16)};", exclude: [[codepoint].pack('U')]
135
+ end
136
+ end
137
+
138
+ # Reported by Dallas DeVries and Johan Duflost
139
+ def test_should_decode_named_entities_reported_as_missing_in_3_0_1
140
+ assert_decode [178].pack('U'), '&sup2;'
141
+ assert_decode [8226].pack('U'), '&bull;'
142
+ assert_decode [948].pack('U'), '&delta;'
143
+ end
144
+
145
+ def test_should_not_decode_named_entities_reported_as_missing_in_3_0_1_if_excluded
146
+ assert_decode '&sup2;', '&sup2;', exclude: [[178].pack('U')]
147
+ assert_decode '&bull;', '&bull;', exclude: [[8226].pack('U')]
148
+ assert_decode '&delta;', '&delta;', exclude: [[948].pack('U')]
149
+ end
150
+
151
+ # Reported by ckruse
152
+ def test_should_decode_only_first_element_in_masked_entities
153
+ input = '&amp;#3346;'
154
+ expected = '&#3346;'
155
+ assert_decode expected, input
156
+ end
157
+
158
+ def test_should_ducktype_parameter_to_string_before_encoding
159
+ obj = Object.new
160
+ def obj.to_s; "foo"; end
161
+ assert_decode "foo", obj
162
+ end
163
+
164
+ end
@@ -0,0 +1,106 @@
1
+ # encoding: UTF-8
2
+ require_relative "./test_helper"
3
+
4
+ class HTMLEntities::EncodingTest < Test::Unit::TestCase
5
+
6
+ def setup
7
+ @entities = [:xhtml1, :html4, :expanded].map{ |a| HTMLEntities.new(a) }
8
+ end
9
+
10
+ def assert_encode(expected, input, *args)
11
+ @entities.each do |coder|
12
+ assert_equal expected, coder.encode(input, *args)
13
+ end
14
+ end
15
+
16
+ def test_should_encode_basic_entities
17
+ assert_encode '&amp;', '&', :basic
18
+ assert_encode '&quot;', '"'
19
+ assert_encode '&lt;', '<', :basic
20
+ assert_encode '&lt;', '<'
21
+ end
22
+
23
+ def test_should_encode_basic_entities_to_decimal
24
+ assert_encode '&#38;', '&', :decimal
25
+ assert_encode '&#34;', '"', :decimal
26
+ assert_encode '&#60;', '<', :decimal
27
+ assert_encode '&#62;', '>', :decimal
28
+ assert_encode '&#39;', "'", :decimal
29
+ end
30
+
31
+ def test_should_encode_basic_entities_to_hexadecimal
32
+ assert_encode '&#x26;', '&', :hexadecimal
33
+ assert_encode '&#x22;', '"', :hexadecimal
34
+ assert_encode '&#x3c;', '<', :hexadecimal
35
+ assert_encode '&#x3e;', '>', :hexadecimal
36
+ assert_encode '&#x27;', "'", :hexadecimal
37
+ end
38
+
39
+ def test_should_encode_extended_named_entities
40
+ assert_encode '&plusmn;', '±', :named
41
+ assert_encode '&eth;', 'ð', :named
42
+ assert_encode '&OElig;', 'Œ', :named
43
+ assert_encode '&oelig;', 'œ', :named
44
+ end
45
+
46
+ def test_should_encode_decimal_entities
47
+ assert_encode '&#8220;', '“', :decimal
48
+ assert_encode '&#8230;', '…', :decimal
49
+ end
50
+
51
+ def test_should_encode_hexadecimal_entities
52
+ assert_encode '&#x2212;', '−', :hexadecimal
53
+ assert_encode '&#x2014;', '—', :hexadecimal
54
+ end
55
+
56
+ def test_should_encode_text_using_mix_of_entities
57
+ assert_encode(
58
+ '&quot;bient&ocirc;t&quot; &amp; &#x6587;&#x5b57;',
59
+ '"bientôt" & 文字', :basic, :named, :hexadecimal
60
+ )
61
+ assert_encode(
62
+ '&quot;bient&ocirc;t&quot; &amp; &#25991;&#23383;',
63
+ '"bientôt" & 文字', :basic, :named, :decimal
64
+ )
65
+ end
66
+
67
+ def test_should_sort_commands_when_encoding_using_mix_of_entities
68
+ assert_encode(
69
+ '&quot;bient&ocirc;t&quot; &amp; &#x6587;&#x5b57;',
70
+ '"bientôt" & 文字', :named, :hexadecimal, :basic
71
+ )
72
+ assert_encode(
73
+ '&quot;bient&ocirc;t&quot; &amp; &#25991;&#23383;',
74
+ '"bientôt" & 文字', :decimal, :named, :basic
75
+ )
76
+ end
77
+
78
+ def test_should_detect_illegal_encoding_command
79
+ assert_raise HTMLEntities::InstructionError do
80
+ HTMLEntities.new.encode('foo', :bar, :baz)
81
+ end
82
+ end
83
+
84
+ def test_should_not_encode_normal_ASCII
85
+ assert_encode '`', '`'
86
+ assert_encode ' ', ' '
87
+ end
88
+
89
+ def test_should_double_encode_existing_entity
90
+ assert_encode '&amp;amp;', '&amp;'
91
+ end
92
+
93
+ def test_should_not_mutate_string_being_encoded
94
+ original = "<£"
95
+ input = original.dup
96
+ HTMLEntities.new.encode(input, :basic, :decimal)
97
+
98
+ assert_equal original, input
99
+ end
100
+
101
+ def test_should_ducktype_parameter_to_string_before_encoding
102
+ obj = Object.new
103
+ def obj.to_s; "foo"; end
104
+ assert_encode "foo", obj
105
+ end
106
+ end
@@ -0,0 +1,24 @@
1
+ # encoding: UTF-8
2
+ require_relative "./test_helper"
3
+
4
+ class HTMLEntities::EntitiesTest < Test::Unit::TestCase
5
+
6
+ def test_should_raise_exception_when_unknown_flavor_specified
7
+ assert_raises HTMLEntities::UnknownFlavor do
8
+ HTMLEntities.new('foo')
9
+ end
10
+ end
11
+
12
+ def test_should_allow_symbol_for_flavor
13
+ assert_nothing_raised do
14
+ HTMLEntities.new(:xhtml1)
15
+ end
16
+ end
17
+
18
+ def test_should_allow_upper_case_flavor
19
+ assert_nothing_raised do
20
+ HTMLEntities.new('XHTML1')
21
+ end
22
+ end
23
+
24
+ end
@@ -0,0 +1,109 @@
1
+ # encoding: UTF-8
2
+ require_relative "./test_helper"
3
+
4
+ class HTMLEntities::ExpandedTest < Test::Unit::TestCase
5
+
6
+ attr_reader :html_entities
7
+
8
+ def setup
9
+ @html_entities = HTMLEntities.new(:expanded)
10
+ end
11
+
12
+ TEST_ENTITIES_SET = [
13
+ ['sub', 0x2282, "xhtml", nil, "⊂", ],
14
+ ['sup', 0x2283, "xhtml", nil, "⊃", ],
15
+ ['nsub', 0x2284, "xhtml", nil, "⊄", ],
16
+ ['subE', 0x2286, nil, "skip", "⊆", ],
17
+ ['sube', 0x2286, "xhtml", nil, "⊆", ],
18
+ ['supE', 0x2287, nil, "skip", "⊇", ],
19
+ ['supe', 0x2287, "xhtml", nil, "⊇", ],
20
+ ['bottom', 0x22a5, nil, "skip", "⊥", ],
21
+ ['perp', 0x22a5, "xhtml", nil, "⊥", ],
22
+ ['models', 0x22a7, nil, nil, "⊧", ],
23
+ ['vDash', 0x22a8, nil, nil, "⊨", ],
24
+ ['Vdash', 0x22a9, nil, nil, "⊩", ],
25
+ ['Vvdash', 0x22aa, nil, nil, "⊪", ],
26
+ ['nvdash', 0x22ac, nil, nil, "⊬", ],
27
+ ['nvDash', 0x22ad, nil, nil, "⊭", ],
28
+ ['nVdash', 0x22ae, nil, nil, "⊮", ],
29
+ ['nsubE', 0x2288, nil, nil, "⊈", ],
30
+ ['nsube', 0x2288, nil, "skip", "⊈", ],
31
+ ['nsupE', 0x2289, nil, nil, "⊉", ],
32
+ ['nsupe', 0x2289, nil, "skip", "⊉", ],
33
+ ['subnE', 0x228a, nil, nil, "⊊", ],
34
+ ['subne', 0x228a, nil, "skip", "⊊", ],
35
+ ['vsubnE', 0x228a, nil, "skip", "⊊", ],
36
+ ['vsubne', 0x228a, nil, "skip", "⊊", ],
37
+ ['nsc', 0x2281, nil, nil, "⊁", ],
38
+ ['nsup', 0x2285, nil, nil, "⊅", ],
39
+ ['b.alpha', 0x03b1, nil, "skip", "α", ],
40
+ ['b.beta', 0x03b2, nil, "skip", "β", ],
41
+ ['b.chi', 0x03c7, nil, "skip", "χ", ],
42
+ ['b.Delta', 0x0394, nil, "skip", "Δ", ],
43
+ ]
44
+
45
+ def test_should_encode_apos_entity
46
+ assert_equal "&apos;", html_entities.encode("'", :named) # note: the normal ' 0x0027, not ʼ 0x02BC
47
+ end
48
+
49
+ def test_should_decode_apos_entity
50
+ assert_equal "é'", html_entities.decode("&eacute;&apos;")
51
+ end
52
+
53
+ def test_should_decode_dotted_entity
54
+ assert_equal "Θ", html_entities.decode("&b.Theta;")
55
+ end
56
+
57
+ def test_should_encode_from_test_set
58
+ TEST_ENTITIES_SET.each do |ent, _, _, skip, decoded|
59
+ next if skip
60
+ assert_equal "&#{ent};", html_entities.encode(decoded, :named)
61
+ end
62
+ end
63
+
64
+ def test_should_decode_from_test_set
65
+ TEST_ENTITIES_SET.each do |ent, _, _, _, decoded|
66
+ assert_equal decoded, html_entities.decode("&#{ent};")
67
+ end
68
+ end
69
+
70
+ def test_should_round_trip_preferred_entities
71
+ TEST_ENTITIES_SET.each do |ent, _, _, skip, decoded|
72
+ next if skip
73
+ assert_equal "&#{ent};", html_entities.encode(html_entities.decode("&#{ent};"), :named)
74
+ assert_equal decoded, html_entities.decode(html_entities.encode(decoded, :named))
75
+ end
76
+ end
77
+
78
+ def test_should_not_round_trip_decoding_skipped_entities
79
+ TEST_ENTITIES_SET.each do |ent, _, _, skip, decoded|
80
+ next unless skip
81
+ assert_not_equal "&#{ent};", html_entities.encode(html_entities.decode("&#{ent};"), :named)
82
+ end
83
+ end
84
+
85
+ def test_should_round_trip_encoding_skipped_entities
86
+ TEST_ENTITIES_SET.each do |ent, _, _, skip, decoded|
87
+ next unless skip
88
+ assert_equal decoded, html_entities.decode(html_entities.encode(decoded, :named))
89
+ end
90
+ end
91
+
92
+ def test_should_treat_all_xhtml1_named_entities_as_xhtml_does
93
+ xhtml_encoder = HTMLEntities.new(:xhtml1)
94
+ HTMLEntities::MAPPINGS['xhtml1'].each do |ent, decoded|
95
+ assert_equal xhtml_encoder.decode("&#{ent};"), html_entities.decode("&#{ent};")
96
+ assert_equal xhtml_encoder.encode(decoded, :named), html_entities.encode(decoded, :named)
97
+ end
98
+ end
99
+
100
+ def test_should_not_agree_with_xhtml1_when_not_in_xhtml
101
+ xhtml_encoder = HTMLEntities.new(:xhtml1)
102
+ TEST_ENTITIES_SET.each do |ent, _, xhtml1, skip, decoded|
103
+ next if xhtml1 || skip
104
+ assert_not_equal xhtml_encoder.decode("&#{ent};"), html_entities.decode("&#{ent};")
105
+ assert_not_equal xhtml_encoder.encode(decoded, :named), html_entities.encode(decoded, :named)
106
+ end
107
+ end
108
+
109
+ end