htmlentities 4.2.4 → 4.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,16 @@
1
+ == 4.3.0 (2011-03-29)
2
+ * Use Ruby 1.9's encoding support where available.
3
+ * Deprecated HTMLEntities.encode_entities/decode_entities interface is now
4
+ removed.
5
+
6
+ == 4.2.4 (2011-01-30)
7
+ * Fix issue where double-escaped entities were not correctly escaped. Bug
8
+ reported by Christian Kruse.
9
+
10
+ == 4.2.3 (2011-01-07)
11
+ * Additional entities from Junya Ishihara.
12
+ * Performance improvements.
13
+
1
14
  == 4.2.1 (2010-04-05)
2
15
  * Fixed error on Ruby 1.8.x when $KCODE was not set to "UTF8". Thanks to
3
16
  Benoit Larroque for the bug report.
@@ -1,5 +1,4 @@
1
1
  # encoding: UTF-8
2
- require 'htmlentities/legacy'
3
2
  require 'htmlentities/flavors'
4
3
  require 'htmlentities/encoder'
5
4
  require 'htmlentities/decoder'
@@ -61,9 +60,9 @@ class HTMLEntities
61
60
  # If no instructions are specified, :basic will be used.
62
61
  #
63
62
  # Examples:
64
- # encode_entities(str) - XML-safe
65
- # encode_entities(str, :basic, :decimal) - XML-safe and 7-bit clean
66
- # encode_entities(str, :basic, :named, :decimal) - 7-bit clean, with all
63
+ # encode(str) - XML-safe
64
+ # encode(str, :basic, :decimal) - XML-safe and 7-bit clean
65
+ # encode(str, :basic, :named, :decimal) - 7-bit clean, with all
67
66
  # non-ASCII characters replaced with their named entity where possible, and
68
67
  # decimal equivalents otherwise.
69
68
  #
@@ -7,7 +7,7 @@ class HTMLEntities
7
7
  end
8
8
 
9
9
  def decode(source)
10
- source.to_s.gsub(@entity_regexp) {
10
+ prepare(source).gsub(@entity_regexp) {
11
11
  if $1 && codepoint = @map[$1]
12
12
  [codepoint].pack('U')
13
13
  elsif $2
@@ -21,6 +21,16 @@ class HTMLEntities
21
21
  end
22
22
 
23
23
  private
24
+ if "1.9".respond_to?(:encoding)
25
+ def prepare(string) #:nodoc:
26
+ string.to_s.encode(Encoding::UTF_8)
27
+ end
28
+ else
29
+ def prepare(string) #:nodoc:
30
+ string.to_s
31
+ end
32
+ end
33
+
24
34
  def entity_regexp
25
35
  key_lengths = @map.keys.map{ |k| k.length }
26
36
  entity_name_pattern =
@@ -13,13 +13,23 @@ class HTMLEntities
13
13
  end
14
14
 
15
15
  def encode(source)
16
- string = source.to_s.dup
17
- string.gsub!(basic_entity_regexp){ encode_basic($&) }
18
- string.gsub!(extended_entity_regexp){ encode_extended($&) }
19
- string
16
+ prepare(source).
17
+ gsub(basic_entity_regexp){ encode_basic($&) }.
18
+ gsub(extended_entity_regexp){ encode_extended($&) }
20
19
  end
21
20
 
22
21
  private
22
+
23
+ if "1.9".respond_to?(:encoding)
24
+ def prepare(string) #:nodoc:
25
+ string.to_s.encode(Encoding::UTF_8)
26
+ end
27
+ else
28
+ def prepare(string) #:nodoc:
29
+ string.to_s
30
+ end
31
+ end
32
+
23
33
  def basic_entity_regexp
24
34
  @basic_entity_regexp ||= (
25
35
  case @flavor
@@ -33,15 +43,15 @@ class HTMLEntities
33
43
 
34
44
  def extended_entity_regexp
35
45
  @extended_entity_regexp ||= (
36
- regexp_options = [nil]
46
+ options = [nil]
37
47
  if encoding_aware?
38
- regexp = '[^\u{20}-\u{7E}]'
48
+ pattern = '[^\u{20}-\u{7E}]'
39
49
  else
40
- regexp = '[^\x20-\x7E]'
41
- regexp_options << "U"
50
+ pattern = '[^\x20-\x7E]'
51
+ options << "U"
42
52
  end
43
- regexp += "|'" if @flavor == 'html4'
44
- Regexp.new(regexp, *regexp_options)
53
+ pattern << "|'" if @flavor == 'html4'
54
+ Regexp.new(pattern, *options)
45
55
  )
46
56
  end
47
57
 
@@ -1,8 +1,8 @@
1
1
  class HTMLEntities
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 4
4
- MINOR = 2
5
- TINY = 4
4
+ MINOR = 3
5
+ TINY = 0
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
@@ -0,0 +1,6 @@
1
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
2
+ require 'test/unit'
3
+ require 'htmlentities'
4
+
5
+ ENCODING_AWARE_RUBY = "1.9".respond_to?(:encoding)
6
+ $KCODE = 'u' unless ENCODING_AWARE_RUBY
@@ -0,0 +1,101 @@
1
+ # encoding: UTF-8
2
+ require File.expand_path("../common", __FILE__)
3
+
4
+ class HTMLEntities::DecodingTest < Test::Unit::TestCase
5
+
6
+ def setup
7
+ @entities = [:xhtml1, :html4, :expanded].map{ |a| HTMLEntities.new(a) }
8
+ end
9
+
10
+ def assert_decode(expected, input)
11
+ @entities.each do |coder|
12
+ assert_equal expected, coder.decode(input)
13
+ end
14
+ end
15
+
16
+ def test_should_decode_basic_entities
17
+ assert_decode '&', '&amp;'
18
+ assert_decode '<', '&lt;'
19
+ assert_decode '"', '&quot;'
20
+ end
21
+
22
+ def test_should_decode_extended_named_entities
23
+ assert_decode '±', '&plusmn;'
24
+ assert_decode 'ð', '&eth;'
25
+ assert_decode 'Œ', '&OElig;'
26
+ assert_decode 'œ', '&oelig;'
27
+ end
28
+
29
+ def test_should_decode_decimal_entities
30
+ assert_decode '“', '&#8220;'
31
+ assert_decode '…', '&#8230;'
32
+ assert_decode ' ', '&#32;'
33
+ end
34
+
35
+ def test_should_decode_hexadecimal_entities
36
+ assert_decode '−', '&#x2212;'
37
+ assert_decode '—', '&#x2014;'
38
+ assert_decode '`', '&#x0060;'
39
+ assert_decode '`', '&#x60;'
40
+ end
41
+
42
+ def test_should_not_mutate_string_being_decoded
43
+ original = "&lt;&#163;"
44
+ input = original.dup
45
+ HTMLEntities.new.decode(input)
46
+
47
+ assert_equal original, input
48
+ end
49
+
50
+ def test_should_decode_text_with_mix_of_entities
51
+ # Just a random headline - I needed something with accented letters.
52
+ assert_decode(
53
+ 'Le tabac pourrait bientôt être banni dans tous les lieux publics en France',
54
+ 'Le tabac pourrait bient&ocirc;t &#234;tre banni dans tous les lieux publics en France'
55
+ )
56
+ assert_decode(
57
+ '"bientôt" & 文字',
58
+ '&quot;bient&ocirc;t&quot; &amp; &#25991;&#x5b57;'
59
+ )
60
+ end
61
+
62
+ def test_should_decode_empty_string
63
+ assert_decode '', ''
64
+ end
65
+
66
+ def test_should_skip_unknown_entity
67
+ assert_decode '&bogus;', '&bogus;'
68
+ end
69
+
70
+ def test_should_decode_double_encoded_entity_once
71
+ assert_decode '&amp;', '&amp;amp;'
72
+ end
73
+
74
+ # Faults found and patched by Moonwolf
75
+ def test_should_decode_full_hexadecimal_range
76
+ (0..127).each do |codepoint|
77
+ assert_decode [codepoint].pack('U'), "&\#x#{codepoint.to_s(16)};"
78
+ end
79
+ end
80
+
81
+ # Reported by Dallas DeVries and Johan Duflost
82
+ def test_should_decode_named_entities_reported_as_missing_in_3_0_1
83
+ assert_decode [178].pack('U'), '&sup2;'
84
+ assert_decode [8226].pack('U'), '&bull;'
85
+ assert_decode [948].pack('U'), '&delta;'
86
+ end
87
+
88
+ # Reported by ckruse
89
+ def test_should_decode_only_first_element_in_masked_entities
90
+ input = '&amp;#3346;'
91
+ expected = '&#3346;'
92
+ assert_decode expected, input
93
+ end
94
+
95
+ def test_should_ducktype_parameter_to_string_before_encoding
96
+ obj = Object.new
97
+ def obj.to_s; "foo"; end
98
+ assert_decode "foo", obj
99
+ end
100
+
101
+ end
@@ -0,0 +1,106 @@
1
+ # encoding: UTF-8
2
+ require File.expand_path("../common", __FILE__)
3
+
4
+ class HTMLEntities::EncodingTest < Test::Unit::TestCase
5
+
6
+ def setup
7
+ @entities = [:xhtml1, :html4, :expanded].map{ |a| HTMLEntities.new(a) }
8
+ end
9
+
10
+ def assert_encode(expected, input, *args)
11
+ @entities.each do |coder|
12
+ assert_equal expected, coder.encode(input, *args)
13
+ end
14
+ end
15
+
16
+ def test_should_encode_basic_entities
17
+ assert_encode '&amp;', '&', :basic
18
+ assert_encode '&quot;', '"'
19
+ assert_encode '&lt;', '<', :basic
20
+ assert_encode '&lt;', '<'
21
+ end
22
+
23
+ def test_should_encode_basic_entities_to_decimal
24
+ assert_encode '&#38;', '&', :decimal
25
+ assert_encode '&#34;', '"', :decimal
26
+ assert_encode '&#60;', '<', :decimal
27
+ assert_encode '&#62;', '>', :decimal
28
+ assert_encode '&#39;', "'", :decimal
29
+ end
30
+
31
+ def test_should_encode_basic_entities_to_hexadecimal
32
+ assert_encode '&#x26;', '&', :hexadecimal
33
+ assert_encode '&#x22;', '"', :hexadecimal
34
+ assert_encode '&#x3c;', '<', :hexadecimal
35
+ assert_encode '&#x3e;', '>', :hexadecimal
36
+ assert_encode '&#x27;', "'", :hexadecimal
37
+ end
38
+
39
+ def test_should_encode_extended_named_entities
40
+ assert_encode '&plusmn;', '±', :named
41
+ assert_encode '&eth;', 'ð', :named
42
+ assert_encode '&OElig;', 'Œ', :named
43
+ assert_encode '&oelig;', 'œ', :named
44
+ end
45
+
46
+ def test_should_encode_decimal_entities
47
+ assert_encode '&#8220;', '“', :decimal
48
+ assert_encode '&#8230;', '…', :decimal
49
+ end
50
+
51
+ def test_should_encode_hexadecimal_entities
52
+ assert_encode '&#x2212;', '−', :hexadecimal
53
+ assert_encode '&#x2014;', '—', :hexadecimal
54
+ end
55
+
56
+ def test_should_encode_text_using_mix_of_entities
57
+ assert_encode(
58
+ '&quot;bient&ocirc;t&quot; &amp; &#x6587;&#x5b57;',
59
+ '"bientôt" & 文字', :basic, :named, :hexadecimal
60
+ )
61
+ assert_encode(
62
+ '&quot;bient&ocirc;t&quot; &amp; &#25991;&#23383;',
63
+ '"bientôt" & 文字', :basic, :named, :decimal
64
+ )
65
+ end
66
+
67
+ def test_should_sort_commands_when_encoding_using_mix_of_entities
68
+ assert_encode(
69
+ '&quot;bient&ocirc;t&quot; &amp; &#x6587;&#x5b57;',
70
+ '"bientôt" & 文字', :named, :hexadecimal, :basic
71
+ )
72
+ assert_encode(
73
+ '&quot;bient&ocirc;t&quot; &amp; &#25991;&#23383;',
74
+ '"bientôt" & 文字', :decimal, :named, :basic
75
+ )
76
+ end
77
+
78
+ def test_should_detect_illegal_encoding_command
79
+ assert_raise HTMLEntities::InstructionError do
80
+ HTMLEntities.new.encode('foo', :bar, :baz)
81
+ end
82
+ end
83
+
84
+ def test_should_not_encode_normal_ASCII
85
+ assert_encode '`', '`'
86
+ assert_encode ' ', ' '
87
+ end
88
+
89
+ def test_should_double_encode_existing_entity
90
+ assert_encode '&amp;amp;', '&amp;'
91
+ end
92
+
93
+ def test_should_not_mutate_string_being_encoded
94
+ original = "<£"
95
+ input = original.dup
96
+ HTMLEntities.new.encode(input, :basic, :decimal)
97
+
98
+ assert_equal original, input
99
+ end
100
+
101
+ def test_should_ducktype_parameter_to_string_before_encoding
102
+ obj = Object.new
103
+ def obj.to_s; "foo"; end
104
+ assert_encode "foo", obj
105
+ end
106
+ end
@@ -1,30 +1,10 @@
1
1
  # encoding: UTF-8
2
- $:.unshift(File.dirname(__FILE__) + '/../lib')
3
- require 'test/unit'
4
- require 'htmlentities'
5
-
6
- $KCODE = 'u' unless "1.9".respond_to?(:encoding)
2
+ require File.expand_path("../common", __FILE__)
7
3
 
8
4
  class HTMLEntities::EntitiesTest < Test::Unit::TestCase
9
5
 
10
- attr_reader :xhtml1_entities, :html4_entities
11
-
12
- def setup
13
- @xhtml1_entities = HTMLEntities.new('xhtml1')
14
- @html4_entities = HTMLEntities.new('html4')
15
- end
16
-
17
- class PseudoString
18
- def initialize(string)
19
- @string = string
20
- end
21
- def to_s
22
- @string
23
- end
24
- end
25
-
26
6
  def test_should_raise_exception_when_unknown_flavor_specified
27
- assert_raises(HTMLEntities::UnknownFlavor) do
7
+ assert_raises HTMLEntities::UnknownFlavor do
28
8
  HTMLEntities.new('foo')
29
9
  end
30
10
  end
@@ -41,186 +21,4 @@ class HTMLEntities::EntitiesTest < Test::Unit::TestCase
41
21
  end
42
22
  end
43
23
 
44
- def test_should_decode_basic_entities
45
- assert_decode('&', '&amp;')
46
- assert_decode('<', '&lt;')
47
- assert_decode('"', '&quot;')
48
- end
49
-
50
- def test_should_encode_basic_entities
51
- assert_encode('&amp;', '&', :basic)
52
- assert_encode('&quot;', '"')
53
- assert_encode('&lt;', '<', :basic)
54
- assert_encode('&lt;', '<')
55
- end
56
-
57
- def test_should_encode_basic_entities_to_decimal
58
- assert_encode('&#38;', '&', :decimal)
59
- assert_encode('&#34;', '"', :decimal)
60
- assert_encode('&#60;', '<', :decimal)
61
- assert_encode('&#62;', '>', :decimal)
62
- assert_encode('&#39;', "'", :decimal)
63
- end
64
-
65
- def test_should_encode_basic_entities_to_hexadecimal
66
- assert_encode('&#x26;', '&', :hexadecimal)
67
- assert_encode('&#x22;', '"', :hexadecimal)
68
- assert_encode('&#x3c;', '<', :hexadecimal)
69
- assert_encode('&#x3e;', '>', :hexadecimal)
70
- assert_encode('&#x27;', "'", :hexadecimal)
71
- end
72
-
73
- def test_should_decode_extended_named_entities
74
- assert_decode('±', '&plusmn;')
75
- assert_decode('ð', '&eth;')
76
- assert_decode('Œ', '&OElig;')
77
- assert_decode('œ', '&oelig;')
78
- end
79
-
80
- def test_should_encode_extended_named_entities
81
- assert_encode('&plusmn;', '±', :named)
82
- assert_encode('&eth;', 'ð', :named)
83
- assert_encode('&OElig;', 'Œ', :named)
84
- assert_encode('&oelig;', 'œ', :named)
85
- end
86
-
87
- def test_should_decode_decimal_entities
88
- assert_decode('“', '&#8220;')
89
- assert_decode('…', '&#8230;')
90
- assert_decode(' ', '&#32;')
91
- end
92
-
93
- def test_should_encode_decimal_entities
94
- assert_encode('&#8220;', '“', :decimal)
95
- assert_encode('&#8230;', '…', :decimal)
96
- end
97
-
98
- def test_should_decode_hexadecimal_entities
99
- assert_decode('−', '&#x2212;')
100
- assert_decode('—', '&#x2014;')
101
- assert_decode('`', '&#x0060;')
102
- assert_decode('`', '&#x60;')
103
- end
104
-
105
- def test_should_encode_hexadecimal_entities
106
- assert_encode('&#x2212;', '−', :hexadecimal)
107
- assert_encode('&#x2014;', '—', :hexadecimal)
108
- end
109
-
110
- def test_should_decode_text_with_mix_of_entities
111
- # Just a random headline - I needed something with accented letters.
112
- assert_decode(
113
- 'Le tabac pourrait bientôt être banni dans tous les lieux publics en France',
114
- 'Le tabac pourrait bient&ocirc;t &#234;tre banni dans tous les lieux publics en France'
115
- )
116
- assert_decode(
117
- '"bientôt" & 文字',
118
- '&quot;bient&ocirc;t&quot; &amp; &#25991;&#x5b57;'
119
- )
120
- end
121
-
122
- def test_should_encode_text_using_mix_of_entities
123
- assert_encode(
124
- '&quot;bient&ocirc;t&quot; &amp; &#x6587;&#x5b57;',
125
- '"bientôt" & 文字', :basic, :named, :hexadecimal
126
- )
127
- assert_encode(
128
- '&quot;bient&ocirc;t&quot; &amp; &#25991;&#23383;',
129
- '"bientôt" & 文字', :basic, :named, :decimal
130
- )
131
- end
132
-
133
- def test_should_sort_commands_when_encoding_using_mix_of_entities
134
- assert_encode(
135
- '&quot;bient&ocirc;t&quot; &amp; &#x6587;&#x5b57;',
136
- '"bientôt" & 文字', :named, :hexadecimal, :basic
137
- )
138
- assert_encode(
139
- '&quot;bient&ocirc;t&quot; &amp; &#25991;&#23383;',
140
- '"bientôt" & 文字', :decimal, :named, :basic
141
- )
142
- end
143
-
144
- def test_should_detect_illegal_encoding_command
145
- assert_raise(HTMLEntities::InstructionError) {
146
- HTMLEntities.encode_entities('foo', :bar, :baz)
147
- }
148
- end
149
-
150
- def test_should_decode_empty_string
151
- assert_decode('', '')
152
- end
153
-
154
- def test_should_skip_unknown_entity
155
- assert_decode('&bogus;', '&bogus;')
156
- end
157
-
158
- def test_should_decode_double_encoded_entity_once
159
- assert_decode('&amp;', '&amp;amp;')
160
- end
161
-
162
- def test_should_not_encode_normal_ASCII
163
- assert_encode('`', '`')
164
- assert_encode(' ', ' ')
165
- end
166
-
167
- def test_should_double_encode_existing_entity
168
- assert_encode('&amp;amp;', '&amp;')
169
- end
170
-
171
- # Faults found and patched by Moonwolf
172
- def test_should_decode_full_hexadecimal_range
173
- (0..127).each do |codepoint|
174
- assert_decode([codepoint].pack('U'), "&\#x#{codepoint.to_s(16)};")
175
- end
176
- end
177
-
178
- # Reported by Dallas DeVries and Johan Duflost
179
- def test_should_decode_named_entities_reported_as_missing_in_3_0_1
180
- assert_decode([178].pack('U'), '&sup2;')
181
- assert_decode([8226].pack('U'), '&bull;')
182
- assert_decode([948].pack('U'), '&delta;')
183
- end
184
-
185
- if RUBY_VERSION =~ /^1\.8\./
186
- # Reported by Benoit Larroque
187
- def test_should_encode_without_error_when_KCODE_is_not_UTF_8
188
- kcode = $KCODE
189
- $KCODE = "n"
190
- coder = HTMLEntities.new;
191
- text = [8212].pack('U')
192
- assert_equal "&#8212;", coder.encode(text, :decimal)
193
- $KCODE = kcode
194
- end
195
- end
196
-
197
- # Reported by ckruse
198
- def test_should_decode_only_first_element_in_masked_entities
199
- input = '&amp;#3346;'
200
- expected = '&#3346;'
201
- assert_decode expected, input
202
- end
203
-
204
- def test_should_ducktype_parameter_to_string_before_encoding
205
- pseudo_string = PseudoString.new('foo')
206
- assert_decode('foo', pseudo_string)
207
- end
208
-
209
- def test_should_ducktype_parameter_to_string_before_decoding
210
- pseudo_string = PseudoString.new('foo')
211
- assert_encode('foo', pseudo_string)
212
- end
213
-
214
- def assert_decode(expected, input)
215
- [xhtml1_entities, html4_entities].each do |coder|
216
- assert_equal(expected, coder.decode(input))
217
- end
218
- end
219
-
220
- def assert_encode(expected, input, *args)
221
- [xhtml1_entities, html4_entities].each do |coder|
222
- assert_equal(expected, coder.encode(input, *args))
223
- end
224
- end
225
-
226
24
  end
@@ -1,9 +1,5 @@
1
1
  # encoding: UTF-8
2
- $:.unshift(File.dirname(__FILE__) + '/../lib')
3
- require 'test/unit'
4
- require 'htmlentities'
5
-
6
- $KCODE = 'u' unless "1.9".respond_to?(:encoding)
2
+ require File.expand_path("../common", __FILE__)
7
3
 
8
4
  class HTMLEntities::ExpandedTest < Test::Unit::TestCase
9
5
 
@@ -109,4 +105,5 @@ class HTMLEntities::ExpandedTest < Test::Unit::TestCase
109
105
  assert_not_equal xhtml_encoder.encode(decoded, :named), html_entities.encode(decoded, :named)
110
106
  end
111
107
  end
108
+
112
109
  end
@@ -1,9 +1,5 @@
1
1
  # encoding: UTF-8
2
- $:.unshift(File.dirname(__FILE__) + '/../lib')
3
- require 'test/unit'
4
- require 'htmlentities'
5
-
6
- $KCODE = 'u' unless "1.9".respond_to?(:encoding)
2
+ require File.expand_path("../common", __FILE__)
7
3
 
8
4
  class HTML4Test < Test::Unit::TestCase
9
5
 
@@ -1,7 +1,5 @@
1
1
  # encoding: UTF-8
2
- $:.unshift(File.dirname(__FILE__) + '/../lib')
3
- require 'test/unit'
4
- require 'htmlentities'
2
+ require File.expand_path("../common", __FILE__)
5
3
 
6
4
  class HTMLEntities::RoundtripTest < Test::Unit::TestCase
7
5
 
@@ -13,74 +11,50 @@ class HTMLEntities::RoundtripTest < Test::Unit::TestCase
13
11
  end
14
12
 
15
13
  def test_should_roundtrip_xhtml1_entities_via_named_encoding
16
- each_mapping('xhtml1') do |name, string|
17
- assert_equal(
18
- string,
19
- xhtml1_entities.decode(xhtml1_entities.encode(string, :named))
20
- )
14
+ each_mapping 'xhtml1' do |name, string|
15
+ assert_equal string, xhtml1_entities.decode(xhtml1_entities.encode(string, :named))
21
16
  end
22
17
  end
23
18
 
24
19
  def test_should_roundtrip_xhtml1_entities_via_basic_and_named_encoding
25
- each_mapping('xhtml1') do |name, string|
26
- assert_equal(
27
- string,
28
- xhtml1_entities.decode(xhtml1_entities.encode(string, :basic, :named))
29
- )
20
+ each_mapping 'xhtml1' do |name, string|
21
+ assert_equal string, xhtml1_entities.decode(xhtml1_entities.encode(string, :basic, :named))
30
22
  end
31
23
  end
32
24
 
33
25
  def test_should_roundtrip_xhtml1_entities_via_basic_named_and_decimal_encoding
34
- each_mapping('xhtml1') do |name, string|
35
- assert_equal(
36
- string,
37
- xhtml1_entities.decode(xhtml1_entities.encode(string, :basic, :named, :decimal))
38
- )
26
+ each_mapping 'xhtml1' do |name, string|
27
+ assert_equal string, xhtml1_entities.decode(xhtml1_entities.encode(string, :basic, :named, :decimal))
39
28
  end
40
29
  end
41
30
 
42
31
  def test_should_roundtrip_xhtml1_entities_via_hexadecimal_encoding
43
- each_mapping('xhtml1') do |name, string|
44
- assert_equal(
45
- string,
46
- xhtml1_entities.decode(xhtml1_entities.encode(string, :hexadecimal))
47
- )
32
+ each_mapping 'xhtml1' do |name, string|
33
+ assert_equal string, xhtml1_entities.decode(xhtml1_entities.encode(string, :hexadecimal))
48
34
  end
49
35
  end
50
36
 
51
37
  def test_should_roundtrip_html4_entities_via_named_encoding
52
- each_mapping('html4') do |name, string|
53
- assert_equal(
54
- string,
55
- html4_entities.decode(html4_entities.encode(string, :named))
56
- )
38
+ each_mapping 'html4' do |name, string|
39
+ assert_equal string, html4_entities.decode(html4_entities.encode(string, :named))
57
40
  end
58
41
  end
59
42
 
60
43
  def test_should_roundtrip_html4_entities_via_basic_and_named_encoding
61
- each_mapping('html4') do |name, string|
62
- assert_equal(
63
- string,
64
- html4_entities.decode(html4_entities.encode(string, :basic, :named))
65
- )
44
+ each_mapping 'html4' do |name, string|
45
+ assert_equal string, html4_entities.decode(html4_entities.encode(string, :basic, :named))
66
46
  end
67
47
  end
68
48
 
69
49
  def test_should_roundtrip_html4_entities_via_basic_named_and_decimal_encoding
70
- each_mapping('html4') do |name, string|
71
- assert_equal(
72
- string,
73
- html4_entities.decode(html4_entities.encode(string, :basic, :named, :decimal))
74
- )
50
+ each_mapping 'html4' do |name, string|
51
+ assert_equal string, html4_entities.decode(html4_entities.encode(string, :basic, :named, :decimal))
75
52
  end
76
53
  end
77
54
 
78
55
  def test_should_roundtrip_html4_entities_via_hexadecimal_encoding
79
- each_mapping('html4') do |name, string|
80
- assert_equal(
81
- string,
82
- html4_entities.decode(html4_entities.encode(string, :hexadecimal))
83
- )
56
+ each_mapping 'html4' do |name, string|
57
+ assert_equal string, html4_entities.decode(html4_entities.encode(string, :hexadecimal))
84
58
  end
85
59
  end
86
60
 
@@ -0,0 +1,18 @@
1
+ # encoding: UTF-8
2
+ require File.expand_path("../common", __FILE__)
3
+
4
+ unless ENCODING_AWARE_RUBY
5
+ class HTMLEntities::Ruby18Test < Test::Unit::TestCase
6
+
7
+ # Reported by Benoit Larroque
8
+ def test_should_encode_without_error_when_KCODE_is_not_UTF_8
9
+ kcode = $KCODE
10
+ $KCODE = "n"
11
+ coder = HTMLEntities.new
12
+ text = [8212].pack('U')
13
+ assert_equal "&#8212;", coder.encode(text, :decimal)
14
+ $KCODE = kcode
15
+ end
16
+
17
+ end
18
+ end
@@ -0,0 +1,70 @@
1
+ # encoding: UTF-8
2
+ require File.expand_path("../common", __FILE__)
3
+
4
+ if ENCODING_AWARE_RUBY
5
+ class HTMLEntities::Ruby19Test < Test::Unit::TestCase
6
+
7
+ def test_should_encode_ascii_to_ascii
8
+ s = "<elan>".encode(Encoding::US_ASCII)
9
+ assert_equal Encoding::US_ASCII, s.encoding
10
+
11
+ t = HTMLEntities.new.encode(s)
12
+ assert_equal "&lt;elan&gt;", t
13
+ assert_equal Encoding::US_ASCII, t.encoding
14
+ end
15
+
16
+ def test_should_encode_utf8_to_utf8_if_needed
17
+ s = "<élan>"
18
+ assert_equal Encoding::UTF_8, s.encoding
19
+
20
+ t = HTMLEntities.new.encode(s)
21
+ assert_equal "&lt;élan&gt;", t
22
+ assert_equal Encoding::UTF_8, t.encoding
23
+ end
24
+
25
+ def test_should_encode_utf8_to_ascii_if_possible
26
+ s = "<elan>"
27
+ assert_equal Encoding::UTF_8, s.encoding
28
+
29
+ t = HTMLEntities.new.encode(s)
30
+ assert_equal "&lt;elan&gt;", t
31
+ assert_equal Encoding::US_ASCII, t.encoding
32
+ end
33
+
34
+ def test_should_encode_other_encoding_to_utf8
35
+ s = "<élan>".encode(Encoding::ISO_8859_1)
36
+ assert_equal Encoding::ISO_8859_1, s.encoding
37
+
38
+ t = HTMLEntities.new.encode(s)
39
+ assert_equal "&lt;élan&gt;", t
40
+ assert_equal Encoding::UTF_8, t.encoding
41
+ end
42
+
43
+ def test_should_decode_ascii_to_utf8
44
+ s = "&lt;&eacute;lan&gt;".encode(Encoding::US_ASCII)
45
+ assert_equal Encoding::US_ASCII, s.encoding
46
+
47
+ t = HTMLEntities.new.decode(s)
48
+ assert_equal "<élan>", t
49
+ assert_equal Encoding::UTF_8, t.encoding
50
+ end
51
+
52
+ def test_should_decode_utf8_to_utf8
53
+ s = "&lt;&eacute;lan&gt;".encode(Encoding::UTF_8)
54
+ assert_equal Encoding::UTF_8, s.encoding
55
+
56
+ t = HTMLEntities.new.decode(s)
57
+ assert_equal "<élan>", t
58
+ assert_equal Encoding::UTF_8, t.encoding
59
+ end
60
+
61
+ def test_should_decode_other_encoding_to_utf8
62
+ s = "&lt;&eacute;lan&gt;".encode(Encoding::ISO_8859_1)
63
+ assert_equal Encoding::ISO_8859_1, s.encoding
64
+
65
+ t = HTMLEntities.new.decode(s)
66
+ assert_equal "<élan>", t
67
+ assert_equal Encoding::UTF_8, t.encoding
68
+ end
69
+ end
70
+ end
@@ -1,9 +1,5 @@
1
1
  # encoding: UTF-8
2
- $:.unshift(File.dirname(__FILE__) + '/../lib')
3
- require 'test/unit'
4
- require 'htmlentities'
5
-
6
- $KCODE = 'u' unless "1.9".respond_to?(:encoding)
2
+ require File.expand_path("../common", __FILE__)
7
3
 
8
4
  class HTMLEntities::XHTML1Test < Test::Unit::TestCase
9
5
 
@@ -25,5 +21,4 @@ class HTMLEntities::XHTML1Test < Test::Unit::TestCase
25
21
  assert_equal "&b.Theta;", html_entities.decode("&b.Theta;")
26
22
  end
27
23
 
28
-
29
24
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: htmlentities
3
3
  version: !ruby/object:Gem::Version
4
- hash: 63
4
+ hash: 51
5
5
  prerelease: false
6
6
  segments:
7
7
  - 4
8
- - 2
9
- - 4
10
- version: 4.2.4
8
+ - 3
9
+ - 0
10
+ version: 4.3.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Paul Battley
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-01-30 00:00:00 +00:00
18
+ date: 2011-03-29 00:00:00 +01:00
19
19
  default_executable:
20
20
  dependencies: []
21
21
 
@@ -26,30 +26,30 @@ executables: []
26
26
  extensions: []
27
27
 
28
28
  extra_rdoc_files:
29
- - README.rdoc
30
29
  - History.txt
31
30
  - COPYING.txt
32
31
  files:
33
- - lib/htmlentities/version.rb
32
+ - lib/htmlentities.rb
34
33
  - lib/htmlentities/flavors.rb
34
+ - lib/htmlentities/version.rb
35
35
  - lib/htmlentities/encoder.rb
36
- - lib/htmlentities/legacy.rb
37
- - lib/htmlentities/mappings/html4.rb
38
36
  - lib/htmlentities/mappings/expanded.rb
39
37
  - lib/htmlentities/mappings/xhtml1.rb
38
+ - lib/htmlentities/mappings/html4.rb
40
39
  - lib/htmlentities/decoder.rb
41
- - lib/htmlentities.rb
42
- - test/entities_test.rb
40
+ - test/decoding_test.rb
41
+ - test/ruby_1_8_test.rb
43
42
  - test/xhtml1_test.rb
44
- - test/roundtrip_test.rb
45
- - test/legacy_test.rb
46
- - test/expanded_test.rb
47
- - test/test_all.rb
48
43
  - test/html4_test.rb
44
+ - test/encoding_test.rb
45
+ - test/expanded_test.rb
46
+ - test/ruby_1_9_test.rb
47
+ - test/common.rb
48
+ - test/roundtrip_test.rb
49
+ - test/entities_test.rb
50
+ - perf/performance.rb
49
51
  - perf/profile.rb
50
52
  - perf/benchmark.rb
51
- - perf/performance.rb
52
- - README.rdoc
53
53
  - History.txt
54
54
  - COPYING.txt
55
55
  has_rdoc: true
@@ -87,4 +87,12 @@ signing_key:
87
87
  specification_version: 3
88
88
  summary: A module for encoding and decoding (X)HTML entities.
89
89
  test_files:
90
- - test/test_all.rb
90
+ - test/decoding_test.rb
91
+ - test/ruby_1_8_test.rb
92
+ - test/xhtml1_test.rb
93
+ - test/html4_test.rb
94
+ - test/encoding_test.rb
95
+ - test/expanded_test.rb
96
+ - test/ruby_1_9_test.rb
97
+ - test/roundtrip_test.rb
98
+ - test/entities_test.rb
@@ -1,44 +0,0 @@
1
- == HTMLEntities
2
-
3
- HTML entity encoding and decoding for Ruby
4
-
5
- The HTMLEntities module facilitates encoding and decoding of
6
- (X)HTML entities from/to their corresponding UTF-8 codepoints.
7
-
8
- To install (requires root/admin privileges):
9
-
10
- ruby setup.rb
11
-
12
- Alternatively, you can just use the gem.
13
-
14
- == Licence
15
-
16
- This code is free to use under the terms of the MIT licence:
17
-
18
- Copyright (c) 2005-2009 Paul Battley
19
-
20
- Permission is hereby granted, free of charge, to any person obtaining a copy
21
- of this software and associated documentation files (the "Software"), to
22
- deal in the Software without restriction, including without limitation the
23
- rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
24
- sell copies of the Software, and to permit persons to whom the Software is
25
- furnished to do so, subject to the following conditions:
26
-
27
- The above copyright notice and this permission notice shall be included in
28
- all copies or substantial portions of the Software.
29
-
30
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
31
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
32
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
33
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
34
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
35
- FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
36
- IN THE SOFTWARE.
37
-
38
- If you'd like to negotiate a different licence for a specific use, just
39
- contact me -- I'll almost certainly permit it.
40
-
41
- == Contact
42
-
43
- Comments are welcome. Send an email to pbattley@gmail.com.
44
-
@@ -1,31 +0,0 @@
1
- class HTMLEntities
2
- class << self
3
-
4
- #
5
- # Legacy compatibility class method allowing direct encoding of XHTML1 entities.
6
- # See HTMLEntities#encode for description of parameters.
7
- #
8
- # Deprecated.
9
- #
10
- def encode_entities(*args)
11
- xhtml1_entities.encode(*args)
12
- end
13
-
14
- #
15
- # Legacy compatibility class method allowing direct decoding of XHTML1 entities.
16
- # See HTMLEntities#decode for description of parameters.
17
- #
18
- # Deprecated.
19
- #
20
- def decode_entities(*args)
21
- xhtml1_entities.decode(*args)
22
- end
23
-
24
- private
25
-
26
- def xhtml1_entities
27
- @xhtml1_entities ||= new('xhtml1')
28
- end
29
-
30
- end
31
- end
@@ -1,35 +0,0 @@
1
- # encoding: UTF-8
2
- $:.unshift(File.dirname(__FILE__) + '/../lib')
3
- require 'test/unit'
4
- require 'htmlentities'
5
-
6
- $KCODE = 'u' unless "1.9".respond_to?(:encoding)
7
-
8
- #
9
- # Test that version 3.x functionality still works
10
- #
11
- class HTMLEntities::LegacyTest < Test::Unit::TestCase
12
-
13
- def test_should_decode_via_legacy_interface
14
- assert_decode('&', '&amp;')
15
- assert_decode('±', '&plusmn;')
16
- assert_decode('“', '&#8220;')
17
- assert_decode('—', '&#x2014;')
18
- end
19
-
20
- def test_should_encode_via_legacy_interface
21
- assert_encode('&amp;', '&', :basic)
22
- assert_encode('&eth;', 'ð', :named)
23
- assert_encode('&#8230;', '…', :decimal)
24
- assert_encode('&#x2212;', '−', :hexadecimal)
25
- end
26
-
27
- def assert_encode(expected, *encode_args)
28
- assert_equal expected, HTMLEntities.encode_entities(*encode_args)
29
- end
30
-
31
- def assert_decode(expected, *decode_args)
32
- assert_equal expected, HTMLEntities.decode_entities(*decode_args)
33
- end
34
-
35
- end
@@ -1,4 +0,0 @@
1
- # encoding: UTF-8
2
- Dir[File.dirname(__FILE__)+'/*_test.rb'].each do |test|
3
- require "./#{test}"
4
- end