htmlentities 4.2.4 → 4.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,16 @@
1
+ == 4.3.0 (2011-03-29)
2
+ * Use Ruby 1.9's encoding support where available.
3
+ * Deprecated HTMLEntities.encode_entities/decode_entities interface is now
4
+ removed.
5
+
6
+ == 4.2.4 (2011-01-30)
7
+ * Fix issue where double-escaped entities were not correctly escaped. Bug
8
+ reported by Christian Kruse.
9
+
10
+ == 4.2.3 (2011-01-07)
11
+ * Additional entities from Junya Ishihara.
12
+ * Performance improvements.
13
+
1
14
  == 4.2.1 (2010-04-05)
2
15
  * Fixed error on Ruby 1.8.x when $KCODE was not set to "UTF8". Thanks to
3
16
  Benoit Larroque for the bug report.
@@ -1,5 +1,4 @@
1
1
  # encoding: UTF-8
2
- require 'htmlentities/legacy'
3
2
  require 'htmlentities/flavors'
4
3
  require 'htmlentities/encoder'
5
4
  require 'htmlentities/decoder'
@@ -61,9 +60,9 @@ class HTMLEntities
61
60
  # If no instructions are specified, :basic will be used.
62
61
  #
63
62
  # Examples:
64
- # encode_entities(str) - XML-safe
65
- # encode_entities(str, :basic, :decimal) - XML-safe and 7-bit clean
66
- # encode_entities(str, :basic, :named, :decimal) - 7-bit clean, with all
63
+ # encode(str) - XML-safe
64
+ # encode(str, :basic, :decimal) - XML-safe and 7-bit clean
65
+ # encode(str, :basic, :named, :decimal) - 7-bit clean, with all
67
66
  # non-ASCII characters replaced with their named entity where possible, and
68
67
  # decimal equivalents otherwise.
69
68
  #
@@ -7,7 +7,7 @@ class HTMLEntities
7
7
  end
8
8
 
9
9
  def decode(source)
10
- source.to_s.gsub(@entity_regexp) {
10
+ prepare(source).gsub(@entity_regexp) {
11
11
  if $1 && codepoint = @map[$1]
12
12
  [codepoint].pack('U')
13
13
  elsif $2
@@ -21,6 +21,16 @@ class HTMLEntities
21
21
  end
22
22
 
23
23
  private
24
+ if "1.9".respond_to?(:encoding)
25
+ def prepare(string) #:nodoc:
26
+ string.to_s.encode(Encoding::UTF_8)
27
+ end
28
+ else
29
+ def prepare(string) #:nodoc:
30
+ string.to_s
31
+ end
32
+ end
33
+
24
34
  def entity_regexp
25
35
  key_lengths = @map.keys.map{ |k| k.length }
26
36
  entity_name_pattern =
@@ -13,13 +13,23 @@ class HTMLEntities
13
13
  end
14
14
 
15
15
  def encode(source)
16
- string = source.to_s.dup
17
- string.gsub!(basic_entity_regexp){ encode_basic($&) }
18
- string.gsub!(extended_entity_regexp){ encode_extended($&) }
19
- string
16
+ prepare(source).
17
+ gsub(basic_entity_regexp){ encode_basic($&) }.
18
+ gsub(extended_entity_regexp){ encode_extended($&) }
20
19
  end
21
20
 
22
21
  private
22
+
23
+ if "1.9".respond_to?(:encoding)
24
+ def prepare(string) #:nodoc:
25
+ string.to_s.encode(Encoding::UTF_8)
26
+ end
27
+ else
28
+ def prepare(string) #:nodoc:
29
+ string.to_s
30
+ end
31
+ end
32
+
23
33
  def basic_entity_regexp
24
34
  @basic_entity_regexp ||= (
25
35
  case @flavor
@@ -33,15 +43,15 @@ class HTMLEntities
33
43
 
34
44
  def extended_entity_regexp
35
45
  @extended_entity_regexp ||= (
36
- regexp_options = [nil]
46
+ options = [nil]
37
47
  if encoding_aware?
38
- regexp = '[^\u{20}-\u{7E}]'
48
+ pattern = '[^\u{20}-\u{7E}]'
39
49
  else
40
- regexp = '[^\x20-\x7E]'
41
- regexp_options << "U"
50
+ pattern = '[^\x20-\x7E]'
51
+ options << "U"
42
52
  end
43
- regexp += "|'" if @flavor == 'html4'
44
- Regexp.new(regexp, *regexp_options)
53
+ pattern << "|'" if @flavor == 'html4'
54
+ Regexp.new(pattern, *options)
45
55
  )
46
56
  end
47
57
 
@@ -1,8 +1,8 @@
1
1
  class HTMLEntities
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 4
4
- MINOR = 2
5
- TINY = 4
4
+ MINOR = 3
5
+ TINY = 0
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
@@ -0,0 +1,6 @@
1
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
2
+ require 'test/unit'
3
+ require 'htmlentities'
4
+
5
+ ENCODING_AWARE_RUBY = "1.9".respond_to?(:encoding)
6
+ $KCODE = 'u' unless ENCODING_AWARE_RUBY
@@ -0,0 +1,101 @@
1
+ # encoding: UTF-8
2
+ require File.expand_path("../common", __FILE__)
3
+
4
+ class HTMLEntities::DecodingTest < Test::Unit::TestCase
5
+
6
+ def setup
7
+ @entities = [:xhtml1, :html4, :expanded].map{ |a| HTMLEntities.new(a) }
8
+ end
9
+
10
+ def assert_decode(expected, input)
11
+ @entities.each do |coder|
12
+ assert_equal expected, coder.decode(input)
13
+ end
14
+ end
15
+
16
+ def test_should_decode_basic_entities
17
+ assert_decode '&', '&amp;'
18
+ assert_decode '<', '&lt;'
19
+ assert_decode '"', '&quot;'
20
+ end
21
+
22
+ def test_should_decode_extended_named_entities
23
+ assert_decode '±', '&plusmn;'
24
+ assert_decode 'ð', '&eth;'
25
+ assert_decode 'Œ', '&OElig;'
26
+ assert_decode 'œ', '&oelig;'
27
+ end
28
+
29
+ def test_should_decode_decimal_entities
30
+ assert_decode '“', '&#8220;'
31
+ assert_decode '…', '&#8230;'
32
+ assert_decode ' ', '&#32;'
33
+ end
34
+
35
+ def test_should_decode_hexadecimal_entities
36
+ assert_decode '−', '&#x2212;'
37
+ assert_decode '—', '&#x2014;'
38
+ assert_decode '`', '&#x0060;'
39
+ assert_decode '`', '&#x60;'
40
+ end
41
+
42
+ def test_should_not_mutate_string_being_decoded
43
+ original = "&lt;&#163;"
44
+ input = original.dup
45
+ HTMLEntities.new.decode(input)
46
+
47
+ assert_equal original, input
48
+ end
49
+
50
+ def test_should_decode_text_with_mix_of_entities
51
+ # Just a random headline - I needed something with accented letters.
52
+ assert_decode(
53
+ 'Le tabac pourrait bientôt être banni dans tous les lieux publics en France',
54
+ 'Le tabac pourrait bient&ocirc;t &#234;tre banni dans tous les lieux publics en France'
55
+ )
56
+ assert_decode(
57
+ '"bientôt" & 文字',
58
+ '&quot;bient&ocirc;t&quot; &amp; &#25991;&#x5b57;'
59
+ )
60
+ end
61
+
62
+ def test_should_decode_empty_string
63
+ assert_decode '', ''
64
+ end
65
+
66
+ def test_should_skip_unknown_entity
67
+ assert_decode '&bogus;', '&bogus;'
68
+ end
69
+
70
+ def test_should_decode_double_encoded_entity_once
71
+ assert_decode '&amp;', '&amp;amp;'
72
+ end
73
+
74
+ # Faults found and patched by Moonwolf
75
+ def test_should_decode_full_hexadecimal_range
76
+ (0..127).each do |codepoint|
77
+ assert_decode [codepoint].pack('U'), "&\#x#{codepoint.to_s(16)};"
78
+ end
79
+ end
80
+
81
+ # Reported by Dallas DeVries and Johan Duflost
82
+ def test_should_decode_named_entities_reported_as_missing_in_3_0_1
83
+ assert_decode [178].pack('U'), '&sup2;'
84
+ assert_decode [8226].pack('U'), '&bull;'
85
+ assert_decode [948].pack('U'), '&delta;'
86
+ end
87
+
88
+ # Reported by ckruse
89
+ def test_should_decode_only_first_element_in_masked_entities
90
+ input = '&amp;#3346;'
91
+ expected = '&#3346;'
92
+ assert_decode expected, input
93
+ end
94
+
95
+ def test_should_ducktype_parameter_to_string_before_encoding
96
+ obj = Object.new
97
+ def obj.to_s; "foo"; end
98
+ assert_decode "foo", obj
99
+ end
100
+
101
+ end
@@ -0,0 +1,106 @@
1
+ # encoding: UTF-8
2
+ require File.expand_path("../common", __FILE__)
3
+
4
+ class HTMLEntities::EncodingTest < Test::Unit::TestCase
5
+
6
+ def setup
7
+ @entities = [:xhtml1, :html4, :expanded].map{ |a| HTMLEntities.new(a) }
8
+ end
9
+
10
+ def assert_encode(expected, input, *args)
11
+ @entities.each do |coder|
12
+ assert_equal expected, coder.encode(input, *args)
13
+ end
14
+ end
15
+
16
+ def test_should_encode_basic_entities
17
+ assert_encode '&amp;', '&', :basic
18
+ assert_encode '&quot;', '"'
19
+ assert_encode '&lt;', '<', :basic
20
+ assert_encode '&lt;', '<'
21
+ end
22
+
23
+ def test_should_encode_basic_entities_to_decimal
24
+ assert_encode '&#38;', '&', :decimal
25
+ assert_encode '&#34;', '"', :decimal
26
+ assert_encode '&#60;', '<', :decimal
27
+ assert_encode '&#62;', '>', :decimal
28
+ assert_encode '&#39;', "'", :decimal
29
+ end
30
+
31
+ def test_should_encode_basic_entities_to_hexadecimal
32
+ assert_encode '&#x26;', '&', :hexadecimal
33
+ assert_encode '&#x22;', '"', :hexadecimal
34
+ assert_encode '&#x3c;', '<', :hexadecimal
35
+ assert_encode '&#x3e;', '>', :hexadecimal
36
+ assert_encode '&#x27;', "'", :hexadecimal
37
+ end
38
+
39
+ def test_should_encode_extended_named_entities
40
+ assert_encode '&plusmn;', '±', :named
41
+ assert_encode '&eth;', 'ð', :named
42
+ assert_encode '&OElig;', 'Œ', :named
43
+ assert_encode '&oelig;', 'œ', :named
44
+ end
45
+
46
+ def test_should_encode_decimal_entities
47
+ assert_encode '&#8220;', '“', :decimal
48
+ assert_encode '&#8230;', '…', :decimal
49
+ end
50
+
51
+ def test_should_encode_hexadecimal_entities
52
+ assert_encode '&#x2212;', '−', :hexadecimal
53
+ assert_encode '&#x2014;', '—', :hexadecimal
54
+ end
55
+
56
+ def test_should_encode_text_using_mix_of_entities
57
+ assert_encode(
58
+ '&quot;bient&ocirc;t&quot; &amp; &#x6587;&#x5b57;',
59
+ '"bientôt" & 文字', :basic, :named, :hexadecimal
60
+ )
61
+ assert_encode(
62
+ '&quot;bient&ocirc;t&quot; &amp; &#25991;&#23383;',
63
+ '"bientôt" & 文字', :basic, :named, :decimal
64
+ )
65
+ end
66
+
67
+ def test_should_sort_commands_when_encoding_using_mix_of_entities
68
+ assert_encode(
69
+ '&quot;bient&ocirc;t&quot; &amp; &#x6587;&#x5b57;',
70
+ '"bientôt" & 文字', :named, :hexadecimal, :basic
71
+ )
72
+ assert_encode(
73
+ '&quot;bient&ocirc;t&quot; &amp; &#25991;&#23383;',
74
+ '"bientôt" & 文字', :decimal, :named, :basic
75
+ )
76
+ end
77
+
78
+ def test_should_detect_illegal_encoding_command
79
+ assert_raise HTMLEntities::InstructionError do
80
+ HTMLEntities.new.encode('foo', :bar, :baz)
81
+ end
82
+ end
83
+
84
+ def test_should_not_encode_normal_ASCII
85
+ assert_encode '`', '`'
86
+ assert_encode ' ', ' '
87
+ end
88
+
89
+ def test_should_double_encode_existing_entity
90
+ assert_encode '&amp;amp;', '&amp;'
91
+ end
92
+
93
+ def test_should_not_mutate_string_being_encoded
94
+ original = "<£"
95
+ input = original.dup
96
+ HTMLEntities.new.encode(input, :basic, :decimal)
97
+
98
+ assert_equal original, input
99
+ end
100
+
101
+ def test_should_ducktype_parameter_to_string_before_encoding
102
+ obj = Object.new
103
+ def obj.to_s; "foo"; end
104
+ assert_encode "foo", obj
105
+ end
106
+ end
@@ -1,30 +1,10 @@
1
1
  # encoding: UTF-8
2
- $:.unshift(File.dirname(__FILE__) + '/../lib')
3
- require 'test/unit'
4
- require 'htmlentities'
5
-
6
- $KCODE = 'u' unless "1.9".respond_to?(:encoding)
2
+ require File.expand_path("../common", __FILE__)
7
3
 
8
4
  class HTMLEntities::EntitiesTest < Test::Unit::TestCase
9
5
 
10
- attr_reader :xhtml1_entities, :html4_entities
11
-
12
- def setup
13
- @xhtml1_entities = HTMLEntities.new('xhtml1')
14
- @html4_entities = HTMLEntities.new('html4')
15
- end
16
-
17
- class PseudoString
18
- def initialize(string)
19
- @string = string
20
- end
21
- def to_s
22
- @string
23
- end
24
- end
25
-
26
6
  def test_should_raise_exception_when_unknown_flavor_specified
27
- assert_raises(HTMLEntities::UnknownFlavor) do
7
+ assert_raises HTMLEntities::UnknownFlavor do
28
8
  HTMLEntities.new('foo')
29
9
  end
30
10
  end
@@ -41,186 +21,4 @@ class HTMLEntities::EntitiesTest < Test::Unit::TestCase
41
21
  end
42
22
  end
43
23
 
44
- def test_should_decode_basic_entities
45
- assert_decode('&', '&amp;')
46
- assert_decode('<', '&lt;')
47
- assert_decode('"', '&quot;')
48
- end
49
-
50
- def test_should_encode_basic_entities
51
- assert_encode('&amp;', '&', :basic)
52
- assert_encode('&quot;', '"')
53
- assert_encode('&lt;', '<', :basic)
54
- assert_encode('&lt;', '<')
55
- end
56
-
57
- def test_should_encode_basic_entities_to_decimal
58
- assert_encode('&#38;', '&', :decimal)
59
- assert_encode('&#34;', '"', :decimal)
60
- assert_encode('&#60;', '<', :decimal)
61
- assert_encode('&#62;', '>', :decimal)
62
- assert_encode('&#39;', "'", :decimal)
63
- end
64
-
65
- def test_should_encode_basic_entities_to_hexadecimal
66
- assert_encode('&#x26;', '&', :hexadecimal)
67
- assert_encode('&#x22;', '"', :hexadecimal)
68
- assert_encode('&#x3c;', '<', :hexadecimal)
69
- assert_encode('&#x3e;', '>', :hexadecimal)
70
- assert_encode('&#x27;', "'", :hexadecimal)
71
- end
72
-
73
- def test_should_decode_extended_named_entities
74
- assert_decode('±', '&plusmn;')
75
- assert_decode('ð', '&eth;')
76
- assert_decode('Œ', '&OElig;')
77
- assert_decode('œ', '&oelig;')
78
- end
79
-
80
- def test_should_encode_extended_named_entities
81
- assert_encode('&plusmn;', '±', :named)
82
- assert_encode('&eth;', 'ð', :named)
83
- assert_encode('&OElig;', 'Œ', :named)
84
- assert_encode('&oelig;', 'œ', :named)
85
- end
86
-
87
- def test_should_decode_decimal_entities
88
- assert_decode('“', '&#8220;')
89
- assert_decode('…', '&#8230;')
90
- assert_decode(' ', '&#32;')
91
- end
92
-
93
- def test_should_encode_decimal_entities
94
- assert_encode('&#8220;', '“', :decimal)
95
- assert_encode('&#8230;', '…', :decimal)
96
- end
97
-
98
- def test_should_decode_hexadecimal_entities
99
- assert_decode('−', '&#x2212;')
100
- assert_decode('—', '&#x2014;')
101
- assert_decode('`', '&#x0060;')
102
- assert_decode('`', '&#x60;')
103
- end
104
-
105
- def test_should_encode_hexadecimal_entities
106
- assert_encode('&#x2212;', '−', :hexadecimal)
107
- assert_encode('&#x2014;', '—', :hexadecimal)
108
- end
109
-
110
- def test_should_decode_text_with_mix_of_entities
111
- # Just a random headline - I needed something with accented letters.
112
- assert_decode(
113
- 'Le tabac pourrait bientôt être banni dans tous les lieux publics en France',
114
- 'Le tabac pourrait bient&ocirc;t &#234;tre banni dans tous les lieux publics en France'
115
- )
116
- assert_decode(
117
- '"bientôt" & 文字',
118
- '&quot;bient&ocirc;t&quot; &amp; &#25991;&#x5b57;'
119
- )
120
- end
121
-
122
- def test_should_encode_text_using_mix_of_entities
123
- assert_encode(
124
- '&quot;bient&ocirc;t&quot; &amp; &#x6587;&#x5b57;',
125
- '"bientôt" & 文字', :basic, :named, :hexadecimal
126
- )
127
- assert_encode(
128
- '&quot;bient&ocirc;t&quot; &amp; &#25991;&#23383;',
129
- '"bientôt" & 文字', :basic, :named, :decimal
130
- )
131
- end
132
-
133
- def test_should_sort_commands_when_encoding_using_mix_of_entities
134
- assert_encode(
135
- '&quot;bient&ocirc;t&quot; &amp; &#x6587;&#x5b57;',
136
- '"bientôt" & 文字', :named, :hexadecimal, :basic
137
- )
138
- assert_encode(
139
- '&quot;bient&ocirc;t&quot; &amp; &#25991;&#23383;',
140
- '"bientôt" & 文字', :decimal, :named, :basic
141
- )
142
- end
143
-
144
- def test_should_detect_illegal_encoding_command
145
- assert_raise(HTMLEntities::InstructionError) {
146
- HTMLEntities.encode_entities('foo', :bar, :baz)
147
- }
148
- end
149
-
150
- def test_should_decode_empty_string
151
- assert_decode('', '')
152
- end
153
-
154
- def test_should_skip_unknown_entity
155
- assert_decode('&bogus;', '&bogus;')
156
- end
157
-
158
- def test_should_decode_double_encoded_entity_once
159
- assert_decode('&amp;', '&amp;amp;')
160
- end
161
-
162
- def test_should_not_encode_normal_ASCII
163
- assert_encode('`', '`')
164
- assert_encode(' ', ' ')
165
- end
166
-
167
- def test_should_double_encode_existing_entity
168
- assert_encode('&amp;amp;', '&amp;')
169
- end
170
-
171
- # Faults found and patched by Moonwolf
172
- def test_should_decode_full_hexadecimal_range
173
- (0..127).each do |codepoint|
174
- assert_decode([codepoint].pack('U'), "&\#x#{codepoint.to_s(16)};")
175
- end
176
- end
177
-
178
- # Reported by Dallas DeVries and Johan Duflost
179
- def test_should_decode_named_entities_reported_as_missing_in_3_0_1
180
- assert_decode([178].pack('U'), '&sup2;')
181
- assert_decode([8226].pack('U'), '&bull;')
182
- assert_decode([948].pack('U'), '&delta;')
183
- end
184
-
185
- if RUBY_VERSION =~ /^1\.8\./
186
- # Reported by Benoit Larroque
187
- def test_should_encode_without_error_when_KCODE_is_not_UTF_8
188
- kcode = $KCODE
189
- $KCODE = "n"
190
- coder = HTMLEntities.new;
191
- text = [8212].pack('U')
192
- assert_equal "&#8212;", coder.encode(text, :decimal)
193
- $KCODE = kcode
194
- end
195
- end
196
-
197
- # Reported by ckruse
198
- def test_should_decode_only_first_element_in_masked_entities
199
- input = '&amp;#3346;'
200
- expected = '&#3346;'
201
- assert_decode expected, input
202
- end
203
-
204
- def test_should_ducktype_parameter_to_string_before_encoding
205
- pseudo_string = PseudoString.new('foo')
206
- assert_decode('foo', pseudo_string)
207
- end
208
-
209
- def test_should_ducktype_parameter_to_string_before_decoding
210
- pseudo_string = PseudoString.new('foo')
211
- assert_encode('foo', pseudo_string)
212
- end
213
-
214
- def assert_decode(expected, input)
215
- [xhtml1_entities, html4_entities].each do |coder|
216
- assert_equal(expected, coder.decode(input))
217
- end
218
- end
219
-
220
- def assert_encode(expected, input, *args)
221
- [xhtml1_entities, html4_entities].each do |coder|
222
- assert_equal(expected, coder.encode(input, *args))
223
- end
224
- end
225
-
226
24
  end
@@ -1,9 +1,5 @@
1
1
  # encoding: UTF-8
2
- $:.unshift(File.dirname(__FILE__) + '/../lib')
3
- require 'test/unit'
4
- require 'htmlentities'
5
-
6
- $KCODE = 'u' unless "1.9".respond_to?(:encoding)
2
+ require File.expand_path("../common", __FILE__)
7
3
 
8
4
  class HTMLEntities::ExpandedTest < Test::Unit::TestCase
9
5
 
@@ -109,4 +105,5 @@ class HTMLEntities::ExpandedTest < Test::Unit::TestCase
109
105
  assert_not_equal xhtml_encoder.encode(decoded, :named), html_entities.encode(decoded, :named)
110
106
  end
111
107
  end
108
+
112
109
  end
@@ -1,9 +1,5 @@
1
1
  # encoding: UTF-8
2
- $:.unshift(File.dirname(__FILE__) + '/../lib')
3
- require 'test/unit'
4
- require 'htmlentities'
5
-
6
- $KCODE = 'u' unless "1.9".respond_to?(:encoding)
2
+ require File.expand_path("../common", __FILE__)
7
3
 
8
4
  class HTML4Test < Test::Unit::TestCase
9
5
 
@@ -1,7 +1,5 @@
1
1
  # encoding: UTF-8
2
- $:.unshift(File.dirname(__FILE__) + '/../lib')
3
- require 'test/unit'
4
- require 'htmlentities'
2
+ require File.expand_path("../common", __FILE__)
5
3
 
6
4
  class HTMLEntities::RoundtripTest < Test::Unit::TestCase
7
5
 
@@ -13,74 +11,50 @@ class HTMLEntities::RoundtripTest < Test::Unit::TestCase
13
11
  end
14
12
 
15
13
  def test_should_roundtrip_xhtml1_entities_via_named_encoding
16
- each_mapping('xhtml1') do |name, string|
17
- assert_equal(
18
- string,
19
- xhtml1_entities.decode(xhtml1_entities.encode(string, :named))
20
- )
14
+ each_mapping 'xhtml1' do |name, string|
15
+ assert_equal string, xhtml1_entities.decode(xhtml1_entities.encode(string, :named))
21
16
  end
22
17
  end
23
18
 
24
19
  def test_should_roundtrip_xhtml1_entities_via_basic_and_named_encoding
25
- each_mapping('xhtml1') do |name, string|
26
- assert_equal(
27
- string,
28
- xhtml1_entities.decode(xhtml1_entities.encode(string, :basic, :named))
29
- )
20
+ each_mapping 'xhtml1' do |name, string|
21
+ assert_equal string, xhtml1_entities.decode(xhtml1_entities.encode(string, :basic, :named))
30
22
  end
31
23
  end
32
24
 
33
25
  def test_should_roundtrip_xhtml1_entities_via_basic_named_and_decimal_encoding
34
- each_mapping('xhtml1') do |name, string|
35
- assert_equal(
36
- string,
37
- xhtml1_entities.decode(xhtml1_entities.encode(string, :basic, :named, :decimal))
38
- )
26
+ each_mapping 'xhtml1' do |name, string|
27
+ assert_equal string, xhtml1_entities.decode(xhtml1_entities.encode(string, :basic, :named, :decimal))
39
28
  end
40
29
  end
41
30
 
42
31
  def test_should_roundtrip_xhtml1_entities_via_hexadecimal_encoding
43
- each_mapping('xhtml1') do |name, string|
44
- assert_equal(
45
- string,
46
- xhtml1_entities.decode(xhtml1_entities.encode(string, :hexadecimal))
47
- )
32
+ each_mapping 'xhtml1' do |name, string|
33
+ assert_equal string, xhtml1_entities.decode(xhtml1_entities.encode(string, :hexadecimal))
48
34
  end
49
35
  end
50
36
 
51
37
  def test_should_roundtrip_html4_entities_via_named_encoding
52
- each_mapping('html4') do |name, string|
53
- assert_equal(
54
- string,
55
- html4_entities.decode(html4_entities.encode(string, :named))
56
- )
38
+ each_mapping 'html4' do |name, string|
39
+ assert_equal string, html4_entities.decode(html4_entities.encode(string, :named))
57
40
  end
58
41
  end
59
42
 
60
43
  def test_should_roundtrip_html4_entities_via_basic_and_named_encoding
61
- each_mapping('html4') do |name, string|
62
- assert_equal(
63
- string,
64
- html4_entities.decode(html4_entities.encode(string, :basic, :named))
65
- )
44
+ each_mapping 'html4' do |name, string|
45
+ assert_equal string, html4_entities.decode(html4_entities.encode(string, :basic, :named))
66
46
  end
67
47
  end
68
48
 
69
49
  def test_should_roundtrip_html4_entities_via_basic_named_and_decimal_encoding
70
- each_mapping('html4') do |name, string|
71
- assert_equal(
72
- string,
73
- html4_entities.decode(html4_entities.encode(string, :basic, :named, :decimal))
74
- )
50
+ each_mapping 'html4' do |name, string|
51
+ assert_equal string, html4_entities.decode(html4_entities.encode(string, :basic, :named, :decimal))
75
52
  end
76
53
  end
77
54
 
78
55
  def test_should_roundtrip_html4_entities_via_hexadecimal_encoding
79
- each_mapping('html4') do |name, string|
80
- assert_equal(
81
- string,
82
- html4_entities.decode(html4_entities.encode(string, :hexadecimal))
83
- )
56
+ each_mapping 'html4' do |name, string|
57
+ assert_equal string, html4_entities.decode(html4_entities.encode(string, :hexadecimal))
84
58
  end
85
59
  end
86
60
 
@@ -0,0 +1,18 @@
1
+ # encoding: UTF-8
2
+ require File.expand_path("../common", __FILE__)
3
+
4
+ unless ENCODING_AWARE_RUBY
5
+ class HTMLEntities::Ruby18Test < Test::Unit::TestCase
6
+
7
+ # Reported by Benoit Larroque
8
+ def test_should_encode_without_error_when_KCODE_is_not_UTF_8
9
+ kcode = $KCODE
10
+ $KCODE = "n"
11
+ coder = HTMLEntities.new
12
+ text = [8212].pack('U')
13
+ assert_equal "&#8212;", coder.encode(text, :decimal)
14
+ $KCODE = kcode
15
+ end
16
+
17
+ end
18
+ end
@@ -0,0 +1,70 @@
1
+ # encoding: UTF-8
2
+ require File.expand_path("../common", __FILE__)
3
+
4
+ if ENCODING_AWARE_RUBY
5
+ class HTMLEntities::Ruby19Test < Test::Unit::TestCase
6
+
7
+ def test_should_encode_ascii_to_ascii
8
+ s = "<elan>".encode(Encoding::US_ASCII)
9
+ assert_equal Encoding::US_ASCII, s.encoding
10
+
11
+ t = HTMLEntities.new.encode(s)
12
+ assert_equal "&lt;elan&gt;", t
13
+ assert_equal Encoding::US_ASCII, t.encoding
14
+ end
15
+
16
+ def test_should_encode_utf8_to_utf8_if_needed
17
+ s = "<élan>"
18
+ assert_equal Encoding::UTF_8, s.encoding
19
+
20
+ t = HTMLEntities.new.encode(s)
21
+ assert_equal "&lt;élan&gt;", t
22
+ assert_equal Encoding::UTF_8, t.encoding
23
+ end
24
+
25
+ def test_should_encode_utf8_to_ascii_if_possible
26
+ s = "<elan>"
27
+ assert_equal Encoding::UTF_8, s.encoding
28
+
29
+ t = HTMLEntities.new.encode(s)
30
+ assert_equal "&lt;elan&gt;", t
31
+ assert_equal Encoding::US_ASCII, t.encoding
32
+ end
33
+
34
+ def test_should_encode_other_encoding_to_utf8
35
+ s = "<élan>".encode(Encoding::ISO_8859_1)
36
+ assert_equal Encoding::ISO_8859_1, s.encoding
37
+
38
+ t = HTMLEntities.new.encode(s)
39
+ assert_equal "&lt;élan&gt;", t
40
+ assert_equal Encoding::UTF_8, t.encoding
41
+ end
42
+
43
+ def test_should_decode_ascii_to_utf8
44
+ s = "&lt;&eacute;lan&gt;".encode(Encoding::US_ASCII)
45
+ assert_equal Encoding::US_ASCII, s.encoding
46
+
47
+ t = HTMLEntities.new.decode(s)
48
+ assert_equal "<élan>", t
49
+ assert_equal Encoding::UTF_8, t.encoding
50
+ end
51
+
52
+ def test_should_decode_utf8_to_utf8
53
+ s = "&lt;&eacute;lan&gt;".encode(Encoding::UTF_8)
54
+ assert_equal Encoding::UTF_8, s.encoding
55
+
56
+ t = HTMLEntities.new.decode(s)
57
+ assert_equal "<élan>", t
58
+ assert_equal Encoding::UTF_8, t.encoding
59
+ end
60
+
61
+ def test_should_decode_other_encoding_to_utf8
62
+ s = "&lt;&eacute;lan&gt;".encode(Encoding::ISO_8859_1)
63
+ assert_equal Encoding::ISO_8859_1, s.encoding
64
+
65
+ t = HTMLEntities.new.decode(s)
66
+ assert_equal "<élan>", t
67
+ assert_equal Encoding::UTF_8, t.encoding
68
+ end
69
+ end
70
+ end
@@ -1,9 +1,5 @@
1
1
  # encoding: UTF-8
2
- $:.unshift(File.dirname(__FILE__) + '/../lib')
3
- require 'test/unit'
4
- require 'htmlentities'
5
-
6
- $KCODE = 'u' unless "1.9".respond_to?(:encoding)
2
+ require File.expand_path("../common", __FILE__)
7
3
 
8
4
  class HTMLEntities::XHTML1Test < Test::Unit::TestCase
9
5
 
@@ -25,5 +21,4 @@ class HTMLEntities::XHTML1Test < Test::Unit::TestCase
25
21
  assert_equal "&b.Theta;", html_entities.decode("&b.Theta;")
26
22
  end
27
23
 
28
-
29
24
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: htmlentities
3
3
  version: !ruby/object:Gem::Version
4
- hash: 63
4
+ hash: 51
5
5
  prerelease: false
6
6
  segments:
7
7
  - 4
8
- - 2
9
- - 4
10
- version: 4.2.4
8
+ - 3
9
+ - 0
10
+ version: 4.3.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Paul Battley
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-01-30 00:00:00 +00:00
18
+ date: 2011-03-29 00:00:00 +01:00
19
19
  default_executable:
20
20
  dependencies: []
21
21
 
@@ -26,30 +26,30 @@ executables: []
26
26
  extensions: []
27
27
 
28
28
  extra_rdoc_files:
29
- - README.rdoc
30
29
  - History.txt
31
30
  - COPYING.txt
32
31
  files:
33
- - lib/htmlentities/version.rb
32
+ - lib/htmlentities.rb
34
33
  - lib/htmlentities/flavors.rb
34
+ - lib/htmlentities/version.rb
35
35
  - lib/htmlentities/encoder.rb
36
- - lib/htmlentities/legacy.rb
37
- - lib/htmlentities/mappings/html4.rb
38
36
  - lib/htmlentities/mappings/expanded.rb
39
37
  - lib/htmlentities/mappings/xhtml1.rb
38
+ - lib/htmlentities/mappings/html4.rb
40
39
  - lib/htmlentities/decoder.rb
41
- - lib/htmlentities.rb
42
- - test/entities_test.rb
40
+ - test/decoding_test.rb
41
+ - test/ruby_1_8_test.rb
43
42
  - test/xhtml1_test.rb
44
- - test/roundtrip_test.rb
45
- - test/legacy_test.rb
46
- - test/expanded_test.rb
47
- - test/test_all.rb
48
43
  - test/html4_test.rb
44
+ - test/encoding_test.rb
45
+ - test/expanded_test.rb
46
+ - test/ruby_1_9_test.rb
47
+ - test/common.rb
48
+ - test/roundtrip_test.rb
49
+ - test/entities_test.rb
50
+ - perf/performance.rb
49
51
  - perf/profile.rb
50
52
  - perf/benchmark.rb
51
- - perf/performance.rb
52
- - README.rdoc
53
53
  - History.txt
54
54
  - COPYING.txt
55
55
  has_rdoc: true
@@ -87,4 +87,12 @@ signing_key:
87
87
  specification_version: 3
88
88
  summary: A module for encoding and decoding (X)HTML entities.
89
89
  test_files:
90
- - test/test_all.rb
90
+ - test/decoding_test.rb
91
+ - test/ruby_1_8_test.rb
92
+ - test/xhtml1_test.rb
93
+ - test/html4_test.rb
94
+ - test/encoding_test.rb
95
+ - test/expanded_test.rb
96
+ - test/ruby_1_9_test.rb
97
+ - test/roundtrip_test.rb
98
+ - test/entities_test.rb
@@ -1,44 +0,0 @@
1
- == HTMLEntities
2
-
3
- HTML entity encoding and decoding for Ruby
4
-
5
- The HTMLEntities module facilitates encoding and decoding of
6
- (X)HTML entities from/to their corresponding UTF-8 codepoints.
7
-
8
- To install (requires root/admin privileges):
9
-
10
- ruby setup.rb
11
-
12
- Alternatively, you can just use the gem.
13
-
14
- == Licence
15
-
16
- This code is free to use under the terms of the MIT licence:
17
-
18
- Copyright (c) 2005-2009 Paul Battley
19
-
20
- Permission is hereby granted, free of charge, to any person obtaining a copy
21
- of this software and associated documentation files (the "Software"), to
22
- deal in the Software without restriction, including without limitation the
23
- rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
24
- sell copies of the Software, and to permit persons to whom the Software is
25
- furnished to do so, subject to the following conditions:
26
-
27
- The above copyright notice and this permission notice shall be included in
28
- all copies or substantial portions of the Software.
29
-
30
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
31
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
32
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
33
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
34
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
35
- FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
36
- IN THE SOFTWARE.
37
-
38
- If you'd like to negotiate a different licence for a specific use, just
39
- contact me -- I'll almost certainly permit it.
40
-
41
- == Contact
42
-
43
- Comments are welcome. Send an email to pbattley@gmail.com.
44
-
@@ -1,31 +0,0 @@
1
- class HTMLEntities
2
- class << self
3
-
4
- #
5
- # Legacy compatibility class method allowing direct encoding of XHTML1 entities.
6
- # See HTMLEntities#encode for description of parameters.
7
- #
8
- # Deprecated.
9
- #
10
- def encode_entities(*args)
11
- xhtml1_entities.encode(*args)
12
- end
13
-
14
- #
15
- # Legacy compatibility class method allowing direct decoding of XHTML1 entities.
16
- # See HTMLEntities#decode for description of parameters.
17
- #
18
- # Deprecated.
19
- #
20
- def decode_entities(*args)
21
- xhtml1_entities.decode(*args)
22
- end
23
-
24
- private
25
-
26
- def xhtml1_entities
27
- @xhtml1_entities ||= new('xhtml1')
28
- end
29
-
30
- end
31
- end
@@ -1,35 +0,0 @@
1
- # encoding: UTF-8
2
- $:.unshift(File.dirname(__FILE__) + '/../lib')
3
- require 'test/unit'
4
- require 'htmlentities'
5
-
6
- $KCODE = 'u' unless "1.9".respond_to?(:encoding)
7
-
8
- #
9
- # Test that version 3.x functionality still works
10
- #
11
- class HTMLEntities::LegacyTest < Test::Unit::TestCase
12
-
13
- def test_should_decode_via_legacy_interface
14
- assert_decode('&', '&amp;')
15
- assert_decode('±', '&plusmn;')
16
- assert_decode('“', '&#8220;')
17
- assert_decode('—', '&#x2014;')
18
- end
19
-
20
- def test_should_encode_via_legacy_interface
21
- assert_encode('&amp;', '&', :basic)
22
- assert_encode('&eth;', 'ð', :named)
23
- assert_encode('&#8230;', '…', :decimal)
24
- assert_encode('&#x2212;', '−', :hexadecimal)
25
- end
26
-
27
- def assert_encode(expected, *encode_args)
28
- assert_equal expected, HTMLEntities.encode_entities(*encode_args)
29
- end
30
-
31
- def assert_decode(expected, *decode_args)
32
- assert_equal expected, HTMLEntities.decode_entities(*decode_args)
33
- end
34
-
35
- end
@@ -1,4 +0,0 @@
1
- # encoding: UTF-8
2
- Dir[File.dirname(__FILE__)+'/*_test.rb'].each do |test|
3
- require "./#{test}"
4
- end