htmlentities 4.2.4 → 4.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +13 -0
- data/lib/htmlentities.rb +3 -4
- data/lib/htmlentities/decoder.rb +11 -1
- data/lib/htmlentities/encoder.rb +20 -10
- data/lib/htmlentities/version.rb +2 -2
- data/test/common.rb +6 -0
- data/test/decoding_test.rb +101 -0
- data/test/encoding_test.rb +106 -0
- data/test/entities_test.rb +2 -204
- data/test/expanded_test.rb +2 -5
- data/test/html4_test.rb +1 -5
- data/test/roundtrip_test.rb +17 -43
- data/test/ruby_1_8_test.rb +18 -0
- data/test/ruby_1_9_test.rb +70 -0
- data/test/xhtml1_test.rb +1 -6
- metadata +26 -18
- data/README.rdoc +0 -44
- data/lib/htmlentities/legacy.rb +0 -31
- data/test/legacy_test.rb +0 -35
- data/test/test_all.rb +0 -4
data/History.txt
CHANGED
@@ -1,3 +1,16 @@
|
|
1
|
+
== 4.3.0 (2011-03-29)
|
2
|
+
* Use Ruby 1.9's encoding support where available.
|
3
|
+
* Deprecated HTMLEntities.encode_entities/decode_entities interface is now
|
4
|
+
removed.
|
5
|
+
|
6
|
+
== 4.2.4 (2011-01-30)
|
7
|
+
* Fix issue where double-escaped entities were not correctly escaped. Bug
|
8
|
+
reported by Christian Kruse.
|
9
|
+
|
10
|
+
== 4.2.3 (2011-01-07)
|
11
|
+
* Additional entities from Junya Ishihara.
|
12
|
+
* Performance improvements.
|
13
|
+
|
1
14
|
== 4.2.1 (2010-04-05)
|
2
15
|
* Fixed error on Ruby 1.8.x when $KCODE was not set to "UTF8". Thanks to
|
3
16
|
Benoit Larroque for the bug report.
|
data/lib/htmlentities.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
require 'htmlentities/legacy'
|
3
2
|
require 'htmlentities/flavors'
|
4
3
|
require 'htmlentities/encoder'
|
5
4
|
require 'htmlentities/decoder'
|
@@ -61,9 +60,9 @@ class HTMLEntities
|
|
61
60
|
# If no instructions are specified, :basic will be used.
|
62
61
|
#
|
63
62
|
# Examples:
|
64
|
-
#
|
65
|
-
#
|
66
|
-
#
|
63
|
+
# encode(str) - XML-safe
|
64
|
+
# encode(str, :basic, :decimal) - XML-safe and 7-bit clean
|
65
|
+
# encode(str, :basic, :named, :decimal) - 7-bit clean, with all
|
67
66
|
# non-ASCII characters replaced with their named entity where possible, and
|
68
67
|
# decimal equivalents otherwise.
|
69
68
|
#
|
data/lib/htmlentities/decoder.rb
CHANGED
@@ -7,7 +7,7 @@ class HTMLEntities
|
|
7
7
|
end
|
8
8
|
|
9
9
|
def decode(source)
|
10
|
-
source.
|
10
|
+
prepare(source).gsub(@entity_regexp) {
|
11
11
|
if $1 && codepoint = @map[$1]
|
12
12
|
[codepoint].pack('U')
|
13
13
|
elsif $2
|
@@ -21,6 +21,16 @@ class HTMLEntities
|
|
21
21
|
end
|
22
22
|
|
23
23
|
private
|
24
|
+
if "1.9".respond_to?(:encoding)
|
25
|
+
def prepare(string) #:nodoc:
|
26
|
+
string.to_s.encode(Encoding::UTF_8)
|
27
|
+
end
|
28
|
+
else
|
29
|
+
def prepare(string) #:nodoc:
|
30
|
+
string.to_s
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
24
34
|
def entity_regexp
|
25
35
|
key_lengths = @map.keys.map{ |k| k.length }
|
26
36
|
entity_name_pattern =
|
data/lib/htmlentities/encoder.rb
CHANGED
@@ -13,13 +13,23 @@ class HTMLEntities
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def encode(source)
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
string
|
16
|
+
prepare(source).
|
17
|
+
gsub(basic_entity_regexp){ encode_basic($&) }.
|
18
|
+
gsub(extended_entity_regexp){ encode_extended($&) }
|
20
19
|
end
|
21
20
|
|
22
21
|
private
|
22
|
+
|
23
|
+
if "1.9".respond_to?(:encoding)
|
24
|
+
def prepare(string) #:nodoc:
|
25
|
+
string.to_s.encode(Encoding::UTF_8)
|
26
|
+
end
|
27
|
+
else
|
28
|
+
def prepare(string) #:nodoc:
|
29
|
+
string.to_s
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
23
33
|
def basic_entity_regexp
|
24
34
|
@basic_entity_regexp ||= (
|
25
35
|
case @flavor
|
@@ -33,15 +43,15 @@ class HTMLEntities
|
|
33
43
|
|
34
44
|
def extended_entity_regexp
|
35
45
|
@extended_entity_regexp ||= (
|
36
|
-
|
46
|
+
options = [nil]
|
37
47
|
if encoding_aware?
|
38
|
-
|
48
|
+
pattern = '[^\u{20}-\u{7E}]'
|
39
49
|
else
|
40
|
-
|
41
|
-
|
50
|
+
pattern = '[^\x20-\x7E]'
|
51
|
+
options << "U"
|
42
52
|
end
|
43
|
-
|
44
|
-
Regexp.new(
|
53
|
+
pattern << "|'" if @flavor == 'html4'
|
54
|
+
Regexp.new(pattern, *options)
|
45
55
|
)
|
46
56
|
end
|
47
57
|
|
data/lib/htmlentities/version.rb
CHANGED
data/test/common.rb
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require File.expand_path("../common", __FILE__)
|
3
|
+
|
4
|
+
class HTMLEntities::DecodingTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def setup
|
7
|
+
@entities = [:xhtml1, :html4, :expanded].map{ |a| HTMLEntities.new(a) }
|
8
|
+
end
|
9
|
+
|
10
|
+
def assert_decode(expected, input)
|
11
|
+
@entities.each do |coder|
|
12
|
+
assert_equal expected, coder.decode(input)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_should_decode_basic_entities
|
17
|
+
assert_decode '&', '&'
|
18
|
+
assert_decode '<', '<'
|
19
|
+
assert_decode '"', '"'
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_should_decode_extended_named_entities
|
23
|
+
assert_decode '±', '±'
|
24
|
+
assert_decode 'ð', 'ð'
|
25
|
+
assert_decode 'Œ', 'Œ'
|
26
|
+
assert_decode 'œ', 'œ'
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_should_decode_decimal_entities
|
30
|
+
assert_decode '“', '“'
|
31
|
+
assert_decode '…', '…'
|
32
|
+
assert_decode ' ', ' '
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_should_decode_hexadecimal_entities
|
36
|
+
assert_decode '−', '−'
|
37
|
+
assert_decode '—', '—'
|
38
|
+
assert_decode '`', '`'
|
39
|
+
assert_decode '`', '`'
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_should_not_mutate_string_being_decoded
|
43
|
+
original = "<£"
|
44
|
+
input = original.dup
|
45
|
+
HTMLEntities.new.decode(input)
|
46
|
+
|
47
|
+
assert_equal original, input
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_should_decode_text_with_mix_of_entities
|
51
|
+
# Just a random headline - I needed something with accented letters.
|
52
|
+
assert_decode(
|
53
|
+
'Le tabac pourrait bientôt être banni dans tous les lieux publics en France',
|
54
|
+
'Le tabac pourrait bientôt être banni dans tous les lieux publics en France'
|
55
|
+
)
|
56
|
+
assert_decode(
|
57
|
+
'"bientôt" & 文字',
|
58
|
+
'"bientôt" & 文字'
|
59
|
+
)
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_should_decode_empty_string
|
63
|
+
assert_decode '', ''
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_should_skip_unknown_entity
|
67
|
+
assert_decode '&bogus;', '&bogus;'
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_should_decode_double_encoded_entity_once
|
71
|
+
assert_decode '&', '&amp;'
|
72
|
+
end
|
73
|
+
|
74
|
+
# Faults found and patched by Moonwolf
|
75
|
+
def test_should_decode_full_hexadecimal_range
|
76
|
+
(0..127).each do |codepoint|
|
77
|
+
assert_decode [codepoint].pack('U'), "&\#x#{codepoint.to_s(16)};"
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# Reported by Dallas DeVries and Johan Duflost
|
82
|
+
def test_should_decode_named_entities_reported_as_missing_in_3_0_1
|
83
|
+
assert_decode [178].pack('U'), '²'
|
84
|
+
assert_decode [8226].pack('U'), '•'
|
85
|
+
assert_decode [948].pack('U'), 'δ'
|
86
|
+
end
|
87
|
+
|
88
|
+
# Reported by ckruse
|
89
|
+
def test_should_decode_only_first_element_in_masked_entities
|
90
|
+
input = '&#3346;'
|
91
|
+
expected = 'ഒ'
|
92
|
+
assert_decode expected, input
|
93
|
+
end
|
94
|
+
|
95
|
+
def test_should_ducktype_parameter_to_string_before_encoding
|
96
|
+
obj = Object.new
|
97
|
+
def obj.to_s; "foo"; end
|
98
|
+
assert_decode "foo", obj
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require File.expand_path("../common", __FILE__)
|
3
|
+
|
4
|
+
class HTMLEntities::EncodingTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def setup
|
7
|
+
@entities = [:xhtml1, :html4, :expanded].map{ |a| HTMLEntities.new(a) }
|
8
|
+
end
|
9
|
+
|
10
|
+
def assert_encode(expected, input, *args)
|
11
|
+
@entities.each do |coder|
|
12
|
+
assert_equal expected, coder.encode(input, *args)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_should_encode_basic_entities
|
17
|
+
assert_encode '&', '&', :basic
|
18
|
+
assert_encode '"', '"'
|
19
|
+
assert_encode '<', '<', :basic
|
20
|
+
assert_encode '<', '<'
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_should_encode_basic_entities_to_decimal
|
24
|
+
assert_encode '&', '&', :decimal
|
25
|
+
assert_encode '"', '"', :decimal
|
26
|
+
assert_encode '<', '<', :decimal
|
27
|
+
assert_encode '>', '>', :decimal
|
28
|
+
assert_encode ''', "'", :decimal
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_should_encode_basic_entities_to_hexadecimal
|
32
|
+
assert_encode '&', '&', :hexadecimal
|
33
|
+
assert_encode '"', '"', :hexadecimal
|
34
|
+
assert_encode '<', '<', :hexadecimal
|
35
|
+
assert_encode '>', '>', :hexadecimal
|
36
|
+
assert_encode ''', "'", :hexadecimal
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_should_encode_extended_named_entities
|
40
|
+
assert_encode '±', '±', :named
|
41
|
+
assert_encode 'ð', 'ð', :named
|
42
|
+
assert_encode 'Œ', 'Œ', :named
|
43
|
+
assert_encode 'œ', 'œ', :named
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_should_encode_decimal_entities
|
47
|
+
assert_encode '“', '“', :decimal
|
48
|
+
assert_encode '…', '…', :decimal
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_should_encode_hexadecimal_entities
|
52
|
+
assert_encode '−', '−', :hexadecimal
|
53
|
+
assert_encode '—', '—', :hexadecimal
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_should_encode_text_using_mix_of_entities
|
57
|
+
assert_encode(
|
58
|
+
'"bientôt" & 文字',
|
59
|
+
'"bientôt" & 文字', :basic, :named, :hexadecimal
|
60
|
+
)
|
61
|
+
assert_encode(
|
62
|
+
'"bientôt" & 文字',
|
63
|
+
'"bientôt" & 文字', :basic, :named, :decimal
|
64
|
+
)
|
65
|
+
end
|
66
|
+
|
67
|
+
def test_should_sort_commands_when_encoding_using_mix_of_entities
|
68
|
+
assert_encode(
|
69
|
+
'"bientôt" & 文字',
|
70
|
+
'"bientôt" & 文字', :named, :hexadecimal, :basic
|
71
|
+
)
|
72
|
+
assert_encode(
|
73
|
+
'"bientôt" & 文字',
|
74
|
+
'"bientôt" & 文字', :decimal, :named, :basic
|
75
|
+
)
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_should_detect_illegal_encoding_command
|
79
|
+
assert_raise HTMLEntities::InstructionError do
|
80
|
+
HTMLEntities.new.encode('foo', :bar, :baz)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def test_should_not_encode_normal_ASCII
|
85
|
+
assert_encode '`', '`'
|
86
|
+
assert_encode ' ', ' '
|
87
|
+
end
|
88
|
+
|
89
|
+
def test_should_double_encode_existing_entity
|
90
|
+
assert_encode '&amp;', '&'
|
91
|
+
end
|
92
|
+
|
93
|
+
def test_should_not_mutate_string_being_encoded
|
94
|
+
original = "<£"
|
95
|
+
input = original.dup
|
96
|
+
HTMLEntities.new.encode(input, :basic, :decimal)
|
97
|
+
|
98
|
+
assert_equal original, input
|
99
|
+
end
|
100
|
+
|
101
|
+
def test_should_ducktype_parameter_to_string_before_encoding
|
102
|
+
obj = Object.new
|
103
|
+
def obj.to_s; "foo"; end
|
104
|
+
assert_encode "foo", obj
|
105
|
+
end
|
106
|
+
end
|
data/test/entities_test.rb
CHANGED
@@ -1,30 +1,10 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
|
3
|
-
require 'test/unit'
|
4
|
-
require 'htmlentities'
|
5
|
-
|
6
|
-
$KCODE = 'u' unless "1.9".respond_to?(:encoding)
|
2
|
+
require File.expand_path("../common", __FILE__)
|
7
3
|
|
8
4
|
class HTMLEntities::EntitiesTest < Test::Unit::TestCase
|
9
5
|
|
10
|
-
attr_reader :xhtml1_entities, :html4_entities
|
11
|
-
|
12
|
-
def setup
|
13
|
-
@xhtml1_entities = HTMLEntities.new('xhtml1')
|
14
|
-
@html4_entities = HTMLEntities.new('html4')
|
15
|
-
end
|
16
|
-
|
17
|
-
class PseudoString
|
18
|
-
def initialize(string)
|
19
|
-
@string = string
|
20
|
-
end
|
21
|
-
def to_s
|
22
|
-
@string
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
6
|
def test_should_raise_exception_when_unknown_flavor_specified
|
27
|
-
assert_raises
|
7
|
+
assert_raises HTMLEntities::UnknownFlavor do
|
28
8
|
HTMLEntities.new('foo')
|
29
9
|
end
|
30
10
|
end
|
@@ -41,186 +21,4 @@ class HTMLEntities::EntitiesTest < Test::Unit::TestCase
|
|
41
21
|
end
|
42
22
|
end
|
43
23
|
|
44
|
-
def test_should_decode_basic_entities
|
45
|
-
assert_decode('&', '&')
|
46
|
-
assert_decode('<', '<')
|
47
|
-
assert_decode('"', '"')
|
48
|
-
end
|
49
|
-
|
50
|
-
def test_should_encode_basic_entities
|
51
|
-
assert_encode('&', '&', :basic)
|
52
|
-
assert_encode('"', '"')
|
53
|
-
assert_encode('<', '<', :basic)
|
54
|
-
assert_encode('<', '<')
|
55
|
-
end
|
56
|
-
|
57
|
-
def test_should_encode_basic_entities_to_decimal
|
58
|
-
assert_encode('&', '&', :decimal)
|
59
|
-
assert_encode('"', '"', :decimal)
|
60
|
-
assert_encode('<', '<', :decimal)
|
61
|
-
assert_encode('>', '>', :decimal)
|
62
|
-
assert_encode(''', "'", :decimal)
|
63
|
-
end
|
64
|
-
|
65
|
-
def test_should_encode_basic_entities_to_hexadecimal
|
66
|
-
assert_encode('&', '&', :hexadecimal)
|
67
|
-
assert_encode('"', '"', :hexadecimal)
|
68
|
-
assert_encode('<', '<', :hexadecimal)
|
69
|
-
assert_encode('>', '>', :hexadecimal)
|
70
|
-
assert_encode(''', "'", :hexadecimal)
|
71
|
-
end
|
72
|
-
|
73
|
-
def test_should_decode_extended_named_entities
|
74
|
-
assert_decode('±', '±')
|
75
|
-
assert_decode('ð', 'ð')
|
76
|
-
assert_decode('Œ', 'Œ')
|
77
|
-
assert_decode('œ', 'œ')
|
78
|
-
end
|
79
|
-
|
80
|
-
def test_should_encode_extended_named_entities
|
81
|
-
assert_encode('±', '±', :named)
|
82
|
-
assert_encode('ð', 'ð', :named)
|
83
|
-
assert_encode('Œ', 'Œ', :named)
|
84
|
-
assert_encode('œ', 'œ', :named)
|
85
|
-
end
|
86
|
-
|
87
|
-
def test_should_decode_decimal_entities
|
88
|
-
assert_decode('“', '“')
|
89
|
-
assert_decode('…', '…')
|
90
|
-
assert_decode(' ', ' ')
|
91
|
-
end
|
92
|
-
|
93
|
-
def test_should_encode_decimal_entities
|
94
|
-
assert_encode('“', '“', :decimal)
|
95
|
-
assert_encode('…', '…', :decimal)
|
96
|
-
end
|
97
|
-
|
98
|
-
def test_should_decode_hexadecimal_entities
|
99
|
-
assert_decode('−', '−')
|
100
|
-
assert_decode('—', '—')
|
101
|
-
assert_decode('`', '`')
|
102
|
-
assert_decode('`', '`')
|
103
|
-
end
|
104
|
-
|
105
|
-
def test_should_encode_hexadecimal_entities
|
106
|
-
assert_encode('−', '−', :hexadecimal)
|
107
|
-
assert_encode('—', '—', :hexadecimal)
|
108
|
-
end
|
109
|
-
|
110
|
-
def test_should_decode_text_with_mix_of_entities
|
111
|
-
# Just a random headline - I needed something with accented letters.
|
112
|
-
assert_decode(
|
113
|
-
'Le tabac pourrait bientôt être banni dans tous les lieux publics en France',
|
114
|
-
'Le tabac pourrait bientôt être banni dans tous les lieux publics en France'
|
115
|
-
)
|
116
|
-
assert_decode(
|
117
|
-
'"bientôt" & 文字',
|
118
|
-
'"bientôt" & 文字'
|
119
|
-
)
|
120
|
-
end
|
121
|
-
|
122
|
-
def test_should_encode_text_using_mix_of_entities
|
123
|
-
assert_encode(
|
124
|
-
'"bientôt" & 文字',
|
125
|
-
'"bientôt" & 文字', :basic, :named, :hexadecimal
|
126
|
-
)
|
127
|
-
assert_encode(
|
128
|
-
'"bientôt" & 文字',
|
129
|
-
'"bientôt" & 文字', :basic, :named, :decimal
|
130
|
-
)
|
131
|
-
end
|
132
|
-
|
133
|
-
def test_should_sort_commands_when_encoding_using_mix_of_entities
|
134
|
-
assert_encode(
|
135
|
-
'"bientôt" & 文字',
|
136
|
-
'"bientôt" & 文字', :named, :hexadecimal, :basic
|
137
|
-
)
|
138
|
-
assert_encode(
|
139
|
-
'"bientôt" & 文字',
|
140
|
-
'"bientôt" & 文字', :decimal, :named, :basic
|
141
|
-
)
|
142
|
-
end
|
143
|
-
|
144
|
-
def test_should_detect_illegal_encoding_command
|
145
|
-
assert_raise(HTMLEntities::InstructionError) {
|
146
|
-
HTMLEntities.encode_entities('foo', :bar, :baz)
|
147
|
-
}
|
148
|
-
end
|
149
|
-
|
150
|
-
def test_should_decode_empty_string
|
151
|
-
assert_decode('', '')
|
152
|
-
end
|
153
|
-
|
154
|
-
def test_should_skip_unknown_entity
|
155
|
-
assert_decode('&bogus;', '&bogus;')
|
156
|
-
end
|
157
|
-
|
158
|
-
def test_should_decode_double_encoded_entity_once
|
159
|
-
assert_decode('&', '&amp;')
|
160
|
-
end
|
161
|
-
|
162
|
-
def test_should_not_encode_normal_ASCII
|
163
|
-
assert_encode('`', '`')
|
164
|
-
assert_encode(' ', ' ')
|
165
|
-
end
|
166
|
-
|
167
|
-
def test_should_double_encode_existing_entity
|
168
|
-
assert_encode('&amp;', '&')
|
169
|
-
end
|
170
|
-
|
171
|
-
# Faults found and patched by Moonwolf
|
172
|
-
def test_should_decode_full_hexadecimal_range
|
173
|
-
(0..127).each do |codepoint|
|
174
|
-
assert_decode([codepoint].pack('U'), "&\#x#{codepoint.to_s(16)};")
|
175
|
-
end
|
176
|
-
end
|
177
|
-
|
178
|
-
# Reported by Dallas DeVries and Johan Duflost
|
179
|
-
def test_should_decode_named_entities_reported_as_missing_in_3_0_1
|
180
|
-
assert_decode([178].pack('U'), '²')
|
181
|
-
assert_decode([8226].pack('U'), '•')
|
182
|
-
assert_decode([948].pack('U'), 'δ')
|
183
|
-
end
|
184
|
-
|
185
|
-
if RUBY_VERSION =~ /^1\.8\./
|
186
|
-
# Reported by Benoit Larroque
|
187
|
-
def test_should_encode_without_error_when_KCODE_is_not_UTF_8
|
188
|
-
kcode = $KCODE
|
189
|
-
$KCODE = "n"
|
190
|
-
coder = HTMLEntities.new;
|
191
|
-
text = [8212].pack('U')
|
192
|
-
assert_equal "—", coder.encode(text, :decimal)
|
193
|
-
$KCODE = kcode
|
194
|
-
end
|
195
|
-
end
|
196
|
-
|
197
|
-
# Reported by ckruse
|
198
|
-
def test_should_decode_only_first_element_in_masked_entities
|
199
|
-
input = '&#3346;'
|
200
|
-
expected = 'ഒ'
|
201
|
-
assert_decode expected, input
|
202
|
-
end
|
203
|
-
|
204
|
-
def test_should_ducktype_parameter_to_string_before_encoding
|
205
|
-
pseudo_string = PseudoString.new('foo')
|
206
|
-
assert_decode('foo', pseudo_string)
|
207
|
-
end
|
208
|
-
|
209
|
-
def test_should_ducktype_parameter_to_string_before_decoding
|
210
|
-
pseudo_string = PseudoString.new('foo')
|
211
|
-
assert_encode('foo', pseudo_string)
|
212
|
-
end
|
213
|
-
|
214
|
-
def assert_decode(expected, input)
|
215
|
-
[xhtml1_entities, html4_entities].each do |coder|
|
216
|
-
assert_equal(expected, coder.decode(input))
|
217
|
-
end
|
218
|
-
end
|
219
|
-
|
220
|
-
def assert_encode(expected, input, *args)
|
221
|
-
[xhtml1_entities, html4_entities].each do |coder|
|
222
|
-
assert_equal(expected, coder.encode(input, *args))
|
223
|
-
end
|
224
|
-
end
|
225
|
-
|
226
24
|
end
|
data/test/expanded_test.rb
CHANGED
@@ -1,9 +1,5 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
|
3
|
-
require 'test/unit'
|
4
|
-
require 'htmlentities'
|
5
|
-
|
6
|
-
$KCODE = 'u' unless "1.9".respond_to?(:encoding)
|
2
|
+
require File.expand_path("../common", __FILE__)
|
7
3
|
|
8
4
|
class HTMLEntities::ExpandedTest < Test::Unit::TestCase
|
9
5
|
|
@@ -109,4 +105,5 @@ class HTMLEntities::ExpandedTest < Test::Unit::TestCase
|
|
109
105
|
assert_not_equal xhtml_encoder.encode(decoded, :named), html_entities.encode(decoded, :named)
|
110
106
|
end
|
111
107
|
end
|
108
|
+
|
112
109
|
end
|
data/test/html4_test.rb
CHANGED
data/test/roundtrip_test.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
|
3
|
-
require 'test/unit'
|
4
|
-
require 'htmlentities'
|
2
|
+
require File.expand_path("../common", __FILE__)
|
5
3
|
|
6
4
|
class HTMLEntities::RoundtripTest < Test::Unit::TestCase
|
7
5
|
|
@@ -13,74 +11,50 @@ class HTMLEntities::RoundtripTest < Test::Unit::TestCase
|
|
13
11
|
end
|
14
12
|
|
15
13
|
def test_should_roundtrip_xhtml1_entities_via_named_encoding
|
16
|
-
each_mapping
|
17
|
-
assert_equal(
|
18
|
-
string,
|
19
|
-
xhtml1_entities.decode(xhtml1_entities.encode(string, :named))
|
20
|
-
)
|
14
|
+
each_mapping 'xhtml1' do |name, string|
|
15
|
+
assert_equal string, xhtml1_entities.decode(xhtml1_entities.encode(string, :named))
|
21
16
|
end
|
22
17
|
end
|
23
18
|
|
24
19
|
def test_should_roundtrip_xhtml1_entities_via_basic_and_named_encoding
|
25
|
-
each_mapping
|
26
|
-
assert_equal(
|
27
|
-
string,
|
28
|
-
xhtml1_entities.decode(xhtml1_entities.encode(string, :basic, :named))
|
29
|
-
)
|
20
|
+
each_mapping 'xhtml1' do |name, string|
|
21
|
+
assert_equal string, xhtml1_entities.decode(xhtml1_entities.encode(string, :basic, :named))
|
30
22
|
end
|
31
23
|
end
|
32
24
|
|
33
25
|
def test_should_roundtrip_xhtml1_entities_via_basic_named_and_decimal_encoding
|
34
|
-
each_mapping
|
35
|
-
assert_equal(
|
36
|
-
string,
|
37
|
-
xhtml1_entities.decode(xhtml1_entities.encode(string, :basic, :named, :decimal))
|
38
|
-
)
|
26
|
+
each_mapping 'xhtml1' do |name, string|
|
27
|
+
assert_equal string, xhtml1_entities.decode(xhtml1_entities.encode(string, :basic, :named, :decimal))
|
39
28
|
end
|
40
29
|
end
|
41
30
|
|
42
31
|
def test_should_roundtrip_xhtml1_entities_via_hexadecimal_encoding
|
43
|
-
each_mapping
|
44
|
-
assert_equal(
|
45
|
-
string,
|
46
|
-
xhtml1_entities.decode(xhtml1_entities.encode(string, :hexadecimal))
|
47
|
-
)
|
32
|
+
each_mapping 'xhtml1' do |name, string|
|
33
|
+
assert_equal string, xhtml1_entities.decode(xhtml1_entities.encode(string, :hexadecimal))
|
48
34
|
end
|
49
35
|
end
|
50
36
|
|
51
37
|
def test_should_roundtrip_html4_entities_via_named_encoding
|
52
|
-
each_mapping
|
53
|
-
assert_equal(
|
54
|
-
string,
|
55
|
-
html4_entities.decode(html4_entities.encode(string, :named))
|
56
|
-
)
|
38
|
+
each_mapping 'html4' do |name, string|
|
39
|
+
assert_equal string, html4_entities.decode(html4_entities.encode(string, :named))
|
57
40
|
end
|
58
41
|
end
|
59
42
|
|
60
43
|
def test_should_roundtrip_html4_entities_via_basic_and_named_encoding
|
61
|
-
each_mapping
|
62
|
-
assert_equal(
|
63
|
-
string,
|
64
|
-
html4_entities.decode(html4_entities.encode(string, :basic, :named))
|
65
|
-
)
|
44
|
+
each_mapping 'html4' do |name, string|
|
45
|
+
assert_equal string, html4_entities.decode(html4_entities.encode(string, :basic, :named))
|
66
46
|
end
|
67
47
|
end
|
68
48
|
|
69
49
|
def test_should_roundtrip_html4_entities_via_basic_named_and_decimal_encoding
|
70
|
-
each_mapping
|
71
|
-
assert_equal(
|
72
|
-
string,
|
73
|
-
html4_entities.decode(html4_entities.encode(string, :basic, :named, :decimal))
|
74
|
-
)
|
50
|
+
each_mapping 'html4' do |name, string|
|
51
|
+
assert_equal string, html4_entities.decode(html4_entities.encode(string, :basic, :named, :decimal))
|
75
52
|
end
|
76
53
|
end
|
77
54
|
|
78
55
|
def test_should_roundtrip_html4_entities_via_hexadecimal_encoding
|
79
|
-
each_mapping
|
80
|
-
assert_equal(
|
81
|
-
string,
|
82
|
-
html4_entities.decode(html4_entities.encode(string, :hexadecimal))
|
83
|
-
)
|
56
|
+
each_mapping 'html4' do |name, string|
|
57
|
+
assert_equal string, html4_entities.decode(html4_entities.encode(string, :hexadecimal))
|
84
58
|
end
|
85
59
|
end
|
86
60
|
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require File.expand_path("../common", __FILE__)
|
3
|
+
|
4
|
+
unless ENCODING_AWARE_RUBY
|
5
|
+
class HTMLEntities::Ruby18Test < Test::Unit::TestCase
|
6
|
+
|
7
|
+
# Reported by Benoit Larroque
|
8
|
+
def test_should_encode_without_error_when_KCODE_is_not_UTF_8
|
9
|
+
kcode = $KCODE
|
10
|
+
$KCODE = "n"
|
11
|
+
coder = HTMLEntities.new
|
12
|
+
text = [8212].pack('U')
|
13
|
+
assert_equal "—", coder.encode(text, :decimal)
|
14
|
+
$KCODE = kcode
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require File.expand_path("../common", __FILE__)
|
3
|
+
|
4
|
+
if ENCODING_AWARE_RUBY
|
5
|
+
class HTMLEntities::Ruby19Test < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def test_should_encode_ascii_to_ascii
|
8
|
+
s = "<elan>".encode(Encoding::US_ASCII)
|
9
|
+
assert_equal Encoding::US_ASCII, s.encoding
|
10
|
+
|
11
|
+
t = HTMLEntities.new.encode(s)
|
12
|
+
assert_equal "<elan>", t
|
13
|
+
assert_equal Encoding::US_ASCII, t.encoding
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_should_encode_utf8_to_utf8_if_needed
|
17
|
+
s = "<élan>"
|
18
|
+
assert_equal Encoding::UTF_8, s.encoding
|
19
|
+
|
20
|
+
t = HTMLEntities.new.encode(s)
|
21
|
+
assert_equal "<élan>", t
|
22
|
+
assert_equal Encoding::UTF_8, t.encoding
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_should_encode_utf8_to_ascii_if_possible
|
26
|
+
s = "<elan>"
|
27
|
+
assert_equal Encoding::UTF_8, s.encoding
|
28
|
+
|
29
|
+
t = HTMLEntities.new.encode(s)
|
30
|
+
assert_equal "<elan>", t
|
31
|
+
assert_equal Encoding::US_ASCII, t.encoding
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_should_encode_other_encoding_to_utf8
|
35
|
+
s = "<élan>".encode(Encoding::ISO_8859_1)
|
36
|
+
assert_equal Encoding::ISO_8859_1, s.encoding
|
37
|
+
|
38
|
+
t = HTMLEntities.new.encode(s)
|
39
|
+
assert_equal "<élan>", t
|
40
|
+
assert_equal Encoding::UTF_8, t.encoding
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_should_decode_ascii_to_utf8
|
44
|
+
s = "<élan>".encode(Encoding::US_ASCII)
|
45
|
+
assert_equal Encoding::US_ASCII, s.encoding
|
46
|
+
|
47
|
+
t = HTMLEntities.new.decode(s)
|
48
|
+
assert_equal "<élan>", t
|
49
|
+
assert_equal Encoding::UTF_8, t.encoding
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_should_decode_utf8_to_utf8
|
53
|
+
s = "<élan>".encode(Encoding::UTF_8)
|
54
|
+
assert_equal Encoding::UTF_8, s.encoding
|
55
|
+
|
56
|
+
t = HTMLEntities.new.decode(s)
|
57
|
+
assert_equal "<élan>", t
|
58
|
+
assert_equal Encoding::UTF_8, t.encoding
|
59
|
+
end
|
60
|
+
|
61
|
+
def test_should_decode_other_encoding_to_utf8
|
62
|
+
s = "<élan>".encode(Encoding::ISO_8859_1)
|
63
|
+
assert_equal Encoding::ISO_8859_1, s.encoding
|
64
|
+
|
65
|
+
t = HTMLEntities.new.decode(s)
|
66
|
+
assert_equal "<élan>", t
|
67
|
+
assert_equal Encoding::UTF_8, t.encoding
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
data/test/xhtml1_test.rb
CHANGED
@@ -1,9 +1,5 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
|
3
|
-
require 'test/unit'
|
4
|
-
require 'htmlentities'
|
5
|
-
|
6
|
-
$KCODE = 'u' unless "1.9".respond_to?(:encoding)
|
2
|
+
require File.expand_path("../common", __FILE__)
|
7
3
|
|
8
4
|
class HTMLEntities::XHTML1Test < Test::Unit::TestCase
|
9
5
|
|
@@ -25,5 +21,4 @@ class HTMLEntities::XHTML1Test < Test::Unit::TestCase
|
|
25
21
|
assert_equal "&b.Theta;", html_entities.decode("&b.Theta;")
|
26
22
|
end
|
27
23
|
|
28
|
-
|
29
24
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: htmlentities
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 51
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 4
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 4.
|
8
|
+
- 3
|
9
|
+
- 0
|
10
|
+
version: 4.3.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Paul Battley
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-03-29 00:00:00 +01:00
|
19
19
|
default_executable:
|
20
20
|
dependencies: []
|
21
21
|
|
@@ -26,30 +26,30 @@ executables: []
|
|
26
26
|
extensions: []
|
27
27
|
|
28
28
|
extra_rdoc_files:
|
29
|
-
- README.rdoc
|
30
29
|
- History.txt
|
31
30
|
- COPYING.txt
|
32
31
|
files:
|
33
|
-
- lib/htmlentities
|
32
|
+
- lib/htmlentities.rb
|
34
33
|
- lib/htmlentities/flavors.rb
|
34
|
+
- lib/htmlentities/version.rb
|
35
35
|
- lib/htmlentities/encoder.rb
|
36
|
-
- lib/htmlentities/legacy.rb
|
37
|
-
- lib/htmlentities/mappings/html4.rb
|
38
36
|
- lib/htmlentities/mappings/expanded.rb
|
39
37
|
- lib/htmlentities/mappings/xhtml1.rb
|
38
|
+
- lib/htmlentities/mappings/html4.rb
|
40
39
|
- lib/htmlentities/decoder.rb
|
41
|
-
-
|
42
|
-
- test/
|
40
|
+
- test/decoding_test.rb
|
41
|
+
- test/ruby_1_8_test.rb
|
43
42
|
- test/xhtml1_test.rb
|
44
|
-
- test/roundtrip_test.rb
|
45
|
-
- test/legacy_test.rb
|
46
|
-
- test/expanded_test.rb
|
47
|
-
- test/test_all.rb
|
48
43
|
- test/html4_test.rb
|
44
|
+
- test/encoding_test.rb
|
45
|
+
- test/expanded_test.rb
|
46
|
+
- test/ruby_1_9_test.rb
|
47
|
+
- test/common.rb
|
48
|
+
- test/roundtrip_test.rb
|
49
|
+
- test/entities_test.rb
|
50
|
+
- perf/performance.rb
|
49
51
|
- perf/profile.rb
|
50
52
|
- perf/benchmark.rb
|
51
|
-
- perf/performance.rb
|
52
|
-
- README.rdoc
|
53
53
|
- History.txt
|
54
54
|
- COPYING.txt
|
55
55
|
has_rdoc: true
|
@@ -87,4 +87,12 @@ signing_key:
|
|
87
87
|
specification_version: 3
|
88
88
|
summary: A module for encoding and decoding (X)HTML entities.
|
89
89
|
test_files:
|
90
|
-
- test/
|
90
|
+
- test/decoding_test.rb
|
91
|
+
- test/ruby_1_8_test.rb
|
92
|
+
- test/xhtml1_test.rb
|
93
|
+
- test/html4_test.rb
|
94
|
+
- test/encoding_test.rb
|
95
|
+
- test/expanded_test.rb
|
96
|
+
- test/ruby_1_9_test.rb
|
97
|
+
- test/roundtrip_test.rb
|
98
|
+
- test/entities_test.rb
|
data/README.rdoc
DELETED
@@ -1,44 +0,0 @@
|
|
1
|
-
== HTMLEntities
|
2
|
-
|
3
|
-
HTML entity encoding and decoding for Ruby
|
4
|
-
|
5
|
-
The HTMLEntities module facilitates encoding and decoding of
|
6
|
-
(X)HTML entities from/to their corresponding UTF-8 codepoints.
|
7
|
-
|
8
|
-
To install (requires root/admin privileges):
|
9
|
-
|
10
|
-
ruby setup.rb
|
11
|
-
|
12
|
-
Alternatively, you can just use the gem.
|
13
|
-
|
14
|
-
== Licence
|
15
|
-
|
16
|
-
This code is free to use under the terms of the MIT licence:
|
17
|
-
|
18
|
-
Copyright (c) 2005-2009 Paul Battley
|
19
|
-
|
20
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
21
|
-
of this software and associated documentation files (the "Software"), to
|
22
|
-
deal in the Software without restriction, including without limitation the
|
23
|
-
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
24
|
-
sell copies of the Software, and to permit persons to whom the Software is
|
25
|
-
furnished to do so, subject to the following conditions:
|
26
|
-
|
27
|
-
The above copyright notice and this permission notice shall be included in
|
28
|
-
all copies or substantial portions of the Software.
|
29
|
-
|
30
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
31
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
32
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
33
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
34
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
35
|
-
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
36
|
-
IN THE SOFTWARE.
|
37
|
-
|
38
|
-
If you'd like to negotiate a different licence for a specific use, just
|
39
|
-
contact me -- I'll almost certainly permit it.
|
40
|
-
|
41
|
-
== Contact
|
42
|
-
|
43
|
-
Comments are welcome. Send an email to pbattley@gmail.com.
|
44
|
-
|
data/lib/htmlentities/legacy.rb
DELETED
@@ -1,31 +0,0 @@
|
|
1
|
-
class HTMLEntities
|
2
|
-
class << self
|
3
|
-
|
4
|
-
#
|
5
|
-
# Legacy compatibility class method allowing direct encoding of XHTML1 entities.
|
6
|
-
# See HTMLEntities#encode for description of parameters.
|
7
|
-
#
|
8
|
-
# Deprecated.
|
9
|
-
#
|
10
|
-
def encode_entities(*args)
|
11
|
-
xhtml1_entities.encode(*args)
|
12
|
-
end
|
13
|
-
|
14
|
-
#
|
15
|
-
# Legacy compatibility class method allowing direct decoding of XHTML1 entities.
|
16
|
-
# See HTMLEntities#decode for description of parameters.
|
17
|
-
#
|
18
|
-
# Deprecated.
|
19
|
-
#
|
20
|
-
def decode_entities(*args)
|
21
|
-
xhtml1_entities.decode(*args)
|
22
|
-
end
|
23
|
-
|
24
|
-
private
|
25
|
-
|
26
|
-
def xhtml1_entities
|
27
|
-
@xhtml1_entities ||= new('xhtml1')
|
28
|
-
end
|
29
|
-
|
30
|
-
end
|
31
|
-
end
|
data/test/legacy_test.rb
DELETED
@@ -1,35 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
$:.unshift(File.dirname(__FILE__) + '/../lib')
|
3
|
-
require 'test/unit'
|
4
|
-
require 'htmlentities'
|
5
|
-
|
6
|
-
$KCODE = 'u' unless "1.9".respond_to?(:encoding)
|
7
|
-
|
8
|
-
#
|
9
|
-
# Test that version 3.x functionality still works
|
10
|
-
#
|
11
|
-
class HTMLEntities::LegacyTest < Test::Unit::TestCase
|
12
|
-
|
13
|
-
def test_should_decode_via_legacy_interface
|
14
|
-
assert_decode('&', '&')
|
15
|
-
assert_decode('±', '±')
|
16
|
-
assert_decode('“', '“')
|
17
|
-
assert_decode('—', '—')
|
18
|
-
end
|
19
|
-
|
20
|
-
def test_should_encode_via_legacy_interface
|
21
|
-
assert_encode('&', '&', :basic)
|
22
|
-
assert_encode('ð', 'ð', :named)
|
23
|
-
assert_encode('…', '…', :decimal)
|
24
|
-
assert_encode('−', '−', :hexadecimal)
|
25
|
-
end
|
26
|
-
|
27
|
-
def assert_encode(expected, *encode_args)
|
28
|
-
assert_equal expected, HTMLEntities.encode_entities(*encode_args)
|
29
|
-
end
|
30
|
-
|
31
|
-
def assert_decode(expected, *decode_args)
|
32
|
-
assert_equal expected, HTMLEntities.decode_entities(*decode_args)
|
33
|
-
end
|
34
|
-
|
35
|
-
end
|
data/test/test_all.rb
DELETED