htmlentities 4.2.4 → 4.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +13 -0
- data/lib/htmlentities.rb +3 -4
- data/lib/htmlentities/decoder.rb +11 -1
- data/lib/htmlentities/encoder.rb +20 -10
- data/lib/htmlentities/version.rb +2 -2
- data/test/common.rb +6 -0
- data/test/decoding_test.rb +101 -0
- data/test/encoding_test.rb +106 -0
- data/test/entities_test.rb +2 -204
- data/test/expanded_test.rb +2 -5
- data/test/html4_test.rb +1 -5
- data/test/roundtrip_test.rb +17 -43
- data/test/ruby_1_8_test.rb +18 -0
- data/test/ruby_1_9_test.rb +70 -0
- data/test/xhtml1_test.rb +1 -6
- metadata +26 -18
- data/README.rdoc +0 -44
- data/lib/htmlentities/legacy.rb +0 -31
- data/test/legacy_test.rb +0 -35
- data/test/test_all.rb +0 -4
data/History.txt
CHANGED
@@ -1,3 +1,16 @@
|
|
1
|
+
== 4.3.0 (2011-03-29)
|
2
|
+
* Use Ruby 1.9's encoding support where available.
|
3
|
+
* Deprecated HTMLEntities.encode_entities/decode_entities interface is now
|
4
|
+
removed.
|
5
|
+
|
6
|
+
== 4.2.4 (2011-01-30)
|
7
|
+
* Fix issue where double-escaped entities were not correctly escaped. Bug
|
8
|
+
reported by Christian Kruse.
|
9
|
+
|
10
|
+
== 4.2.3 (2011-01-07)
|
11
|
+
* Additional entities from Junya Ishihara.
|
12
|
+
* Performance improvements.
|
13
|
+
|
1
14
|
== 4.2.1 (2010-04-05)
|
2
15
|
* Fixed error on Ruby 1.8.x when $KCODE was not set to "UTF8". Thanks to
|
3
16
|
Benoit Larroque for the bug report.
|
data/lib/htmlentities.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
require 'htmlentities/legacy'
|
3
2
|
require 'htmlentities/flavors'
|
4
3
|
require 'htmlentities/encoder'
|
5
4
|
require 'htmlentities/decoder'
|
@@ -61,9 +60,9 @@ class HTMLEntities
|
|
61
60
|
# If no instructions are specified, :basic will be used.
|
62
61
|
#
|
63
62
|
# Examples:
|
64
|
-
#
|
65
|
-
#
|
66
|
-
#
|
63
|
+
# encode(str) - XML-safe
|
64
|
+
# encode(str, :basic, :decimal) - XML-safe and 7-bit clean
|
65
|
+
# encode(str, :basic, :named, :decimal) - 7-bit clean, with all
|
67
66
|
# non-ASCII characters replaced with their named entity where possible, and
|
68
67
|
# decimal equivalents otherwise.
|
69
68
|
#
|
data/lib/htmlentities/decoder.rb
CHANGED
@@ -7,7 +7,7 @@ class HTMLEntities
|
|
7
7
|
end
|
8
8
|
|
9
9
|
def decode(source)
|
10
|
-
source.
|
10
|
+
prepare(source).gsub(@entity_regexp) {
|
11
11
|
if $1 && codepoint = @map[$1]
|
12
12
|
[codepoint].pack('U')
|
13
13
|
elsif $2
|
@@ -21,6 +21,16 @@ class HTMLEntities
|
|
21
21
|
end
|
22
22
|
|
23
23
|
private
|
24
|
+
if "1.9".respond_to?(:encoding)
|
25
|
+
def prepare(string) #:nodoc:
|
26
|
+
string.to_s.encode(Encoding::UTF_8)
|
27
|
+
end
|
28
|
+
else
|
29
|
+
def prepare(string) #:nodoc:
|
30
|
+
string.to_s
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
24
34
|
def entity_regexp
|
25
35
|
key_lengths = @map.keys.map{ |k| k.length }
|
26
36
|
entity_name_pattern =
|
data/lib/htmlentities/encoder.rb
CHANGED
@@ -13,13 +13,23 @@ class HTMLEntities
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def encode(source)
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
string
|
16
|
+
prepare(source).
|
17
|
+
gsub(basic_entity_regexp){ encode_basic($&) }.
|
18
|
+
gsub(extended_entity_regexp){ encode_extended($&) }
|
20
19
|
end
|
21
20
|
|
22
21
|
private
|
22
|
+
|
23
|
+
if "1.9".respond_to?(:encoding)
|
24
|
+
def prepare(string) #:nodoc:
|
25
|
+
string.to_s.encode(Encoding::UTF_8)
|
26
|
+
end
|
27
|
+
else
|
28
|
+
def prepare(string) #:nodoc:
|
29
|
+
string.to_s
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
23
33
|
def basic_entity_regexp
|
24
34
|
@basic_entity_regexp ||= (
|
25
35
|
case @flavor
|
@@ -33,15 +43,15 @@ class HTMLEntities
|
|
33
43
|
|
34
44
|
def extended_entity_regexp
|
35
45
|
@extended_entity_regexp ||= (
|
36
|
-
|
46
|
+
options = [nil]
|
37
47
|
if encoding_aware?
|
38
|
-
|
48
|
+
pattern = '[^\u{20}-\u{7E}]'
|
39
49
|
else
|
40
|
-
|
41
|
-
|
50
|
+
pattern = '[^\x20-\x7E]'
|
51
|
+
options << "U"
|
42
52
|
end
|
43
|
-
|
44
|
-
Regexp.new(
|
53
|
+
pattern << "|'" if @flavor == 'html4'
|
54
|
+
Regexp.new(pattern, *options)
|
45
55
|
)
|
46
56
|
end
|
47
57
|
|
data/lib/htmlentities/version.rb
CHANGED
data/test/common.rb
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require File.expand_path("../common", __FILE__)
|
3
|
+
|
4
|
+
class HTMLEntities::DecodingTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def setup
|
7
|
+
@entities = [:xhtml1, :html4, :expanded].map{ |a| HTMLEntities.new(a) }
|
8
|
+
end
|
9
|
+
|
10
|
+
def assert_decode(expected, input)
|
11
|
+
@entities.each do |coder|
|
12
|
+
assert_equal expected, coder.decode(input)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_should_decode_basic_entities
|
17
|
+
assert_decode '&', '&'
|
18
|
+
assert_decode '<', '<'
|
19
|
+
assert_decode '"', '"'
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_should_decode_extended_named_entities
|
23
|
+
assert_decode '±', '±'
|
24
|
+
assert_decode 'ð', 'ð'
|
25
|
+
assert_decode 'Œ', 'Œ'
|
26
|
+
assert_decode 'œ', 'œ'
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_should_decode_decimal_entities
|
30
|
+
assert_decode '“', '“'
|
31
|
+
assert_decode '…', '…'
|
32
|
+
assert_decode ' ', ' '
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_should_decode_hexadecimal_entities
|
36
|
+
assert_decode '−', '−'
|
37
|
+
assert_decode '—', '—'
|
38
|
+
assert_decode '`', '`'
|
39
|
+
assert_decode '`', '`'
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_should_not_mutate_string_being_decoded
|
43
|
+
original = "<£"
|
44
|
+
input = original.dup
|
45
|
+
HTMLEntities.new.decode(input)
|
46
|
+
|
47
|
+
assert_equal original, input
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_should_decode_text_with_mix_of_entities
|
51
|
+
# Just a random headline - I needed something with accented letters.
|
52
|
+
assert_decode(
|
53
|
+
'Le tabac pourrait bientôt être banni dans tous les lieux publics en France',
|
54
|
+
'Le tabac pourrait bientôt être banni dans tous les lieux publics en France'
|
55
|
+
)
|
56
|
+
assert_decode(
|
57
|
+
'"bientôt" & 文字',
|
58
|
+
'"bientôt" & 文字'
|
59
|
+
)
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_should_decode_empty_string
|
63
|
+
assert_decode '', ''
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_should_skip_unknown_entity
|
67
|
+
assert_decode '&bogus;', '&bogus;'
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_should_decode_double_encoded_entity_once
|
71
|
+
assert_decode '&', '&amp;'
|
72
|
+
end
|
73
|
+
|
74
|
+
# Faults found and patched by Moonwolf
|
75
|
+
def test_should_decode_full_hexadecimal_range
|
76
|
+
(0..127).each do |codepoint|
|
77
|
+
assert_decode [codepoint].pack('U'), "&\#x#{codepoint.to_s(16)};"
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# Reported by Dallas DeVries and Johan Duflost
|
82
|
+
def test_should_decode_named_entities_reported_as_missing_in_3_0_1
|
83
|
+
assert_decode [178].pack('U'), '²'
|
84
|
+
assert_decode [8226].pack('U'), '•'
|
85
|
+
assert_decode [948].pack('U'), 'δ'
|
86
|
+
end
|
87
|
+
|
88
|
+
# Reported by ckruse
|
89
|
+
def test_should_decode_only_first_element_in_masked_entities
|
90
|
+
input = '&#3346;'
|
91
|
+
expected = 'ഒ'
|
92
|
+
assert_decode expected, input
|
93
|
+
end
|
94
|
+
|
95
|
+
def test_should_ducktype_parameter_to_string_before_encoding
|
96
|
+
obj = Object.new
|
97
|
+
def obj.to_s; "foo"; end
|
98
|
+
assert_decode "foo", obj
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require File.expand_path("../common", __FILE__)
|
3
|
+
|
4
|
+
class HTMLEntities::EncodingTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def setup
|
7
|
+
@entities = [:xhtml1, :html4, :expanded].map{ |a| HTMLEntities.new(a) }
|
8
|
+
end
|
9
|
+
|
10
|
+
def assert_encode(expected, input, *args)
|
11
|
+
@entities.each do |coder|
|
12
|
+
assert_equal expected, coder.encode(input, *args)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_should_encode_basic_entities
|
17
|
+
assert_encode '&', '&', :basic
|
18
|
+
assert_encode '"', '"'
|
19
|
+
assert_encode '<', '<', :basic
|
20
|
+
assert_encode '<', '<'
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_should_encode_basic_entities_to_decimal
|
24
|
+
assert_encode '&', '&', :decimal
|
25
|
+
assert_encode '"', '"', :decimal
|
26
|
+
assert_encode '<', '<', :decimal
|
27
|
+
assert_encode '>', '>', :decimal
|
28
|
+
assert_encode ''', "'", :decimal
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_should_encode_basic_entities_to_hexadecimal
|
32
|
+
assert_encode '&', '&', :hexadecimal
|
33
|
+
assert_encode '"', '"', :hexadecimal
|
34
|
+
assert_encode '<', '<', :hexadecimal
|
35
|
+
assert_encode '>', '>', :hexadecimal
|
36
|
+
assert_encode ''', "'", :hexadecimal
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_should_encode_extended_named_entities
|
40
|
+
assert_encode '±', '±', :named
|
41
|
+
assert_encode 'ð', 'ð', :named
|
42
|
+
assert_encode 'Œ', 'Œ', :named
|
43
|
+
assert_encode 'œ', 'œ', :named
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_should_encode_decimal_entities
|
47
|
+
assert_encode '“', '“', :decimal
|
48
|
+
assert_encode '…', '…', :decimal
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_should_encode_hexadecimal_entities
|
52
|
+
assert_encode '−', '−', :hexadecimal
|
53
|
+
assert_encode '—', '—', :hexadecimal
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_should_encode_text_using_mix_of_entities
|
57
|
+
assert_encode(
|
58
|
+
'"bientôt" & 文字',
|
59
|
+
'"bientôt" & 文字', :basic, :named, :hexadecimal
|
60
|
+
)
|
61
|
+
assert_encode(
|
62
|
+
'"bientôt" & 文字',
|
63
|
+
'"bientôt" & 文字', :basic, :named, :decimal
|
64
|
+
)
|
65
|
+
end
|
66
|
+
|
67
|
+
def test_should_sort_commands_when_encoding_using_mix_of_entities
|
68
|
+
assert_encode(
|
69
|
+
'"bientôt" & 文字',
|
70
|
+
'"bientôt" & 文字', :named, :hexadecimal, :basic
|
71
|
+
)
|
72
|
+
assert_encode(
|
73
|
+
'"bientôt" & 文字',
|
74
|
+
'"bientôt" & 文字', :decimal, :named, :basic
|
75
|
+
)
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_should_detect_illegal_encoding_command
|
79
|
+
assert_raise HTMLEntities::InstructionError do
|
80
|
+
HTMLEntities.new.encode('foo', :bar, :baz)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def test_should_not_encode_normal_ASCII
|
85
|
+
assert_encode '`', '`'
|
86
|
+
assert_encode ' ', ' '
|
87
|
+
end
|
88
|
+
|
89
|
+
def test_should_double_encode_existing_entity
|
90
|
+
assert_encode '&amp;', '&'
|
91
|
+
end
|
92
|
+
|
93
|
+
def test_should_not_mutate_string_being_encoded
|
94
|
+
original = "<£"
|
95
|
+
input = original.dup
|
96
|
+
HTMLEntities.new.encode(input, :basic, :decimal)
|
97
|
+
|
98
|
+
assert_equal original, input
|
99
|
+
end
|
100
|
+
|
101
|
+
def test_should_ducktype_parameter_to_string_before_encoding
|
102
|
+
obj = Object.new
|
103
|
+
def obj.to_s; "foo"; end
|
104
|
+
assert_encode "foo", obj
|
105
|
+
end
|
106
|
+
end
|
data/test/entities_test.rb
CHANGED
@@ -1,30 +1,10 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
|
3
|
-
require 'test/unit'
|
4
|
-
require 'htmlentities'
|
5
|
-
|
6
|
-
$KCODE = 'u' unless "1.9".respond_to?(:encoding)
|
2
|
+
require File.expand_path("../common", __FILE__)
|
7
3
|
|
8
4
|
class HTMLEntities::EntitiesTest < Test::Unit::TestCase
|
9
5
|
|
10
|
-
attr_reader :xhtml1_entities, :html4_entities
|
11
|
-
|
12
|
-
def setup
|
13
|
-
@xhtml1_entities = HTMLEntities.new('xhtml1')
|
14
|
-
@html4_entities = HTMLEntities.new('html4')
|
15
|
-
end
|
16
|
-
|
17
|
-
class PseudoString
|
18
|
-
def initialize(string)
|
19
|
-
@string = string
|
20
|
-
end
|
21
|
-
def to_s
|
22
|
-
@string
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
6
|
def test_should_raise_exception_when_unknown_flavor_specified
|
27
|
-
assert_raises
|
7
|
+
assert_raises HTMLEntities::UnknownFlavor do
|
28
8
|
HTMLEntities.new('foo')
|
29
9
|
end
|
30
10
|
end
|
@@ -41,186 +21,4 @@ class HTMLEntities::EntitiesTest < Test::Unit::TestCase
|
|
41
21
|
end
|
42
22
|
end
|
43
23
|
|
44
|
-
def test_should_decode_basic_entities
|
45
|
-
assert_decode('&', '&')
|
46
|
-
assert_decode('<', '<')
|
47
|
-
assert_decode('"', '"')
|
48
|
-
end
|
49
|
-
|
50
|
-
def test_should_encode_basic_entities
|
51
|
-
assert_encode('&', '&', :basic)
|
52
|
-
assert_encode('"', '"')
|
53
|
-
assert_encode('<', '<', :basic)
|
54
|
-
assert_encode('<', '<')
|
55
|
-
end
|
56
|
-
|
57
|
-
def test_should_encode_basic_entities_to_decimal
|
58
|
-
assert_encode('&', '&', :decimal)
|
59
|
-
assert_encode('"', '"', :decimal)
|
60
|
-
assert_encode('<', '<', :decimal)
|
61
|
-
assert_encode('>', '>', :decimal)
|
62
|
-
assert_encode(''', "'", :decimal)
|
63
|
-
end
|
64
|
-
|
65
|
-
def test_should_encode_basic_entities_to_hexadecimal
|
66
|
-
assert_encode('&', '&', :hexadecimal)
|
67
|
-
assert_encode('"', '"', :hexadecimal)
|
68
|
-
assert_encode('<', '<', :hexadecimal)
|
69
|
-
assert_encode('>', '>', :hexadecimal)
|
70
|
-
assert_encode(''', "'", :hexadecimal)
|
71
|
-
end
|
72
|
-
|
73
|
-
def test_should_decode_extended_named_entities
|
74
|
-
assert_decode('±', '±')
|
75
|
-
assert_decode('ð', 'ð')
|
76
|
-
assert_decode('Œ', 'Œ')
|
77
|
-
assert_decode('œ', 'œ')
|
78
|
-
end
|
79
|
-
|
80
|
-
def test_should_encode_extended_named_entities
|
81
|
-
assert_encode('±', '±', :named)
|
82
|
-
assert_encode('ð', 'ð', :named)
|
83
|
-
assert_encode('Œ', 'Œ', :named)
|
84
|
-
assert_encode('œ', 'œ', :named)
|
85
|
-
end
|
86
|
-
|
87
|
-
def test_should_decode_decimal_entities
|
88
|
-
assert_decode('“', '“')
|
89
|
-
assert_decode('…', '…')
|
90
|
-
assert_decode(' ', ' ')
|
91
|
-
end
|
92
|
-
|
93
|
-
def test_should_encode_decimal_entities
|
94
|
-
assert_encode('“', '“', :decimal)
|
95
|
-
assert_encode('…', '…', :decimal)
|
96
|
-
end
|
97
|
-
|
98
|
-
def test_should_decode_hexadecimal_entities
|
99
|
-
assert_decode('−', '−')
|
100
|
-
assert_decode('—', '—')
|
101
|
-
assert_decode('`', '`')
|
102
|
-
assert_decode('`', '`')
|
103
|
-
end
|
104
|
-
|
105
|
-
def test_should_encode_hexadecimal_entities
|
106
|
-
assert_encode('−', '−', :hexadecimal)
|
107
|
-
assert_encode('—', '—', :hexadecimal)
|
108
|
-
end
|
109
|
-
|
110
|
-
def test_should_decode_text_with_mix_of_entities
|
111
|
-
# Just a random headline - I needed something with accented letters.
|
112
|
-
assert_decode(
|
113
|
-
'Le tabac pourrait bientôt être banni dans tous les lieux publics en France',
|
114
|
-
'Le tabac pourrait bientôt être banni dans tous les lieux publics en France'
|
115
|
-
)
|
116
|
-
assert_decode(
|
117
|
-
'"bientôt" & 文字',
|
118
|
-
'"bientôt" & 文字'
|
119
|
-
)
|
120
|
-
end
|
121
|
-
|
122
|
-
def test_should_encode_text_using_mix_of_entities
|
123
|
-
assert_encode(
|
124
|
-
'"bientôt" & 文字',
|
125
|
-
'"bientôt" & 文字', :basic, :named, :hexadecimal
|
126
|
-
)
|
127
|
-
assert_encode(
|
128
|
-
'"bientôt" & 文字',
|
129
|
-
'"bientôt" & 文字', :basic, :named, :decimal
|
130
|
-
)
|
131
|
-
end
|
132
|
-
|
133
|
-
def test_should_sort_commands_when_encoding_using_mix_of_entities
|
134
|
-
assert_encode(
|
135
|
-
'"bientôt" & 文字',
|
136
|
-
'"bientôt" & 文字', :named, :hexadecimal, :basic
|
137
|
-
)
|
138
|
-
assert_encode(
|
139
|
-
'"bientôt" & 文字',
|
140
|
-
'"bientôt" & 文字', :decimal, :named, :basic
|
141
|
-
)
|
142
|
-
end
|
143
|
-
|
144
|
-
def test_should_detect_illegal_encoding_command
|
145
|
-
assert_raise(HTMLEntities::InstructionError) {
|
146
|
-
HTMLEntities.encode_entities('foo', :bar, :baz)
|
147
|
-
}
|
148
|
-
end
|
149
|
-
|
150
|
-
def test_should_decode_empty_string
|
151
|
-
assert_decode('', '')
|
152
|
-
end
|
153
|
-
|
154
|
-
def test_should_skip_unknown_entity
|
155
|
-
assert_decode('&bogus;', '&bogus;')
|
156
|
-
end
|
157
|
-
|
158
|
-
def test_should_decode_double_encoded_entity_once
|
159
|
-
assert_decode('&', '&amp;')
|
160
|
-
end
|
161
|
-
|
162
|
-
def test_should_not_encode_normal_ASCII
|
163
|
-
assert_encode('`', '`')
|
164
|
-
assert_encode(' ', ' ')
|
165
|
-
end
|
166
|
-
|
167
|
-
def test_should_double_encode_existing_entity
|
168
|
-
assert_encode('&amp;', '&')
|
169
|
-
end
|
170
|
-
|
171
|
-
# Faults found and patched by Moonwolf
|
172
|
-
def test_should_decode_full_hexadecimal_range
|
173
|
-
(0..127).each do |codepoint|
|
174
|
-
assert_decode([codepoint].pack('U'), "&\#x#{codepoint.to_s(16)};")
|
175
|
-
end
|
176
|
-
end
|
177
|
-
|
178
|
-
# Reported by Dallas DeVries and Johan Duflost
|
179
|
-
def test_should_decode_named_entities_reported_as_missing_in_3_0_1
|
180
|
-
assert_decode([178].pack('U'), '²')
|
181
|
-
assert_decode([8226].pack('U'), '•')
|
182
|
-
assert_decode([948].pack('U'), 'δ')
|
183
|
-
end
|
184
|
-
|
185
|
-
if RUBY_VERSION =~ /^1\.8\./
|
186
|
-
# Reported by Benoit Larroque
|
187
|
-
def test_should_encode_without_error_when_KCODE_is_not_UTF_8
|
188
|
-
kcode = $KCODE
|
189
|
-
$KCODE = "n"
|
190
|
-
coder = HTMLEntities.new;
|
191
|
-
text = [8212].pack('U')
|
192
|
-
assert_equal "—", coder.encode(text, :decimal)
|
193
|
-
$KCODE = kcode
|
194
|
-
end
|
195
|
-
end
|
196
|
-
|
197
|
-
# Reported by ckruse
|
198
|
-
def test_should_decode_only_first_element_in_masked_entities
|
199
|
-
input = '&#3346;'
|
200
|
-
expected = 'ഒ'
|
201
|
-
assert_decode expected, input
|
202
|
-
end
|
203
|
-
|
204
|
-
def test_should_ducktype_parameter_to_string_before_encoding
|
205
|
-
pseudo_string = PseudoString.new('foo')
|
206
|
-
assert_decode('foo', pseudo_string)
|
207
|
-
end
|
208
|
-
|
209
|
-
def test_should_ducktype_parameter_to_string_before_decoding
|
210
|
-
pseudo_string = PseudoString.new('foo')
|
211
|
-
assert_encode('foo', pseudo_string)
|
212
|
-
end
|
213
|
-
|
214
|
-
def assert_decode(expected, input)
|
215
|
-
[xhtml1_entities, html4_entities].each do |coder|
|
216
|
-
assert_equal(expected, coder.decode(input))
|
217
|
-
end
|
218
|
-
end
|
219
|
-
|
220
|
-
def assert_encode(expected, input, *args)
|
221
|
-
[xhtml1_entities, html4_entities].each do |coder|
|
222
|
-
assert_equal(expected, coder.encode(input, *args))
|
223
|
-
end
|
224
|
-
end
|
225
|
-
|
226
24
|
end
|
data/test/expanded_test.rb
CHANGED
@@ -1,9 +1,5 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
|
3
|
-
require 'test/unit'
|
4
|
-
require 'htmlentities'
|
5
|
-
|
6
|
-
$KCODE = 'u' unless "1.9".respond_to?(:encoding)
|
2
|
+
require File.expand_path("../common", __FILE__)
|
7
3
|
|
8
4
|
class HTMLEntities::ExpandedTest < Test::Unit::TestCase
|
9
5
|
|
@@ -109,4 +105,5 @@ class HTMLEntities::ExpandedTest < Test::Unit::TestCase
|
|
109
105
|
assert_not_equal xhtml_encoder.encode(decoded, :named), html_entities.encode(decoded, :named)
|
110
106
|
end
|
111
107
|
end
|
108
|
+
|
112
109
|
end
|
data/test/html4_test.rb
CHANGED
data/test/roundtrip_test.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
|
3
|
-
require 'test/unit'
|
4
|
-
require 'htmlentities'
|
2
|
+
require File.expand_path("../common", __FILE__)
|
5
3
|
|
6
4
|
class HTMLEntities::RoundtripTest < Test::Unit::TestCase
|
7
5
|
|
@@ -13,74 +11,50 @@ class HTMLEntities::RoundtripTest < Test::Unit::TestCase
|
|
13
11
|
end
|
14
12
|
|
15
13
|
def test_should_roundtrip_xhtml1_entities_via_named_encoding
|
16
|
-
each_mapping
|
17
|
-
assert_equal(
|
18
|
-
string,
|
19
|
-
xhtml1_entities.decode(xhtml1_entities.encode(string, :named))
|
20
|
-
)
|
14
|
+
each_mapping 'xhtml1' do |name, string|
|
15
|
+
assert_equal string, xhtml1_entities.decode(xhtml1_entities.encode(string, :named))
|
21
16
|
end
|
22
17
|
end
|
23
18
|
|
24
19
|
def test_should_roundtrip_xhtml1_entities_via_basic_and_named_encoding
|
25
|
-
each_mapping
|
26
|
-
assert_equal(
|
27
|
-
string,
|
28
|
-
xhtml1_entities.decode(xhtml1_entities.encode(string, :basic, :named))
|
29
|
-
)
|
20
|
+
each_mapping 'xhtml1' do |name, string|
|
21
|
+
assert_equal string, xhtml1_entities.decode(xhtml1_entities.encode(string, :basic, :named))
|
30
22
|
end
|
31
23
|
end
|
32
24
|
|
33
25
|
def test_should_roundtrip_xhtml1_entities_via_basic_named_and_decimal_encoding
|
34
|
-
each_mapping
|
35
|
-
assert_equal(
|
36
|
-
string,
|
37
|
-
xhtml1_entities.decode(xhtml1_entities.encode(string, :basic, :named, :decimal))
|
38
|
-
)
|
26
|
+
each_mapping 'xhtml1' do |name, string|
|
27
|
+
assert_equal string, xhtml1_entities.decode(xhtml1_entities.encode(string, :basic, :named, :decimal))
|
39
28
|
end
|
40
29
|
end
|
41
30
|
|
42
31
|
def test_should_roundtrip_xhtml1_entities_via_hexadecimal_encoding
|
43
|
-
each_mapping
|
44
|
-
assert_equal(
|
45
|
-
string,
|
46
|
-
xhtml1_entities.decode(xhtml1_entities.encode(string, :hexadecimal))
|
47
|
-
)
|
32
|
+
each_mapping 'xhtml1' do |name, string|
|
33
|
+
assert_equal string, xhtml1_entities.decode(xhtml1_entities.encode(string, :hexadecimal))
|
48
34
|
end
|
49
35
|
end
|
50
36
|
|
51
37
|
def test_should_roundtrip_html4_entities_via_named_encoding
|
52
|
-
each_mapping
|
53
|
-
assert_equal(
|
54
|
-
string,
|
55
|
-
html4_entities.decode(html4_entities.encode(string, :named))
|
56
|
-
)
|
38
|
+
each_mapping 'html4' do |name, string|
|
39
|
+
assert_equal string, html4_entities.decode(html4_entities.encode(string, :named))
|
57
40
|
end
|
58
41
|
end
|
59
42
|
|
60
43
|
def test_should_roundtrip_html4_entities_via_basic_and_named_encoding
|
61
|
-
each_mapping
|
62
|
-
assert_equal(
|
63
|
-
string,
|
64
|
-
html4_entities.decode(html4_entities.encode(string, :basic, :named))
|
65
|
-
)
|
44
|
+
each_mapping 'html4' do |name, string|
|
45
|
+
assert_equal string, html4_entities.decode(html4_entities.encode(string, :basic, :named))
|
66
46
|
end
|
67
47
|
end
|
68
48
|
|
69
49
|
def test_should_roundtrip_html4_entities_via_basic_named_and_decimal_encoding
|
70
|
-
each_mapping
|
71
|
-
assert_equal(
|
72
|
-
string,
|
73
|
-
html4_entities.decode(html4_entities.encode(string, :basic, :named, :decimal))
|
74
|
-
)
|
50
|
+
each_mapping 'html4' do |name, string|
|
51
|
+
assert_equal string, html4_entities.decode(html4_entities.encode(string, :basic, :named, :decimal))
|
75
52
|
end
|
76
53
|
end
|
77
54
|
|
78
55
|
def test_should_roundtrip_html4_entities_via_hexadecimal_encoding
|
79
|
-
each_mapping
|
80
|
-
assert_equal(
|
81
|
-
string,
|
82
|
-
html4_entities.decode(html4_entities.encode(string, :hexadecimal))
|
83
|
-
)
|
56
|
+
each_mapping 'html4' do |name, string|
|
57
|
+
assert_equal string, html4_entities.decode(html4_entities.encode(string, :hexadecimal))
|
84
58
|
end
|
85
59
|
end
|
86
60
|
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require File.expand_path("../common", __FILE__)
|
3
|
+
|
4
|
+
unless ENCODING_AWARE_RUBY
|
5
|
+
class HTMLEntities::Ruby18Test < Test::Unit::TestCase
|
6
|
+
|
7
|
+
# Reported by Benoit Larroque
|
8
|
+
def test_should_encode_without_error_when_KCODE_is_not_UTF_8
|
9
|
+
kcode = $KCODE
|
10
|
+
$KCODE = "n"
|
11
|
+
coder = HTMLEntities.new
|
12
|
+
text = [8212].pack('U')
|
13
|
+
assert_equal "—", coder.encode(text, :decimal)
|
14
|
+
$KCODE = kcode
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require File.expand_path("../common", __FILE__)
|
3
|
+
|
4
|
+
if ENCODING_AWARE_RUBY
|
5
|
+
class HTMLEntities::Ruby19Test < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def test_should_encode_ascii_to_ascii
|
8
|
+
s = "<elan>".encode(Encoding::US_ASCII)
|
9
|
+
assert_equal Encoding::US_ASCII, s.encoding
|
10
|
+
|
11
|
+
t = HTMLEntities.new.encode(s)
|
12
|
+
assert_equal "<elan>", t
|
13
|
+
assert_equal Encoding::US_ASCII, t.encoding
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_should_encode_utf8_to_utf8_if_needed
|
17
|
+
s = "<élan>"
|
18
|
+
assert_equal Encoding::UTF_8, s.encoding
|
19
|
+
|
20
|
+
t = HTMLEntities.new.encode(s)
|
21
|
+
assert_equal "<élan>", t
|
22
|
+
assert_equal Encoding::UTF_8, t.encoding
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_should_encode_utf8_to_ascii_if_possible
|
26
|
+
s = "<elan>"
|
27
|
+
assert_equal Encoding::UTF_8, s.encoding
|
28
|
+
|
29
|
+
t = HTMLEntities.new.encode(s)
|
30
|
+
assert_equal "<elan>", t
|
31
|
+
assert_equal Encoding::US_ASCII, t.encoding
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_should_encode_other_encoding_to_utf8
|
35
|
+
s = "<élan>".encode(Encoding::ISO_8859_1)
|
36
|
+
assert_equal Encoding::ISO_8859_1, s.encoding
|
37
|
+
|
38
|
+
t = HTMLEntities.new.encode(s)
|
39
|
+
assert_equal "<élan>", t
|
40
|
+
assert_equal Encoding::UTF_8, t.encoding
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_should_decode_ascii_to_utf8
|
44
|
+
s = "<élan>".encode(Encoding::US_ASCII)
|
45
|
+
assert_equal Encoding::US_ASCII, s.encoding
|
46
|
+
|
47
|
+
t = HTMLEntities.new.decode(s)
|
48
|
+
assert_equal "<élan>", t
|
49
|
+
assert_equal Encoding::UTF_8, t.encoding
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_should_decode_utf8_to_utf8
|
53
|
+
s = "<élan>".encode(Encoding::UTF_8)
|
54
|
+
assert_equal Encoding::UTF_8, s.encoding
|
55
|
+
|
56
|
+
t = HTMLEntities.new.decode(s)
|
57
|
+
assert_equal "<élan>", t
|
58
|
+
assert_equal Encoding::UTF_8, t.encoding
|
59
|
+
end
|
60
|
+
|
61
|
+
def test_should_decode_other_encoding_to_utf8
|
62
|
+
s = "<élan>".encode(Encoding::ISO_8859_1)
|
63
|
+
assert_equal Encoding::ISO_8859_1, s.encoding
|
64
|
+
|
65
|
+
t = HTMLEntities.new.decode(s)
|
66
|
+
assert_equal "<élan>", t
|
67
|
+
assert_equal Encoding::UTF_8, t.encoding
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
data/test/xhtml1_test.rb
CHANGED
@@ -1,9 +1,5 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
|
3
|
-
require 'test/unit'
|
4
|
-
require 'htmlentities'
|
5
|
-
|
6
|
-
$KCODE = 'u' unless "1.9".respond_to?(:encoding)
|
2
|
+
require File.expand_path("../common", __FILE__)
|
7
3
|
|
8
4
|
class HTMLEntities::XHTML1Test < Test::Unit::TestCase
|
9
5
|
|
@@ -25,5 +21,4 @@ class HTMLEntities::XHTML1Test < Test::Unit::TestCase
|
|
25
21
|
assert_equal "&b.Theta;", html_entities.decode("&b.Theta;")
|
26
22
|
end
|
27
23
|
|
28
|
-
|
29
24
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: htmlentities
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 51
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 4
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 4.
|
8
|
+
- 3
|
9
|
+
- 0
|
10
|
+
version: 4.3.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Paul Battley
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-03-29 00:00:00 +01:00
|
19
19
|
default_executable:
|
20
20
|
dependencies: []
|
21
21
|
|
@@ -26,30 +26,30 @@ executables: []
|
|
26
26
|
extensions: []
|
27
27
|
|
28
28
|
extra_rdoc_files:
|
29
|
-
- README.rdoc
|
30
29
|
- History.txt
|
31
30
|
- COPYING.txt
|
32
31
|
files:
|
33
|
-
- lib/htmlentities
|
32
|
+
- lib/htmlentities.rb
|
34
33
|
- lib/htmlentities/flavors.rb
|
34
|
+
- lib/htmlentities/version.rb
|
35
35
|
- lib/htmlentities/encoder.rb
|
36
|
-
- lib/htmlentities/legacy.rb
|
37
|
-
- lib/htmlentities/mappings/html4.rb
|
38
36
|
- lib/htmlentities/mappings/expanded.rb
|
39
37
|
- lib/htmlentities/mappings/xhtml1.rb
|
38
|
+
- lib/htmlentities/mappings/html4.rb
|
40
39
|
- lib/htmlentities/decoder.rb
|
41
|
-
-
|
42
|
-
- test/
|
40
|
+
- test/decoding_test.rb
|
41
|
+
- test/ruby_1_8_test.rb
|
43
42
|
- test/xhtml1_test.rb
|
44
|
-
- test/roundtrip_test.rb
|
45
|
-
- test/legacy_test.rb
|
46
|
-
- test/expanded_test.rb
|
47
|
-
- test/test_all.rb
|
48
43
|
- test/html4_test.rb
|
44
|
+
- test/encoding_test.rb
|
45
|
+
- test/expanded_test.rb
|
46
|
+
- test/ruby_1_9_test.rb
|
47
|
+
- test/common.rb
|
48
|
+
- test/roundtrip_test.rb
|
49
|
+
- test/entities_test.rb
|
50
|
+
- perf/performance.rb
|
49
51
|
- perf/profile.rb
|
50
52
|
- perf/benchmark.rb
|
51
|
-
- perf/performance.rb
|
52
|
-
- README.rdoc
|
53
53
|
- History.txt
|
54
54
|
- COPYING.txt
|
55
55
|
has_rdoc: true
|
@@ -87,4 +87,12 @@ signing_key:
|
|
87
87
|
specification_version: 3
|
88
88
|
summary: A module for encoding and decoding (X)HTML entities.
|
89
89
|
test_files:
|
90
|
-
- test/
|
90
|
+
- test/decoding_test.rb
|
91
|
+
- test/ruby_1_8_test.rb
|
92
|
+
- test/xhtml1_test.rb
|
93
|
+
- test/html4_test.rb
|
94
|
+
- test/encoding_test.rb
|
95
|
+
- test/expanded_test.rb
|
96
|
+
- test/ruby_1_9_test.rb
|
97
|
+
- test/roundtrip_test.rb
|
98
|
+
- test/entities_test.rb
|
data/README.rdoc
DELETED
@@ -1,44 +0,0 @@
|
|
1
|
-
== HTMLEntities
|
2
|
-
|
3
|
-
HTML entity encoding and decoding for Ruby
|
4
|
-
|
5
|
-
The HTMLEntities module facilitates encoding and decoding of
|
6
|
-
(X)HTML entities from/to their corresponding UTF-8 codepoints.
|
7
|
-
|
8
|
-
To install (requires root/admin privileges):
|
9
|
-
|
10
|
-
ruby setup.rb
|
11
|
-
|
12
|
-
Alternatively, you can just use the gem.
|
13
|
-
|
14
|
-
== Licence
|
15
|
-
|
16
|
-
This code is free to use under the terms of the MIT licence:
|
17
|
-
|
18
|
-
Copyright (c) 2005-2009 Paul Battley
|
19
|
-
|
20
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
21
|
-
of this software and associated documentation files (the "Software"), to
|
22
|
-
deal in the Software without restriction, including without limitation the
|
23
|
-
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
24
|
-
sell copies of the Software, and to permit persons to whom the Software is
|
25
|
-
furnished to do so, subject to the following conditions:
|
26
|
-
|
27
|
-
The above copyright notice and this permission notice shall be included in
|
28
|
-
all copies or substantial portions of the Software.
|
29
|
-
|
30
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
31
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
32
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
33
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
34
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
35
|
-
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
36
|
-
IN THE SOFTWARE.
|
37
|
-
|
38
|
-
If you'd like to negotiate a different licence for a specific use, just
|
39
|
-
contact me -- I'll almost certainly permit it.
|
40
|
-
|
41
|
-
== Contact
|
42
|
-
|
43
|
-
Comments are welcome. Send an email to pbattley@gmail.com.
|
44
|
-
|
data/lib/htmlentities/legacy.rb
DELETED
@@ -1,31 +0,0 @@
|
|
1
|
-
class HTMLEntities
|
2
|
-
class << self
|
3
|
-
|
4
|
-
#
|
5
|
-
# Legacy compatibility class method allowing direct encoding of XHTML1 entities.
|
6
|
-
# See HTMLEntities#encode for description of parameters.
|
7
|
-
#
|
8
|
-
# Deprecated.
|
9
|
-
#
|
10
|
-
def encode_entities(*args)
|
11
|
-
xhtml1_entities.encode(*args)
|
12
|
-
end
|
13
|
-
|
14
|
-
#
|
15
|
-
# Legacy compatibility class method allowing direct decoding of XHTML1 entities.
|
16
|
-
# See HTMLEntities#decode for description of parameters.
|
17
|
-
#
|
18
|
-
# Deprecated.
|
19
|
-
#
|
20
|
-
def decode_entities(*args)
|
21
|
-
xhtml1_entities.decode(*args)
|
22
|
-
end
|
23
|
-
|
24
|
-
private
|
25
|
-
|
26
|
-
def xhtml1_entities
|
27
|
-
@xhtml1_entities ||= new('xhtml1')
|
28
|
-
end
|
29
|
-
|
30
|
-
end
|
31
|
-
end
|
data/test/legacy_test.rb
DELETED
@@ -1,35 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
$:.unshift(File.dirname(__FILE__) + '/../lib')
|
3
|
-
require 'test/unit'
|
4
|
-
require 'htmlentities'
|
5
|
-
|
6
|
-
$KCODE = 'u' unless "1.9".respond_to?(:encoding)
|
7
|
-
|
8
|
-
#
|
9
|
-
# Test that version 3.x functionality still works
|
10
|
-
#
|
11
|
-
class HTMLEntities::LegacyTest < Test::Unit::TestCase
|
12
|
-
|
13
|
-
def test_should_decode_via_legacy_interface
|
14
|
-
assert_decode('&', '&')
|
15
|
-
assert_decode('±', '±')
|
16
|
-
assert_decode('“', '“')
|
17
|
-
assert_decode('—', '—')
|
18
|
-
end
|
19
|
-
|
20
|
-
def test_should_encode_via_legacy_interface
|
21
|
-
assert_encode('&', '&', :basic)
|
22
|
-
assert_encode('ð', 'ð', :named)
|
23
|
-
assert_encode('…', '…', :decimal)
|
24
|
-
assert_encode('−', '−', :hexadecimal)
|
25
|
-
end
|
26
|
-
|
27
|
-
def assert_encode(expected, *encode_args)
|
28
|
-
assert_equal expected, HTMLEntities.encode_entities(*encode_args)
|
29
|
-
end
|
30
|
-
|
31
|
-
def assert_decode(expected, *decode_args)
|
32
|
-
assert_equal expected, HTMLEntities.decode_entities(*decode_args)
|
33
|
-
end
|
34
|
-
|
35
|
-
end
|
data/test/test_all.rb
DELETED