htmlentities 4.3.1 → 4.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/COPYING.txt +1 -1
- data/lib/htmlentities/decoder.rb +7 -14
- data/lib/htmlentities/encoder.rb +48 -49
- data/lib/htmlentities/version.rb +1 -1
- data/perf/profile.rb +1 -1
- data/test/decoding_test.rb +1 -1
- data/test/encoding_test.rb +1 -1
- data/test/entities_test.rb +1 -1
- data/test/expanded_test.rb +1 -1
- data/test/html4_test.rb +1 -1
- data/test/interoperability_test.rb +1 -1
- data/test/roundtrip_test.rb +1 -1
- data/test/string_encodings_test.rb +68 -0
- data/test/test_helper.rb +3 -0
- data/test/xhtml1_test.rb +1 -1
- metadata +61 -57
- data/test/common.rb +0 -6
- data/test/ruby_1_8_test.rb +0 -18
- data/test/ruby_1_9_test.rb +0 -70
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f6afa4d22af6c783ac7932aaeda0e3369bfb0648
|
4
|
+
data.tar.gz: eced509ca635c31969ab270199beb23f1d08e653
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 77876c708e3db5def2ccebfb2ce11fd216e645acc4844b7215d872e162d139c5d0f797721b52b418df9104b08f51fc2565b505e93b5a7a48732ac89f9a53a83e
|
7
|
+
data.tar.gz: 7f37a24c747c9dd56735ae247ff60b9a7b29f10bac0f261709e3d9b3371fc805d00cd91670b9f712759eb571d14b78de106f9b110a139cfe8469ea0b7f776ee4
|
data/COPYING.txt
CHANGED
data/lib/htmlentities/decoder.rb
CHANGED
@@ -21,24 +21,17 @@ class HTMLEntities
|
|
21
21
|
end
|
22
22
|
|
23
23
|
private
|
24
|
-
|
25
|
-
|
26
|
-
string.to_s.encode(Encoding::UTF_8)
|
27
|
-
end
|
28
|
-
else
|
29
|
-
def prepare(string) #:nodoc:
|
30
|
-
string.to_s
|
31
|
-
end
|
24
|
+
def prepare(string) #:nodoc:
|
25
|
+
string.to_s.encode(Encoding::UTF_8)
|
32
26
|
end
|
33
27
|
|
34
28
|
def entity_regexp
|
35
29
|
key_lengths = @map.keys.map{ |k| k.length }
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
end
|
30
|
+
if @flavor == 'expanded'
|
31
|
+
entity_name_pattern = '(?:b\.)?[a-z][a-z0-9]'
|
32
|
+
else
|
33
|
+
entity_name_pattern = '[a-z][a-z0-9]'
|
34
|
+
end
|
42
35
|
/&(?:(#{entity_name_pattern}{#{key_lengths.min - 1},#{key_lengths.max - 1}})|#([0-9]{1,7})|#x([0-9a-f]{1,6}));/i
|
43
36
|
end
|
44
37
|
end
|
data/lib/htmlentities/encoder.rb
CHANGED
@@ -7,69 +7,66 @@ class HTMLEntities
|
|
7
7
|
def initialize(flavor, instructions)
|
8
8
|
@flavor = flavor
|
9
9
|
instructions = [:basic] if instructions.empty?
|
10
|
-
validate_instructions
|
11
|
-
build_basic_entity_encoder
|
12
|
-
build_extended_entity_encoder
|
10
|
+
validate_instructions instructions
|
11
|
+
build_basic_entity_encoder instructions
|
12
|
+
build_extended_entity_encoder instructions
|
13
13
|
end
|
14
14
|
|
15
15
|
def encode(source)
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
)
|
16
|
+
minimize_encoding(
|
17
|
+
replace_extended(
|
18
|
+
replace_basic(
|
19
|
+
prepare(source))))
|
21
20
|
end
|
22
21
|
|
23
22
|
private
|
24
23
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
end
|
29
|
-
|
30
|
-
def post_process(string)
|
31
|
-
if string.encoding != Encoding::ASCII && string.match(/\A[\x01-\x7F]*\z/)
|
32
|
-
string.encode(Encoding::ASCII)
|
33
|
-
else
|
34
|
-
string
|
35
|
-
end
|
36
|
-
end
|
37
|
-
else
|
38
|
-
def prepare(string) #:nodoc:
|
39
|
-
string.to_s
|
40
|
-
end
|
24
|
+
def prepare(string)
|
25
|
+
string.to_s.encode(Encoding::UTF_8)
|
26
|
+
end
|
41
27
|
|
42
|
-
|
28
|
+
def minimize_encoding(string)
|
29
|
+
if string.encoding != Encoding::ASCII && contains_only_ascii?(string)
|
30
|
+
string.encode(Encoding::ASCII)
|
31
|
+
else
|
43
32
|
string
|
44
33
|
end
|
45
34
|
end
|
46
35
|
|
36
|
+
def contains_only_ascii?(string)
|
37
|
+
string.match(/\A[\x01-\x7F]*\z/)
|
38
|
+
end
|
39
|
+
|
47
40
|
def basic_entity_regexp
|
48
41
|
@basic_entity_regexp ||= @flavor.match(/^html/) ? /[<>"&]/ : /[<>'"&]/
|
49
42
|
end
|
50
43
|
|
51
44
|
def extended_entity_regexp
|
52
45
|
@extended_entity_regexp ||= (
|
53
|
-
|
54
|
-
if encoding_aware?
|
55
|
-
pattern = '[^\u{20}-\u{7E}]'
|
56
|
-
else
|
57
|
-
pattern = '[^\x20-\x7E]'
|
58
|
-
options << "U"
|
59
|
-
end
|
46
|
+
pattern = '[^\u{20}-\u{7E}]'
|
60
47
|
pattern << "|'" if @flavor == 'html4'
|
61
|
-
Regexp.new(pattern
|
48
|
+
Regexp.new(pattern)
|
62
49
|
)
|
63
50
|
end
|
64
51
|
|
52
|
+
def replace_basic(string)
|
53
|
+
string.gsub(basic_entity_regexp){ |match| encode_basic(match) }
|
54
|
+
end
|
55
|
+
|
56
|
+
def replace_extended(string)
|
57
|
+
string.gsub(extended_entity_regexp){ |match| encode_extended(match) }
|
58
|
+
end
|
59
|
+
|
65
60
|
def validate_instructions(instructions)
|
66
61
|
unknown_instructions = instructions - INSTRUCTIONS
|
67
62
|
if unknown_instructions.any?
|
68
|
-
raise InstructionError,
|
63
|
+
raise InstructionError,
|
64
|
+
"unknown encode_entities command(s): #{unknown_instructions.inspect}"
|
69
65
|
end
|
70
66
|
|
71
|
-
if
|
72
|
-
raise InstructionError,
|
67
|
+
if instructions.include?(:decimal) && instructions.include?(:hexadecimal)
|
68
|
+
raise InstructionError,
|
69
|
+
"hexadecimal and decimal encoding are mutually exclusive"
|
73
70
|
end
|
74
71
|
end
|
75
72
|
|
@@ -81,18 +78,24 @@ class HTMLEntities
|
|
81
78
|
elsif instructions.include?(:hexadecimal)
|
82
79
|
method = :encode_hexadecimal
|
83
80
|
end
|
84
|
-
instance_eval
|
81
|
+
instance_eval <<-END
|
82
|
+
def encode_basic(char)
|
83
|
+
#{method}(char)
|
84
|
+
end
|
85
|
+
END
|
85
86
|
end
|
86
87
|
|
87
88
|
def build_extended_entity_encoder(instructions)
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
89
|
+
operations = [:named, :decimal, :hexadecimal] & instructions
|
90
|
+
instance_eval <<-END
|
91
|
+
def encode_extended(char)
|
92
|
+
#{operations.map{ |encoder| %{
|
93
|
+
encoded = encode_#{encoder}(char)
|
94
|
+
return encoded if encoded
|
95
|
+
}}.join("\n")}
|
96
|
+
char
|
97
|
+
end
|
98
|
+
END
|
96
99
|
end
|
97
100
|
|
98
101
|
def encode_named(char)
|
@@ -116,9 +119,5 @@ class HTMLEntities
|
|
116
119
|
uniqmap.invert
|
117
120
|
)
|
118
121
|
end
|
119
|
-
|
120
|
-
def encoding_aware?
|
121
|
-
"1.9".respond_to?(:encoding)
|
122
|
-
end
|
123
122
|
end
|
124
123
|
end
|
data/lib/htmlentities/version.rb
CHANGED
data/perf/profile.rb
CHANGED
data/test/decoding_test.rb
CHANGED
data/test/encoding_test.rb
CHANGED
data/test/entities_test.rb
CHANGED
data/test/expanded_test.rb
CHANGED
data/test/html4_test.rb
CHANGED
data/test/roundtrip_test.rb
CHANGED
@@ -0,0 +1,68 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require_relative "./test_helper"
|
3
|
+
|
4
|
+
class HTMLEntities::StringEncodingsTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test_should_encode_ascii_to_ascii
|
7
|
+
s = "<elan>".encode(Encoding::US_ASCII)
|
8
|
+
assert_equal Encoding::US_ASCII, s.encoding
|
9
|
+
|
10
|
+
t = HTMLEntities.new.encode(s)
|
11
|
+
assert_equal "<elan>", t
|
12
|
+
assert_equal Encoding::US_ASCII, t.encoding
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_should_encode_utf8_to_utf8_if_needed
|
16
|
+
s = "<élan>"
|
17
|
+
assert_equal Encoding::UTF_8, s.encoding
|
18
|
+
|
19
|
+
t = HTMLEntities.new.encode(s)
|
20
|
+
assert_equal "<élan>", t
|
21
|
+
assert_equal Encoding::UTF_8, t.encoding
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_should_encode_utf8_to_ascii_if_possible
|
25
|
+
s = "<elan>"
|
26
|
+
assert_equal Encoding::UTF_8, s.encoding
|
27
|
+
|
28
|
+
t = HTMLEntities.new.encode(s)
|
29
|
+
assert_equal "<elan>", t
|
30
|
+
assert_equal Encoding::US_ASCII, t.encoding
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_should_encode_other_encoding_to_utf8
|
34
|
+
s = "<élan>".encode(Encoding::ISO_8859_1)
|
35
|
+
assert_equal Encoding::ISO_8859_1, s.encoding
|
36
|
+
|
37
|
+
t = HTMLEntities.new.encode(s)
|
38
|
+
assert_equal "<élan>", t
|
39
|
+
assert_equal Encoding::UTF_8, t.encoding
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_should_decode_ascii_to_utf8
|
43
|
+
s = "<élan>".encode(Encoding::US_ASCII)
|
44
|
+
assert_equal Encoding::US_ASCII, s.encoding
|
45
|
+
|
46
|
+
t = HTMLEntities.new.decode(s)
|
47
|
+
assert_equal "<élan>", t
|
48
|
+
assert_equal Encoding::UTF_8, t.encoding
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_should_decode_utf8_to_utf8
|
52
|
+
s = "<élan>".encode(Encoding::UTF_8)
|
53
|
+
assert_equal Encoding::UTF_8, s.encoding
|
54
|
+
|
55
|
+
t = HTMLEntities.new.decode(s)
|
56
|
+
assert_equal "<élan>", t
|
57
|
+
assert_equal Encoding::UTF_8, t.encoding
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_should_decode_other_encoding_to_utf8
|
61
|
+
s = "<élan>".encode(Encoding::ISO_8859_1)
|
62
|
+
assert_equal Encoding::ISO_8859_1, s.encoding
|
63
|
+
|
64
|
+
t = HTMLEntities.new.decode(s)
|
65
|
+
assert_equal "<élan>", t
|
66
|
+
assert_equal Encoding::UTF_8, t.encoding
|
67
|
+
end
|
68
|
+
end
|
data/test/test_helper.rb
ADDED
data/test/xhtml1_test.rb
CHANGED
metadata
CHANGED
@@ -1,87 +1,91 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: htmlentities
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
version: 4.3.1
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 4.3.2
|
6
5
|
platform: ruby
|
7
|
-
authors:
|
6
|
+
authors:
|
8
7
|
- Paul Battley
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
11
|
+
date: 2014-05-30 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rake
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
description: A module for encoding and decoding (X)HTML entities.
|
17
28
|
email: pbattley@gmail.com
|
18
29
|
executables: []
|
19
|
-
|
20
30
|
extensions: []
|
21
|
-
|
22
|
-
extra_rdoc_files:
|
31
|
+
extra_rdoc_files:
|
23
32
|
- History.txt
|
24
33
|
- COPYING.txt
|
25
|
-
files:
|
26
|
-
-
|
34
|
+
files:
|
35
|
+
- COPYING.txt
|
36
|
+
- History.txt
|
37
|
+
- lib/htmlentities.rb
|
27
38
|
- lib/htmlentities/decoder.rb
|
28
|
-
- lib/htmlentities/
|
39
|
+
- lib/htmlentities/encoder.rb
|
40
|
+
- lib/htmlentities/flavors.rb
|
29
41
|
- lib/htmlentities/mappings/expanded.rb
|
30
42
|
- lib/htmlentities/mappings/html4.rb
|
31
|
-
- lib/htmlentities/
|
43
|
+
- lib/htmlentities/mappings/xhtml1.rb
|
32
44
|
- lib/htmlentities/version.rb
|
33
|
-
- lib/htmlentities.rb
|
34
|
-
- test/html4_test.rb
|
35
|
-
- test/ruby_1_8_test.rb
|
36
|
-
- test/roundtrip_test.rb
|
37
|
-
- test/encoding_test.rb
|
38
|
-
- test/common.rb
|
39
|
-
- test/xhtml1_test.rb
|
40
|
-
- test/decoding_test.rb
|
41
|
-
- test/ruby_1_9_test.rb
|
42
|
-
- test/expanded_test.rb
|
43
|
-
- test/entities_test.rb
|
44
|
-
- test/interoperability_test.rb
|
45
45
|
- perf/benchmark.rb
|
46
46
|
- perf/performance.rb
|
47
47
|
- perf/profile.rb
|
48
|
-
-
|
49
|
-
-
|
48
|
+
- test/decoding_test.rb
|
49
|
+
- test/encoding_test.rb
|
50
|
+
- test/entities_test.rb
|
51
|
+
- test/expanded_test.rb
|
52
|
+
- test/html4_test.rb
|
53
|
+
- test/interoperability_test.rb
|
54
|
+
- test/roundtrip_test.rb
|
55
|
+
- test/string_encodings_test.rb
|
56
|
+
- test/test_helper.rb
|
57
|
+
- test/xhtml1_test.rb
|
50
58
|
homepage: https://github.com/threedaymonk/htmlentities
|
51
|
-
licenses:
|
52
|
-
|
59
|
+
licenses:
|
60
|
+
- MIT
|
61
|
+
metadata: {}
|
53
62
|
post_install_message:
|
54
63
|
rdoc_options: []
|
55
|
-
|
56
|
-
require_paths:
|
64
|
+
require_paths:
|
57
65
|
- lib
|
58
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
-
|
60
|
-
requirements:
|
66
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
67
|
+
requirements:
|
61
68
|
- - ">="
|
62
|
-
- !ruby/object:Gem::Version
|
63
|
-
version:
|
64
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
-
|
66
|
-
requirements:
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: '0'
|
71
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
67
73
|
- - ">="
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
version:
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
70
76
|
requirements: []
|
71
|
-
|
72
77
|
rubyforge_project:
|
73
|
-
rubygems_version:
|
78
|
+
rubygems_version: 2.2.2
|
74
79
|
signing_key:
|
75
|
-
specification_version:
|
76
|
-
summary:
|
77
|
-
test_files:
|
78
|
-
- test/
|
79
|
-
- test/ruby_1_8_test.rb
|
80
|
-
- test/roundtrip_test.rb
|
80
|
+
specification_version: 4
|
81
|
+
summary: Encode/decode HTML entities
|
82
|
+
test_files:
|
83
|
+
- test/interoperability_test.rb
|
81
84
|
- test/encoding_test.rb
|
85
|
+
- test/string_encodings_test.rb
|
86
|
+
- test/entities_test.rb
|
87
|
+
- test/html4_test.rb
|
82
88
|
- test/xhtml1_test.rb
|
83
|
-
- test/decoding_test.rb
|
84
|
-
- test/ruby_1_9_test.rb
|
85
89
|
- test/expanded_test.rb
|
86
|
-
- test/
|
87
|
-
- test/
|
90
|
+
- test/decoding_test.rb
|
91
|
+
- test/roundtrip_test.rb
|
data/test/common.rb
DELETED
data/test/ruby_1_8_test.rb
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
require File.expand_path("../common", __FILE__)
|
3
|
-
|
4
|
-
unless ENCODING_AWARE_RUBY
|
5
|
-
class HTMLEntities::Ruby18Test < Test::Unit::TestCase
|
6
|
-
|
7
|
-
# Reported by Benoit Larroque
|
8
|
-
def test_should_encode_without_error_when_KCODE_is_not_UTF_8
|
9
|
-
kcode = $KCODE
|
10
|
-
$KCODE = "n"
|
11
|
-
coder = HTMLEntities.new
|
12
|
-
text = [8212].pack('U')
|
13
|
-
assert_equal "—", coder.encode(text, :decimal)
|
14
|
-
$KCODE = kcode
|
15
|
-
end
|
16
|
-
|
17
|
-
end
|
18
|
-
end
|
data/test/ruby_1_9_test.rb
DELETED
@@ -1,70 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
require File.expand_path("../common", __FILE__)
|
3
|
-
|
4
|
-
if ENCODING_AWARE_RUBY
|
5
|
-
class HTMLEntities::Ruby19Test < Test::Unit::TestCase
|
6
|
-
|
7
|
-
def test_should_encode_ascii_to_ascii
|
8
|
-
s = "<elan>".encode(Encoding::US_ASCII)
|
9
|
-
assert_equal Encoding::US_ASCII, s.encoding
|
10
|
-
|
11
|
-
t = HTMLEntities.new.encode(s)
|
12
|
-
assert_equal "<elan>", t
|
13
|
-
assert_equal Encoding::US_ASCII, t.encoding
|
14
|
-
end
|
15
|
-
|
16
|
-
def test_should_encode_utf8_to_utf8_if_needed
|
17
|
-
s = "<élan>"
|
18
|
-
assert_equal Encoding::UTF_8, s.encoding
|
19
|
-
|
20
|
-
t = HTMLEntities.new.encode(s)
|
21
|
-
assert_equal "<élan>", t
|
22
|
-
assert_equal Encoding::UTF_8, t.encoding
|
23
|
-
end
|
24
|
-
|
25
|
-
def test_should_encode_utf8_to_ascii_if_possible
|
26
|
-
s = "<elan>"
|
27
|
-
assert_equal Encoding::UTF_8, s.encoding
|
28
|
-
|
29
|
-
t = HTMLEntities.new.encode(s)
|
30
|
-
assert_equal "<elan>", t
|
31
|
-
assert_equal Encoding::US_ASCII, t.encoding
|
32
|
-
end
|
33
|
-
|
34
|
-
def test_should_encode_other_encoding_to_utf8
|
35
|
-
s = "<élan>".encode(Encoding::ISO_8859_1)
|
36
|
-
assert_equal Encoding::ISO_8859_1, s.encoding
|
37
|
-
|
38
|
-
t = HTMLEntities.new.encode(s)
|
39
|
-
assert_equal "<élan>", t
|
40
|
-
assert_equal Encoding::UTF_8, t.encoding
|
41
|
-
end
|
42
|
-
|
43
|
-
def test_should_decode_ascii_to_utf8
|
44
|
-
s = "<élan>".encode(Encoding::US_ASCII)
|
45
|
-
assert_equal Encoding::US_ASCII, s.encoding
|
46
|
-
|
47
|
-
t = HTMLEntities.new.decode(s)
|
48
|
-
assert_equal "<élan>", t
|
49
|
-
assert_equal Encoding::UTF_8, t.encoding
|
50
|
-
end
|
51
|
-
|
52
|
-
def test_should_decode_utf8_to_utf8
|
53
|
-
s = "<élan>".encode(Encoding::UTF_8)
|
54
|
-
assert_equal Encoding::UTF_8, s.encoding
|
55
|
-
|
56
|
-
t = HTMLEntities.new.decode(s)
|
57
|
-
assert_equal "<élan>", t
|
58
|
-
assert_equal Encoding::UTF_8, t.encoding
|
59
|
-
end
|
60
|
-
|
61
|
-
def test_should_decode_other_encoding_to_utf8
|
62
|
-
s = "<élan>".encode(Encoding::ISO_8859_1)
|
63
|
-
assert_equal Encoding::ISO_8859_1, s.encoding
|
64
|
-
|
65
|
-
t = HTMLEntities.new.decode(s)
|
66
|
-
assert_equal "<élan>", t
|
67
|
-
assert_equal Encoding::UTF_8, t.encoding
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|