htmlentities 4.3.1 → 4.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/COPYING.txt +1 -1
- data/lib/htmlentities/decoder.rb +7 -14
- data/lib/htmlentities/encoder.rb +48 -49
- data/lib/htmlentities/version.rb +1 -1
- data/perf/profile.rb +1 -1
- data/test/decoding_test.rb +1 -1
- data/test/encoding_test.rb +1 -1
- data/test/entities_test.rb +1 -1
- data/test/expanded_test.rb +1 -1
- data/test/html4_test.rb +1 -1
- data/test/interoperability_test.rb +1 -1
- data/test/roundtrip_test.rb +1 -1
- data/test/string_encodings_test.rb +68 -0
- data/test/test_helper.rb +3 -0
- data/test/xhtml1_test.rb +1 -1
- metadata +61 -57
- data/test/common.rb +0 -6
- data/test/ruby_1_8_test.rb +0 -18
- data/test/ruby_1_9_test.rb +0 -70
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f6afa4d22af6c783ac7932aaeda0e3369bfb0648
|
4
|
+
data.tar.gz: eced509ca635c31969ab270199beb23f1d08e653
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 77876c708e3db5def2ccebfb2ce11fd216e645acc4844b7215d872e162d139c5d0f797721b52b418df9104b08f51fc2565b505e93b5a7a48732ac89f9a53a83e
|
7
|
+
data.tar.gz: 7f37a24c747c9dd56735ae247ff60b9a7b29f10bac0f261709e3d9b3371fc805d00cd91670b9f712759eb571d14b78de106f9b110a139cfe8469ea0b7f776ee4
|
data/COPYING.txt
CHANGED
data/lib/htmlentities/decoder.rb
CHANGED
@@ -21,24 +21,17 @@ class HTMLEntities
|
|
21
21
|
end
|
22
22
|
|
23
23
|
private
|
24
|
-
|
25
|
-
|
26
|
-
string.to_s.encode(Encoding::UTF_8)
|
27
|
-
end
|
28
|
-
else
|
29
|
-
def prepare(string) #:nodoc:
|
30
|
-
string.to_s
|
31
|
-
end
|
24
|
+
def prepare(string) #:nodoc:
|
25
|
+
string.to_s.encode(Encoding::UTF_8)
|
32
26
|
end
|
33
27
|
|
34
28
|
def entity_regexp
|
35
29
|
key_lengths = @map.keys.map{ |k| k.length }
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
end
|
30
|
+
if @flavor == 'expanded'
|
31
|
+
entity_name_pattern = '(?:b\.)?[a-z][a-z0-9]'
|
32
|
+
else
|
33
|
+
entity_name_pattern = '[a-z][a-z0-9]'
|
34
|
+
end
|
42
35
|
/&(?:(#{entity_name_pattern}{#{key_lengths.min - 1},#{key_lengths.max - 1}})|#([0-9]{1,7})|#x([0-9a-f]{1,6}));/i
|
43
36
|
end
|
44
37
|
end
|
data/lib/htmlentities/encoder.rb
CHANGED
@@ -7,69 +7,66 @@ class HTMLEntities
|
|
7
7
|
def initialize(flavor, instructions)
|
8
8
|
@flavor = flavor
|
9
9
|
instructions = [:basic] if instructions.empty?
|
10
|
-
validate_instructions
|
11
|
-
build_basic_entity_encoder
|
12
|
-
build_extended_entity_encoder
|
10
|
+
validate_instructions instructions
|
11
|
+
build_basic_entity_encoder instructions
|
12
|
+
build_extended_entity_encoder instructions
|
13
13
|
end
|
14
14
|
|
15
15
|
def encode(source)
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
)
|
16
|
+
minimize_encoding(
|
17
|
+
replace_extended(
|
18
|
+
replace_basic(
|
19
|
+
prepare(source))))
|
21
20
|
end
|
22
21
|
|
23
22
|
private
|
24
23
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
end
|
29
|
-
|
30
|
-
def post_process(string)
|
31
|
-
if string.encoding != Encoding::ASCII && string.match(/\A[\x01-\x7F]*\z/)
|
32
|
-
string.encode(Encoding::ASCII)
|
33
|
-
else
|
34
|
-
string
|
35
|
-
end
|
36
|
-
end
|
37
|
-
else
|
38
|
-
def prepare(string) #:nodoc:
|
39
|
-
string.to_s
|
40
|
-
end
|
24
|
+
def prepare(string)
|
25
|
+
string.to_s.encode(Encoding::UTF_8)
|
26
|
+
end
|
41
27
|
|
42
|
-
|
28
|
+
def minimize_encoding(string)
|
29
|
+
if string.encoding != Encoding::ASCII && contains_only_ascii?(string)
|
30
|
+
string.encode(Encoding::ASCII)
|
31
|
+
else
|
43
32
|
string
|
44
33
|
end
|
45
34
|
end
|
46
35
|
|
36
|
+
def contains_only_ascii?(string)
|
37
|
+
string.match(/\A[\x01-\x7F]*\z/)
|
38
|
+
end
|
39
|
+
|
47
40
|
def basic_entity_regexp
|
48
41
|
@basic_entity_regexp ||= @flavor.match(/^html/) ? /[<>"&]/ : /[<>'"&]/
|
49
42
|
end
|
50
43
|
|
51
44
|
def extended_entity_regexp
|
52
45
|
@extended_entity_regexp ||= (
|
53
|
-
|
54
|
-
if encoding_aware?
|
55
|
-
pattern = '[^\u{20}-\u{7E}]'
|
56
|
-
else
|
57
|
-
pattern = '[^\x20-\x7E]'
|
58
|
-
options << "U"
|
59
|
-
end
|
46
|
+
pattern = '[^\u{20}-\u{7E}]'
|
60
47
|
pattern << "|'" if @flavor == 'html4'
|
61
|
-
Regexp.new(pattern
|
48
|
+
Regexp.new(pattern)
|
62
49
|
)
|
63
50
|
end
|
64
51
|
|
52
|
+
def replace_basic(string)
|
53
|
+
string.gsub(basic_entity_regexp){ |match| encode_basic(match) }
|
54
|
+
end
|
55
|
+
|
56
|
+
def replace_extended(string)
|
57
|
+
string.gsub(extended_entity_regexp){ |match| encode_extended(match) }
|
58
|
+
end
|
59
|
+
|
65
60
|
def validate_instructions(instructions)
|
66
61
|
unknown_instructions = instructions - INSTRUCTIONS
|
67
62
|
if unknown_instructions.any?
|
68
|
-
raise InstructionError,
|
63
|
+
raise InstructionError,
|
64
|
+
"unknown encode_entities command(s): #{unknown_instructions.inspect}"
|
69
65
|
end
|
70
66
|
|
71
|
-
if
|
72
|
-
raise InstructionError,
|
67
|
+
if instructions.include?(:decimal) && instructions.include?(:hexadecimal)
|
68
|
+
raise InstructionError,
|
69
|
+
"hexadecimal and decimal encoding are mutually exclusive"
|
73
70
|
end
|
74
71
|
end
|
75
72
|
|
@@ -81,18 +78,24 @@ class HTMLEntities
|
|
81
78
|
elsif instructions.include?(:hexadecimal)
|
82
79
|
method = :encode_hexadecimal
|
83
80
|
end
|
84
|
-
instance_eval
|
81
|
+
instance_eval <<-END
|
82
|
+
def encode_basic(char)
|
83
|
+
#{method}(char)
|
84
|
+
end
|
85
|
+
END
|
85
86
|
end
|
86
87
|
|
87
88
|
def build_extended_entity_encoder(instructions)
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
89
|
+
operations = [:named, :decimal, :hexadecimal] & instructions
|
90
|
+
instance_eval <<-END
|
91
|
+
def encode_extended(char)
|
92
|
+
#{operations.map{ |encoder| %{
|
93
|
+
encoded = encode_#{encoder}(char)
|
94
|
+
return encoded if encoded
|
95
|
+
}}.join("\n")}
|
96
|
+
char
|
97
|
+
end
|
98
|
+
END
|
96
99
|
end
|
97
100
|
|
98
101
|
def encode_named(char)
|
@@ -116,9 +119,5 @@ class HTMLEntities
|
|
116
119
|
uniqmap.invert
|
117
120
|
)
|
118
121
|
end
|
119
|
-
|
120
|
-
def encoding_aware?
|
121
|
-
"1.9".respond_to?(:encoding)
|
122
|
-
end
|
123
122
|
end
|
124
123
|
end
|
data/lib/htmlentities/version.rb
CHANGED
data/perf/profile.rb
CHANGED
data/test/decoding_test.rb
CHANGED
data/test/encoding_test.rb
CHANGED
data/test/entities_test.rb
CHANGED
data/test/expanded_test.rb
CHANGED
data/test/html4_test.rb
CHANGED
data/test/roundtrip_test.rb
CHANGED
@@ -0,0 +1,68 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require_relative "./test_helper"
|
3
|
+
|
4
|
+
class HTMLEntities::StringEncodingsTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test_should_encode_ascii_to_ascii
|
7
|
+
s = "<elan>".encode(Encoding::US_ASCII)
|
8
|
+
assert_equal Encoding::US_ASCII, s.encoding
|
9
|
+
|
10
|
+
t = HTMLEntities.new.encode(s)
|
11
|
+
assert_equal "<elan>", t
|
12
|
+
assert_equal Encoding::US_ASCII, t.encoding
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_should_encode_utf8_to_utf8_if_needed
|
16
|
+
s = "<élan>"
|
17
|
+
assert_equal Encoding::UTF_8, s.encoding
|
18
|
+
|
19
|
+
t = HTMLEntities.new.encode(s)
|
20
|
+
assert_equal "<élan>", t
|
21
|
+
assert_equal Encoding::UTF_8, t.encoding
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_should_encode_utf8_to_ascii_if_possible
|
25
|
+
s = "<elan>"
|
26
|
+
assert_equal Encoding::UTF_8, s.encoding
|
27
|
+
|
28
|
+
t = HTMLEntities.new.encode(s)
|
29
|
+
assert_equal "<elan>", t
|
30
|
+
assert_equal Encoding::US_ASCII, t.encoding
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_should_encode_other_encoding_to_utf8
|
34
|
+
s = "<élan>".encode(Encoding::ISO_8859_1)
|
35
|
+
assert_equal Encoding::ISO_8859_1, s.encoding
|
36
|
+
|
37
|
+
t = HTMLEntities.new.encode(s)
|
38
|
+
assert_equal "<élan>", t
|
39
|
+
assert_equal Encoding::UTF_8, t.encoding
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_should_decode_ascii_to_utf8
|
43
|
+
s = "<élan>".encode(Encoding::US_ASCII)
|
44
|
+
assert_equal Encoding::US_ASCII, s.encoding
|
45
|
+
|
46
|
+
t = HTMLEntities.new.decode(s)
|
47
|
+
assert_equal "<élan>", t
|
48
|
+
assert_equal Encoding::UTF_8, t.encoding
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_should_decode_utf8_to_utf8
|
52
|
+
s = "<élan>".encode(Encoding::UTF_8)
|
53
|
+
assert_equal Encoding::UTF_8, s.encoding
|
54
|
+
|
55
|
+
t = HTMLEntities.new.decode(s)
|
56
|
+
assert_equal "<élan>", t
|
57
|
+
assert_equal Encoding::UTF_8, t.encoding
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_should_decode_other_encoding_to_utf8
|
61
|
+
s = "<élan>".encode(Encoding::ISO_8859_1)
|
62
|
+
assert_equal Encoding::ISO_8859_1, s.encoding
|
63
|
+
|
64
|
+
t = HTMLEntities.new.decode(s)
|
65
|
+
assert_equal "<élan>", t
|
66
|
+
assert_equal Encoding::UTF_8, t.encoding
|
67
|
+
end
|
68
|
+
end
|
data/test/test_helper.rb
ADDED
data/test/xhtml1_test.rb
CHANGED
metadata
CHANGED
@@ -1,87 +1,91 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: htmlentities
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
version: 4.3.1
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 4.3.2
|
6
5
|
platform: ruby
|
7
|
-
authors:
|
6
|
+
authors:
|
8
7
|
- Paul Battley
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
11
|
+
date: 2014-05-30 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rake
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
description: A module for encoding and decoding (X)HTML entities.
|
17
28
|
email: pbattley@gmail.com
|
18
29
|
executables: []
|
19
|
-
|
20
30
|
extensions: []
|
21
|
-
|
22
|
-
extra_rdoc_files:
|
31
|
+
extra_rdoc_files:
|
23
32
|
- History.txt
|
24
33
|
- COPYING.txt
|
25
|
-
files:
|
26
|
-
-
|
34
|
+
files:
|
35
|
+
- COPYING.txt
|
36
|
+
- History.txt
|
37
|
+
- lib/htmlentities.rb
|
27
38
|
- lib/htmlentities/decoder.rb
|
28
|
-
- lib/htmlentities/
|
39
|
+
- lib/htmlentities/encoder.rb
|
40
|
+
- lib/htmlentities/flavors.rb
|
29
41
|
- lib/htmlentities/mappings/expanded.rb
|
30
42
|
- lib/htmlentities/mappings/html4.rb
|
31
|
-
- lib/htmlentities/
|
43
|
+
- lib/htmlentities/mappings/xhtml1.rb
|
32
44
|
- lib/htmlentities/version.rb
|
33
|
-
- lib/htmlentities.rb
|
34
|
-
- test/html4_test.rb
|
35
|
-
- test/ruby_1_8_test.rb
|
36
|
-
- test/roundtrip_test.rb
|
37
|
-
- test/encoding_test.rb
|
38
|
-
- test/common.rb
|
39
|
-
- test/xhtml1_test.rb
|
40
|
-
- test/decoding_test.rb
|
41
|
-
- test/ruby_1_9_test.rb
|
42
|
-
- test/expanded_test.rb
|
43
|
-
- test/entities_test.rb
|
44
|
-
- test/interoperability_test.rb
|
45
45
|
- perf/benchmark.rb
|
46
46
|
- perf/performance.rb
|
47
47
|
- perf/profile.rb
|
48
|
-
-
|
49
|
-
-
|
48
|
+
- test/decoding_test.rb
|
49
|
+
- test/encoding_test.rb
|
50
|
+
- test/entities_test.rb
|
51
|
+
- test/expanded_test.rb
|
52
|
+
- test/html4_test.rb
|
53
|
+
- test/interoperability_test.rb
|
54
|
+
- test/roundtrip_test.rb
|
55
|
+
- test/string_encodings_test.rb
|
56
|
+
- test/test_helper.rb
|
57
|
+
- test/xhtml1_test.rb
|
50
58
|
homepage: https://github.com/threedaymonk/htmlentities
|
51
|
-
licenses:
|
52
|
-
|
59
|
+
licenses:
|
60
|
+
- MIT
|
61
|
+
metadata: {}
|
53
62
|
post_install_message:
|
54
63
|
rdoc_options: []
|
55
|
-
|
56
|
-
require_paths:
|
64
|
+
require_paths:
|
57
65
|
- lib
|
58
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
-
|
60
|
-
requirements:
|
66
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
67
|
+
requirements:
|
61
68
|
- - ">="
|
62
|
-
- !ruby/object:Gem::Version
|
63
|
-
version:
|
64
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
-
|
66
|
-
requirements:
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: '0'
|
71
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
67
73
|
- - ">="
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
version:
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
70
76
|
requirements: []
|
71
|
-
|
72
77
|
rubyforge_project:
|
73
|
-
rubygems_version:
|
78
|
+
rubygems_version: 2.2.2
|
74
79
|
signing_key:
|
75
|
-
specification_version:
|
76
|
-
summary:
|
77
|
-
test_files:
|
78
|
-
- test/
|
79
|
-
- test/ruby_1_8_test.rb
|
80
|
-
- test/roundtrip_test.rb
|
80
|
+
specification_version: 4
|
81
|
+
summary: Encode/decode HTML entities
|
82
|
+
test_files:
|
83
|
+
- test/interoperability_test.rb
|
81
84
|
- test/encoding_test.rb
|
85
|
+
- test/string_encodings_test.rb
|
86
|
+
- test/entities_test.rb
|
87
|
+
- test/html4_test.rb
|
82
88
|
- test/xhtml1_test.rb
|
83
|
-
- test/decoding_test.rb
|
84
|
-
- test/ruby_1_9_test.rb
|
85
89
|
- test/expanded_test.rb
|
86
|
-
- test/
|
87
|
-
- test/
|
90
|
+
- test/decoding_test.rb
|
91
|
+
- test/roundtrip_test.rb
|
data/test/common.rb
DELETED
data/test/ruby_1_8_test.rb
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
require File.expand_path("../common", __FILE__)
|
3
|
-
|
4
|
-
unless ENCODING_AWARE_RUBY
|
5
|
-
class HTMLEntities::Ruby18Test < Test::Unit::TestCase
|
6
|
-
|
7
|
-
# Reported by Benoit Larroque
|
8
|
-
def test_should_encode_without_error_when_KCODE_is_not_UTF_8
|
9
|
-
kcode = $KCODE
|
10
|
-
$KCODE = "n"
|
11
|
-
coder = HTMLEntities.new
|
12
|
-
text = [8212].pack('U')
|
13
|
-
assert_equal "—", coder.encode(text, :decimal)
|
14
|
-
$KCODE = kcode
|
15
|
-
end
|
16
|
-
|
17
|
-
end
|
18
|
-
end
|
data/test/ruby_1_9_test.rb
DELETED
@@ -1,70 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
require File.expand_path("../common", __FILE__)
|
3
|
-
|
4
|
-
if ENCODING_AWARE_RUBY
|
5
|
-
class HTMLEntities::Ruby19Test < Test::Unit::TestCase
|
6
|
-
|
7
|
-
def test_should_encode_ascii_to_ascii
|
8
|
-
s = "<elan>".encode(Encoding::US_ASCII)
|
9
|
-
assert_equal Encoding::US_ASCII, s.encoding
|
10
|
-
|
11
|
-
t = HTMLEntities.new.encode(s)
|
12
|
-
assert_equal "<elan>", t
|
13
|
-
assert_equal Encoding::US_ASCII, t.encoding
|
14
|
-
end
|
15
|
-
|
16
|
-
def test_should_encode_utf8_to_utf8_if_needed
|
17
|
-
s = "<élan>"
|
18
|
-
assert_equal Encoding::UTF_8, s.encoding
|
19
|
-
|
20
|
-
t = HTMLEntities.new.encode(s)
|
21
|
-
assert_equal "<élan>", t
|
22
|
-
assert_equal Encoding::UTF_8, t.encoding
|
23
|
-
end
|
24
|
-
|
25
|
-
def test_should_encode_utf8_to_ascii_if_possible
|
26
|
-
s = "<elan>"
|
27
|
-
assert_equal Encoding::UTF_8, s.encoding
|
28
|
-
|
29
|
-
t = HTMLEntities.new.encode(s)
|
30
|
-
assert_equal "<elan>", t
|
31
|
-
assert_equal Encoding::US_ASCII, t.encoding
|
32
|
-
end
|
33
|
-
|
34
|
-
def test_should_encode_other_encoding_to_utf8
|
35
|
-
s = "<élan>".encode(Encoding::ISO_8859_1)
|
36
|
-
assert_equal Encoding::ISO_8859_1, s.encoding
|
37
|
-
|
38
|
-
t = HTMLEntities.new.encode(s)
|
39
|
-
assert_equal "<élan>", t
|
40
|
-
assert_equal Encoding::UTF_8, t.encoding
|
41
|
-
end
|
42
|
-
|
43
|
-
def test_should_decode_ascii_to_utf8
|
44
|
-
s = "<élan>".encode(Encoding::US_ASCII)
|
45
|
-
assert_equal Encoding::US_ASCII, s.encoding
|
46
|
-
|
47
|
-
t = HTMLEntities.new.decode(s)
|
48
|
-
assert_equal "<élan>", t
|
49
|
-
assert_equal Encoding::UTF_8, t.encoding
|
50
|
-
end
|
51
|
-
|
52
|
-
def test_should_decode_utf8_to_utf8
|
53
|
-
s = "<élan>".encode(Encoding::UTF_8)
|
54
|
-
assert_equal Encoding::UTF_8, s.encoding
|
55
|
-
|
56
|
-
t = HTMLEntities.new.decode(s)
|
57
|
-
assert_equal "<élan>", t
|
58
|
-
assert_equal Encoding::UTF_8, t.encoding
|
59
|
-
end
|
60
|
-
|
61
|
-
def test_should_decode_other_encoding_to_utf8
|
62
|
-
s = "<élan>".encode(Encoding::ISO_8859_1)
|
63
|
-
assert_equal Encoding::ISO_8859_1, s.encoding
|
64
|
-
|
65
|
-
t = HTMLEntities.new.decode(s)
|
66
|
-
assert_equal "<élan>", t
|
67
|
-
assert_equal Encoding::UTF_8, t.encoding
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|