htmlentities 4.3.1 → 4.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f6afa4d22af6c783ac7932aaeda0e3369bfb0648
4
+ data.tar.gz: eced509ca635c31969ab270199beb23f1d08e653
5
+ SHA512:
6
+ metadata.gz: 77876c708e3db5def2ccebfb2ce11fd216e645acc4844b7215d872e162d139c5d0f797721b52b418df9104b08f51fc2565b505e93b5a7a48732ac89f9a53a83e
7
+ data.tar.gz: 7f37a24c747c9dd56735ae247ff60b9a7b29f10bac0f261709e3d9b3371fc805d00cd91670b9f712759eb571d14b78de106f9b110a139cfe8469ea0b7f776ee4
@@ -1,6 +1,6 @@
1
1
  == Licence (MIT)
2
2
 
3
- Copyright (c) 2005-2009 Paul Battley
3
+ Copyright (c) 2005-2013 Paul Battley
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -21,24 +21,17 @@ class HTMLEntities
21
21
  end
22
22
 
23
23
  private
24
- if "1.9".respond_to?(:encoding)
25
- def prepare(string) #:nodoc:
26
- string.to_s.encode(Encoding::UTF_8)
27
- end
28
- else
29
- def prepare(string) #:nodoc:
30
- string.to_s
31
- end
24
+ def prepare(string) #:nodoc:
25
+ string.to_s.encode(Encoding::UTF_8)
32
26
  end
33
27
 
34
28
  def entity_regexp
35
29
  key_lengths = @map.keys.map{ |k| k.length }
36
- entity_name_pattern =
37
- if @flavor == 'expanded'
38
- '(?:b\.)?[a-z][a-z0-9]'
39
- else
40
- '[a-z][a-z0-9]'
41
- end
30
+ if @flavor == 'expanded'
31
+ entity_name_pattern = '(?:b\.)?[a-z][a-z0-9]'
32
+ else
33
+ entity_name_pattern = '[a-z][a-z0-9]'
34
+ end
42
35
  /&(?:(#{entity_name_pattern}{#{key_lengths.min - 1},#{key_lengths.max - 1}})|#([0-9]{1,7})|#x([0-9a-f]{1,6}));/i
43
36
  end
44
37
  end
@@ -7,69 +7,66 @@ class HTMLEntities
7
7
  def initialize(flavor, instructions)
8
8
  @flavor = flavor
9
9
  instructions = [:basic] if instructions.empty?
10
- validate_instructions(instructions)
11
- build_basic_entity_encoder(instructions)
12
- build_extended_entity_encoder(instructions)
10
+ validate_instructions instructions
11
+ build_basic_entity_encoder instructions
12
+ build_extended_entity_encoder instructions
13
13
  end
14
14
 
15
15
  def encode(source)
16
- post_process(
17
- prepare(source).
18
- gsub(basic_entity_regexp){ |match| encode_basic(match) }.
19
- gsub(extended_entity_regexp){ |match| encode_extended(match) }
20
- )
16
+ minimize_encoding(
17
+ replace_extended(
18
+ replace_basic(
19
+ prepare(source))))
21
20
  end
22
21
 
23
22
  private
24
23
 
25
- if "1.9".respond_to?(:encoding)
26
- def prepare(string) #:nodoc:
27
- string.to_s.encode(Encoding::UTF_8)
28
- end
29
-
30
- def post_process(string)
31
- if string.encoding != Encoding::ASCII && string.match(/\A[\x01-\x7F]*\z/)
32
- string.encode(Encoding::ASCII)
33
- else
34
- string
35
- end
36
- end
37
- else
38
- def prepare(string) #:nodoc:
39
- string.to_s
40
- end
24
+ def prepare(string)
25
+ string.to_s.encode(Encoding::UTF_8)
26
+ end
41
27
 
42
- def post_process(string)
28
+ def minimize_encoding(string)
29
+ if string.encoding != Encoding::ASCII && contains_only_ascii?(string)
30
+ string.encode(Encoding::ASCII)
31
+ else
43
32
  string
44
33
  end
45
34
  end
46
35
 
36
+ def contains_only_ascii?(string)
37
+ string.match(/\A[\x01-\x7F]*\z/)
38
+ end
39
+
47
40
  def basic_entity_regexp
48
41
  @basic_entity_regexp ||= @flavor.match(/^html/) ? /[<>"&]/ : /[<>'"&]/
49
42
  end
50
43
 
51
44
  def extended_entity_regexp
52
45
  @extended_entity_regexp ||= (
53
- options = [nil]
54
- if encoding_aware?
55
- pattern = '[^\u{20}-\u{7E}]'
56
- else
57
- pattern = '[^\x20-\x7E]'
58
- options << "U"
59
- end
46
+ pattern = '[^\u{20}-\u{7E}]'
60
47
  pattern << "|'" if @flavor == 'html4'
61
- Regexp.new(pattern, *options)
48
+ Regexp.new(pattern)
62
49
  )
63
50
  end
64
51
 
52
+ def replace_basic(string)
53
+ string.gsub(basic_entity_regexp){ |match| encode_basic(match) }
54
+ end
55
+
56
+ def replace_extended(string)
57
+ string.gsub(extended_entity_regexp){ |match| encode_extended(match) }
58
+ end
59
+
65
60
  def validate_instructions(instructions)
66
61
  unknown_instructions = instructions - INSTRUCTIONS
67
62
  if unknown_instructions.any?
68
- raise InstructionError, "unknown encode_entities command(s): #{unknown_instructions.inspect}"
63
+ raise InstructionError,
64
+ "unknown encode_entities command(s): #{unknown_instructions.inspect}"
69
65
  end
70
66
 
71
- if (instructions.include?(:decimal) && instructions.include?(:hexadecimal))
72
- raise InstructionError, "hexadecimal and decimal encoding are mutually exclusive"
67
+ if instructions.include?(:decimal) && instructions.include?(:hexadecimal)
68
+ raise InstructionError,
69
+ "hexadecimal and decimal encoding are mutually exclusive"
73
70
  end
74
71
  end
75
72
 
@@ -81,18 +78,24 @@ class HTMLEntities
81
78
  elsif instructions.include?(:hexadecimal)
82
79
  method = :encode_hexadecimal
83
80
  end
84
- instance_eval "def encode_basic(char)\n#{method}(char)\nend"
81
+ instance_eval <<-END
82
+ def encode_basic(char)
83
+ #{method}(char)
84
+ end
85
+ END
85
86
  end
86
87
 
87
88
  def build_extended_entity_encoder(instructions)
88
- definition = "def encode_extended(char)\n"
89
- ([:named, :decimal, :hexadecimal] & instructions).each do |encoder|
90
- definition << "encoded = encode_#{encoder}(char)\n"
91
- definition << "return encoded if encoded\n"
92
- end
93
- definition << "char\n"
94
- definition << "end"
95
- instance_eval definition
89
+ operations = [:named, :decimal, :hexadecimal] & instructions
90
+ instance_eval <<-END
91
+ def encode_extended(char)
92
+ #{operations.map{ |encoder| %{
93
+ encoded = encode_#{encoder}(char)
94
+ return encoded if encoded
95
+ }}.join("\n")}
96
+ char
97
+ end
98
+ END
96
99
  end
97
100
 
98
101
  def encode_named(char)
@@ -116,9 +119,5 @@ class HTMLEntities
116
119
  uniqmap.invert
117
120
  )
118
121
  end
119
-
120
- def encoding_aware?
121
- "1.9".respond_to?(:encoding)
122
- end
123
122
  end
124
123
  end
@@ -2,7 +2,7 @@ class HTMLEntities
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 4
4
4
  MINOR = 3
5
- TINY = 1
5
+ TINY = 2
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
@@ -1,7 +1,7 @@
1
1
  # encoding: UTF-8
2
2
  $KCODE = 'u' unless "1.9".respond_to?(:encoding)
3
3
 
4
- require File.join(File.dirname(__FILE__), "performance")
4
+ require File.expand_path("../performance", __FILE__)
5
5
  require "profiler"
6
6
 
7
7
  job = HTMLEntitiesJob.new
@@ -1,5 +1,5 @@
1
1
  # encoding: UTF-8
2
- require File.expand_path("../common", __FILE__)
2
+ require_relative "./test_helper"
3
3
 
4
4
  class HTMLEntities::DecodingTest < Test::Unit::TestCase
5
5
 
@@ -1,5 +1,5 @@
1
1
  # encoding: UTF-8
2
- require File.expand_path("../common", __FILE__)
2
+ require_relative "./test_helper"
3
3
 
4
4
  class HTMLEntities::EncodingTest < Test::Unit::TestCase
5
5
 
@@ -1,5 +1,5 @@
1
1
  # encoding: UTF-8
2
- require File.expand_path("../common", __FILE__)
2
+ require_relative "./test_helper"
3
3
 
4
4
  class HTMLEntities::EntitiesTest < Test::Unit::TestCase
5
5
 
@@ -1,5 +1,5 @@
1
1
  # encoding: UTF-8
2
- require File.expand_path("../common", __FILE__)
2
+ require_relative "./test_helper"
3
3
 
4
4
  class HTMLEntities::ExpandedTest < Test::Unit::TestCase
5
5
 
@@ -1,5 +1,5 @@
1
1
  # encoding: UTF-8
2
- require File.expand_path("../common", __FILE__)
2
+ require_relative "./test_helper"
3
3
 
4
4
  class HTML4Test < Test::Unit::TestCase
5
5
 
@@ -1,5 +1,5 @@
1
1
  # encoding: UTF-8
2
- require File.expand_path("../common", __FILE__)
2
+ require_relative "./test_helper"
3
3
 
4
4
  if ENV["RUN_INTEROPERABILITY_TESTS"]
5
5
  class HTMLEntities::InteroperabilityTest < Test::Unit::TestCase
@@ -1,5 +1,5 @@
1
1
  # encoding: UTF-8
2
- require File.expand_path("../common", __FILE__)
2
+ require_relative "./test_helper"
3
3
 
4
4
  class HTMLEntities::RoundtripTest < Test::Unit::TestCase
5
5
 
@@ -0,0 +1,68 @@
1
+ # encoding: UTF-8
2
+ require_relative "./test_helper"
3
+
4
+ class HTMLEntities::StringEncodingsTest < Test::Unit::TestCase
5
+
6
+ def test_should_encode_ascii_to_ascii
7
+ s = "<elan>".encode(Encoding::US_ASCII)
8
+ assert_equal Encoding::US_ASCII, s.encoding
9
+
10
+ t = HTMLEntities.new.encode(s)
11
+ assert_equal "&lt;elan&gt;", t
12
+ assert_equal Encoding::US_ASCII, t.encoding
13
+ end
14
+
15
+ def test_should_encode_utf8_to_utf8_if_needed
16
+ s = "<élan>"
17
+ assert_equal Encoding::UTF_8, s.encoding
18
+
19
+ t = HTMLEntities.new.encode(s)
20
+ assert_equal "&lt;élan&gt;", t
21
+ assert_equal Encoding::UTF_8, t.encoding
22
+ end
23
+
24
+ def test_should_encode_utf8_to_ascii_if_possible
25
+ s = "<elan>"
26
+ assert_equal Encoding::UTF_8, s.encoding
27
+
28
+ t = HTMLEntities.new.encode(s)
29
+ assert_equal "&lt;elan&gt;", t
30
+ assert_equal Encoding::US_ASCII, t.encoding
31
+ end
32
+
33
+ def test_should_encode_other_encoding_to_utf8
34
+ s = "<élan>".encode(Encoding::ISO_8859_1)
35
+ assert_equal Encoding::ISO_8859_1, s.encoding
36
+
37
+ t = HTMLEntities.new.encode(s)
38
+ assert_equal "&lt;élan&gt;", t
39
+ assert_equal Encoding::UTF_8, t.encoding
40
+ end
41
+
42
+ def test_should_decode_ascii_to_utf8
43
+ s = "&lt;&eacute;lan&gt;".encode(Encoding::US_ASCII)
44
+ assert_equal Encoding::US_ASCII, s.encoding
45
+
46
+ t = HTMLEntities.new.decode(s)
47
+ assert_equal "<élan>", t
48
+ assert_equal Encoding::UTF_8, t.encoding
49
+ end
50
+
51
+ def test_should_decode_utf8_to_utf8
52
+ s = "&lt;&eacute;lan&gt;".encode(Encoding::UTF_8)
53
+ assert_equal Encoding::UTF_8, s.encoding
54
+
55
+ t = HTMLEntities.new.decode(s)
56
+ assert_equal "<élan>", t
57
+ assert_equal Encoding::UTF_8, t.encoding
58
+ end
59
+
60
+ def test_should_decode_other_encoding_to_utf8
61
+ s = "&lt;&eacute;lan&gt;".encode(Encoding::ISO_8859_1)
62
+ assert_equal Encoding::ISO_8859_1, s.encoding
63
+
64
+ t = HTMLEntities.new.decode(s)
65
+ assert_equal "<élan>", t
66
+ assert_equal Encoding::UTF_8, t.encoding
67
+ end
68
+ end
@@ -0,0 +1,3 @@
1
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
2
+ require 'test/unit'
3
+ require 'htmlentities'
@@ -1,5 +1,5 @@
1
1
  # encoding: UTF-8
2
- require File.expand_path("../common", __FILE__)
2
+ require_relative "./test_helper"
3
3
 
4
4
  class HTMLEntities::XHTML1Test < Test::Unit::TestCase
5
5
 
metadata CHANGED
@@ -1,87 +1,91 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: htmlentities
3
- version: !ruby/object:Gem::Version
4
- prerelease:
5
- version: 4.3.1
3
+ version: !ruby/object:Gem::Version
4
+ version: 4.3.2
6
5
  platform: ruby
7
- authors:
6
+ authors:
8
7
  - Paul Battley
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
-
13
- date: 2011-11-30 00:00:00 Z
14
- dependencies: []
15
-
16
- description:
11
+ date: 2014-05-30 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rake
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ description: A module for encoding and decoding (X)HTML entities.
17
28
  email: pbattley@gmail.com
18
29
  executables: []
19
-
20
30
  extensions: []
21
-
22
- extra_rdoc_files:
31
+ extra_rdoc_files:
23
32
  - History.txt
24
33
  - COPYING.txt
25
- files:
26
- - lib/htmlentities/encoder.rb
34
+ files:
35
+ - COPYING.txt
36
+ - History.txt
37
+ - lib/htmlentities.rb
27
38
  - lib/htmlentities/decoder.rb
28
- - lib/htmlentities/mappings/xhtml1.rb
39
+ - lib/htmlentities/encoder.rb
40
+ - lib/htmlentities/flavors.rb
29
41
  - lib/htmlentities/mappings/expanded.rb
30
42
  - lib/htmlentities/mappings/html4.rb
31
- - lib/htmlentities/flavors.rb
43
+ - lib/htmlentities/mappings/xhtml1.rb
32
44
  - lib/htmlentities/version.rb
33
- - lib/htmlentities.rb
34
- - test/html4_test.rb
35
- - test/ruby_1_8_test.rb
36
- - test/roundtrip_test.rb
37
- - test/encoding_test.rb
38
- - test/common.rb
39
- - test/xhtml1_test.rb
40
- - test/decoding_test.rb
41
- - test/ruby_1_9_test.rb
42
- - test/expanded_test.rb
43
- - test/entities_test.rb
44
- - test/interoperability_test.rb
45
45
  - perf/benchmark.rb
46
46
  - perf/performance.rb
47
47
  - perf/profile.rb
48
- - History.txt
49
- - COPYING.txt
48
+ - test/decoding_test.rb
49
+ - test/encoding_test.rb
50
+ - test/entities_test.rb
51
+ - test/expanded_test.rb
52
+ - test/html4_test.rb
53
+ - test/interoperability_test.rb
54
+ - test/roundtrip_test.rb
55
+ - test/string_encodings_test.rb
56
+ - test/test_helper.rb
57
+ - test/xhtml1_test.rb
50
58
  homepage: https://github.com/threedaymonk/htmlentities
51
- licenses: []
52
-
59
+ licenses:
60
+ - MIT
61
+ metadata: {}
53
62
  post_install_message:
54
63
  rdoc_options: []
55
-
56
- require_paths:
64
+ require_paths:
57
65
  - lib
58
- required_ruby_version: !ruby/object:Gem::Requirement
59
- none: false
60
- requirements:
66
+ required_ruby_version: !ruby/object:Gem::Requirement
67
+ requirements:
61
68
  - - ">="
62
- - !ruby/object:Gem::Version
63
- version: "0"
64
- required_rubygems_version: !ruby/object:Gem::Requirement
65
- none: false
66
- requirements:
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ requirements:
67
73
  - - ">="
68
- - !ruby/object:Gem::Version
69
- version: "0"
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
70
76
  requirements: []
71
-
72
77
  rubyforge_project:
73
- rubygems_version: 1.8.11
78
+ rubygems_version: 2.2.2
74
79
  signing_key:
75
- specification_version: 3
76
- summary: A module for encoding and decoding (X)HTML entities.
77
- test_files:
78
- - test/html4_test.rb
79
- - test/ruby_1_8_test.rb
80
- - test/roundtrip_test.rb
80
+ specification_version: 4
81
+ summary: Encode/decode HTML entities
82
+ test_files:
83
+ - test/interoperability_test.rb
81
84
  - test/encoding_test.rb
85
+ - test/string_encodings_test.rb
86
+ - test/entities_test.rb
87
+ - test/html4_test.rb
82
88
  - test/xhtml1_test.rb
83
- - test/decoding_test.rb
84
- - test/ruby_1_9_test.rb
85
89
  - test/expanded_test.rb
86
- - test/entities_test.rb
87
- - test/interoperability_test.rb
90
+ - test/decoding_test.rb
91
+ - test/roundtrip_test.rb
@@ -1,6 +0,0 @@
1
- $:.unshift(File.dirname(__FILE__) + '/../lib')
2
- require 'test/unit'
3
- require 'htmlentities'
4
-
5
- ENCODING_AWARE_RUBY = "1.9".respond_to?(:encoding)
6
- $KCODE = 'u' unless ENCODING_AWARE_RUBY
@@ -1,18 +0,0 @@
1
- # encoding: UTF-8
2
- require File.expand_path("../common", __FILE__)
3
-
4
- unless ENCODING_AWARE_RUBY
5
- class HTMLEntities::Ruby18Test < Test::Unit::TestCase
6
-
7
- # Reported by Benoit Larroque
8
- def test_should_encode_without_error_when_KCODE_is_not_UTF_8
9
- kcode = $KCODE
10
- $KCODE = "n"
11
- coder = HTMLEntities.new
12
- text = [8212].pack('U')
13
- assert_equal "&#8212;", coder.encode(text, :decimal)
14
- $KCODE = kcode
15
- end
16
-
17
- end
18
- end
@@ -1,70 +0,0 @@
1
- # encoding: UTF-8
2
- require File.expand_path("../common", __FILE__)
3
-
4
- if ENCODING_AWARE_RUBY
5
- class HTMLEntities::Ruby19Test < Test::Unit::TestCase
6
-
7
- def test_should_encode_ascii_to_ascii
8
- s = "<elan>".encode(Encoding::US_ASCII)
9
- assert_equal Encoding::US_ASCII, s.encoding
10
-
11
- t = HTMLEntities.new.encode(s)
12
- assert_equal "&lt;elan&gt;", t
13
- assert_equal Encoding::US_ASCII, t.encoding
14
- end
15
-
16
- def test_should_encode_utf8_to_utf8_if_needed
17
- s = "<élan>"
18
- assert_equal Encoding::UTF_8, s.encoding
19
-
20
- t = HTMLEntities.new.encode(s)
21
- assert_equal "&lt;élan&gt;", t
22
- assert_equal Encoding::UTF_8, t.encoding
23
- end
24
-
25
- def test_should_encode_utf8_to_ascii_if_possible
26
- s = "<elan>"
27
- assert_equal Encoding::UTF_8, s.encoding
28
-
29
- t = HTMLEntities.new.encode(s)
30
- assert_equal "&lt;elan&gt;", t
31
- assert_equal Encoding::US_ASCII, t.encoding
32
- end
33
-
34
- def test_should_encode_other_encoding_to_utf8
35
- s = "<élan>".encode(Encoding::ISO_8859_1)
36
- assert_equal Encoding::ISO_8859_1, s.encoding
37
-
38
- t = HTMLEntities.new.encode(s)
39
- assert_equal "&lt;élan&gt;", t
40
- assert_equal Encoding::UTF_8, t.encoding
41
- end
42
-
43
- def test_should_decode_ascii_to_utf8
44
- s = "&lt;&eacute;lan&gt;".encode(Encoding::US_ASCII)
45
- assert_equal Encoding::US_ASCII, s.encoding
46
-
47
- t = HTMLEntities.new.decode(s)
48
- assert_equal "<élan>", t
49
- assert_equal Encoding::UTF_8, t.encoding
50
- end
51
-
52
- def test_should_decode_utf8_to_utf8
53
- s = "&lt;&eacute;lan&gt;".encode(Encoding::UTF_8)
54
- assert_equal Encoding::UTF_8, s.encoding
55
-
56
- t = HTMLEntities.new.decode(s)
57
- assert_equal "<élan>", t
58
- assert_equal Encoding::UTF_8, t.encoding
59
- end
60
-
61
- def test_should_decode_other_encoding_to_utf8
62
- s = "&lt;&eacute;lan&gt;".encode(Encoding::ISO_8859_1)
63
- assert_equal Encoding::ISO_8859_1, s.encoding
64
-
65
- t = HTMLEntities.new.decode(s)
66
- assert_equal "<élan>", t
67
- assert_equal Encoding::UTF_8, t.encoding
68
- end
69
- end
70
- end