guess_html_encoding 0.0.9 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f24b82e186d3e1a58cd2061c7cb1eef2f5b5d1b0
4
+ data.tar.gz: cbfd0284000e074ef621763a36ca2be60cbed218
5
+ SHA512:
6
+ metadata.gz: 4d68030d7c0af216faa1e1dc029c65b6557287a8349aa89ec2a7a98833de4178a838693d2bf3e866b966edd597951e0d31c26f3a4c33daab30c7afa93692b7a5
7
+ data.tar.gz: e2ddc685bae62c4cc6e962dd79a4f69863aef512b556957b2ab91113b492c3b07d7315f880c57179c8a9aea30d0279823983a21bbdcc6cbbc697cfc9ef2ada30
data/.ruby-gemset ADDED
@@ -0,0 +1 @@
1
+ guess_html_encoding
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.1.5
data/Gemfile.lock CHANGED
@@ -1,20 +1,24 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- guess_html_encoding (0.0.8)
4
+ guess_html_encoding (0.0.9)
5
5
 
6
6
  GEM
7
7
  remote: http://rubygems.org/
8
8
  specs:
9
- diff-lcs (1.1.3)
10
- rspec (2.6.0)
11
- rspec-core (~> 2.6.0)
12
- rspec-expectations (~> 2.6.0)
13
- rspec-mocks (~> 2.6.0)
14
- rspec-core (2.6.4)
15
- rspec-expectations (2.6.0)
16
- diff-lcs (~> 1.1.2)
17
- rspec-mocks (2.6.0)
9
+ diff-lcs (1.2.5)
10
+ rspec (3.1.0)
11
+ rspec-core (~> 3.1.0)
12
+ rspec-expectations (~> 3.1.0)
13
+ rspec-mocks (~> 3.1.0)
14
+ rspec-core (3.1.7)
15
+ rspec-support (~> 3.1.0)
16
+ rspec-expectations (3.1.2)
17
+ diff-lcs (>= 1.2.0, < 2.0)
18
+ rspec-support (~> 3.1.0)
19
+ rspec-mocks (3.1.3)
20
+ rspec-support (~> 3.1.0)
21
+ rspec-support (3.1.2)
18
22
 
19
23
  PLATFORMS
20
24
  ruby
@@ -10,7 +10,7 @@ module GuessHtmlEncoding
10
10
  if headers
11
11
  headers = headers.map {|k, v| "#{k}: #{v}" }.join("\n") if headers.is_a?(Hash)
12
12
  headers = headers.dup.force_encoding("ASCII-8BIT")
13
- headers.split("\n").map {|i| i.split(":")}.each do |k,v|
13
+ headers.gsub(/[\r\n]+/, "\n").split("\n").map {|i| i.split(":")}.each do |k,v|
14
14
  if k =~ /Content-Type/i && v =~ /charset=([\w\d-]+);?/i
15
15
  out = $1.upcase
16
16
  break
@@ -42,7 +42,7 @@ module GuessHtmlEncoding
42
42
  # Force an HTML string into a guessed encoding.
43
43
  def self.encode(html, headers = nil)
44
44
  html_copy = html.to_s.dup
45
- encoding = guess(html_copy, (headers || '').gsub(/[\r\n]+/, "\n"))
45
+ encoding = guess(html_copy, headers)
46
46
  html_copy.force_encoding(encoding_loaded?(encoding) ? encoding : "UTF-8")
47
47
  if html_copy.valid_encoding?
48
48
  html_copy
@@ -1,3 +1,3 @@
1
1
  module GuessHtmlEncoding
2
- VERSION = "0.0.9"
2
+ VERSION = "0.0.10"
3
3
  end
@@ -6,59 +6,59 @@ describe "GuessHtmlEncoding" do
6
6
  it "can use headers" do
7
7
  guess = GuessHtmlEncoding.guess("<html><body><div>hi!</div></body></html>",
8
8
  "Hello: world\nContent-Type: text/html; charset=LATIN1\nFoo: bar")
9
- guess.should == "ISO-8859-1"
9
+ expect(guess).to eq("ISO-8859-1")
10
10
  end
11
11
 
12
12
  it "accepts headers as a hash as well" do
13
13
  guess = GuessHtmlEncoding.guess("<html><body><div>hi!</div></body></html>",
14
14
  {"Hello" => "world", "Content-Type" => "text/html; charset=LATIN1", "Foo" => "bar"})
15
- guess.should == "ISO-8859-1"
15
+ expect(guess).to eq("ISO-8859-1")
16
16
  end
17
17
 
18
18
  it "accepts meta tags" do
19
19
  guess = GuessHtmlEncoding.guess('<html><head><meta http-equiv="content-type" content="text/html; charset=LATIN1"></head><body><div>hi!</div></body></html>')
20
- guess.should == "ISO-8859-1"
20
+ expect(guess).to eq("ISO-8859-1")
21
21
  end
22
22
 
23
23
  it "works okay when there is a semi-colon after the encoding with headers" do
24
24
  guess = GuessHtmlEncoding.guess("<html><body><div>hi!</div></body></html>",
25
25
  "Hello: world\nContent-Type: text/html; charset=utf-8;\nFoo: bar")
26
- guess.should == "UTF-8"
26
+ expect(guess).to eq("UTF-8")
27
27
  end
28
28
 
29
29
  it "works okay when there is a semi-colon after the encoding with meta-tags" do
30
30
  guess = GuessHtmlEncoding.guess('<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8;"></head><body><div>hi!</div></body></html>')
31
- guess.should == "UTF-8"
31
+ expect(guess).to eq("UTF-8")
32
32
  end
33
33
 
34
34
  it "converts UTF8 to UTF-8" do
35
35
  guess = GuessHtmlEncoding.guess('<html><head><meta http-equiv="content-type" content="text/html; charset=utf8;"></head><body><div>hi!</div></body></html>')
36
- guess.should == "UTF-8"
36
+ expect(guess).to eq("UTF-8")
37
37
  end
38
38
 
39
39
  it "converts CP-1251 to CP1251" do
40
40
  guess = GuessHtmlEncoding.guess('<html><head><meta http-equiv="content-type" content="text/html; charset=cp-1251;"></head><body><div>hi!</div></body></html>')
41
- guess.should == "CP1251"
41
+ expect(guess).to eq("CP1251")
42
42
  end
43
43
 
44
44
  it "skips the header content type if it's invalid" do
45
45
  guess = GuessHtmlEncoding.guess('<html><head><meta http-equiv="content-type" content="text/html; charset=utf8;"></head><body><div>hi!</div></body></html>',
46
46
  "Hello: world\nContent-Type: text/html; charset=RU;\nFoo: bar")
47
- guess.should == "UTF-8"
47
+ expect(guess).to eq("UTF-8")
48
48
  end
49
49
 
50
50
  it "translates WIN1251 to WINDOWS-1250" do
51
51
  guess = GuessHtmlEncoding.guess('<html><head><meta http-equiv="content-type" content="text/html; charset=WIN1251;"></head><body><div>hi!</div></body></html>')
52
- guess.should == "WINDOWS-1250"
52
+ expect(guess).to eq("WINDOWS-1250")
53
53
  end
54
54
 
55
55
  it "translates GB2312 to GB18030" do
56
56
  guess = GuessHtmlEncoding.guess('<html><head><meta http-equiv="content-type" content="text/html; charset=GB2312;"></head><body><div>hi!</div></body></html>')
57
- guess.should == "GB18030"
57
+ expect(guess).to eq("GB18030")
58
58
  end
59
59
 
60
60
  it "should not raise an exception if data is nil" do
61
- GuessHtmlEncoding.guess(nil).should_not raise_error(TypeError)
61
+ expect { GuessHtmlEncoding.guess(nil) }.not_to raise_error
62
62
  end
63
63
  end
64
64
 
@@ -66,97 +66,101 @@ describe "GuessHtmlEncoding" do
66
66
  it "should work on correctly encoded pages" do
67
67
  data = "<html><head><meta http-equiv='content-type' content='text/html; charset=utf8;'></head><body><div>hi!♥</div></body></html>"
68
68
  data.force_encoding("ASCII-8BIT")
69
- data.should be_valid_encoding # everything is valid in binary
69
+ expect(data).to be_valid_encoding # everything is valid in binary
70
70
 
71
- GuessHtmlEncoding.guess(data).should == "UTF-8" # because the page says so!
72
- data.force_encoding("UTF-8").should be_valid_encoding # because it really is utf-8
71
+ expect(GuessHtmlEncoding.guess(data)).to eq("UTF-8") # because the page says so!
72
+ expect(data.force_encoding("UTF-8")).to be_valid_encoding # because it really is utf-8
73
73
 
74
74
  encoded = GuessHtmlEncoding.encode(data)
75
- encoded.encoding.to_s.should == "UTF-8"
76
- encoded.should be_valid_encoding
75
+ expect(encoded.encoding.to_s).to eq("UTF-8")
76
+ expect(encoded).to be_valid_encoding
77
77
  end
78
78
 
79
79
  it "should work on incorrectly encoded pages" do
80
80
  data = "<html><head><meta http-equiv='content-type' content='text/html; charset=utf8;'></head><body><div>hi!\xc2</div></body></html>"
81
81
  data.force_encoding("ASCII-8BIT")
82
- data.should be_valid_encoding # everything is valid in binary
82
+ expect(data).to be_valid_encoding # everything is valid in binary
83
83
 
84
- GuessHtmlEncoding.guess(data).should == "UTF-8" # because the page says so!
85
- data.force_encoding("UTF-8").should_not be_valid_encoding # because of the bad byte sequence \xc2 which is not valid UTF-8
84
+ expect(GuessHtmlEncoding.guess(data)).to eq("UTF-8") # because the page says so!
85
+ expect(data.force_encoding("UTF-8")).not_to be_valid_encoding # because of the bad byte sequence \xc2 which is not valid UTF-8
86
86
 
87
87
  encoded = GuessHtmlEncoding.encode(data)
88
- encoded.encoding.to_s.should == "UTF-8"
89
- encoded.should be_valid_encoding
88
+ expect(encoded.encoding.to_s).to eq("UTF-8")
89
+ expect(encoded).to be_valid_encoding
90
90
  end
91
91
 
92
92
  it "should work on pages encoded with an unknown encoding by forcing them to utf8" do
93
93
  data = "<html><head><meta http-equiv='content-type' content='text/html; charset=x-mac-roman;'></head><body><div>hi!</div></body></html>"
94
94
  data.force_encoding("ASCII-8BIT")
95
- data.should be_valid_encoding # everything is valid in binary
95
+ expect(data).to be_valid_encoding # everything is valid in binary
96
96
 
97
- GuessHtmlEncoding.guess(data).should == "X-MAC-ROMAN" # because the page says so!
97
+ expect(GuessHtmlEncoding.guess(data)).to eq("X-MAC-ROMAN") # because the page says so!
98
98
 
99
99
  encoded = GuessHtmlEncoding.encode(data)
100
- encoded.encoding.to_s.should == "UTF-8"
101
- encoded.should be_valid_encoding
100
+ expect(encoded.encoding.to_s).to eq("UTF-8")
101
+ expect(encoded).to be_valid_encoding
102
102
 
103
- data.encoding.to_s.should == "ASCII-8BIT"
103
+ expect(data.encoding.to_s).to eq("ASCII-8BIT")
104
104
  end
105
105
 
106
106
  it "should not raise an exception if data is nil" do
107
- GuessHtmlEncoding.encode(nil).should_not raise_error(TypeError)
107
+ expect { GuessHtmlEncoding.encode(nil) }.not_to raise_error
108
108
  end
109
109
 
110
-
111
110
  it "should work on GB18030 (and translate GB2312 into GB18030)" do
112
111
  data = File.read(File.join(File.dirname(__FILE__), "fixtures/gb18030.html"), :encoding => "binary")
113
- GuessHtmlEncoding.encoding_loaded?("GB18030").should be_true
114
- GuessHtmlEncoding.guess(data).should == "GB18030"
115
- GuessHtmlEncoding.encode(data).encoding.to_s.should == "GB18030"
112
+ expect(GuessHtmlEncoding.encoding_loaded?("GB18030")).to be_truthy
113
+ expect(GuessHtmlEncoding.guess(data)).to eq("GB18030")
114
+ expect(GuessHtmlEncoding.encode(data).encoding.to_s).to eq("GB18030")
115
+ end
116
+
117
+ it "should work with headers as a hash" do
118
+ data = File.read(File.join(File.dirname(__FILE__), "fixtures/gb18030.html"), :encoding => "binary")
119
+ expect(lambda { GuessHtmlEncoding.encode(data, {}) }).not_to raise_error
116
120
  end
117
121
  end
118
122
 
119
123
  describe "#encoding_loaded?" do
120
124
  it 'returns true for all loaded encodings' do
121
125
  (Encoding.name_list - ["internal"]).each do |name|
122
- GuessHtmlEncoding.encoding_loaded?(name).should be_true
123
- lambda { Encoding.find(name) }.should_not raise_error
126
+ expect(GuessHtmlEncoding.encoding_loaded?(name)).to be_truthy
127
+ expect { Encoding.find(name) }.not_to raise_error
124
128
  end
125
129
  end
126
130
 
127
131
  it 'returns true for uppercase encodings' do
128
- GuessHtmlEncoding.encoding_loaded?("WINDOWS-1250").should be_true
129
- lambda { Encoding.find("WINDOWS-1250") }.should_not raise_error
132
+ expect(GuessHtmlEncoding.encoding_loaded?("WINDOWS-1250")).to be_truthy
133
+ expect { Encoding.find("WINDOWS-1250") }.not_to raise_error
130
134
  end
131
135
 
132
136
  it 'returns true for lowercase encodings' do
133
- GuessHtmlEncoding.encoding_loaded?("windows-1250").should be_true
134
- lambda { Encoding.find("windows-1250") }.should_not raise_error
137
+ expect(GuessHtmlEncoding.encoding_loaded?("windows-1250")).to be_truthy
138
+ expect { Encoding.find("windows-1250") }.not_to raise_error
135
139
  end
136
140
 
137
141
  it 'returns true for encoding aliases' do
138
142
  Encoding.aliases.keys.each do |key|
139
- GuessHtmlEncoding.encoding_loaded?(key).should be_true
140
- GuessHtmlEncoding.encoding_loaded?(key.upcase).should be_true
141
- lambda { Encoding.find(key) }.should_not raise_error
142
- lambda { Encoding.find(key.upcase) }.should_not raise_error
143
+ expect(GuessHtmlEncoding.encoding_loaded?(key)).to be_truthy
144
+ expect(GuessHtmlEncoding.encoding_loaded?(key.upcase)).to be_truthy
145
+ expect { Encoding.find(key) }.not_to raise_error
146
+ expect { Encoding.find(key.upcase) }.not_to raise_error
143
147
  end
144
148
  end
145
149
 
146
150
  it 'returns false for irregular or unloaded encoding' do
147
- GuessHtmlEncoding.encoding_loaded?('_WHY').should be_false
151
+ expect(GuessHtmlEncoding.encoding_loaded?('_WHY')).to be_falsy
148
152
  end
149
153
 
150
154
  it "accepts a simple meta tag" do
151
155
  # Like http://www.taobao.com
152
156
  guess = GuessHtmlEncoding.guess('<html><head><meta charset="gbk" /></head><body><div>hi!</div></body></html>')
153
- guess.should == "GBK"
157
+ expect(guess).to eq("GBK")
154
158
  end
155
159
 
156
160
  it "works as well when there is no double quotation marks with http-equiv in meta-tags" do
157
161
  # Like http://www.frozentux.net/iptables-tutorial/cn/iptables-tutorial-cn-1.1.19.html
158
162
  guess = GuessHtmlEncoding.guess('<html><head><META http-equiv=Content-Type content="text/html; charset=utf-8"></head><body><div>hi!</div></body></html>')
159
- guess.should == "UTF-8"
163
+ expect(guess).to eq("UTF-8")
160
164
  end
161
165
  end
162
166
  end
metadata CHANGED
@@ -1,30 +1,27 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: guess_html_encoding
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
5
- prerelease:
4
+ version: 0.0.10
6
5
  platform: ruby
7
6
  authors:
8
7
  - Andrew Cantino (Iteration Labs, LLC)
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-01-13 00:00:00.000000000 Z
11
+ date: 2014-12-14 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: rspec
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ! '>='
17
+ - - ">="
20
18
  - !ruby/object:Gem::Version
21
19
  version: '0'
22
20
  type: :development
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ! '>='
24
+ - - ">="
28
25
  - !ruby/object:Gem::Version
29
26
  version: '0'
30
27
  description: ''
@@ -34,8 +31,9 @@ executables: []
34
31
  extensions: []
35
32
  extra_rdoc_files: []
36
33
  files:
37
- - .gitignore
38
- - .rvmrc
34
+ - ".gitignore"
35
+ - ".ruby-gemset"
36
+ - ".ruby-version"
39
37
  - Gemfile
40
38
  - Gemfile.lock
41
39
  - LICENSE
@@ -50,27 +48,26 @@ files:
50
48
  - spec/spec_helper.rb
51
49
  homepage: http://github.com/cantino/guess_html_encoding
52
50
  licenses: []
51
+ metadata: {}
53
52
  post_install_message:
54
53
  rdoc_options: []
55
54
  require_paths:
56
55
  - lib
57
56
  required_ruby_version: !ruby/object:Gem::Requirement
58
- none: false
59
57
  requirements:
60
- - - ! '>='
58
+ - - ">="
61
59
  - !ruby/object:Gem::Version
62
60
  version: '0'
63
61
  required_rubygems_version: !ruby/object:Gem::Requirement
64
- none: false
65
62
  requirements:
66
- - - ! '>='
63
+ - - ">="
67
64
  - !ruby/object:Gem::Version
68
65
  version: '0'
69
66
  requirements: []
70
67
  rubyforge_project: guess_html_encoding
71
- rubygems_version: 1.8.23
68
+ rubygems_version: 2.2.2
72
69
  signing_key:
73
- specification_version: 3
70
+ specification_version: 4
74
71
  summary: A small gem that attempts to guess and then force encoding of HTML documents
75
72
  for Ruby 1.9
76
73
  test_files:
data/.rvmrc DELETED
@@ -1 +0,0 @@
1
- rvm use 1.9.3@guess_html_encoding --create