guess_html_encoding 0.0.9 → 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f24b82e186d3e1a58cd2061c7cb1eef2f5b5d1b0
4
+ data.tar.gz: cbfd0284000e074ef621763a36ca2be60cbed218
5
+ SHA512:
6
+ metadata.gz: 4d68030d7c0af216faa1e1dc029c65b6557287a8349aa89ec2a7a98833de4178a838693d2bf3e866b966edd597951e0d31c26f3a4c33daab30c7afa93692b7a5
7
+ data.tar.gz: e2ddc685bae62c4cc6e962dd79a4f69863aef512b556957b2ab91113b492c3b07d7315f880c57179c8a9aea30d0279823983a21bbdcc6cbbc697cfc9ef2ada30
data/.ruby-gemset ADDED
@@ -0,0 +1 @@
1
+ guess_html_encoding
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.1.5
data/Gemfile.lock CHANGED
@@ -1,20 +1,24 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- guess_html_encoding (0.0.8)
4
+ guess_html_encoding (0.0.9)
5
5
 
6
6
  GEM
7
7
  remote: http://rubygems.org/
8
8
  specs:
9
- diff-lcs (1.1.3)
10
- rspec (2.6.0)
11
- rspec-core (~> 2.6.0)
12
- rspec-expectations (~> 2.6.0)
13
- rspec-mocks (~> 2.6.0)
14
- rspec-core (2.6.4)
15
- rspec-expectations (2.6.0)
16
- diff-lcs (~> 1.1.2)
17
- rspec-mocks (2.6.0)
9
+ diff-lcs (1.2.5)
10
+ rspec (3.1.0)
11
+ rspec-core (~> 3.1.0)
12
+ rspec-expectations (~> 3.1.0)
13
+ rspec-mocks (~> 3.1.0)
14
+ rspec-core (3.1.7)
15
+ rspec-support (~> 3.1.0)
16
+ rspec-expectations (3.1.2)
17
+ diff-lcs (>= 1.2.0, < 2.0)
18
+ rspec-support (~> 3.1.0)
19
+ rspec-mocks (3.1.3)
20
+ rspec-support (~> 3.1.0)
21
+ rspec-support (3.1.2)
18
22
 
19
23
  PLATFORMS
20
24
  ruby
@@ -10,7 +10,7 @@ module GuessHtmlEncoding
10
10
  if headers
11
11
  headers = headers.map {|k, v| "#{k}: #{v}" }.join("\n") if headers.is_a?(Hash)
12
12
  headers = headers.dup.force_encoding("ASCII-8BIT")
13
- headers.split("\n").map {|i| i.split(":")}.each do |k,v|
13
+ headers.gsub(/[\r\n]+/, "\n").split("\n").map {|i| i.split(":")}.each do |k,v|
14
14
  if k =~ /Content-Type/i && v =~ /charset=([\w\d-]+);?/i
15
15
  out = $1.upcase
16
16
  break
@@ -42,7 +42,7 @@ module GuessHtmlEncoding
42
42
  # Force an HTML string into a guessed encoding.
43
43
  def self.encode(html, headers = nil)
44
44
  html_copy = html.to_s.dup
45
- encoding = guess(html_copy, (headers || '').gsub(/[\r\n]+/, "\n"))
45
+ encoding = guess(html_copy, headers)
46
46
  html_copy.force_encoding(encoding_loaded?(encoding) ? encoding : "UTF-8")
47
47
  if html_copy.valid_encoding?
48
48
  html_copy
@@ -1,3 +1,3 @@
1
1
  module GuessHtmlEncoding
2
- VERSION = "0.0.9"
2
+ VERSION = "0.0.10"
3
3
  end
@@ -6,59 +6,59 @@ describe "GuessHtmlEncoding" do
6
6
  it "can use headers" do
7
7
  guess = GuessHtmlEncoding.guess("<html><body><div>hi!</div></body></html>",
8
8
  "Hello: world\nContent-Type: text/html; charset=LATIN1\nFoo: bar")
9
- guess.should == "ISO-8859-1"
9
+ expect(guess).to eq("ISO-8859-1")
10
10
  end
11
11
 
12
12
  it "accepts headers as a hash as well" do
13
13
  guess = GuessHtmlEncoding.guess("<html><body><div>hi!</div></body></html>",
14
14
  {"Hello" => "world", "Content-Type" => "text/html; charset=LATIN1", "Foo" => "bar"})
15
- guess.should == "ISO-8859-1"
15
+ expect(guess).to eq("ISO-8859-1")
16
16
  end
17
17
 
18
18
  it "accepts meta tags" do
19
19
  guess = GuessHtmlEncoding.guess('<html><head><meta http-equiv="content-type" content="text/html; charset=LATIN1"></head><body><div>hi!</div></body></html>')
20
- guess.should == "ISO-8859-1"
20
+ expect(guess).to eq("ISO-8859-1")
21
21
  end
22
22
 
23
23
  it "works okay when there is a semi-colon after the encoding with headers" do
24
24
  guess = GuessHtmlEncoding.guess("<html><body><div>hi!</div></body></html>",
25
25
  "Hello: world\nContent-Type: text/html; charset=utf-8;\nFoo: bar")
26
- guess.should == "UTF-8"
26
+ expect(guess).to eq("UTF-8")
27
27
  end
28
28
 
29
29
  it "works okay when there is a semi-colon after the encoding with meta-tags" do
30
30
  guess = GuessHtmlEncoding.guess('<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8;"></head><body><div>hi!</div></body></html>')
31
- guess.should == "UTF-8"
31
+ expect(guess).to eq("UTF-8")
32
32
  end
33
33
 
34
34
  it "converts UTF8 to UTF-8" do
35
35
  guess = GuessHtmlEncoding.guess('<html><head><meta http-equiv="content-type" content="text/html; charset=utf8;"></head><body><div>hi!</div></body></html>')
36
- guess.should == "UTF-8"
36
+ expect(guess).to eq("UTF-8")
37
37
  end
38
38
 
39
39
  it "converts CP-1251 to CP1251" do
40
40
  guess = GuessHtmlEncoding.guess('<html><head><meta http-equiv="content-type" content="text/html; charset=cp-1251;"></head><body><div>hi!</div></body></html>')
41
- guess.should == "CP1251"
41
+ expect(guess).to eq("CP1251")
42
42
  end
43
43
 
44
44
  it "skips the header content type if it's invalid" do
45
45
  guess = GuessHtmlEncoding.guess('<html><head><meta http-equiv="content-type" content="text/html; charset=utf8;"></head><body><div>hi!</div></body></html>',
46
46
  "Hello: world\nContent-Type: text/html; charset=RU;\nFoo: bar")
47
- guess.should == "UTF-8"
47
+ expect(guess).to eq("UTF-8")
48
48
  end
49
49
 
50
50
  it "translates WIN1251 to WINDOWS-1250" do
51
51
  guess = GuessHtmlEncoding.guess('<html><head><meta http-equiv="content-type" content="text/html; charset=WIN1251;"></head><body><div>hi!</div></body></html>')
52
- guess.should == "WINDOWS-1250"
52
+ expect(guess).to eq("WINDOWS-1250")
53
53
  end
54
54
 
55
55
  it "translates GB2312 to GB18030" do
56
56
  guess = GuessHtmlEncoding.guess('<html><head><meta http-equiv="content-type" content="text/html; charset=GB2312;"></head><body><div>hi!</div></body></html>')
57
- guess.should == "GB18030"
57
+ expect(guess).to eq("GB18030")
58
58
  end
59
59
 
60
60
  it "should not raise an exception if data is nil" do
61
- GuessHtmlEncoding.guess(nil).should_not raise_error(TypeError)
61
+ expect { GuessHtmlEncoding.guess(nil) }.not_to raise_error
62
62
  end
63
63
  end
64
64
 
@@ -66,97 +66,101 @@ describe "GuessHtmlEncoding" do
66
66
  it "should work on correctly encoded pages" do
67
67
  data = "<html><head><meta http-equiv='content-type' content='text/html; charset=utf8;'></head><body><div>hi!♥</div></body></html>"
68
68
  data.force_encoding("ASCII-8BIT")
69
- data.should be_valid_encoding # everything is valid in binary
69
+ expect(data).to be_valid_encoding # everything is valid in binary
70
70
 
71
- GuessHtmlEncoding.guess(data).should == "UTF-8" # because the page says so!
72
- data.force_encoding("UTF-8").should be_valid_encoding # because it really is utf-8
71
+ expect(GuessHtmlEncoding.guess(data)).to eq("UTF-8") # because the page says so!
72
+ expect(data.force_encoding("UTF-8")).to be_valid_encoding # because it really is utf-8
73
73
 
74
74
  encoded = GuessHtmlEncoding.encode(data)
75
- encoded.encoding.to_s.should == "UTF-8"
76
- encoded.should be_valid_encoding
75
+ expect(encoded.encoding.to_s).to eq("UTF-8")
76
+ expect(encoded).to be_valid_encoding
77
77
  end
78
78
 
79
79
  it "should work on incorrectly encoded pages" do
80
80
  data = "<html><head><meta http-equiv='content-type' content='text/html; charset=utf8;'></head><body><div>hi!\xc2</div></body></html>"
81
81
  data.force_encoding("ASCII-8BIT")
82
- data.should be_valid_encoding # everything is valid in binary
82
+ expect(data).to be_valid_encoding # everything is valid in binary
83
83
 
84
- GuessHtmlEncoding.guess(data).should == "UTF-8" # because the page says so!
85
- data.force_encoding("UTF-8").should_not be_valid_encoding # because of the bad byte sequence \xc2 which is not valid UTF-8
84
+ expect(GuessHtmlEncoding.guess(data)).to eq("UTF-8") # because the page says so!
85
+ expect(data.force_encoding("UTF-8")).not_to be_valid_encoding # because of the bad byte sequence \xc2 which is not valid UTF-8
86
86
 
87
87
  encoded = GuessHtmlEncoding.encode(data)
88
- encoded.encoding.to_s.should == "UTF-8"
89
- encoded.should be_valid_encoding
88
+ expect(encoded.encoding.to_s).to eq("UTF-8")
89
+ expect(encoded).to be_valid_encoding
90
90
  end
91
91
 
92
92
  it "should work on pages encoded with an unknown encoding by forcing them to utf8" do
93
93
  data = "<html><head><meta http-equiv='content-type' content='text/html; charset=x-mac-roman;'></head><body><div>hi!</div></body></html>"
94
94
  data.force_encoding("ASCII-8BIT")
95
- data.should be_valid_encoding # everything is valid in binary
95
+ expect(data).to be_valid_encoding # everything is valid in binary
96
96
 
97
- GuessHtmlEncoding.guess(data).should == "X-MAC-ROMAN" # because the page says so!
97
+ expect(GuessHtmlEncoding.guess(data)).to eq("X-MAC-ROMAN") # because the page says so!
98
98
 
99
99
  encoded = GuessHtmlEncoding.encode(data)
100
- encoded.encoding.to_s.should == "UTF-8"
101
- encoded.should be_valid_encoding
100
+ expect(encoded.encoding.to_s).to eq("UTF-8")
101
+ expect(encoded).to be_valid_encoding
102
102
 
103
- data.encoding.to_s.should == "ASCII-8BIT"
103
+ expect(data.encoding.to_s).to eq("ASCII-8BIT")
104
104
  end
105
105
 
106
106
  it "should not raise an exception if data is nil" do
107
- GuessHtmlEncoding.encode(nil).should_not raise_error(TypeError)
107
+ expect { GuessHtmlEncoding.encode(nil) }.not_to raise_error
108
108
  end
109
109
 
110
-
111
110
  it "should work on GB18030 (and translate GB2312 into GB18030)" do
112
111
  data = File.read(File.join(File.dirname(__FILE__), "fixtures/gb18030.html"), :encoding => "binary")
113
- GuessHtmlEncoding.encoding_loaded?("GB18030").should be_true
114
- GuessHtmlEncoding.guess(data).should == "GB18030"
115
- GuessHtmlEncoding.encode(data).encoding.to_s.should == "GB18030"
112
+ expect(GuessHtmlEncoding.encoding_loaded?("GB18030")).to be_truthy
113
+ expect(GuessHtmlEncoding.guess(data)).to eq("GB18030")
114
+ expect(GuessHtmlEncoding.encode(data).encoding.to_s).to eq("GB18030")
115
+ end
116
+
117
+ it "should work with headers as a hash" do
118
+ data = File.read(File.join(File.dirname(__FILE__), "fixtures/gb18030.html"), :encoding => "binary")
119
+ expect(lambda { GuessHtmlEncoding.encode(data, {}) }).not_to raise_error
116
120
  end
117
121
  end
118
122
 
119
123
  describe "#encoding_loaded?" do
120
124
  it 'returns true for all loaded encodings' do
121
125
  (Encoding.name_list - ["internal"]).each do |name|
122
- GuessHtmlEncoding.encoding_loaded?(name).should be_true
123
- lambda { Encoding.find(name) }.should_not raise_error
126
+ expect(GuessHtmlEncoding.encoding_loaded?(name)).to be_truthy
127
+ expect { Encoding.find(name) }.not_to raise_error
124
128
  end
125
129
  end
126
130
 
127
131
  it 'returns true for uppercase encodings' do
128
- GuessHtmlEncoding.encoding_loaded?("WINDOWS-1250").should be_true
129
- lambda { Encoding.find("WINDOWS-1250") }.should_not raise_error
132
+ expect(GuessHtmlEncoding.encoding_loaded?("WINDOWS-1250")).to be_truthy
133
+ expect { Encoding.find("WINDOWS-1250") }.not_to raise_error
130
134
  end
131
135
 
132
136
  it 'returns true for lowercase encodings' do
133
- GuessHtmlEncoding.encoding_loaded?("windows-1250").should be_true
134
- lambda { Encoding.find("windows-1250") }.should_not raise_error
137
+ expect(GuessHtmlEncoding.encoding_loaded?("windows-1250")).to be_truthy
138
+ expect { Encoding.find("windows-1250") }.not_to raise_error
135
139
  end
136
140
 
137
141
  it 'returns true for encoding aliases' do
138
142
  Encoding.aliases.keys.each do |key|
139
- GuessHtmlEncoding.encoding_loaded?(key).should be_true
140
- GuessHtmlEncoding.encoding_loaded?(key.upcase).should be_true
141
- lambda { Encoding.find(key) }.should_not raise_error
142
- lambda { Encoding.find(key.upcase) }.should_not raise_error
143
+ expect(GuessHtmlEncoding.encoding_loaded?(key)).to be_truthy
144
+ expect(GuessHtmlEncoding.encoding_loaded?(key.upcase)).to be_truthy
145
+ expect { Encoding.find(key) }.not_to raise_error
146
+ expect { Encoding.find(key.upcase) }.not_to raise_error
143
147
  end
144
148
  end
145
149
 
146
150
  it 'returns false for irregular or unloaded encoding' do
147
- GuessHtmlEncoding.encoding_loaded?('_WHY').should be_false
151
+ expect(GuessHtmlEncoding.encoding_loaded?('_WHY')).to be_falsy
148
152
  end
149
153
 
150
154
  it "accepts a simple meta tag" do
151
155
  # Like http://www.taobao.com
152
156
  guess = GuessHtmlEncoding.guess('<html><head><meta charset="gbk" /></head><body><div>hi!</div></body></html>')
153
- guess.should == "GBK"
157
+ expect(guess).to eq("GBK")
154
158
  end
155
159
 
156
160
  it "works as well when there is no double quotation marks with http-equiv in meta-tags" do
157
161
  # Like http://www.frozentux.net/iptables-tutorial/cn/iptables-tutorial-cn-1.1.19.html
158
162
  guess = GuessHtmlEncoding.guess('<html><head><META http-equiv=Content-Type content="text/html; charset=utf-8"></head><body><div>hi!</div></body></html>')
159
- guess.should == "UTF-8"
163
+ expect(guess).to eq("UTF-8")
160
164
  end
161
165
  end
162
166
  end
metadata CHANGED
@@ -1,30 +1,27 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: guess_html_encoding
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
5
- prerelease:
4
+ version: 0.0.10
6
5
  platform: ruby
7
6
  authors:
8
7
  - Andrew Cantino (Iteration Labs, LLC)
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-01-13 00:00:00.000000000 Z
11
+ date: 2014-12-14 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: rspec
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ! '>='
17
+ - - ">="
20
18
  - !ruby/object:Gem::Version
21
19
  version: '0'
22
20
  type: :development
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ! '>='
24
+ - - ">="
28
25
  - !ruby/object:Gem::Version
29
26
  version: '0'
30
27
  description: ''
@@ -34,8 +31,9 @@ executables: []
34
31
  extensions: []
35
32
  extra_rdoc_files: []
36
33
  files:
37
- - .gitignore
38
- - .rvmrc
34
+ - ".gitignore"
35
+ - ".ruby-gemset"
36
+ - ".ruby-version"
39
37
  - Gemfile
40
38
  - Gemfile.lock
41
39
  - LICENSE
@@ -50,27 +48,26 @@ files:
50
48
  - spec/spec_helper.rb
51
49
  homepage: http://github.com/cantino/guess_html_encoding
52
50
  licenses: []
51
+ metadata: {}
53
52
  post_install_message:
54
53
  rdoc_options: []
55
54
  require_paths:
56
55
  - lib
57
56
  required_ruby_version: !ruby/object:Gem::Requirement
58
- none: false
59
57
  requirements:
60
- - - ! '>='
58
+ - - ">="
61
59
  - !ruby/object:Gem::Version
62
60
  version: '0'
63
61
  required_rubygems_version: !ruby/object:Gem::Requirement
64
- none: false
65
62
  requirements:
66
- - - ! '>='
63
+ - - ">="
67
64
  - !ruby/object:Gem::Version
68
65
  version: '0'
69
66
  requirements: []
70
67
  rubyforge_project: guess_html_encoding
71
- rubygems_version: 1.8.23
68
+ rubygems_version: 2.2.2
72
69
  signing_key:
73
- specification_version: 3
70
+ specification_version: 4
74
71
  summary: A small gem that attempts to guess and then force encoding of HTML documents
75
72
  for Ruby 1.9
76
73
  test_files:
data/.rvmrc DELETED
@@ -1 +0,0 @@
1
- rvm use 1.9.3@guess_html_encoding --create