guess_html_encoding 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- guess_html_encoding (0.0.6)
4
+ guess_html_encoding (0.0.8)
5
5
 
6
6
  GEM
7
7
  remote: http://rubygems.org/
@@ -19,9 +19,12 @@ module GuessHtmlEncoding
19
19
  end
20
20
 
21
21
  if out.nil? || out.empty? || !encoding_loaded?(out)
22
- if html =~ /<meta[^>]*HTTP-EQUIV=["']Content-Type["'][^>]*content=["']([^'"]*)["']/i && $1 =~ /charset=([\w\d-]+);?/i
23
- out = $1.upcase
22
+ if html =~ /<meta[^>]*HTTP-EQUIV=["']?Content-Type["']?[^>]*content=["']([^'"]*)["']/i && $1 =~ /charset=([\w\d-]+);?/i
23
+ out = $1
24
+ elsif html =~ /<meta\s+charset=["']([\w\d-]+)?/i
25
+ out = $1
24
26
  end
27
+ out.upcase! unless out.nil?
25
28
  end
26
29
 
27
30
  # Translate encodings with other names.
@@ -1,3 +1,3 @@
1
1
  module GuessHtmlEncoding
2
- VERSION = "0.0.7"
2
+ VERSION = "0.0.8"
3
3
  end
@@ -146,5 +146,17 @@ describe "GuessHtmlEncoding" do
146
146
  it 'returns false for irregular or unloaded encoding' do
147
147
  GuessHtmlEncoding.encoding_loaded?('_WHY').should be_false
148
148
  end
149
+
150
+ it "accepts a simple meta tag" do
151
+ # Like http://www.taobao.com
152
+ guess = GuessHtmlEncoding.guess('<html><head><meta charset="gbk" /></head><body><div>hi!</div></body></html>')
153
+ guess.should == "GBK"
154
+ end
155
+
156
+ it "works as well when there is no double quotation marks with http-equiv in meta-tags" do
157
+ # Like http://www.frozentux.net/iptables-tutorial/cn/iptables-tutorial-cn-1.1.19.html
158
+ guess = GuessHtmlEncoding.guess('<html><head><META http-equiv=Content-Type content="text/html; charset=utf-8"></head><body><div>hi!</div></body></html>')
159
+ guess.should == "UTF-8"
160
+ end
149
161
  end
150
162
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: guess_html_encoding
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.8
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-01-01 00:00:00.000000000 Z
12
+ date: 2013-01-08 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec