guess_html_encoding 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
File without changes
@@ -1,3 +1,3 @@
1
1
  module GuessHtmlEncoding
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
@@ -18,7 +18,7 @@ module GuessHtmlEncoding
18
18
  end
19
19
  end
20
20
 
21
- if out.nil? || out.empty? || !Encoding.name_list.include?(out)
21
+ if out.nil? || out.empty? || !encoding_loaded?(out)
22
22
  if html =~ /<meta[^>]*HTTP-EQUIV=["']Content-Type["'][^>]*content=["']([^'"]*)["']/i && $1 =~ /charset=([\w\d-]+);?/i
23
23
  out = $1.upcase
24
24
  end
@@ -38,11 +38,16 @@ module GuessHtmlEncoding
38
38
  # Force an HTML string into a guessed encoding.
39
39
  def self.encode(html, headers = nil)
40
40
  encoding = guess(html, (headers || '').gsub(/[\r\n]+/, "\n"))
41
- html.force_encoding(encoding ? encoding : "UTF-8")
41
+ html.force_encoding(encoding_loaded?(encoding) ? encoding : "UTF-8")
42
42
  if html.valid_encoding?
43
43
  html
44
44
  else
45
45
  html.force_encoding('ASCII-8BIT').encode('UTF-8', :undef => :replace, :invalid => :replace)
46
46
  end
47
47
  end
48
+
49
+ # Is this encoding loaded?
50
+ def self.encoding_loaded?(encoding)
51
+ Encoding.name_list.include? encoding
52
+ end
48
53
  end
@@ -75,5 +75,28 @@ describe "GuessHtmlEncoding" do
75
75
  encoded.encoding.to_s.should == "UTF-8"
76
76
  encoded.should be_valid_encoding
77
77
  end
78
+
79
+ it "should work on pages encoded with an unloaded encoding" do
80
+ data = "<html><head><meta http-equiv='content-type' content='text/html; charset=x-mac-roman;'></head><body><div>hi!</div></body></html>"
81
+ data.force_encoding("ASCII-8BIT")
82
+ data.should be_valid_encoding # everything is valid in binary
83
+
84
+ GuessHtmlEncoding.guess(data).should == "X-MAC-ROMAN" # because the page says so!
85
+
86
+ encoded = GuessHtmlEncoding.encode(data)
87
+ encoded.encoding.to_s.should == "UTF-8"
88
+ encoded.should be_valid_encoding
89
+ end
90
+ end
91
+
92
+ describe "#encoding_loaded?" do
93
+ it 'returns true for all loaded encodings' do
94
+ Encoding.name_list.each do |name|
95
+ GuessHtmlEncoding.encoding_loaded?(name).should be_true
96
+ end
97
+ end
98
+ it 'returns false for irregular or unloaded encoding' do
99
+ GuessHtmlEncoding.encoding_loaded?('_WHY').should be_false
100
+ end
78
101
  end
79
- end
102
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: guess_html_encoding
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-10-26 00:00:00.000000000Z
12
+ date: 2012-03-13 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
16
- requirement: &70345601879200 !ruby/object:Gem::Requirement
16
+ requirement: &70293930 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,7 +21,7 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *70345601879200
24
+ version_requirements: *70293930
25
25
  description: This gem helps guess the encoding of an HTML page.
26
26
  email:
27
27
  - andrew@iterationlabs.com
@@ -33,7 +33,7 @@ files:
33
33
  - .rvmrc
34
34
  - Gemfile
35
35
  - Gemfile.lock
36
- - README
36
+ - README.markdown
37
37
  - Rakefile
38
38
  - guess_html_encoding.gemspec
39
39
  - lib/guess_html_encoding.rb
@@ -61,11 +61,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
61
61
  version: '0'
62
62
  requirements: []
63
63
  rubyforge_project: guess_html_encoding
64
- rubygems_version: 1.8.6
64
+ rubygems_version: 1.8.16
65
65
  signing_key:
66
66
  specification_version: 3
67
67
  summary: This gem helps guess the encoding of an HTML page.
68
- test_files:
69
- - spec/guess_html_encoding_spec.rb
70
- - spec/spec.opts
71
- - spec/spec_helper.rb
68
+ test_files: []