guess_html_encoding 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -1,3 +1,3 @@
1
1
  module GuessHtmlEncoding
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
@@ -18,7 +18,7 @@ module GuessHtmlEncoding
18
18
  end
19
19
  end
20
20
 
21
- if out.nil? || out.empty? || !Encoding.name_list.include?(out)
21
+ if out.nil? || out.empty? || !encoding_loaded?(out)
22
22
  if html =~ /<meta[^>]*HTTP-EQUIV=["']Content-Type["'][^>]*content=["']([^'"]*)["']/i && $1 =~ /charset=([\w\d-]+);?/i
23
23
  out = $1.upcase
24
24
  end
@@ -38,11 +38,16 @@ module GuessHtmlEncoding
38
38
  # Force an HTML string into a guessed encoding.
39
39
  def self.encode(html, headers = nil)
40
40
  encoding = guess(html, (headers || '').gsub(/[\r\n]+/, "\n"))
41
- html.force_encoding(encoding ? encoding : "UTF-8")
41
+ html.force_encoding(encoding_loaded?(encoding) ? encoding : "UTF-8")
42
42
  if html.valid_encoding?
43
43
  html
44
44
  else
45
45
  html.force_encoding('ASCII-8BIT').encode('UTF-8', :undef => :replace, :invalid => :replace)
46
46
  end
47
47
  end
48
+
49
+ # Is this encoding loaded?
50
+ def self.encoding_loaded?(encoding)
51
+ Encoding.name_list.include? encoding
52
+ end
48
53
  end
@@ -75,5 +75,28 @@ describe "GuessHtmlEncoding" do
75
75
  encoded.encoding.to_s.should == "UTF-8"
76
76
  encoded.should be_valid_encoding
77
77
  end
78
+
79
+ it "should work on pages encoded with an unloaded encoding" do
80
+ data = "<html><head><meta http-equiv='content-type' content='text/html; charset=x-mac-roman;'></head><body><div>hi!</div></body></html>"
81
+ data.force_encoding("ASCII-8BIT")
82
+ data.should be_valid_encoding # everything is valid in binary
83
+
84
+ GuessHtmlEncoding.guess(data).should == "X-MAC-ROMAN" # because the page says so!
85
+
86
+ encoded = GuessHtmlEncoding.encode(data)
87
+ encoded.encoding.to_s.should == "UTF-8"
88
+ encoded.should be_valid_encoding
89
+ end
90
+ end
91
+
92
+ describe "#encoding_loaded?" do
93
+ it 'returns true for all loaded encodings' do
94
+ Encoding.name_list.each do |name|
95
+ GuessHtmlEncoding.encoding_loaded?(name).should be_true
96
+ end
97
+ end
98
+ it 'returns false for irregular or unloaded encoding' do
99
+ GuessHtmlEncoding.encoding_loaded?('_WHY').should be_false
100
+ end
78
101
  end
79
- end
102
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: guess_html_encoding
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-10-26 00:00:00.000000000Z
12
+ date: 2012-03-13 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
16
- requirement: &70345601879200 !ruby/object:Gem::Requirement
16
+ requirement: &70293930 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,7 +21,7 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *70345601879200
24
+ version_requirements: *70293930
25
25
  description: This gem helps guess the encoding of an HTML page.
26
26
  email:
27
27
  - andrew@iterationlabs.com
@@ -33,7 +33,7 @@ files:
33
33
  - .rvmrc
34
34
  - Gemfile
35
35
  - Gemfile.lock
36
- - README
36
+ - README.markdown
37
37
  - Rakefile
38
38
  - guess_html_encoding.gemspec
39
39
  - lib/guess_html_encoding.rb
@@ -61,11 +61,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
61
61
  version: '0'
62
62
  requirements: []
63
63
  rubyforge_project: guess_html_encoding
64
- rubygems_version: 1.8.6
64
+ rubygems_version: 1.8.16
65
65
  signing_key:
66
66
  specification_version: 3
67
67
  summary: This gem helps guess the encoding of an HTML page.
68
- test_files:
69
- - spec/guess_html_encoding_spec.rb
70
- - spec/spec.opts
71
- - spec/spec_helper.rb
68
+ test_files: []