guess_html_encoding 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
data/{README → README.markdown}
RENAMED
File without changes
|
data/lib/guess_html_encoding.rb
CHANGED
@@ -18,7 +18,7 @@ module GuessHtmlEncoding
|
|
18
18
|
end
|
19
19
|
end
|
20
20
|
|
21
|
-
if out.nil? || out.empty? || !
|
21
|
+
if out.nil? || out.empty? || !encoding_loaded?(out)
|
22
22
|
if html =~ /<meta[^>]*HTTP-EQUIV=["']Content-Type["'][^>]*content=["']([^'"]*)["']/i && $1 =~ /charset=([\w\d-]+);?/i
|
23
23
|
out = $1.upcase
|
24
24
|
end
|
@@ -38,11 +38,16 @@ module GuessHtmlEncoding
|
|
38
38
|
# Force an HTML string into a guessed encoding.
|
39
39
|
def self.encode(html, headers = nil)
|
40
40
|
encoding = guess(html, (headers || '').gsub(/[\r\n]+/, "\n"))
|
41
|
-
html.force_encoding(encoding ? encoding : "UTF-8")
|
41
|
+
html.force_encoding(encoding_loaded?(encoding) ? encoding : "UTF-8")
|
42
42
|
if html.valid_encoding?
|
43
43
|
html
|
44
44
|
else
|
45
45
|
html.force_encoding('ASCII-8BIT').encode('UTF-8', :undef => :replace, :invalid => :replace)
|
46
46
|
end
|
47
47
|
end
|
48
|
+
|
49
|
+
# Is this encoding loaded?
|
50
|
+
def self.encoding_loaded?(encoding)
|
51
|
+
Encoding.name_list.include? encoding
|
52
|
+
end
|
48
53
|
end
|
@@ -75,5 +75,28 @@ describe "GuessHtmlEncoding" do
|
|
75
75
|
encoded.encoding.to_s.should == "UTF-8"
|
76
76
|
encoded.should be_valid_encoding
|
77
77
|
end
|
78
|
+
|
79
|
+
it "should work on pages encoded with an unloaded encoding" do
|
80
|
+
data = "<html><head><meta http-equiv='content-type' content='text/html; charset=x-mac-roman;'></head><body><div>hi!</div></body></html>"
|
81
|
+
data.force_encoding("ASCII-8BIT")
|
82
|
+
data.should be_valid_encoding # everything is valid in binary
|
83
|
+
|
84
|
+
GuessHtmlEncoding.guess(data).should == "X-MAC-ROMAN" # because the page says so!
|
85
|
+
|
86
|
+
encoded = GuessHtmlEncoding.encode(data)
|
87
|
+
encoded.encoding.to_s.should == "UTF-8"
|
88
|
+
encoded.should be_valid_encoding
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
describe "#encoding_loaded?" do
|
93
|
+
it 'returns true for all loaded encodings' do
|
94
|
+
Encoding.name_list.each do |name|
|
95
|
+
GuessHtmlEncoding.encoding_loaded?(name).should be_true
|
96
|
+
end
|
97
|
+
end
|
98
|
+
it 'returns false for irregular or unloaded encoding' do
|
99
|
+
GuessHtmlEncoding.encoding_loaded?('_WHY').should be_false
|
100
|
+
end
|
78
101
|
end
|
79
|
-
end
|
102
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: guess_html_encoding
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2012-03-13 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &70293930 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70293930
|
25
25
|
description: This gem helps guess the encoding of an HTML page.
|
26
26
|
email:
|
27
27
|
- andrew@iterationlabs.com
|
@@ -33,7 +33,7 @@ files:
|
|
33
33
|
- .rvmrc
|
34
34
|
- Gemfile
|
35
35
|
- Gemfile.lock
|
36
|
-
- README
|
36
|
+
- README.markdown
|
37
37
|
- Rakefile
|
38
38
|
- guess_html_encoding.gemspec
|
39
39
|
- lib/guess_html_encoding.rb
|
@@ -61,11 +61,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
61
61
|
version: '0'
|
62
62
|
requirements: []
|
63
63
|
rubyforge_project: guess_html_encoding
|
64
|
-
rubygems_version: 1.8.
|
64
|
+
rubygems_version: 1.8.16
|
65
65
|
signing_key:
|
66
66
|
specification_version: 3
|
67
67
|
summary: This gem helps guess the encoding of an HTML page.
|
68
|
-
test_files:
|
69
|
-
- spec/guess_html_encoding_spec.rb
|
70
|
-
- spec/spec.opts
|
71
|
-
- spec/spec_helper.rb
|
68
|
+
test_files: []
|