guess_html_encoding 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/{README → README.markdown}
RENAMED
File without changes
|
data/lib/guess_html_encoding.rb
CHANGED
@@ -18,7 +18,7 @@ module GuessHtmlEncoding
|
|
18
18
|
end
|
19
19
|
end
|
20
20
|
|
21
|
-
if out.nil? || out.empty? || !
|
21
|
+
if out.nil? || out.empty? || !encoding_loaded?(out)
|
22
22
|
if html =~ /<meta[^>]*HTTP-EQUIV=["']Content-Type["'][^>]*content=["']([^'"]*)["']/i && $1 =~ /charset=([\w\d-]+);?/i
|
23
23
|
out = $1.upcase
|
24
24
|
end
|
@@ -38,11 +38,16 @@ module GuessHtmlEncoding
|
|
38
38
|
# Force an HTML string into a guessed encoding.
|
39
39
|
def self.encode(html, headers = nil)
|
40
40
|
encoding = guess(html, (headers || '').gsub(/[\r\n]+/, "\n"))
|
41
|
-
html.force_encoding(encoding ? encoding : "UTF-8")
|
41
|
+
html.force_encoding(encoding_loaded?(encoding) ? encoding : "UTF-8")
|
42
42
|
if html.valid_encoding?
|
43
43
|
html
|
44
44
|
else
|
45
45
|
html.force_encoding('ASCII-8BIT').encode('UTF-8', :undef => :replace, :invalid => :replace)
|
46
46
|
end
|
47
47
|
end
|
48
|
+
|
49
|
+
# Is this encoding loaded?
|
50
|
+
def self.encoding_loaded?(encoding)
|
51
|
+
Encoding.name_list.include? encoding
|
52
|
+
end
|
48
53
|
end
|
@@ -75,5 +75,28 @@ describe "GuessHtmlEncoding" do
|
|
75
75
|
encoded.encoding.to_s.should == "UTF-8"
|
76
76
|
encoded.should be_valid_encoding
|
77
77
|
end
|
78
|
+
|
79
|
+
it "should work on pages encoded with an unloaded encoding" do
|
80
|
+
data = "<html><head><meta http-equiv='content-type' content='text/html; charset=x-mac-roman;'></head><body><div>hi!</div></body></html>"
|
81
|
+
data.force_encoding("ASCII-8BIT")
|
82
|
+
data.should be_valid_encoding # everything is valid in binary
|
83
|
+
|
84
|
+
GuessHtmlEncoding.guess(data).should == "X-MAC-ROMAN" # because the page says so!
|
85
|
+
|
86
|
+
encoded = GuessHtmlEncoding.encode(data)
|
87
|
+
encoded.encoding.to_s.should == "UTF-8"
|
88
|
+
encoded.should be_valid_encoding
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
describe "#encoding_loaded?" do
|
93
|
+
it 'returns true for all loaded encodings' do
|
94
|
+
Encoding.name_list.each do |name|
|
95
|
+
GuessHtmlEncoding.encoding_loaded?(name).should be_true
|
96
|
+
end
|
97
|
+
end
|
98
|
+
it 'returns false for irregular or unloaded encoding' do
|
99
|
+
GuessHtmlEncoding.encoding_loaded?('_WHY').should be_false
|
100
|
+
end
|
78
101
|
end
|
79
|
-
end
|
102
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: guess_html_encoding
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2012-03-13 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &70293930 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70293930
|
25
25
|
description: This gem helps guess the encoding of an HTML page.
|
26
26
|
email:
|
27
27
|
- andrew@iterationlabs.com
|
@@ -33,7 +33,7 @@ files:
|
|
33
33
|
- .rvmrc
|
34
34
|
- Gemfile
|
35
35
|
- Gemfile.lock
|
36
|
-
- README
|
36
|
+
- README.markdown
|
37
37
|
- Rakefile
|
38
38
|
- guess_html_encoding.gemspec
|
39
39
|
- lib/guess_html_encoding.rb
|
@@ -61,11 +61,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
61
61
|
version: '0'
|
62
62
|
requirements: []
|
63
63
|
rubyforge_project: guess_html_encoding
|
64
|
-
rubygems_version: 1.8.
|
64
|
+
rubygems_version: 1.8.16
|
65
65
|
signing_key:
|
66
66
|
specification_version: 3
|
67
67
|
summary: This gem helps guess the encoding of an HTML page.
|
68
|
-
test_files:
|
69
|
-
- spec/guess_html_encoding_spec.rb
|
70
|
-
- spec/spec.opts
|
71
|
-
- spec/spec_helper.rb
|
68
|
+
test_files: []
|