iconoclasm 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/iconoclasm/downloader.rb +7 -9
- data/lib/iconoclasm/errors.rb +11 -3
- data/lib/iconoclasm/extractor.rb +21 -16
- data/lib/iconoclasm/favicon.rb +4 -4
- data/spec/iconoclasm/downloader_spec.rb +7 -16
- data/spec/iconoclasm/extractor_spec.rb +3 -3
- data/spec/iconoclasm/favicon_spec.rb +4 -4
- metadata +1 -1
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.0.
|
1
|
+
1.0.1
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require '
|
1
|
+
require 'typhoeus'
|
2
2
|
|
3
3
|
module Iconoclasm
|
4
4
|
module Downloader
|
@@ -13,18 +13,16 @@ module Iconoclasm
|
|
13
13
|
@@user_agent
|
14
14
|
end
|
15
15
|
|
16
|
+
def user_agent
|
17
|
+
@@user_agent
|
18
|
+
end
|
19
|
+
|
16
20
|
def get(url)
|
17
|
-
|
18
|
-
curl.headers['User-Agent'] = Iconoclasm::Downloader.user_agent
|
19
|
-
curl.follow_location = true
|
20
|
-
end
|
21
|
+
Typhoeus::Request.get(url, :user_agent => user_agent, :follow_location => true)
|
21
22
|
end
|
22
23
|
|
23
24
|
def head(url)
|
24
|
-
|
25
|
-
curl.headers['User-Agent'] = Iconoclasm::Downloader.user_agent
|
26
|
-
end
|
25
|
+
Typhoeus::Request.head(url, :user_agent => user_agent)
|
27
26
|
end
|
28
|
-
|
29
27
|
end
|
30
28
|
end
|
data/lib/iconoclasm/errors.rb
CHANGED
@@ -30,12 +30,20 @@ module Iconoclasm
|
|
30
30
|
end
|
31
31
|
|
32
32
|
def http_error_reason
|
33
|
-
@response.respond_to?(:header_str) ?
|
34
|
-
end
|
33
|
+
@response.respond_to?(:header_str) ? error_reason : @response
|
34
|
+
end
|
35
35
|
|
36
36
|
def http_error_message
|
37
37
|
"#{@code}: #{http_error_reason}"
|
38
|
-
end
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def error_reason
|
43
|
+
first_line = @response.header_str.split('\n').first.chomp
|
44
|
+
first_line.match(/\d{3}\s(.*)$/)
|
45
|
+
$1
|
46
|
+
end
|
39
47
|
end
|
40
48
|
|
41
49
|
class RTFMError < Iconoclasm::Error
|
data/lib/iconoclasm/extractor.rb
CHANGED
@@ -10,42 +10,41 @@ module Iconoclasm
|
|
10
10
|
|
11
11
|
def extract_favicon_from(url, content = nil)
|
12
12
|
catch(:done) do
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
raise Iconoclasm::MissingFavicon.new(base_url)
|
13
|
+
extract_favicon_from_head_of(url, content)
|
14
|
+
extract_favicon_from_naive_guess(base_url_of(url))
|
15
|
+
raise Iconoclasm::MissingFavicon.new(url)
|
17
16
|
end
|
18
17
|
end
|
19
18
|
|
20
19
|
private
|
21
20
|
|
22
|
-
def extract_favicon_from_head_of(
|
23
|
-
if document = document_from(
|
21
|
+
def extract_favicon_from_head_of(url, content = nil)
|
22
|
+
if document = document_from(url, content)
|
24
23
|
favicon_links = find_favicon_links_in(document)
|
25
24
|
throw(:done, {
|
26
|
-
:url => href_of(favicon_links.first),
|
25
|
+
:url => href_of(favicon_links.first, :base_url => base_url_of(url)),
|
27
26
|
:content_type => type_of(favicon_links.first)
|
28
27
|
}) unless favicon_links.empty?
|
29
28
|
end
|
30
29
|
end
|
31
30
|
|
32
|
-
def document_from(
|
31
|
+
def document_from(url, content = nil)
|
33
32
|
if content
|
34
33
|
Nokogiri::XML(content)
|
35
34
|
else
|
36
|
-
response = get(
|
37
|
-
Nokogiri::XML(response.
|
35
|
+
response = get(url)
|
36
|
+
Nokogiri::XML(response.body) if response.code == 200
|
38
37
|
end
|
39
38
|
end
|
40
39
|
|
41
40
|
def extract_favicon_from_naive_guess(base_url)
|
42
41
|
naive_url = "#{base_url}/favicon.ico"
|
43
42
|
response = get(naive_url)
|
44
|
-
headers = Iconoclasm::Headers.new(response.
|
45
|
-
if response.
|
43
|
+
headers = Iconoclasm::Headers.new(response.headers)
|
44
|
+
if response.code == 200
|
46
45
|
throw(:done, {
|
47
46
|
:url => naive_url,
|
48
|
-
:content_length =>
|
47
|
+
:content_length => headers.content_length,
|
49
48
|
:content_type => headers.content_type,
|
50
49
|
:data => response.body_str
|
51
50
|
})
|
@@ -53,7 +52,7 @@ module Iconoclasm
|
|
53
52
|
end
|
54
53
|
|
55
54
|
def find_favicon_links_in(document)
|
56
|
-
document.
|
55
|
+
document.css('link:favicon_link', Class.new {
|
57
56
|
def favicon_link(node_set)
|
58
57
|
node_set.find_all { |node| node['rel'] && node['rel'] =~ /^(?:shortcut\s)?icon$/i }
|
59
58
|
end
|
@@ -65,9 +64,15 @@ module Iconoclasm
|
|
65
64
|
"#{uri.scheme}://#{uri.host}"
|
66
65
|
end
|
67
66
|
|
68
|
-
def href_of(node)
|
67
|
+
def href_of(node, options = {})
|
69
68
|
href = normal_node_attributes(node)['href']
|
70
|
-
|
69
|
+
if href
|
70
|
+
relative?(href.value) ? "#{options[:base_url]}#{href.value}" : href.value
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def relative?(href)
|
75
|
+
href =~ /^[\.\/]/
|
71
76
|
end
|
72
77
|
|
73
78
|
def type_of(node)
|
data/lib/iconoclasm/favicon.rb
CHANGED
@@ -14,8 +14,8 @@ module Iconoclasm
|
|
14
14
|
@data = attributes[:data]
|
15
15
|
@name = attributes[:name] || parse_name_from(@url)
|
16
16
|
headers = attributes[:headers]
|
17
|
-
@content_type = attributes[:content_type]
|
18
|
-
@size = attributes[:content_length]
|
17
|
+
@content_type = attributes[:content_type] ? attributes[:content_type] : headers ? headers.content_type : nil
|
18
|
+
@size = attributes[:content_length] ? attributes[:content_length] : headers ? headers.content_length : nil
|
19
19
|
@save_path = nil
|
20
20
|
end
|
21
21
|
|
@@ -58,8 +58,8 @@ module Iconoclasm
|
|
58
58
|
|
59
59
|
def fetch_data
|
60
60
|
response = get(url)
|
61
|
-
if response.
|
62
|
-
response.
|
61
|
+
if response.code == 200
|
62
|
+
response.body
|
63
63
|
else
|
64
64
|
raise Iconoclasm::HTTPError.new(url, response)
|
65
65
|
end
|
@@ -10,39 +10,30 @@ describe Iconoclasm::Downloader do
|
|
10
10
|
end
|
11
11
|
|
12
12
|
describe "GETting a url" do
|
13
|
-
it "should GET the url using
|
14
|
-
|
13
|
+
it "should GET the url using Typheous" do
|
14
|
+
Typhoeus::Request.expects(:get).with(@url, instance_of(Hash))
|
15
15
|
@thing.get(@url)
|
16
16
|
end
|
17
17
|
|
18
18
|
it "should set the user agent to the default user agent" do
|
19
|
-
|
20
|
-
headers = mock('headers')
|
21
|
-
Curl::Easy.stubs(:http_get).yields(@curl)
|
22
|
-
@curl.expects(:headers).returns(headers)
|
23
|
-
headers.expects(:[]=).with('User-Agent', Iconoclasm::Downloader.user_agent)
|
19
|
+
Typhoeus::Request.expects(:get).with(instance_of(String), has_entry(:user_agent => Iconoclasm::Downloader.user_agent))
|
24
20
|
@thing.get(@url)
|
25
21
|
end
|
26
22
|
|
27
23
|
it "should follow redirects" do
|
28
|
-
|
29
|
-
Curl::Easy.stubs(:http_get).yields(@curl)
|
30
|
-
@curl.expects(:follow_location=).with(true)
|
24
|
+
Typhoeus::Request.expects(:get).with(instance_of(String), has_entry(:follow_location => true))
|
31
25
|
@thing.get(@url)
|
32
26
|
end
|
33
27
|
end
|
34
28
|
|
35
29
|
describe "HEADing a url" do
|
36
|
-
it "should HEAD the url using
|
37
|
-
|
30
|
+
it "should HEAD the url using Typhoeus" do
|
31
|
+
Typhoeus::Request.expects(:head).with(@url, instance_of(Hash))
|
38
32
|
@thing.head(@url)
|
39
33
|
end
|
40
34
|
|
41
35
|
it "should set the user agent to the default user agent" do
|
42
|
-
|
43
|
-
Curl::Easy.stubs(:http_head).yields(@curl)
|
44
|
-
@curl.expects(:headers).returns(headers)
|
45
|
-
headers.expects(:[]=).with('User-Agent', Iconoclasm::Downloader.user_agent)
|
36
|
+
Typhoeus::Request.expects(:head).with(instance_of(String), has_entry(:user_agent => Iconoclasm::Downloader.user_agent))
|
46
37
|
@thing.head(@url)
|
47
38
|
end
|
48
39
|
end
|
@@ -20,7 +20,7 @@ describe Iconoclasm::Extractor do
|
|
20
20
|
end
|
21
21
|
|
22
22
|
it "should try to find the favicon path in the head of the content" do
|
23
|
-
@thing.expects(:extract_favicon_from_head_of).with(@
|
23
|
+
@thing.expects(:extract_favicon_from_head_of).with(@url, nil).throws(:done)
|
24
24
|
@thing.extract_favicon_from(@url)
|
25
25
|
end
|
26
26
|
|
@@ -69,7 +69,7 @@ describe Iconoclasm::Extractor do
|
|
69
69
|
|
70
70
|
describe "when content isn't already provided" do
|
71
71
|
before do
|
72
|
-
@response = mock('http response', :
|
72
|
+
@response = mock('http response', :code => 200, :body => "")
|
73
73
|
end
|
74
74
|
|
75
75
|
it "should go get the content" do
|
@@ -100,7 +100,7 @@ describe Iconoclasm::Extractor do
|
|
100
100
|
before do
|
101
101
|
@href = 'http://www.website.com/images/favicon.ico'
|
102
102
|
@type = 'image/vnd.microsoft.icon'
|
103
|
-
@thing.expects(:href_of).with(@link).returns(@href)
|
103
|
+
@thing.expects(:href_of).with(@link, instance_of(Hash)).returns(@href)
|
104
104
|
@thing.expects(:type_of).with(@link).returns(@type)
|
105
105
|
@hash = catch(:done) { @thing.__send__(:extract_favicon_from_head_of, @url, @content) }
|
106
106
|
end
|
@@ -149,7 +149,7 @@ describe Iconoclasm::Favicon do
|
|
149
149
|
|
150
150
|
it "should request the icon image" do
|
151
151
|
@favicon.expects(:get).returns(@response)
|
152
|
-
@response.stubs(:
|
152
|
+
@response.stubs(:code => 200, :body => "IMAGE DATA!")
|
153
153
|
@favicon.fetch_data
|
154
154
|
end
|
155
155
|
|
@@ -157,11 +157,11 @@ describe Iconoclasm::Favicon do
|
|
157
157
|
before do
|
158
158
|
@favicon.stubs(:get).returns(@response)
|
159
159
|
@data = "THIS IS ALSO TOTALLY SOME IMAGE DATA HAR HAR HAR!"
|
160
|
-
@response.expects(:
|
160
|
+
@response.expects(:code).returns(200)
|
161
161
|
end
|
162
162
|
|
163
163
|
it "should return the content of the request (the binary image data)" do
|
164
|
-
@response.expects(:
|
164
|
+
@response.expects(:body).returns(@data)
|
165
165
|
@favicon.fetch_data.should == @data
|
166
166
|
end
|
167
167
|
end
|
@@ -169,7 +169,7 @@ describe Iconoclasm::Favicon do
|
|
169
169
|
describe "when the HTTP request is not successful" do
|
170
170
|
before do
|
171
171
|
@favicon.stubs(:get).returns(@response)
|
172
|
-
@response.expects(:
|
172
|
+
@response.expects(:code).returns(400)
|
173
173
|
end
|
174
174
|
|
175
175
|
it "should raise an HTTP error" do
|