iconoclasm 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/iconoclasm/downloader.rb +7 -9
- data/lib/iconoclasm/errors.rb +11 -3
- data/lib/iconoclasm/extractor.rb +21 -16
- data/lib/iconoclasm/favicon.rb +4 -4
- data/spec/iconoclasm/downloader_spec.rb +7 -16
- data/spec/iconoclasm/extractor_spec.rb +3 -3
- data/spec/iconoclasm/favicon_spec.rb +4 -4
- metadata +1 -1
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.0.
|
1
|
+
1.0.1
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require '
|
1
|
+
require 'typhoeus'
|
2
2
|
|
3
3
|
module Iconoclasm
|
4
4
|
module Downloader
|
@@ -13,18 +13,16 @@ module Iconoclasm
|
|
13
13
|
@@user_agent
|
14
14
|
end
|
15
15
|
|
16
|
+
def user_agent
|
17
|
+
@@user_agent
|
18
|
+
end
|
19
|
+
|
16
20
|
def get(url)
|
17
|
-
|
18
|
-
curl.headers['User-Agent'] = Iconoclasm::Downloader.user_agent
|
19
|
-
curl.follow_location = true
|
20
|
-
end
|
21
|
+
Typhoeus::Request.get(url, :user_agent => user_agent, :follow_location => true)
|
21
22
|
end
|
22
23
|
|
23
24
|
def head(url)
|
24
|
-
|
25
|
-
curl.headers['User-Agent'] = Iconoclasm::Downloader.user_agent
|
26
|
-
end
|
25
|
+
Typhoeus::Request.head(url, :user_agent => user_agent)
|
27
26
|
end
|
28
|
-
|
29
27
|
end
|
30
28
|
end
|
data/lib/iconoclasm/errors.rb
CHANGED
@@ -30,12 +30,20 @@ module Iconoclasm
|
|
30
30
|
end
|
31
31
|
|
32
32
|
def http_error_reason
|
33
|
-
@response.respond_to?(:header_str) ?
|
34
|
-
end
|
33
|
+
@response.respond_to?(:header_str) ? error_reason : @response
|
34
|
+
end
|
35
35
|
|
36
36
|
def http_error_message
|
37
37
|
"#{@code}: #{http_error_reason}"
|
38
|
-
end
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def error_reason
|
43
|
+
first_line = @response.header_str.split('\n').first.chomp
|
44
|
+
first_line.match(/\d{3}\s(.*)$/)
|
45
|
+
$1
|
46
|
+
end
|
39
47
|
end
|
40
48
|
|
41
49
|
class RTFMError < Iconoclasm::Error
|
data/lib/iconoclasm/extractor.rb
CHANGED
@@ -10,42 +10,41 @@ module Iconoclasm
|
|
10
10
|
|
11
11
|
def extract_favicon_from(url, content = nil)
|
12
12
|
catch(:done) do
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
raise Iconoclasm::MissingFavicon.new(base_url)
|
13
|
+
extract_favicon_from_head_of(url, content)
|
14
|
+
extract_favicon_from_naive_guess(base_url_of(url))
|
15
|
+
raise Iconoclasm::MissingFavicon.new(url)
|
17
16
|
end
|
18
17
|
end
|
19
18
|
|
20
19
|
private
|
21
20
|
|
22
|
-
def extract_favicon_from_head_of(
|
23
|
-
if document = document_from(
|
21
|
+
def extract_favicon_from_head_of(url, content = nil)
|
22
|
+
if document = document_from(url, content)
|
24
23
|
favicon_links = find_favicon_links_in(document)
|
25
24
|
throw(:done, {
|
26
|
-
:url => href_of(favicon_links.first),
|
25
|
+
:url => href_of(favicon_links.first, :base_url => base_url_of(url)),
|
27
26
|
:content_type => type_of(favicon_links.first)
|
28
27
|
}) unless favicon_links.empty?
|
29
28
|
end
|
30
29
|
end
|
31
30
|
|
32
|
-
def document_from(
|
31
|
+
def document_from(url, content = nil)
|
33
32
|
if content
|
34
33
|
Nokogiri::XML(content)
|
35
34
|
else
|
36
|
-
response = get(
|
37
|
-
Nokogiri::XML(response.
|
35
|
+
response = get(url)
|
36
|
+
Nokogiri::XML(response.body) if response.code == 200
|
38
37
|
end
|
39
38
|
end
|
40
39
|
|
41
40
|
def extract_favicon_from_naive_guess(base_url)
|
42
41
|
naive_url = "#{base_url}/favicon.ico"
|
43
42
|
response = get(naive_url)
|
44
|
-
headers = Iconoclasm::Headers.new(response.
|
45
|
-
if response.
|
43
|
+
headers = Iconoclasm::Headers.new(response.headers)
|
44
|
+
if response.code == 200
|
46
45
|
throw(:done, {
|
47
46
|
:url => naive_url,
|
48
|
-
:content_length =>
|
47
|
+
:content_length => headers.content_length,
|
49
48
|
:content_type => headers.content_type,
|
50
49
|
:data => response.body_str
|
51
50
|
})
|
@@ -53,7 +52,7 @@ module Iconoclasm
|
|
53
52
|
end
|
54
53
|
|
55
54
|
def find_favicon_links_in(document)
|
56
|
-
document.
|
55
|
+
document.css('link:favicon_link', Class.new {
|
57
56
|
def favicon_link(node_set)
|
58
57
|
node_set.find_all { |node| node['rel'] && node['rel'] =~ /^(?:shortcut\s)?icon$/i }
|
59
58
|
end
|
@@ -65,9 +64,15 @@ module Iconoclasm
|
|
65
64
|
"#{uri.scheme}://#{uri.host}"
|
66
65
|
end
|
67
66
|
|
68
|
-
def href_of(node)
|
67
|
+
def href_of(node, options = {})
|
69
68
|
href = normal_node_attributes(node)['href']
|
70
|
-
|
69
|
+
if href
|
70
|
+
relative?(href.value) ? "#{options[:base_url]}#{href.value}" : href.value
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def relative?(href)
|
75
|
+
href =~ /^[\.\/]/
|
71
76
|
end
|
72
77
|
|
73
78
|
def type_of(node)
|
data/lib/iconoclasm/favicon.rb
CHANGED
@@ -14,8 +14,8 @@ module Iconoclasm
|
|
14
14
|
@data = attributes[:data]
|
15
15
|
@name = attributes[:name] || parse_name_from(@url)
|
16
16
|
headers = attributes[:headers]
|
17
|
-
@content_type = attributes[:content_type]
|
18
|
-
@size = attributes[:content_length]
|
17
|
+
@content_type = attributes[:content_type] ? attributes[:content_type] : headers ? headers.content_type : nil
|
18
|
+
@size = attributes[:content_length] ? attributes[:content_length] : headers ? headers.content_length : nil
|
19
19
|
@save_path = nil
|
20
20
|
end
|
21
21
|
|
@@ -58,8 +58,8 @@ module Iconoclasm
|
|
58
58
|
|
59
59
|
def fetch_data
|
60
60
|
response = get(url)
|
61
|
-
if response.
|
62
|
-
response.
|
61
|
+
if response.code == 200
|
62
|
+
response.body
|
63
63
|
else
|
64
64
|
raise Iconoclasm::HTTPError.new(url, response)
|
65
65
|
end
|
@@ -10,39 +10,30 @@ describe Iconoclasm::Downloader do
|
|
10
10
|
end
|
11
11
|
|
12
12
|
describe "GETting a url" do
|
13
|
-
it "should GET the url using
|
14
|
-
|
13
|
+
it "should GET the url using Typheous" do
|
14
|
+
Typhoeus::Request.expects(:get).with(@url, instance_of(Hash))
|
15
15
|
@thing.get(@url)
|
16
16
|
end
|
17
17
|
|
18
18
|
it "should set the user agent to the default user agent" do
|
19
|
-
|
20
|
-
headers = mock('headers')
|
21
|
-
Curl::Easy.stubs(:http_get).yields(@curl)
|
22
|
-
@curl.expects(:headers).returns(headers)
|
23
|
-
headers.expects(:[]=).with('User-Agent', Iconoclasm::Downloader.user_agent)
|
19
|
+
Typhoeus::Request.expects(:get).with(instance_of(String), has_entry(:user_agent => Iconoclasm::Downloader.user_agent))
|
24
20
|
@thing.get(@url)
|
25
21
|
end
|
26
22
|
|
27
23
|
it "should follow redirects" do
|
28
|
-
|
29
|
-
Curl::Easy.stubs(:http_get).yields(@curl)
|
30
|
-
@curl.expects(:follow_location=).with(true)
|
24
|
+
Typhoeus::Request.expects(:get).with(instance_of(String), has_entry(:follow_location => true))
|
31
25
|
@thing.get(@url)
|
32
26
|
end
|
33
27
|
end
|
34
28
|
|
35
29
|
describe "HEADing a url" do
|
36
|
-
it "should HEAD the url using
|
37
|
-
|
30
|
+
it "should HEAD the url using Typhoeus" do
|
31
|
+
Typhoeus::Request.expects(:head).with(@url, instance_of(Hash))
|
38
32
|
@thing.head(@url)
|
39
33
|
end
|
40
34
|
|
41
35
|
it "should set the user agent to the default user agent" do
|
42
|
-
|
43
|
-
Curl::Easy.stubs(:http_head).yields(@curl)
|
44
|
-
@curl.expects(:headers).returns(headers)
|
45
|
-
headers.expects(:[]=).with('User-Agent', Iconoclasm::Downloader.user_agent)
|
36
|
+
Typhoeus::Request.expects(:head).with(instance_of(String), has_entry(:user_agent => Iconoclasm::Downloader.user_agent))
|
46
37
|
@thing.head(@url)
|
47
38
|
end
|
48
39
|
end
|
@@ -20,7 +20,7 @@ describe Iconoclasm::Extractor do
|
|
20
20
|
end
|
21
21
|
|
22
22
|
it "should try to find the favicon path in the head of the content" do
|
23
|
-
@thing.expects(:extract_favicon_from_head_of).with(@
|
23
|
+
@thing.expects(:extract_favicon_from_head_of).with(@url, nil).throws(:done)
|
24
24
|
@thing.extract_favicon_from(@url)
|
25
25
|
end
|
26
26
|
|
@@ -69,7 +69,7 @@ describe Iconoclasm::Extractor do
|
|
69
69
|
|
70
70
|
describe "when content isn't already provided" do
|
71
71
|
before do
|
72
|
-
@response = mock('http response', :
|
72
|
+
@response = mock('http response', :code => 200, :body => "")
|
73
73
|
end
|
74
74
|
|
75
75
|
it "should go get the content" do
|
@@ -100,7 +100,7 @@ describe Iconoclasm::Extractor do
|
|
100
100
|
before do
|
101
101
|
@href = 'http://www.website.com/images/favicon.ico'
|
102
102
|
@type = 'image/vnd.microsoft.icon'
|
103
|
-
@thing.expects(:href_of).with(@link).returns(@href)
|
103
|
+
@thing.expects(:href_of).with(@link, instance_of(Hash)).returns(@href)
|
104
104
|
@thing.expects(:type_of).with(@link).returns(@type)
|
105
105
|
@hash = catch(:done) { @thing.__send__(:extract_favicon_from_head_of, @url, @content) }
|
106
106
|
end
|
@@ -149,7 +149,7 @@ describe Iconoclasm::Favicon do
|
|
149
149
|
|
150
150
|
it "should request the icon image" do
|
151
151
|
@favicon.expects(:get).returns(@response)
|
152
|
-
@response.stubs(:
|
152
|
+
@response.stubs(:code => 200, :body => "IMAGE DATA!")
|
153
153
|
@favicon.fetch_data
|
154
154
|
end
|
155
155
|
|
@@ -157,11 +157,11 @@ describe Iconoclasm::Favicon do
|
|
157
157
|
before do
|
158
158
|
@favicon.stubs(:get).returns(@response)
|
159
159
|
@data = "THIS IS ALSO TOTALLY SOME IMAGE DATA HAR HAR HAR!"
|
160
|
-
@response.expects(:
|
160
|
+
@response.expects(:code).returns(200)
|
161
161
|
end
|
162
162
|
|
163
163
|
it "should return the content of the request (the binary image data)" do
|
164
|
-
@response.expects(:
|
164
|
+
@response.expects(:body).returns(@data)
|
165
165
|
@favicon.fetch_data.should == @data
|
166
166
|
end
|
167
167
|
end
|
@@ -169,7 +169,7 @@ describe Iconoclasm::Favicon do
|
|
169
169
|
describe "when the HTTP request is not successful" do
|
170
170
|
before do
|
171
171
|
@favicon.stubs(:get).returns(@response)
|
172
|
-
@response.expects(:
|
172
|
+
@response.expects(:code).returns(400)
|
173
173
|
end
|
174
174
|
|
175
175
|
it "should raise an HTTP error" do
|