iconoclasm 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.0.0
1
+ 1.0.1
@@ -1,4 +1,4 @@
1
- require 'curl'
1
+ require 'typhoeus'
2
2
 
3
3
  module Iconoclasm
4
4
  module Downloader
@@ -13,18 +13,16 @@ module Iconoclasm
13
13
  @@user_agent
14
14
  end
15
15
 
16
+ def user_agent
17
+ @@user_agent
18
+ end
19
+
16
20
  def get(url)
17
- Curl::Easy.http_get(url) do |curl|
18
- curl.headers['User-Agent'] = Iconoclasm::Downloader.user_agent
19
- curl.follow_location = true
20
- end
21
+ Typhoeus::Request.get(url, :user_agent => user_agent, :follow_location => true)
21
22
  end
22
23
 
23
24
  def head(url)
24
- Curl::Easy.http_head(url) do |curl|
25
- curl.headers['User-Agent'] = Iconoclasm::Downloader.user_agent
26
- end
25
+ Typhoeus::Request.head(url, :user_agent => user_agent)
27
26
  end
28
-
29
27
  end
30
28
  end
@@ -30,12 +30,20 @@ module Iconoclasm
30
30
  end
31
31
 
32
32
  def http_error_reason
33
- @response.respond_to?(:header_str) ? @response.header_str[/(?<=\d{3}\s)(.*)$/].chomp : @response
34
- end
33
+ @response.respond_to?(:header_str) ? error_reason : @response
34
+ end
35
35
 
36
36
  def http_error_message
37
37
  "#{@code}: #{http_error_reason}"
38
- end
38
+ end
39
+
40
+ private
41
+
42
+ def error_reason
43
+ first_line = @response.header_str.split('\n').first.chomp
44
+ first_line.match(/\d{3}\s(.*)$/)
45
+ $1
46
+ end
39
47
  end
40
48
 
41
49
  class RTFMError < Iconoclasm::Error
@@ -10,42 +10,41 @@ module Iconoclasm
10
10
 
11
11
  def extract_favicon_from(url, content = nil)
12
12
  catch(:done) do
13
- base_url = base_url_of(url)
14
- extract_favicon_from_head_of(base_url, content)
15
- extract_favicon_from_naive_guess(base_url)
16
- raise Iconoclasm::MissingFavicon.new(base_url)
13
+ extract_favicon_from_head_of(url, content)
14
+ extract_favicon_from_naive_guess(base_url_of(url))
15
+ raise Iconoclasm::MissingFavicon.new(url)
17
16
  end
18
17
  end
19
18
 
20
19
  private
21
20
 
22
- def extract_favicon_from_head_of(base_url, content = nil)
23
- if document = document_from(base_url, content)
21
+ def extract_favicon_from_head_of(url, content = nil)
22
+ if document = document_from(url, content)
24
23
  favicon_links = find_favicon_links_in(document)
25
24
  throw(:done, {
26
- :url => href_of(favicon_links.first),
25
+ :url => href_of(favicon_links.first, :base_url => base_url_of(url)),
27
26
  :content_type => type_of(favicon_links.first)
28
27
  }) unless favicon_links.empty?
29
28
  end
30
29
  end
31
30
 
32
- def document_from(base_url, content = nil)
31
+ def document_from(url, content = nil)
33
32
  if content
34
33
  Nokogiri::XML(content)
35
34
  else
36
- response = get(base_url)
37
- Nokogiri::XML(response.body_str) if response.response_code == 200
35
+ response = get(url)
36
+ Nokogiri::XML(response.body) if response.code == 200
38
37
  end
39
38
  end
40
39
 
41
40
  def extract_favicon_from_naive_guess(base_url)
42
41
  naive_url = "#{base_url}/favicon.ico"
43
42
  response = get(naive_url)
44
- headers = Iconoclasm::Headers.new(response.header_str)
45
- if response.response_code == 200
43
+ headers = Iconoclasm::Headers.new(response.headers)
44
+ if response.code == 200
46
45
  throw(:done, {
47
46
  :url => naive_url,
48
- :content_length => header.content_length,
47
+ :content_length => headers.content_length,
49
48
  :content_type => headers.content_type,
50
49
  :data => response.body_str
51
50
  })
@@ -53,7 +52,7 @@ module Iconoclasm
53
52
  end
54
53
 
55
54
  def find_favicon_links_in(document)
56
- document.xpath('//link[favicon_link(.)]', Class.new {
55
+ document.css('link:favicon_link', Class.new {
57
56
  def favicon_link(node_set)
58
57
  node_set.find_all { |node| node['rel'] && node['rel'] =~ /^(?:shortcut\s)?icon$/i }
59
58
  end
@@ -65,9 +64,15 @@ module Iconoclasm
65
64
  "#{uri.scheme}://#{uri.host}"
66
65
  end
67
66
 
68
- def href_of(node)
67
+ def href_of(node, options = {})
69
68
  href = normal_node_attributes(node)['href']
70
- href.value if href
69
+ if href
70
+ relative?(href.value) ? "#{options[:base_url]}#{href.value}" : href.value
71
+ end
72
+ end
73
+
74
+ def relative?(href)
75
+ href =~ /^[\.\/]/
71
76
  end
72
77
 
73
78
  def type_of(node)
@@ -14,8 +14,8 @@ module Iconoclasm
14
14
  @data = attributes[:data]
15
15
  @name = attributes[:name] || parse_name_from(@url)
16
16
  headers = attributes[:headers]
17
- @content_type = attributes[:content_type] || headers ? headers.content_type : nil
18
- @size = attributes[:content_length] || headers ? headers.content_length : nil
17
+ @content_type = attributes[:content_type] ? attributes[:content_type] : headers ? headers.content_type : nil
18
+ @size = attributes[:content_length] ? attributes[:content_length] : headers ? headers.content_length : nil
19
19
  @save_path = nil
20
20
  end
21
21
 
@@ -58,8 +58,8 @@ module Iconoclasm
58
58
 
59
59
  def fetch_data
60
60
  response = get(url)
61
- if response.response_code == 200
62
- response.body_str
61
+ if response.code == 200
62
+ response.body
63
63
  else
64
64
  raise Iconoclasm::HTTPError.new(url, response)
65
65
  end
@@ -10,39 +10,30 @@ describe Iconoclasm::Downloader do
10
10
  end
11
11
 
12
12
  describe "GETting a url" do
13
- it "should GET the url using curl easy" do
14
- Curl::Easy.expects(:http_get).with(@url)
13
+ it "should GET the url using Typheous" do
14
+ Typhoeus::Request.expects(:get).with(@url, instance_of(Hash))
15
15
  @thing.get(@url)
16
16
  end
17
17
 
18
18
  it "should set the user agent to the default user agent" do
19
- @curl.stubs(:follow_location=)
20
- headers = mock('headers')
21
- Curl::Easy.stubs(:http_get).yields(@curl)
22
- @curl.expects(:headers).returns(headers)
23
- headers.expects(:[]=).with('User-Agent', Iconoclasm::Downloader.user_agent)
19
+ Typhoeus::Request.expects(:get).with(instance_of(String), has_entry(:user_agent => Iconoclasm::Downloader.user_agent))
24
20
  @thing.get(@url)
25
21
  end
26
22
 
27
23
  it "should follow redirects" do
28
- @curl.stubs(:headers).returns({})
29
- Curl::Easy.stubs(:http_get).yields(@curl)
30
- @curl.expects(:follow_location=).with(true)
24
+ Typhoeus::Request.expects(:get).with(instance_of(String), has_entry(:follow_location => true))
31
25
  @thing.get(@url)
32
26
  end
33
27
  end
34
28
 
35
29
  describe "HEADing a url" do
36
- it "should HEAD the url using curl easy" do
37
- Curl::Easy.expects(:http_head).with(@url)
30
+ it "should HEAD the url using Typhoeus" do
31
+ Typhoeus::Request.expects(:head).with(@url, instance_of(Hash))
38
32
  @thing.head(@url)
39
33
  end
40
34
 
41
35
  it "should set the user agent to the default user agent" do
42
- headers = mock('headers')
43
- Curl::Easy.stubs(:http_head).yields(@curl)
44
- @curl.expects(:headers).returns(headers)
45
- headers.expects(:[]=).with('User-Agent', Iconoclasm::Downloader.user_agent)
36
+ Typhoeus::Request.expects(:head).with(instance_of(String), has_entry(:user_agent => Iconoclasm::Downloader.user_agent))
46
37
  @thing.head(@url)
47
38
  end
48
39
  end
@@ -20,7 +20,7 @@ describe Iconoclasm::Extractor do
20
20
  end
21
21
 
22
22
  it "should try to find the favicon path in the head of the content" do
23
- @thing.expects(:extract_favicon_from_head_of).with(@base_url, nil).throws(:done)
23
+ @thing.expects(:extract_favicon_from_head_of).with(@url, nil).throws(:done)
24
24
  @thing.extract_favicon_from(@url)
25
25
  end
26
26
 
@@ -69,7 +69,7 @@ describe Iconoclasm::Extractor do
69
69
 
70
70
  describe "when content isn't already provided" do
71
71
  before do
72
- @response = mock('http response', :response_code => 200, :body_str => "")
72
+ @response = mock('http response', :code => 200, :body => "")
73
73
  end
74
74
 
75
75
  it "should go get the content" do
@@ -100,7 +100,7 @@ describe Iconoclasm::Extractor do
100
100
  before do
101
101
  @href = 'http://www.website.com/images/favicon.ico'
102
102
  @type = 'image/vnd.microsoft.icon'
103
- @thing.expects(:href_of).with(@link).returns(@href)
103
+ @thing.expects(:href_of).with(@link, instance_of(Hash)).returns(@href)
104
104
  @thing.expects(:type_of).with(@link).returns(@type)
105
105
  @hash = catch(:done) { @thing.__send__(:extract_favicon_from_head_of, @url, @content) }
106
106
  end
@@ -149,7 +149,7 @@ describe Iconoclasm::Favicon do
149
149
 
150
150
  it "should request the icon image" do
151
151
  @favicon.expects(:get).returns(@response)
152
- @response.stubs(:response_code => 200, :body_str => "IMAGE DATA!")
152
+ @response.stubs(:code => 200, :body => "IMAGE DATA!")
153
153
  @favicon.fetch_data
154
154
  end
155
155
 
@@ -157,11 +157,11 @@ describe Iconoclasm::Favicon do
157
157
  before do
158
158
  @favicon.stubs(:get).returns(@response)
159
159
  @data = "THIS IS ALSO TOTALLY SOME IMAGE DATA HAR HAR HAR!"
160
- @response.expects(:response_code).returns(200)
160
+ @response.expects(:code).returns(200)
161
161
  end
162
162
 
163
163
  it "should return the content of the request (the binary image data)" do
164
- @response.expects(:body_str).returns(@data)
164
+ @response.expects(:body).returns(@data)
165
165
  @favicon.fetch_data.should == @data
166
166
  end
167
167
  end
@@ -169,7 +169,7 @@ describe Iconoclasm::Favicon do
169
169
  describe "when the HTTP request is not successful" do
170
170
  before do
171
171
  @favicon.stubs(:get).returns(@response)
172
- @response.expects(:response_code).returns(400)
172
+ @response.expects(:code).returns(400)
173
173
  end
174
174
 
175
175
  it "should raise an HTTP error" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: iconoclasm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sander Hartlage