iconoclasm 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.0.0
1
+ 1.0.1
@@ -1,4 +1,4 @@
1
- require 'curl'
1
+ require 'typhoeus'
2
2
 
3
3
  module Iconoclasm
4
4
  module Downloader
@@ -13,18 +13,16 @@ module Iconoclasm
13
13
  @@user_agent
14
14
  end
15
15
 
16
+ def user_agent
17
+ @@user_agent
18
+ end
19
+
16
20
  def get(url)
17
- Curl::Easy.http_get(url) do |curl|
18
- curl.headers['User-Agent'] = Iconoclasm::Downloader.user_agent
19
- curl.follow_location = true
20
- end
21
+ Typhoeus::Request.get(url, :user_agent => user_agent, :follow_location => true)
21
22
  end
22
23
 
23
24
  def head(url)
24
- Curl::Easy.http_head(url) do |curl|
25
- curl.headers['User-Agent'] = Iconoclasm::Downloader.user_agent
26
- end
25
+ Typhoeus::Request.head(url, :user_agent => user_agent)
27
26
  end
28
-
29
27
  end
30
28
  end
@@ -30,12 +30,20 @@ module Iconoclasm
30
30
  end
31
31
 
32
32
  def http_error_reason
33
- @response.respond_to?(:header_str) ? @response.header_str[/(?<=\d{3}\s)(.*)$/].chomp : @response
34
- end
33
+ @response.respond_to?(:header_str) ? error_reason : @response
34
+ end
35
35
 
36
36
  def http_error_message
37
37
  "#{@code}: #{http_error_reason}"
38
- end
38
+ end
39
+
40
+ private
41
+
42
+ def error_reason
43
+ first_line = @response.header_str.split('\n').first.chomp
44
+ first_line.match(/\d{3}\s(.*)$/)
45
+ $1
46
+ end
39
47
  end
40
48
 
41
49
  class RTFMError < Iconoclasm::Error
@@ -10,42 +10,41 @@ module Iconoclasm
10
10
 
11
11
  def extract_favicon_from(url, content = nil)
12
12
  catch(:done) do
13
- base_url = base_url_of(url)
14
- extract_favicon_from_head_of(base_url, content)
15
- extract_favicon_from_naive_guess(base_url)
16
- raise Iconoclasm::MissingFavicon.new(base_url)
13
+ extract_favicon_from_head_of(url, content)
14
+ extract_favicon_from_naive_guess(base_url_of(url))
15
+ raise Iconoclasm::MissingFavicon.new(url)
17
16
  end
18
17
  end
19
18
 
20
19
  private
21
20
 
22
- def extract_favicon_from_head_of(base_url, content = nil)
23
- if document = document_from(base_url, content)
21
+ def extract_favicon_from_head_of(url, content = nil)
22
+ if document = document_from(url, content)
24
23
  favicon_links = find_favicon_links_in(document)
25
24
  throw(:done, {
26
- :url => href_of(favicon_links.first),
25
+ :url => href_of(favicon_links.first, :base_url => base_url_of(url)),
27
26
  :content_type => type_of(favicon_links.first)
28
27
  }) unless favicon_links.empty?
29
28
  end
30
29
  end
31
30
 
32
- def document_from(base_url, content = nil)
31
+ def document_from(url, content = nil)
33
32
  if content
34
33
  Nokogiri::XML(content)
35
34
  else
36
- response = get(base_url)
37
- Nokogiri::XML(response.body_str) if response.response_code == 200
35
+ response = get(url)
36
+ Nokogiri::XML(response.body) if response.code == 200
38
37
  end
39
38
  end
40
39
 
41
40
  def extract_favicon_from_naive_guess(base_url)
42
41
  naive_url = "#{base_url}/favicon.ico"
43
42
  response = get(naive_url)
44
- headers = Iconoclasm::Headers.new(response.header_str)
45
- if response.response_code == 200
43
+ headers = Iconoclasm::Headers.new(response.headers)
44
+ if response.code == 200
46
45
  throw(:done, {
47
46
  :url => naive_url,
48
- :content_length => header.content_length,
47
+ :content_length => headers.content_length,
49
48
  :content_type => headers.content_type,
50
49
  :data => response.body_str
51
50
  })
@@ -53,7 +52,7 @@ module Iconoclasm
53
52
  end
54
53
 
55
54
  def find_favicon_links_in(document)
56
- document.xpath('//link[favicon_link(.)]', Class.new {
55
+ document.css('link:favicon_link', Class.new {
57
56
  def favicon_link(node_set)
58
57
  node_set.find_all { |node| node['rel'] && node['rel'] =~ /^(?:shortcut\s)?icon$/i }
59
58
  end
@@ -65,9 +64,15 @@ module Iconoclasm
65
64
  "#{uri.scheme}://#{uri.host}"
66
65
  end
67
66
 
68
- def href_of(node)
67
+ def href_of(node, options = {})
69
68
  href = normal_node_attributes(node)['href']
70
- href.value if href
69
+ if href
70
+ relative?(href.value) ? "#{options[:base_url]}#{href.value}" : href.value
71
+ end
72
+ end
73
+
74
+ def relative?(href)
75
+ href =~ /^[\.\/]/
71
76
  end
72
77
 
73
78
  def type_of(node)
@@ -14,8 +14,8 @@ module Iconoclasm
14
14
  @data = attributes[:data]
15
15
  @name = attributes[:name] || parse_name_from(@url)
16
16
  headers = attributes[:headers]
17
- @content_type = attributes[:content_type] || headers ? headers.content_type : nil
18
- @size = attributes[:content_length] || headers ? headers.content_length : nil
17
+ @content_type = attributes[:content_type] ? attributes[:content_type] : headers ? headers.content_type : nil
18
+ @size = attributes[:content_length] ? attributes[:content_length] : headers ? headers.content_length : nil
19
19
  @save_path = nil
20
20
  end
21
21
 
@@ -58,8 +58,8 @@ module Iconoclasm
58
58
 
59
59
  def fetch_data
60
60
  response = get(url)
61
- if response.response_code == 200
62
- response.body_str
61
+ if response.code == 200
62
+ response.body
63
63
  else
64
64
  raise Iconoclasm::HTTPError.new(url, response)
65
65
  end
@@ -10,39 +10,30 @@ describe Iconoclasm::Downloader do
10
10
  end
11
11
 
12
12
  describe "GETting a url" do
13
- it "should GET the url using curl easy" do
14
- Curl::Easy.expects(:http_get).with(@url)
13
+ it "should GET the url using Typheous" do
14
+ Typhoeus::Request.expects(:get).with(@url, instance_of(Hash))
15
15
  @thing.get(@url)
16
16
  end
17
17
 
18
18
  it "should set the user agent to the default user agent" do
19
- @curl.stubs(:follow_location=)
20
- headers = mock('headers')
21
- Curl::Easy.stubs(:http_get).yields(@curl)
22
- @curl.expects(:headers).returns(headers)
23
- headers.expects(:[]=).with('User-Agent', Iconoclasm::Downloader.user_agent)
19
+ Typhoeus::Request.expects(:get).with(instance_of(String), has_entry(:user_agent => Iconoclasm::Downloader.user_agent))
24
20
  @thing.get(@url)
25
21
  end
26
22
 
27
23
  it "should follow redirects" do
28
- @curl.stubs(:headers).returns({})
29
- Curl::Easy.stubs(:http_get).yields(@curl)
30
- @curl.expects(:follow_location=).with(true)
24
+ Typhoeus::Request.expects(:get).with(instance_of(String), has_entry(:follow_location => true))
31
25
  @thing.get(@url)
32
26
  end
33
27
  end
34
28
 
35
29
  describe "HEADing a url" do
36
- it "should HEAD the url using curl easy" do
37
- Curl::Easy.expects(:http_head).with(@url)
30
+ it "should HEAD the url using Typhoeus" do
31
+ Typhoeus::Request.expects(:head).with(@url, instance_of(Hash))
38
32
  @thing.head(@url)
39
33
  end
40
34
 
41
35
  it "should set the user agent to the default user agent" do
42
- headers = mock('headers')
43
- Curl::Easy.stubs(:http_head).yields(@curl)
44
- @curl.expects(:headers).returns(headers)
45
- headers.expects(:[]=).with('User-Agent', Iconoclasm::Downloader.user_agent)
36
+ Typhoeus::Request.expects(:head).with(instance_of(String), has_entry(:user_agent => Iconoclasm::Downloader.user_agent))
46
37
  @thing.head(@url)
47
38
  end
48
39
  end
@@ -20,7 +20,7 @@ describe Iconoclasm::Extractor do
20
20
  end
21
21
 
22
22
  it "should try to find the favicon path in the head of the content" do
23
- @thing.expects(:extract_favicon_from_head_of).with(@base_url, nil).throws(:done)
23
+ @thing.expects(:extract_favicon_from_head_of).with(@url, nil).throws(:done)
24
24
  @thing.extract_favicon_from(@url)
25
25
  end
26
26
 
@@ -69,7 +69,7 @@ describe Iconoclasm::Extractor do
69
69
 
70
70
  describe "when content isn't already provided" do
71
71
  before do
72
- @response = mock('http response', :response_code => 200, :body_str => "")
72
+ @response = mock('http response', :code => 200, :body => "")
73
73
  end
74
74
 
75
75
  it "should go get the content" do
@@ -100,7 +100,7 @@ describe Iconoclasm::Extractor do
100
100
  before do
101
101
  @href = 'http://www.website.com/images/favicon.ico'
102
102
  @type = 'image/vnd.microsoft.icon'
103
- @thing.expects(:href_of).with(@link).returns(@href)
103
+ @thing.expects(:href_of).with(@link, instance_of(Hash)).returns(@href)
104
104
  @thing.expects(:type_of).with(@link).returns(@type)
105
105
  @hash = catch(:done) { @thing.__send__(:extract_favicon_from_head_of, @url, @content) }
106
106
  end
@@ -149,7 +149,7 @@ describe Iconoclasm::Favicon do
149
149
 
150
150
  it "should request the icon image" do
151
151
  @favicon.expects(:get).returns(@response)
152
- @response.stubs(:response_code => 200, :body_str => "IMAGE DATA!")
152
+ @response.stubs(:code => 200, :body => "IMAGE DATA!")
153
153
  @favicon.fetch_data
154
154
  end
155
155
 
@@ -157,11 +157,11 @@ describe Iconoclasm::Favicon do
157
157
  before do
158
158
  @favicon.stubs(:get).returns(@response)
159
159
  @data = "THIS IS ALSO TOTALLY SOME IMAGE DATA HAR HAR HAR!"
160
- @response.expects(:response_code).returns(200)
160
+ @response.expects(:code).returns(200)
161
161
  end
162
162
 
163
163
  it "should return the content of the request (the binary image data)" do
164
- @response.expects(:body_str).returns(@data)
164
+ @response.expects(:body).returns(@data)
165
165
  @favicon.fetch_data.should == @data
166
166
  end
167
167
  end
@@ -169,7 +169,7 @@ describe Iconoclasm::Favicon do
169
169
  describe "when the HTTP request is not successful" do
170
170
  before do
171
171
  @favicon.stubs(:get).returns(@response)
172
- @response.expects(:response_code).returns(400)
172
+ @response.expects(:code).returns(400)
173
173
  end
174
174
 
175
175
  it "should raise an HTTP error" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: iconoclasm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sander Hartlage