iconoclasm 1.0.7 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/iconoclasm/downloader.rb +17 -5
- data/lib/iconoclasm/extractor.rb +4 -4
- data/lib/iconoclasm.rb +10 -1
- data/spec/iconoclasm/downloader_spec.rb +31 -17
- data/spec/iconoclasm/extractor_spec.rb +1 -1
- metadata +28 -13
@@ -1,9 +1,7 @@
|
|
1
|
-
require 'typhoeus'
|
2
|
-
|
3
1
|
module Iconoclasm
|
4
2
|
module Downloader
|
5
3
|
|
6
|
-
@@user_agent =
|
4
|
+
@@user_agent = %Q{Mozilla/5.0 (compatible; Iconoclasm/#{Iconoclasm.version}; +http://github.com/sander6/iconoclasm)}
|
7
5
|
|
8
6
|
def self.user_agent=(agent)
|
9
7
|
@@user_agent = agent
|
@@ -18,11 +16,25 @@ module Iconoclasm
|
|
18
16
|
end
|
19
17
|
|
20
18
|
def get(url)
|
21
|
-
|
19
|
+
c = curl(url)
|
20
|
+
c.http_get
|
21
|
+
c
|
22
22
|
end
|
23
23
|
|
24
24
|
def head(url)
|
25
|
-
|
25
|
+
c = curl(url)
|
26
|
+
c.http_head
|
27
|
+
c
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def curl(url)
|
33
|
+
Curl::Easy.new(url) do |curl|
|
34
|
+
curl.useragent = user_agent
|
35
|
+
curl.follow_location = true
|
36
|
+
curl.timeout = Iconoclasm.timeout || 1000
|
37
|
+
end
|
26
38
|
end
|
27
39
|
end
|
28
40
|
end
|
data/lib/iconoclasm/extractor.rb
CHANGED
@@ -33,20 +33,20 @@ module Iconoclasm
|
|
33
33
|
Nokogiri::XML(content)
|
34
34
|
else
|
35
35
|
response = get(url)
|
36
|
-
Nokogiri::XML(response.
|
36
|
+
Nokogiri::XML(response.body_str) if response.response_code == 200
|
37
37
|
end
|
38
38
|
end
|
39
39
|
|
40
40
|
def extract_favicon_from_naive_guess(base_url)
|
41
41
|
naive_url = "#{base_url}/favicon.ico"
|
42
42
|
response = get(naive_url)
|
43
|
-
headers = Iconoclasm::Headers.new(response.
|
44
|
-
if response.
|
43
|
+
headers = Iconoclasm::Headers.new(response.header_str)
|
44
|
+
if response.response_code == 200
|
45
45
|
throw(:done, {
|
46
46
|
:url => naive_url,
|
47
47
|
:content_length => headers.content_length,
|
48
48
|
:content_type => headers.content_type,
|
49
|
-
:data => response.
|
49
|
+
:data => response.body_str
|
50
50
|
})
|
51
51
|
end
|
52
52
|
end
|
data/lib/iconoclasm.rb
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'curb'
|
3
|
+
|
4
|
+
module Iconoclasm
|
5
|
+
def self.version
|
6
|
+
"1.0.8"
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
1
10
|
$:.unshift(File.dirname(__FILE__))
|
2
11
|
require 'iconoclasm/downloader'
|
3
12
|
require 'iconoclasm/errors'
|
@@ -23,4 +32,4 @@ class String
|
|
23
32
|
def lines
|
24
33
|
Enumerable::Enumerator.new(self.split("\n"))
|
25
34
|
end
|
26
|
-
end unless ''.respond_to?(:lines)
|
35
|
+
end unless ''.respond_to?(:lines)
|
@@ -10,31 +10,45 @@ describe Iconoclasm::Downloader do
|
|
10
10
|
end
|
11
11
|
|
12
12
|
describe "GETting a url" do
|
13
|
-
it "should GET the url using
|
14
|
-
|
13
|
+
it "should GET the url using Curl" do
|
14
|
+
@thing.expects(:curl).with(@url).returns(@curl)
|
15
|
+
@curl.expects(:http_get)
|
15
16
|
@thing.get(@url)
|
16
|
-
end
|
17
|
-
|
18
|
-
it "should set the user agent to the default user agent" do
|
19
|
-
Typhoeus::Request.expects(:get).with(instance_of(String), has_entry(:user_agent => Iconoclasm::Downloader.user_agent))
|
20
|
-
@thing.get(@url)
|
21
|
-
end
|
22
|
-
|
23
|
-
it "should follow redirects" do
|
24
|
-
Typhoeus::Request.expects(:get).with(instance_of(String), has_entry(:follow_location => true))
|
25
|
-
@thing.get(@url)
|
26
|
-
end
|
17
|
+
end
|
27
18
|
end
|
28
19
|
|
29
20
|
describe "HEADing a url" do
|
30
|
-
it "should HEAD the url using
|
31
|
-
|
21
|
+
it "should HEAD the url using Curl" do
|
22
|
+
@thing.expects(:curl).with(@url).returns(@curl)
|
23
|
+
@curl.expects(:http_head)
|
32
24
|
@thing.head(@url)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe "building the Curl object" do
|
29
|
+
before do
|
30
|
+
Curl::Easy.expects(:new).with(@url).yields(@curl)
|
33
31
|
end
|
34
32
|
|
35
33
|
it "should set the user agent to the default user agent" do
|
36
|
-
|
37
|
-
@
|
34
|
+
@curl.stubs(:follow_location=)
|
35
|
+
@curl.stubs(:timeout=)
|
36
|
+
@curl.expects(:useragent=).with(Iconoclasm::Downloader.user_agent)
|
37
|
+
@thing.__send__(:curl, @url)
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should follow location" do
|
41
|
+
@curl.expects(:follow_location=).with(true)
|
42
|
+
@curl.stubs(:timeout=)
|
43
|
+
@curl.stubs(:useragent=)
|
44
|
+
@thing.__send__(:curl, @url)
|
45
|
+
end
|
46
|
+
|
47
|
+
it "should set the timeout to 1 second" do
|
48
|
+
@curl.stubs(:follow_location=)
|
49
|
+
@curl.expects(:timeout=).with(1000)
|
50
|
+
@curl.stubs(:useragent=)
|
51
|
+
@thing.__send__(:curl, @url)
|
38
52
|
end
|
39
53
|
end
|
40
54
|
end
|
@@ -69,7 +69,7 @@ describe Iconoclasm::Extractor do
|
|
69
69
|
|
70
70
|
describe "when content isn't already provided" do
|
71
71
|
before do
|
72
|
-
@response = mock('http response', :
|
72
|
+
@response = mock('http response', :response_code => 200, :body_str => "")
|
73
73
|
end
|
74
74
|
|
75
75
|
it "should go get the content" do
|
metadata
CHANGED
@@ -1,7 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: iconoclasm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 1
|
7
|
+
- 0
|
8
|
+
- 8
|
9
|
+
version: 1.0.8
|
5
10
|
platform: ruby
|
6
11
|
authors:
|
7
12
|
- Sander Hartlage
|
@@ -14,24 +19,30 @@ default_executable:
|
|
14
19
|
dependencies:
|
15
20
|
- !ruby/object:Gem::Dependency
|
16
21
|
name: addressable
|
17
|
-
|
18
|
-
|
19
|
-
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
20
25
|
requirements:
|
21
26
|
- - ">="
|
22
27
|
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 0
|
23
30
|
version: "0"
|
24
|
-
version:
|
25
|
-
- !ruby/object:Gem::Dependency
|
26
|
-
name: typhoeus
|
27
31
|
type: :runtime
|
28
|
-
|
29
|
-
|
32
|
+
version_requirements: *id001
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: curb
|
35
|
+
prerelease: false
|
36
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
37
|
+
none: false
|
30
38
|
requirements:
|
31
39
|
- - ">="
|
32
40
|
- !ruby/object:Gem::Version
|
41
|
+
segments:
|
42
|
+
- 0
|
33
43
|
version: "0"
|
34
|
-
|
44
|
+
type: :runtime
|
45
|
+
version_requirements: *id002
|
35
46
|
description: Finds favorites icons for web pages on the world wide internets by checking the HTML head or the standard favicon location. Then, do with them what you will.
|
36
47
|
email: sander.hartlage@gmail.com
|
37
48
|
executables: []
|
@@ -71,21 +82,25 @@ rdoc_options:
|
|
71
82
|
require_paths:
|
72
83
|
- lib
|
73
84
|
required_ruby_version: !ruby/object:Gem::Requirement
|
85
|
+
none: false
|
74
86
|
requirements:
|
75
87
|
- - ">="
|
76
88
|
- !ruby/object:Gem::Version
|
89
|
+
segments:
|
90
|
+
- 0
|
77
91
|
version: "0"
|
78
|
-
version:
|
79
92
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
93
|
+
none: false
|
80
94
|
requirements:
|
81
95
|
- - ">="
|
82
96
|
- !ruby/object:Gem::Version
|
97
|
+
segments:
|
98
|
+
- 0
|
83
99
|
version: "0"
|
84
|
-
version:
|
85
100
|
requirements: []
|
86
101
|
|
87
102
|
rubyforge_project:
|
88
|
-
rubygems_version: 1.3.
|
103
|
+
rubygems_version: 1.3.7
|
89
104
|
signing_key:
|
90
105
|
specification_version: 3
|
91
106
|
summary: Finds favicons and DESTROYS THEM (well, not really, but it will download and save them)
|