iconoclasm 1.0.7 → 1.0.8
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/iconoclasm/downloader.rb +17 -5
- data/lib/iconoclasm/extractor.rb +4 -4
- data/lib/iconoclasm.rb +10 -1
- data/spec/iconoclasm/downloader_spec.rb +31 -17
- data/spec/iconoclasm/extractor_spec.rb +1 -1
- metadata +28 -13
@@ -1,9 +1,7 @@
|
|
1
|
-
require 'typhoeus'
|
2
|
-
|
3
1
|
module Iconoclasm
|
4
2
|
module Downloader
|
5
3
|
|
6
|
-
@@user_agent =
|
4
|
+
@@user_agent = %Q{Mozilla/5.0 (compatible; Iconoclasm/#{Iconoclasm.version}; +http://github.com/sander6/iconoclasm)}
|
7
5
|
|
8
6
|
def self.user_agent=(agent)
|
9
7
|
@@user_agent = agent
|
@@ -18,11 +16,25 @@ module Iconoclasm
|
|
18
16
|
end
|
19
17
|
|
20
18
|
def get(url)
|
21
|
-
|
19
|
+
c = curl(url)
|
20
|
+
c.http_get
|
21
|
+
c
|
22
22
|
end
|
23
23
|
|
24
24
|
def head(url)
|
25
|
-
|
25
|
+
c = curl(url)
|
26
|
+
c.http_head
|
27
|
+
c
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def curl(url)
|
33
|
+
Curl::Easy.new(url) do |curl|
|
34
|
+
curl.useragent = user_agent
|
35
|
+
curl.follow_location = true
|
36
|
+
curl.timeout = Iconoclasm.timeout || 1000
|
37
|
+
end
|
26
38
|
end
|
27
39
|
end
|
28
40
|
end
|
data/lib/iconoclasm/extractor.rb
CHANGED
@@ -33,20 +33,20 @@ module Iconoclasm
|
|
33
33
|
Nokogiri::XML(content)
|
34
34
|
else
|
35
35
|
response = get(url)
|
36
|
-
Nokogiri::XML(response.
|
36
|
+
Nokogiri::XML(response.body_str) if response.response_code == 200
|
37
37
|
end
|
38
38
|
end
|
39
39
|
|
40
40
|
def extract_favicon_from_naive_guess(base_url)
|
41
41
|
naive_url = "#{base_url}/favicon.ico"
|
42
42
|
response = get(naive_url)
|
43
|
-
headers = Iconoclasm::Headers.new(response.
|
44
|
-
if response.
|
43
|
+
headers = Iconoclasm::Headers.new(response.header_str)
|
44
|
+
if response.response_code == 200
|
45
45
|
throw(:done, {
|
46
46
|
:url => naive_url,
|
47
47
|
:content_length => headers.content_length,
|
48
48
|
:content_type => headers.content_type,
|
49
|
-
:data => response.
|
49
|
+
:data => response.body_str
|
50
50
|
})
|
51
51
|
end
|
52
52
|
end
|
data/lib/iconoclasm.rb
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'curb'
|
3
|
+
|
4
|
+
module Iconoclasm
|
5
|
+
def self.version
|
6
|
+
"1.0.8"
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
1
10
|
$:.unshift(File.dirname(__FILE__))
|
2
11
|
require 'iconoclasm/downloader'
|
3
12
|
require 'iconoclasm/errors'
|
@@ -23,4 +32,4 @@ class String
|
|
23
32
|
def lines
|
24
33
|
Enumerable::Enumerator.new(self.split("\n"))
|
25
34
|
end
|
26
|
-
end unless ''.respond_to?(:lines)
|
35
|
+
end unless ''.respond_to?(:lines)
|
@@ -10,31 +10,45 @@ describe Iconoclasm::Downloader do
|
|
10
10
|
end
|
11
11
|
|
12
12
|
describe "GETting a url" do
|
13
|
-
it "should GET the url using
|
14
|
-
|
13
|
+
it "should GET the url using Curl" do
|
14
|
+
@thing.expects(:curl).with(@url).returns(@curl)
|
15
|
+
@curl.expects(:http_get)
|
15
16
|
@thing.get(@url)
|
16
|
-
end
|
17
|
-
|
18
|
-
it "should set the user agent to the default user agent" do
|
19
|
-
Typhoeus::Request.expects(:get).with(instance_of(String), has_entry(:user_agent => Iconoclasm::Downloader.user_agent))
|
20
|
-
@thing.get(@url)
|
21
|
-
end
|
22
|
-
|
23
|
-
it "should follow redirects" do
|
24
|
-
Typhoeus::Request.expects(:get).with(instance_of(String), has_entry(:follow_location => true))
|
25
|
-
@thing.get(@url)
|
26
|
-
end
|
17
|
+
end
|
27
18
|
end
|
28
19
|
|
29
20
|
describe "HEADing a url" do
|
30
|
-
it "should HEAD the url using
|
31
|
-
|
21
|
+
it "should HEAD the url using Curl" do
|
22
|
+
@thing.expects(:curl).with(@url).returns(@curl)
|
23
|
+
@curl.expects(:http_head)
|
32
24
|
@thing.head(@url)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe "building the Curl object" do
|
29
|
+
before do
|
30
|
+
Curl::Easy.expects(:new).with(@url).yields(@curl)
|
33
31
|
end
|
34
32
|
|
35
33
|
it "should set the user agent to the default user agent" do
|
36
|
-
|
37
|
-
@
|
34
|
+
@curl.stubs(:follow_location=)
|
35
|
+
@curl.stubs(:timeout=)
|
36
|
+
@curl.expects(:useragent=).with(Iconoclasm::Downloader.user_agent)
|
37
|
+
@thing.__send__(:curl, @url)
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should follow location" do
|
41
|
+
@curl.expects(:follow_location=).with(true)
|
42
|
+
@curl.stubs(:timeout=)
|
43
|
+
@curl.stubs(:useragent=)
|
44
|
+
@thing.__send__(:curl, @url)
|
45
|
+
end
|
46
|
+
|
47
|
+
it "should set the timeout to 1 second" do
|
48
|
+
@curl.stubs(:follow_location=)
|
49
|
+
@curl.expects(:timeout=).with(1000)
|
50
|
+
@curl.stubs(:useragent=)
|
51
|
+
@thing.__send__(:curl, @url)
|
38
52
|
end
|
39
53
|
end
|
40
54
|
end
|
@@ -69,7 +69,7 @@ describe Iconoclasm::Extractor do
|
|
69
69
|
|
70
70
|
describe "when content isn't already provided" do
|
71
71
|
before do
|
72
|
-
@response = mock('http response', :
|
72
|
+
@response = mock('http response', :response_code => 200, :body_str => "")
|
73
73
|
end
|
74
74
|
|
75
75
|
it "should go get the content" do
|
metadata
CHANGED
@@ -1,7 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: iconoclasm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 1
|
7
|
+
- 0
|
8
|
+
- 8
|
9
|
+
version: 1.0.8
|
5
10
|
platform: ruby
|
6
11
|
authors:
|
7
12
|
- Sander Hartlage
|
@@ -14,24 +19,30 @@ default_executable:
|
|
14
19
|
dependencies:
|
15
20
|
- !ruby/object:Gem::Dependency
|
16
21
|
name: addressable
|
17
|
-
|
18
|
-
|
19
|
-
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
20
25
|
requirements:
|
21
26
|
- - ">="
|
22
27
|
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 0
|
23
30
|
version: "0"
|
24
|
-
version:
|
25
|
-
- !ruby/object:Gem::Dependency
|
26
|
-
name: typhoeus
|
27
31
|
type: :runtime
|
28
|
-
|
29
|
-
|
32
|
+
version_requirements: *id001
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: curb
|
35
|
+
prerelease: false
|
36
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
37
|
+
none: false
|
30
38
|
requirements:
|
31
39
|
- - ">="
|
32
40
|
- !ruby/object:Gem::Version
|
41
|
+
segments:
|
42
|
+
- 0
|
33
43
|
version: "0"
|
34
|
-
|
44
|
+
type: :runtime
|
45
|
+
version_requirements: *id002
|
35
46
|
description: Finds favorites icons for web pages on the world wide internets by checking the HTML head or the standard favicon location. Then, do with them what you will.
|
36
47
|
email: sander.hartlage@gmail.com
|
37
48
|
executables: []
|
@@ -71,21 +82,25 @@ rdoc_options:
|
|
71
82
|
require_paths:
|
72
83
|
- lib
|
73
84
|
required_ruby_version: !ruby/object:Gem::Requirement
|
85
|
+
none: false
|
74
86
|
requirements:
|
75
87
|
- - ">="
|
76
88
|
- !ruby/object:Gem::Version
|
89
|
+
segments:
|
90
|
+
- 0
|
77
91
|
version: "0"
|
78
|
-
version:
|
79
92
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
93
|
+
none: false
|
80
94
|
requirements:
|
81
95
|
- - ">="
|
82
96
|
- !ruby/object:Gem::Version
|
97
|
+
segments:
|
98
|
+
- 0
|
83
99
|
version: "0"
|
84
|
-
version:
|
85
100
|
requirements: []
|
86
101
|
|
87
102
|
rubyforge_project:
|
88
|
-
rubygems_version: 1.3.
|
103
|
+
rubygems_version: 1.3.7
|
89
104
|
signing_key:
|
90
105
|
specification_version: 3
|
91
106
|
summary: Finds favicons and DESTROYS THEM (well, not really, but it will download and save them)
|