iconoclasm 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,22 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
22
+ *.gemspec
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Sander Hartlage
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.markdown ADDED
@@ -0,0 +1,24 @@
1
+ # iconoclasm
2
+
3
+ Finds favorites icons for web pages on the world wide internets by checking the HTML head or the standard favicon location. Then, do with them what you will.
4
+
5
+ Called "iconoclasm" because there was already a gem on gemcutter called "iconoclast". Boo! Hiss!
6
+
7
+ ### Usage
8
+
9
+ To get the favicon for a page, do:
10
+
11
+ `favicon = Iconoclasm.extract('www.website.com')`
12
+
13
+ This will go and do a bunch of GETs (two or three, actually) on the url given. If you've already got the content and want to skip one of the GETs, you can pass the content in as the second argument.
14
+
15
+ `content = get_some_content('www.website.com')`<br/>
16
+ `favicon = Iconoclasm.extract('www.website.com', content)`
17
+
18
+ `Iconoclasm.extract` returns an `Iconoclasm::Favicon` instance, from which you can get the URL, content type, size, or access the binary image data. By calling `valid?`, you can check if the favicon is valid based on whatever my standards were when I wrote this (basically, whether or not it's actually an image).
19
+
20
+ You can save the image to a tempfile using `favicon.save`, or more usefully, to a file at `favicon.save('path/to/file')`. Fun times had by all.
21
+
22
+ ## Copyright
23
+
24
+ Copyright (c) 2009 Sander Hartlage. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,52 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "iconoclasm"
8
+ gem.summary = %Q{Finds favicons and DESTROYS THEM (well, not really, but it will download and save them)}
9
+ gem.description = %Q{Finds favorites icons for web pages on the world wide internets by checking the HTML head or the standard favicon location. Then, do with them what you will.}
10
+ gem.email = "sander.hartlage@gmail.com"
11
+ gem.homepage = "http://github.com/sander6/iconoclasm"
12
+ gem.authors = ["Sander Hartlage"]
13
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
14
+ end
15
+ Jeweler::GemcutterTasks.new
16
+ rescue LoadError
17
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
18
+ end
19
+
20
+ require 'rake/testtask'
21
+ Rake::TestTask.new(:test) do |test|
22
+ test.libs << 'lib' << 'test'
23
+ test.pattern = 'test/**/test_*.rb'
24
+ test.verbose = true
25
+ end
26
+
27
+ begin
28
+ require 'rcov/rcovtask'
29
+ Rcov::RcovTask.new do |test|
30
+ test.libs << 'test'
31
+ test.pattern = 'test/**/test_*.rb'
32
+ test.verbose = true
33
+ end
34
+ rescue LoadError
35
+ task :rcov do
36
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
37
+ end
38
+ end
39
+
40
+ task :test => :check_dependencies
41
+
42
+ task :default => :test
43
+
44
+ require 'rake/rdoctask'
45
+ Rake::RDocTask.new do |rdoc|
46
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
47
+
48
+ rdoc.rdoc_dir = 'rdoc'
49
+ rdoc.title = "iconoclast #{version}"
50
+ rdoc.rdoc_files.include('README*')
51
+ rdoc.rdoc_files.include('lib/**/*.rb')
52
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.0.0
@@ -0,0 +1,30 @@
1
+ require 'curl'
2
+
3
+ module Iconoclasm
4
+ module Downloader
5
+
6
+ @@user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
7
+
8
+ def self.user_agent=(agent)
9
+ @@user_agent = agent
10
+ end
11
+
12
+ def self.user_agent
13
+ @@user_agent
14
+ end
15
+
16
+ def get(url)
17
+ Curl::Easy.http_get(url) do |curl|
18
+ curl.headers['User-Agent'] = Iconoclasm::Downloader.user_agent
19
+ curl.follow_location = true
20
+ end
21
+ end
22
+
23
+ def head(url)
24
+ Curl::Easy.http_head(url) do |curl|
25
+ curl.headers['User-Agent'] = Iconoclasm::Downloader.user_agent
26
+ end
27
+ end
28
+
29
+ end
30
+ end
@@ -0,0 +1,61 @@
1
+ module Iconoclasm
2
+
3
+ class Error < StandardError
4
+ def initialize(url)
5
+ @url = url
6
+ end
7
+ end
8
+
9
+ class MissingFavicon < Iconoclasm::Error
10
+ def message
11
+ "#{@url} doesn't seem to have a favicon"
12
+ end
13
+ end
14
+
15
+ class HTTPError < Iconoclasm::Error
16
+ def initialize(url, response)
17
+ super(url)
18
+ @response = response
19
+ end
20
+
21
+ def message
22
+ msg = ""
23
+ msg += "There was a problem getting #{@url} " if @url
24
+ msg += "(#{http_error_reason})"
25
+ msg
26
+ end
27
+
28
+ def code
29
+ @response.respond_to?(:response_code) ? @response.response_code : @response[/\d{3}/]
30
+ end
31
+
32
+ def http_error_reason
33
+ @response.respond_to?(:header_str) ? @response.header_str[/(?<=\d{3}\s)(.*)$/].chomp : @response
34
+ end
35
+
36
+ def http_error_message
37
+ "#{@code}: #{http_error_reason}"
38
+ end
39
+ end
40
+
41
+ class RTFMError < Iconoclasm::Error
42
+ def initialize(reason)
43
+ @reason = reason
44
+ end
45
+
46
+ def message
47
+ "Iconoclasm doesn't work that way (#{@reason})"
48
+ end
49
+ end
50
+
51
+ class InvalidFavicon < Iconoclasm::Error
52
+ def initialize(url, content_type)
53
+ super(url)
54
+ @content_type = content_type
55
+ end
56
+
57
+ def message
58
+ "The favicon from #{@url} is invalid (content type is #{@content_type})"
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,82 @@
1
+ require 'nokogiri'
2
+ require 'uri'
3
+
4
+ module Iconoclasm
5
+ module Extractor
6
+
7
+ def self.included(base)
8
+ base.class_eval { include Iconoclasm::Downloader }
9
+ end
10
+
11
+ def extract_favicon_from(url, content = nil)
12
+ catch(:done) do
13
+ base_url = base_url_of(url)
14
+ extract_favicon_from_head_of(base_url, content)
15
+ extract_favicon_from_naive_guess(base_url)
16
+ raise Iconoclasm::MissingFavicon.new(base_url)
17
+ end
18
+ end
19
+
20
+ private
21
+
22
+ def extract_favicon_from_head_of(base_url, content = nil)
23
+ if document = document_from(base_url, content)
24
+ favicon_links = find_favicon_links_in(document)
25
+ throw(:done, {
26
+ :url => href_of(favicon_links.first),
27
+ :content_type => type_of(favicon_links.first)
28
+ }) unless favicon_links.empty?
29
+ end
30
+ end
31
+
32
+ def document_from(base_url, content = nil)
33
+ if content
34
+ Nokogiri::XML(content)
35
+ else
36
+ response = get(base_url)
37
+ Nokogiri::XML(response.body_str) if response.response_code == 200
38
+ end
39
+ end
40
+
41
+ def extract_favicon_from_naive_guess(base_url)
42
+ naive_url = "#{base_url}/favicon.ico"
43
+ response = get(naive_url)
44
+ headers = Iconoclasm::Headers.new(response.header_str)
45
+ if response.response_code == 200
46
+ throw(:done, {
47
+ :url => naive_url,
48
+ :content_length => header.content_length,
49
+ :content_type => headers.content_type,
50
+ :data => response.body_str
51
+ })
52
+ end
53
+ end
54
+
55
+ def find_favicon_links_in(document)
56
+ document.xpath('//link[favicon_link(.)]', Class.new {
57
+ def favicon_link(node_set)
58
+ node_set.find_all { |node| node['rel'] && node['rel'] =~ /^(?:shortcut\s)?icon$/i }
59
+ end
60
+ }.new)
61
+ end
62
+
63
+ def base_url_of(url)
64
+ uri = URI.parse(url)
65
+ "#{uri.scheme}://#{uri.host}"
66
+ end
67
+
68
+ def href_of(node)
69
+ href = normal_node_attributes(node)['href']
70
+ href.value if href
71
+ end
72
+
73
+ def type_of(node)
74
+ type = normal_node_attributes(node)['type']
75
+ type.value if type
76
+ end
77
+
78
+ def normal_node_attributes(node)
79
+ node.attributes.inject({}) { |hash, (key, value)| hash.merge(key.downcase => value) }
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,104 @@
1
+ require 'tempfile'
2
+ require 'mime/types'
3
+ require 'uri'
4
+
5
+ module Iconoclasm
6
+ class Favicon
7
+ include Iconoclasm::Downloader
8
+
9
+ attr_reader :content_type, :url, :save_path
10
+ attr_accessor :name
11
+
12
+ def initialize(attributes = {})
13
+ @url = attributes[:url]
14
+ @data = attributes[:data]
15
+ @name = attributes[:name] || parse_name_from(@url)
16
+ headers = attributes[:headers]
17
+ @content_type = attributes[:content_type] || headers ? headers.content_type : nil
18
+ @size = attributes[:content_length] || headers ? headers.content_length : nil
19
+ @save_path = nil
20
+ end
21
+
22
+ def inspect
23
+ "#<Iconoclasm::Favicon @url=#{url}, @name=#{name}, @content_type=#{content_type}, @size=#{size}, @save_path=#{save_path ? save_path : "nil"}>"
24
+ end
25
+
26
+ def size
27
+ @size ||= data.size
28
+ end
29
+ alias_method :content_length, :size
30
+
31
+ def data
32
+ @data ||= fetch_data
33
+ end
34
+
35
+ def content_type
36
+ if @content_type
37
+ @content_type
38
+ else
39
+ mime = MIME::Types.of(name).first
40
+ @content_type = mime.content_type if mime
41
+ end
42
+ end
43
+
44
+ def valid?
45
+ @valid ||= if size > 0
46
+ case content_type
47
+ when /^(?:x-)?image/ then true
48
+ when /^text\/html/ then false
49
+ when NilClass then false
50
+ else
51
+ # check the file type using filemagic, maybe?
52
+ false
53
+ end
54
+ else
55
+ false
56
+ end
57
+ end
58
+
59
+ def fetch_data
60
+ response = get(url)
61
+ if response.response_code == 200
62
+ response.body_str
63
+ else
64
+ raise Iconoclasm::HTTPError.new(url, response)
65
+ end
66
+ end
67
+
68
+ def save(path_or_storage = nil, force = false)
69
+ if valid? && !force
70
+ warn("Saving an invalid favicon.") if !valid? && force
71
+ @save_path = if path_or_storage.nil?
72
+ save_to_tempfile
73
+ elsif path_or_storage.is_a?(String)
74
+ save_to_file(path_or_storage)
75
+ else
76
+ raise Iconoclasm::RTFMError.new("invalid storage type")
77
+ end
78
+ else
79
+ raise Iconoclasm::InvalidFavicon.new(url, content_type)
80
+ end
81
+ end
82
+
83
+ def save_to_tempfile
84
+ tfile = dump_data(Tempfile.new(name))
85
+ @save_path = tfile.path
86
+ end
87
+
88
+ def save_to_file(path)
89
+ path = File.expand_path(File.join(path, name))
90
+ dump_data(File.new(path, File::CREAT|File::TRUNC|File::WRONLY))
91
+ @save_path = path
92
+ end
93
+
94
+ def parse_name_from(url)
95
+ URI.parse(url).path.split('/').last
96
+ end
97
+
98
+ def dump_data(file)
99
+ file.write(data)
100
+ file.close
101
+ file
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,59 @@
1
+ module Iconoclasm
2
+ class Headers
3
+
4
+ attr_reader :version, :code, :message
5
+
6
+ def initialize(header_string)
7
+ parse_http_response(header_string.lines.first.chomp)
8
+ @header_hash = parse_header_string(header_string)
9
+ end
10
+
11
+ def [](header)
12
+ @header_hash[convert_header_key(header.to_s)]
13
+ end
14
+
15
+ def content_type
16
+ @content_type ||= self['content_type']
17
+ end
18
+ alias_method :type, :content_type
19
+
20
+ def content_length
21
+ @content_length ||= self['content_length']
22
+ end
23
+ alias_method :length, :content_length
24
+
25
+ def location
26
+ @location ||= self['location']
27
+ end
28
+
29
+ private
30
+
31
+ def parse_header_string(header_string)
32
+ header_string.scan(/^([^:]+):(.*)$/).inject({}) do |hash, (key, value)|
33
+ hash.merge(convert_header_key(key) => convert_header_value(value))
34
+ end
35
+ end
36
+
37
+ def convert_header_key(key)
38
+ key.gsub(/-/, '_').downcase
39
+ end
40
+
41
+ def convert_header_value(value)
42
+ if value =~ /^\s*\d+\s*$/
43
+ value.to_i
44
+ else
45
+ value.strip
46
+ end
47
+ end
48
+
49
+ def parse_http_response(response)
50
+ if response.match(/HTTP\/(\d\.\d)\s*(\d{3})\s*(.*)/)
51
+ @version = $1
52
+ @code = $2.to_i
53
+ @message = $3.strip
54
+ else
55
+ raise Iconoclasm::HTTPError.new(nil, response)
56
+ end
57
+ end
58
+ end
59
+ end
data/lib/iconoclasm.rb ADDED
@@ -0,0 +1,18 @@
1
+ $:.unshift(File.dirname(__FILE__))
2
+ require 'iconoclasm/downloader'
3
+ require 'iconoclasm/errors'
4
+ require 'iconoclasm/extractor'
5
+ require 'iconoclasm/favicon'
6
+ require 'iconoclasm/headers'
7
+
8
+ module Iconoclasm
9
+
10
+ class << self
11
+ include Iconoclasm::Extractor
12
+
13
+ def extract(url, content = nil)
14
+ Iconoclasm::Favicon.new(extract_favicon_from(url, content))
15
+ end
16
+ end
17
+
18
+ end
data/spec/helper.rb ADDED
@@ -0,0 +1,34 @@
1
+ require 'rubygems'
2
+ require 'spec'
3
+ require 'mocha'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'iconoclasm'
8
+
9
+ Spec::Runner.configure do |config|
10
+ config.mock_with :mocha
11
+ end
12
+
13
+ # It bothers me that mocha doesn't have this built-in.
14
+ # It also bothers me that I forked mocha but didn't add this.
15
+ # We are all at fault.
16
+ module Mocha
17
+ class Expectation
18
+ def throws(symbol, object = nil)
19
+ @return_values += ReturnValues.new(SymbolThrower.new(symbol, object))
20
+ self
21
+ end
22
+ end
23
+
24
+ class SymbolThrower
25
+ def initialize(symbol, object)
26
+ @symbol = symbol
27
+ @object = object
28
+ end
29
+
30
+ def evaluate
31
+ @object ? throw(@symbol, @object) : throw(@symbol)
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,49 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../helper')
2
+
3
+ describe Iconoclasm::Downloader do
4
+
5
+ before do
6
+ class Thing; include Iconoclasm::Downloader; end
7
+ @thing = Thing.new
8
+ @url = 'http://www.website.com'
9
+ @curl = mock('curl')
10
+ end
11
+
12
+ describe "GETting a url" do
13
+ it "should GET the url using curl easy" do
14
+ Curl::Easy.expects(:http_get).with(@url)
15
+ @thing.get(@url)
16
+ end
17
+
18
+ it "should set the user agent to the default user agent" do
19
+ @curl.stubs(:follow_location=)
20
+ headers = mock('headers')
21
+ Curl::Easy.stubs(:http_get).yields(@curl)
22
+ @curl.expects(:headers).returns(headers)
23
+ headers.expects(:[]=).with('User-Agent', Iconoclasm::Downloader.user_agent)
24
+ @thing.get(@url)
25
+ end
26
+
27
+ it "should follow redirects" do
28
+ @curl.stubs(:headers).returns({})
29
+ Curl::Easy.stubs(:http_get).yields(@curl)
30
+ @curl.expects(:follow_location=).with(true)
31
+ @thing.get(@url)
32
+ end
33
+ end
34
+
35
+ describe "HEADing a url" do
36
+ it "should HEAD the url using curl easy" do
37
+ Curl::Easy.expects(:http_head).with(@url)
38
+ @thing.head(@url)
39
+ end
40
+
41
+ it "should set the user agent to the default user agent" do
42
+ headers = mock('headers')
43
+ Curl::Easy.stubs(:http_head).yields(@curl)
44
+ @curl.expects(:headers).returns(headers)
45
+ headers.expects(:[]=).with('User-Agent', Iconoclasm::Downloader.user_agent)
46
+ @thing.head(@url)
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,5 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../helper')
2
+
3
+ describe "Errors" do
4
+
5
+ end
@@ -0,0 +1,121 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../helper')
2
+
3
+ describe Iconoclasm::Extractor do
4
+
5
+ before do
6
+ class Thing; include Iconoclasm::Extractor; end
7
+ @thing = Thing.new
8
+ end
9
+
10
+ describe "requiring the module" do
11
+ it "should also require the Downloader module" do
12
+ Thing.included_modules.should include(Iconoclasm::Downloader)
13
+ end
14
+ end
15
+
16
+ describe "extracting a favicon from a url" do
17
+ before do
18
+ @url = "http://www.website.com/page.html"
19
+ @base_url = "http://www.website.com"
20
+ end
21
+
22
+ it "should try to find the favicon path in the head of the content" do
23
+ @thing.expects(:extract_favicon_from_head_of).with(@base_url, nil).throws(:done)
24
+ @thing.extract_favicon_from(@url)
25
+ end
26
+
27
+ describe "when the favicon path isn't in the head of the content" do
28
+ before do
29
+ @thing.stubs(:extract_favicon_from_head_of)
30
+ end
31
+
32
+ it "should naively guess where the favicon is" do
33
+ @thing.expects(:extract_favicon_from_naive_guess).with(@base_url).throws(:done)
34
+ @thing.extract_favicon_from(@url)
35
+ end
36
+ end
37
+
38
+ describe "when the favicon isn't mentioned in the content or in the default place" do
39
+ before do
40
+ @thing.stubs(:extract_favicon_from_head_of)
41
+ @thing.stubs(:extract_favicon_from_naive_guess)
42
+ end
43
+
44
+ it "should raise an error" do
45
+ lambda { @thing.extract_favicon_from(@url) }.should raise_error(Iconoclasm::MissingFavicon)
46
+ end
47
+ end
48
+ end
49
+
50
+ describe "extracting a favicon from the head of some HTML content" do
51
+ before do
52
+ # stubbing this to make sure we're not calling it accidentally without
53
+ # having to deal with expectations within a catch/throw pile
54
+ @thing.stubs(:extract_favicon_from_naive_guess)
55
+ @url = "http://www.website.com/page.html"
56
+ @base_url = "http://www.website.com"
57
+ @content = <<-HTML
58
+ <html>
59
+ <head>
60
+ <link rel="stylesheet" type="text/css" href="/stylesuponstyles.css" />
61
+ <link rel="shortcut icon" type="image/vnd.microsoft.icon" href="/images/favicon.ico" />
62
+ </head>
63
+ <body>
64
+ <p>This is the most interesting website ever.</p>
65
+ </body>
66
+ </html>
67
+ HTML
68
+ end
69
+
70
+ describe "when content isn't already provided" do
71
+ before do
72
+ @response = mock('http response', :response_code => 200, :body_str => "")
73
+ end
74
+
75
+ it "should go get the content" do
76
+ @thing.expects(:get).returns(@response)
77
+ catch(:done) { @thing.__send__(:extract_favicon_from_head_of, @url) }
78
+ end
79
+ end
80
+
81
+ describe "when content is provided" do
82
+ describe "when there are no favicon links in the HTML content" do
83
+ before do
84
+ @thing.stubs(:find_favicon_links_in).returns([])
85
+ end
86
+
87
+ it "should return nil" do
88
+ catch(:done) { @thing.__send__(:extract_favicon_from_head_of, @url, @content) }.should be_nil
89
+ end
90
+ end
91
+
92
+ describe "when there are some favicon links in the HTML content" do
93
+ before do
94
+ @link = stub('favicon link')
95
+ @links = stub('favicon links', :empty? => false, :first => @link)
96
+ @thing.stubs(:find_favicon_links_in).returns(@links)
97
+ end
98
+
99
+ describe "the return value" do
100
+ before do
101
+ @href = 'http://www.website.com/images/favicon.ico'
102
+ @type = 'image/vnd.microsoft.icon'
103
+ @thing.expects(:href_of).with(@link).returns(@href)
104
+ @thing.expects(:type_of).with(@link).returns(@type)
105
+ @hash = catch(:done) { @thing.__send__(:extract_favicon_from_head_of, @url, @content) }
106
+ end
107
+
108
+ it "should contain the href from the first link" do
109
+ @hash.should have_key(:url)
110
+ @hash[:url].should == @href
111
+ end
112
+
113
+ it "should contain the content type from the first link" do
114
+ @hash.should have_key(:content_type)
115
+ @hash[:content_type].should == @type
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,304 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../helper')
2
+
3
+ describe Iconoclasm::Favicon do
4
+
5
+ before do
6
+ @size = 100
7
+ @headers = stub('headers', :content_type => 'image/vnd.microsoft.icon', :content_length => @size)
8
+ @name = 'favicon.ico'
9
+ @url = "http://www.website.com/#{@name}"
10
+ @attributes = {
11
+ :url => @url,
12
+ :headers => @headers
13
+ }
14
+ end
15
+
16
+ describe "initialization" do
17
+ before do
18
+ @favicon = Iconoclasm::Favicon.new(@attributes)
19
+ end
20
+
21
+ it "should set the content type to the content type supplied in the headers" do
22
+ @favicon.content_type.should == @headers.content_type
23
+ end
24
+
25
+ it "should set the size to the content length supplied in the headers" do
26
+ @favicon.size.should == @headers.content_length
27
+ end
28
+
29
+ it "should parse the name from the url supplied in the headers" do
30
+ @favicon.name.should == @name
31
+ end
32
+
33
+ it "should not have a save path" do
34
+ @favicon.save_path.should be_nil
35
+ end
36
+ end
37
+
38
+ describe "accessing the data attribute" do
39
+ before do
40
+ @data = "THIS IS TOTALLY SOME IMAGE DATA!"
41
+ end
42
+
43
+ describe "when the data was supplied on intialization" do
44
+ before do
45
+ @favicon = Iconoclasm::Favicon.new(@attributes.merge({:data => @data}))
46
+ end
47
+
48
+ it "should return the supplied data" do
49
+ @favicon.data.should == @data
50
+ end
51
+
52
+ it "should not try to fetch the data from the internets" do
53
+ @favicon.expects(:fetch_data).never
54
+ @favicon.data
55
+ end
56
+ end
57
+
58
+ describe "when data was not supplied on initialization" do
59
+ before do
60
+ @favicon = Iconoclasm::Favicon.new(@attributes.merge({:data => nil}))
61
+ end
62
+
63
+ it "should fetch the data from the internets and return it" do
64
+ @favicon.expects(:fetch_data).returns(@data)
65
+ @favicon.data.should == @data
66
+ end
67
+
68
+ it "should not fetch the data from the internets on subsequent calls" do
69
+ @favicon.expects(:fetch_data).once.returns(@data)
70
+ @favicon.data
71
+ @favicon.data
72
+ end
73
+ end
74
+ end
75
+
76
+ describe "accessing the size attribute" do
77
+ before do
78
+ @data = "THIS IS SOME DATA!"
79
+ @size = 100
80
+ end
81
+
82
+ describe "when the size was supplied on initialization" do
83
+ before do
84
+ @favicon = Iconoclasm::Favicon.new(@attributes.merge({:data => @data, :content_length => @size}))
85
+ end
86
+
87
+ it "should return the supplied size" do
88
+ @favicon.size.should == @size
89
+ end
90
+
91
+ it "should not check the length of the data" do
92
+ @favicon.data.expects(:size).never
93
+ @favicon.size
94
+ end
95
+ end
96
+
97
+ describe "when the size was not supplied on initialization" do
98
+ before do
99
+ @headers.stubs(:content_length).returns(nil)
100
+ @favicon = Iconoclasm::Favicon.new(@attributes.merge({:data => @data, :content_length => nil}))
101
+ @favicon.instance_variable_get(:@size).should be_nil
102
+ end
103
+
104
+ it "should return the size of the data" do
105
+ @favicon.size.should == @data.size
106
+ end
107
+ end
108
+ end
109
+
110
+ describe "accessing the content type attribute" do
111
+ before do
112
+ @content_type = 'image/vnd.microsoft.icon'
113
+ end
114
+
115
+ describe "when the content type was supplied on initialization" do
116
+ before do
117
+ @favicon = Iconoclasm::Favicon.new(@attributes.merge({:content_type => @content_type}))
118
+ end
119
+
120
+ it "should return the supplied content_type" do
121
+ @favicon.content_type.should == @content_type
122
+ end
123
+
124
+ it "should not try to check it" do
125
+ ::MIME::Types.expects(:of).never
126
+ @favicon.content_type
127
+ end
128
+ end
129
+
130
+ describe "when the content type was not supplied on initialization" do
131
+ before do
132
+ @headers.stubs(:content_type).returns(nil)
133
+ @favicon = Iconoclasm::Favicon.new(@attributes.merge({:content_type => nil}))
134
+ @mime = mock('mime type', :content_type => @content_type)
135
+ end
136
+
137
+ it "should check the content type of the file name using the mime types library and return the first" do
138
+ ::MIME::Types.expects(:of).with(@favicon.name).returns([@mime])
139
+ @favicon.content_type.should == @content_type
140
+ end
141
+ end
142
+ end
143
+
144
+ describe "fetching the image data" do
145
+ before do
146
+ @favicon = Iconoclasm::Favicon.new(@attributes)
147
+ @response = mock('http response')
148
+ end
149
+
150
+ it "should request the icon image" do
151
+ @favicon.expects(:get).returns(@response)
152
+ @response.stubs(:response_code => 200, :body_str => "IMAGE DATA!")
153
+ @favicon.fetch_data
154
+ end
155
+
156
+ describe "when the HTTP request is successsful" do
157
+ before do
158
+ @favicon.stubs(:get).returns(@response)
159
+ @data = "THIS IS ALSO TOTALLY SOME IMAGE DATA HAR HAR HAR!"
160
+ @response.expects(:response_code).returns(200)
161
+ end
162
+
163
+ it "should return the content of the request (the binary image data)" do
164
+ @response.expects(:body_str).returns(@data)
165
+ @favicon.fetch_data.should == @data
166
+ end
167
+ end
168
+
169
+ describe "when the HTTP request is not successful" do
170
+ before do
171
+ @favicon.stubs(:get).returns(@response)
172
+ @response.expects(:response_code).returns(400)
173
+ end
174
+
175
+ it "should raise an HTTP error" do
176
+ lambda { @favicon.fetch_data }.should raise_error(Iconoclasm::HTTPError)
177
+ end
178
+ end
179
+ end
180
+
181
+ describe "determining the validity of the favicon" do
182
+ before do
183
+ @favicon = Iconoclasm::Favicon.new(@attributes.merge({:data => "IMAGE DATA!"}))
184
+ end
185
+
186
+ describe "when the content is zero-length" do
187
+ before do
188
+ @favicon.stubs(:size).returns(0)
189
+ end
190
+
191
+ it "should not be valid" do
192
+ @favicon.should_not be_valid
193
+ end
194
+ end
195
+
196
+ describe "when the content type is a image" do
197
+ before do
198
+ @favicon.stubs(:content_type).returns('image/png')
199
+ end
200
+
201
+ it "should be valid" do
202
+ @favicon.should be_valid
203
+ end
204
+ end
205
+
206
+ describe "when the content type is HTML" do
207
+ before do
208
+ # This will happen when some jerkface puts a webpage where the favicon
209
+ # should be. People on the internet are the worst.
210
+ @favicon.stubs(:content_type).returns('text/html')
211
+ end
212
+
213
+ it "should not be valid" do
214
+ @favicon.should_not be_valid
215
+ end
216
+ end
217
+
218
+ describe "when the content type is nil" do
219
+ before do
220
+ @favicon.stubs(:content_type).returns(nil)
221
+ end
222
+
223
+ it "should not be valid" do
224
+ @favicon.should_not be_valid
225
+ end
226
+ end
227
+
228
+ describe "when the content type is something else" do
229
+ before do
230
+ @favicon.stubs(:content_type).returns('something/else')
231
+ # eventually, maybe, I'll try harder to see if it's a valid image of some sort.
232
+ end
233
+
234
+ it "should not be valid" do
235
+ @favicon.should_not be_valid
236
+ end
237
+ end
238
+ end
239
+
240
+ describe "saving the favicon" do
241
+ before do
242
+ @favicon = Iconoclasm::Favicon.new(@attributes.merge({:data => "IMAGE DATA!"}))
243
+ end
244
+
245
+ describe "to a tempfile" do
246
+ before do
247
+ @path = '/tmp/favicon.ico'
248
+ @tempfile = stub('tempfile!', :path => @path)
249
+ end
250
+
251
+ it "should happen when there are no arguments to save" do
252
+ @favicon.expects(:save_to_tempfile)
253
+ @favicon.save
254
+ end
255
+
256
+ it "should dump its data to a tempfile named after the favicon" do
257
+ Tempfile.expects(:new).with(@favicon.name).returns(@tempfile)
258
+ @favicon.expects(:dump_data).with(@tempfile).returns(@tempfile)
259
+ @favicon.save_to_tempfile
260
+ end
261
+
262
+ it "should set the save_path to the path to the tempfile" do
263
+ Tempfile.stubs(:new).returns(@tempfile)
264
+ @favicon.stubs(:dump_data).returns(@tempfile)
265
+ @favicon.save
266
+ @favicon.save_path.should == @path
267
+ end
268
+ end
269
+
270
+ describe "to a file" do
271
+ before do
272
+ @file = stub('file')
273
+ @path = '/var/stuff/favicons'
274
+ end
275
+
276
+ it "should happen when providing a path to save" do
277
+ @favicon.expects(:save_to_file)
278
+ @favicon.save(@path)
279
+ end
280
+
281
+ it "should dump its data to a file at the given path" do
282
+ File.expects(:new).with("#{@path}/#{@favicon.name}", anything).returns(@file)
283
+ @favicon.expects(:dump_data).with(@file)
284
+ @favicon.save_to_file(@path)
285
+ end
286
+
287
+ it "should set the save_path to the path to the new file" do
288
+ File.stubs(:new).returns(stub_everything)
289
+ @favicon.save_to_file(@path)
290
+ @favicon.save_path.should == "#{@path}/#{@favicon.name}"
291
+ end
292
+ end
293
+
294
+ describe "to some other kind of storage" do
295
+ before do
296
+ @storage = Object.new
297
+ end
298
+
299
+ it "should raise an error" do
300
+ lambda { @favicon.save(@storage) }.should raise_error(Iconoclasm::RTFMError)
301
+ end
302
+ end
303
+ end
304
+ end
@@ -0,0 +1,43 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../helper')
2
+
3
+ describe Iconoclasm::Headers do
4
+
5
+ before do
6
+ @http_response = "HTTP/1.1 200 OK"
7
+ @server = "Apache/2.2.11 (Unix) mod_ssl/2.2.11 OpenSSL/0.9.7e"
8
+ @last_modified = "Wed, 13 Jun 2007 19:15:36 GMT"
9
+ @content_type = "image/x-icon"
10
+ @content_length = 3638
11
+ @header_string = "#{@http_response}\r\nDate: Tue, 22 Dec 2009 21:15:31 GMT\r\nServer: #{@server}\r\nVary: Host,User-Agent\r\nLast-Modified: #{@last_modified}\r\nETag: \"e36-432ce70534600\"\r\nAccept-Ranges: bytes\r\nContent-Length: #{@content_length}\r\nContent-Type: #{@content_type}\r\n\r\n"
12
+ @headers = Iconoclasm::Headers.new(@header_string)
13
+ end
14
+
15
+ describe "parsing the HTTP response" do
16
+
17
+ it "should extract the HTTP version from the headers" do
18
+ @headers.version.should == "1.1"
19
+ end
20
+
21
+ it "should extract the HTTP response code from the headers" do
22
+ @headers.code.should == 200
23
+ end
24
+
25
+ it "should extract the HTTP response message from the headers" do
26
+ @headers.message.should == "OK"
27
+ end
28
+ end
29
+
30
+ describe "hashifying the headers" do
31
+ it "should allow headers to be accessible by name" do
32
+ @headers['Server'].should == @server
33
+ end
34
+
35
+ it "should allow headers to be accessible by their normalized (lowercase and underscored) names" do
36
+ @headers[:last_modified].should == @last_modified
37
+ end
38
+
39
+ it "should convert numeric values to actual numbers" do
40
+ @headers[:content_length].should be_a_kind_of(Numeric)
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,24 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/helper')
2
+
3
+ describe Iconoclasm do
4
+
5
+ describe "#extract" do
6
+ before do
7
+ @url = 'http://www.website.com/some-crappy-blog-post'
8
+ end
9
+
10
+ it "should extract the favicon for the given url" do
11
+ Iconoclasm.expects(:extract_favicon_from).with(@url, nil)
12
+ Iconoclasm::Favicon.stubs(:new)
13
+ Iconoclasm.extract(@url)
14
+ end
15
+
16
+ it "should make a new Favicon instance" do
17
+ favicon = stub('favicon')
18
+ Iconoclasm.stubs(:extract_favicon_from).returns(favicon)
19
+ Iconoclasm::Favicon.expects(:new).with(favicon)
20
+ Iconoclasm.extract(@url)
21
+ end
22
+ end
23
+
24
+ end
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: iconoclasm
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Sander Hartlage
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-12-28 00:00:00 -05:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: Finds favorites icons for web pages on the world wide internets by checking the HTML head or the standard favicon location. Then, do with them what you will.
17
+ email: sander.hartlage@gmail.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - LICENSE
24
+ - README.markdown
25
+ files:
26
+ - .document
27
+ - .gitignore
28
+ - LICENSE
29
+ - README.markdown
30
+ - Rakefile
31
+ - VERSION
32
+ - lib/iconoclasm.rb
33
+ - lib/iconoclasm/downloader.rb
34
+ - lib/iconoclasm/errors.rb
35
+ - lib/iconoclasm/extractor.rb
36
+ - lib/iconoclasm/favicon.rb
37
+ - lib/iconoclasm/headers.rb
38
+ - spec/helper.rb
39
+ - spec/iconoclasm/downloader_spec.rb
40
+ - spec/iconoclasm/errors_spec.rb
41
+ - spec/iconoclasm/extractor_spec.rb
42
+ - spec/iconoclasm/favicon_spec.rb
43
+ - spec/iconoclasm/headers_spec.rb
44
+ - spec/iconoclasm_spec.rb
45
+ has_rdoc: true
46
+ homepage: http://github.com/sander6/iconoclasm
47
+ licenses: []
48
+
49
+ post_install_message:
50
+ rdoc_options:
51
+ - --charset=UTF-8
52
+ require_paths:
53
+ - lib
54
+ required_ruby_version: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: "0"
59
+ version:
60
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: "0"
65
+ version:
66
+ requirements: []
67
+
68
+ rubyforge_project:
69
+ rubygems_version: 1.3.5
70
+ signing_key:
71
+ specification_version: 3
72
+ summary: Finds favicons and DESTROYS THEM (well, not really, but it will download and save them)
73
+ test_files:
74
+ - spec/helper.rb
75
+ - spec/iconoclasm/downloader_spec.rb
76
+ - spec/iconoclasm/errors_spec.rb
77
+ - spec/iconoclasm/extractor_spec.rb
78
+ - spec/iconoclasm/favicon_spec.rb
79
+ - spec/iconoclasm/headers_spec.rb
80
+ - spec/iconoclasm_spec.rb