RubyGems - findart - Versions diffs - 0.0.1 - Mend

findart 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

data/History.txt +3 -0
data/Manifest.txt +33 -0
data/README.rdoc +63 -0
data/Rakefile +30 -0
data/bin/findart +55 -0
data/features/development.feature +13 -0
data/features/findalbumarturls.feature +38 -0
data/features/fixtures/search-albumartexchange-daft-punk-discovery.html +107 -0
data/features/registerdscapers.feature +12 -0
data/features/step_definitions/common_steps.rb +169 -0
data/features/step_definitions/findalbumarturls_steps.rb +9 -0
data/features/support/common.rb +29 -0
data/features/support/env.rb +14 -0
data/features/support/matchers.rb +11 -0
data/lib/FindArt.rb +20 -0
data/lib/FindArt/scraper.rb +76 -0
data/lib/FindArt/scrapers/albumartexchange.rb +22 -0
data/lib/FindArt/scrapers/amazon.rb.disabled +47 -0
data/lib/FindArt/scrapers/discogs.rb +44 -0
data/lib/FindArt/scrapers/junodownload.rb +31 -0
data/lib/FindArt/scrapers/walmart.rb +38 -0
data/script/console +10 -0
data/script/destroy +14 -0
data/script/generate +14 -0
data/spec/albumartexchange_spec.rb +26 -0
data/spec/amazon_spec.rb +34 -0
data/spec/discogs_spec.rb +32 -0
data/spec/junodownload_spec.rb +32 -0
data/spec/scraper_spec.rb +121 -0
data/spec/spec.opts +1 -0
data/spec/spec_helper.rb +11 -0
data/spec/walmart_spec.rb +26 -0
data/tasks/rspec.rake +25 -0
metadata +164 -0

data/features/step_definitions/findalbumarturls_steps.rb ADDED

@@ -0,0 +1,9 @@
+gem "fakeweb"
+require "fakeweb"
+Before do
+  FakeWeb.allow_net_connect = false
+end
+Given /^that i want to find artwork for the album "([^\"]*)"$/ do |query| end

data/features/support/common.rb ADDED

@@ -0,0 +1,29 @@
+module CommonHelpers
+  def in_tmp_folder(&block)
+    FileUtils.chdir(@tmp_root, &block)
+  end
+  def in_project_folder(&block)
+    project_folder = @active_project_folder || @tmp_root
+    FileUtils.chdir(project_folder, &block)
+  end
+  def in_home_folder(&block)
+    FileUtils.chdir(@home_path, &block)
+  end
+  def force_local_lib_override(project_name = @project_name)
+    rakefile = File.read(File.join(project_name, 'Rakefile'))
+    File.open(File.join(project_name, 'Rakefile'), "w+") do |f|
+      f << "$:.unshift('#{@lib_path}')\n"
+      f << rakefile
+    end
+  end
+  def setup_active_project_folder project_name
+    @active_project_folder = File.join(@tmp_root, project_name)
+    @project_name = project_name
+  end
+end
+World(CommonHelpers)

data/features/support/env.rb ADDED

@@ -0,0 +1,14 @@
+require File.dirname(__FILE__) + "/../../lib/FindArt"
+gem 'cucumber'
+require 'cucumber'
+gem 'rspec'
+require 'spec'
+Before do
+  @tmp_root = File.dirname(__FILE__) + "/../../tmp"
+  @home_path = File.expand_path(File.join(@tmp_root, "home"))
+  FileUtils.rm_rf   @tmp_root
+  FileUtils.mkdir_p @home_path
+  ENV['HOME'] = @home_path
+end

data/features/support/matchers.rb ADDED

@@ -0,0 +1,11 @@
+module Matchers
+  def contain(expected)
+    simple_matcher("contain #{expected.inspect}") do |given, matcher|
+      matcher.failure_message = "expected #{given.inspect} to contain #{expected.inspect}"
+      matcher.negative_failure_message = "expected #{given.inspect} not to contain #{expected.inspect}"
+      given.index expected
+    end
+  end
+end
+World(Matchers)

data/lib/FindArt.rb ADDED

@@ -0,0 +1,20 @@
+$:.unshift(File.dirname(__FILE__)) unless
+  $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
+require 'rubygems'
+require 'mechanize'
+require 'hpricot'
+#meta programming patch
+class Object # http://whytheluckystiff.net/articles/seeingMetaclassesClearly.html
+  def meta_def name, &blk
+    (class << self; self; end).instance_eval { define_method name, &blk }
+  end
+end
+module FindArt
+  VERSION = '0.0.1'
+end
+require "#{File.dirname(__FILE__)}/FindArt/scraper.rb"
+Dir["#{File.dirname(__FILE__)}/FindArt/scrapers/*.rb"].each {|file| require file }

data/lib/FindArt/scraper.rb ADDED

@@ -0,0 +1,76 @@
+# Factory that contains site specific scapers.
+# Scrapers are classes that collect urls of album art for a given artist and title.
+# Scraper classes can self register them self by calling register_scraper in the class body.
+# == Example
+#
+#  @scraper = Scraper.new
+#
+#  class TestScraper < Scraper
+#     register_scraper :test_scraper
+#  end
+#
+#  @scraper.scrapers
+#  {:test_scraper=>TestScraper}
+module FindArt
+  class Scraper
+    @@scrapers = {}
+    # class method for registering scrapers
+    def self.register_scraper(name)
+      @@scrapers[name] = self
+    end
+    # hash of all registerd scrapers
+    def scrapers
+       @@scrapers
+    end
+    def self.registerd_sites
+      @@scrapers.map {|scraper_name,klass| URI(klass.url).host}
+    end
+    # clears all registerd scapers
+    def self.unregister_scrapers!
+      @@scrapers = {}
+    end
+    # all registerd scrapers are used to find album art
+    # returns an array of urls
+    def find_art(artist,title,opts=nil)
+      threads = []
+      @@scrapers.each do |scraper_name,klass|
+        scraper = klass.new
+        threads << Thread.new { Thread.current["results"] = scraper.scrape(artist,title,opts)}
+      end
+      results = []
+      begin
+      threads.each {|t| t.join;  results << t["results"] unless t['results'].nil? }
+      rescue
+      end
+      results
+    end
+    private
+    # helper method for setting the url where the scraper starts its search for album art
+    #
+    #  == Example
+    #
+    #  class JunoDownload < Scraper
+    #     start_url "http://www.junodownload.com/search/"
+    #     register_scraper :junodownload
+    #  end
+    #
+    def self.start_url(url)
+      class_variable_set "@@url", url
+      meta_def :url do
+       class_variable_get "@@url"
+      end
+    end
+  end
+end

data/lib/FindArt/scrapers/albumartexchange.rb ADDED

@@ -0,0 +1,22 @@
+module FindArt
+  class AlbumArtExchange < Scraper
+    start_url "http://www.albumartexchange.com/covers.php?sort=7&q="
+    register_scraper :albumartexchange
+    def scrape(artist,title,opts={})
+      url = nil
+      search_url =  "#{@@url}#{CGI.escape("#{artist} #{title}")}"
+      browser = WWW::Mechanize.new
+      browser.get(search_url) do |page|
+        doc = Hpricot(page.body)
+        element =  doc.at("* table tr td a img[@width='150']")
+        src = element["src"] if !element.nil? && !element["src"].nil?
+        if src
+          path = src.split("src=").last
+          url = "http://www.albumartexchange.com#{URI.decode(path)}"
+        end
+      end
+     url
+    end
+  end
+end

data/lib/FindArt/scrapers/amazon.rb.disabled ADDED

@@ -0,0 +1,47 @@
+module FindArt
+  class Amazon < Scraper
+    start_url "http://webservices.amazon.com/"
+    register_scraper :amazon
+    @@urls = [
+     "http://webservices.amazon.com/",
+     "http://webservices.amazon.co.uk/",
+     "http://webservices.amazon.co.jp/",
+     "http://webservices.amazon.ca/",
+     "http://webservices.amazon.fr/",
+     "http://webservices.amazon.de/",
+    ]
+    # Uses the artist webservice to search for album covers.
+    # Tries all international amazon sites
+    def scrape(artist,title,opts={})
+      search_url = "onca/xml?Service=AWSECommerceService&AWSAccessKeyId=0NK019CD48HNEDK3PBG2&Operation=ItemSearch&SearchIndex=Music&ResponseGroup=Small,Images&Keywords=#{CGI.escape("#{artist} #{title}")}"
+      @@urls.each do |url|
+        art = find_best_art("#{url}#{search_url}")
+        return art unless art.nil?
+      end
+      nil
+    end
+    private
+    # Finds the best quality artwork availible at amazon
+    def find_best_art(search_url)
+     	browser = WWW::Mechanize.new
+      search = browser.get(search_url)
+      doc = Hpricot(search.body)
+      large,medium,small = [],[],[]
+      (doc/:item).each do |item|
+         large    << item.at("largeimage").at("url").innerHTML  if item.at("largeimage")
+         medium   << item.at("mediumimage").at("url").innerHTML if item.at("mediumimage")
+         small    << item.at("smallimage").at("url").innerHTML  if item.at("smallimage")
+      end
+      url = small.first  unless small.empty?
+      url = medium.first unless medium.empty?
+      url = large.first  unless large.empty?
+    end
+  end
+end

data/lib/FindArt/scrapers/discogs.rb ADDED

@@ -0,0 +1,44 @@
+# module FindArt
+# #
+# # class Discogs < Scraper
+# #   start_url "http://www.discogs.com/search?type=all&q="
+# #   register_scraper :discogs
+# #
+# #   def scrape(artist,title,opts={})
+# #     url = nil
+# #     search_url =  "#{@@url}#{CGI.escape("#{artist} #{title}")}"
+# #     browser = WWW::Mechanize.new
+# #     browser.get(search_url) do |page|
+# #       doc = Hpricot(page.body)
+# #
+# #       # check if there are multiple results and get the top result
+# #       element =  doc.at("* .search_result a")
+# #       if !element.nil?
+# #          # extract and fetch item page
+# #          item_page = browser.get(element.attributes["href"])
+# #          doc = Hpricot(item_page.body)
+# #
+# #
+# #
+# #       end
+# #
+# #       #extract art from product page
+# #     end
+# #    url
+# #   end
+# #
+# #   # Extracts album art url from wallmart product page
+# #   def extract_art(doc)
+# #     puts doc
+# #      # url = nil
+# #      #     element =  doc.at("* td[@align='center'] a[href~='/viewimages']")
+# #      #      href = element["href"] if !element.nil? && !element["href"].nil?
+# #      #      if href
+# #      #        match, url = *href.match(/javascript:photo_opener\('(http:\/\/.*.jpg)&/)
+# #      #      end
+# #      #      url
+# #   end
+# #
+# # end
+#
+# end

data/lib/FindArt/scrapers/junodownload.rb ADDED

@@ -0,0 +1,31 @@
+module FindArt
+  class JunoDownload < Scraper
+    start_url "http://www.junodownload.com/search/"
+    register_scraper :junodownload
+    def scrape(artist,title,opts={})
+      url = nil
+      browser = WWW::Mechanize.new
+      browser.get(@@url) do |page|
+        # go to search form
+        search = page.form_with(:action => 'http://www.junodownload.com/search/') do |f|
+           f.q  = "#{artist} #{title}"
+         end.click_button
+         doc = Hpricot(search.body)
+         element = doc.at('.productcover img[@src^="http://cdn.images.juno.co.uk/75/"]')
+         src = nil
+         if !element.nil?
+           src = element.attributes['src']
+           # instead of cliking on the image to find the uri of the larger image we will replace some parts of the url
+           # lets hope junodownload keeps this convention
+           src.gsub!("/75/","/full/")
+           src.gsub!("-TN","-BIG")
+           url = src
+         end
+      end
+      url
+    end
+  end
+end

data/lib/FindArt/scrapers/walmart.rb ADDED

@@ -0,0 +1,38 @@
+module FindArt
+  class WalMart < Scraper
+    start_url "http://www.walmart.com/search/search-ng.do?earch_sort=2&search_query="
+    register_scraper :walmart
+    def scrape(artist,title,opts={})
+      url = nil
+      search_url =  "#{@@url}#{CGI.escape("#{artist} #{title}")}"
+      browser = WWW::Mechanize.new
+      browser.get(search_url) do |page|
+        doc = Hpricot(page.body)
+        # check if there are multiple results and get the top result
+        element =  doc.at("* .firstRow a")
+        if !element.nil?
+           # extract and fetch item page
+           item_page = browser.get(element.attributes["href"])
+           doc = Hpricot(item_page.body)
+        end
+        #extract art from product page
+        url = extract_art(doc)
+      end
+     url
+    end
+    # Extracts album art url from wallmart product page
+    def extract_art(doc)
+       url = nil
+       element =  doc.at("* div[@class='LargeItemPhoto150'] a[@href^=javascript]")
+        href = element["href"] if !element.nil? && !element["href"].nil?
+        if href
+          match, url = *href.match(/javascript:photo_opener\('(http:\/\/.*.jpg)&/)
+        end
+        url
+    end
+  end
+end

data/script/console ADDED

@@ -0,0 +1,10 @@
+#!/usr/bin/env ruby
+# File: script/console
+irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
+libs =  " -r irb/completion"
+# Perhaps use a console_lib to store any extra methods I may want available in the cosole
+# libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
+libs <<  " -r #{File.dirname(__FILE__) + '/../lib/FindArt.rb'}"
+puts "Loading FindArt gem"
+exec "#{irb} #{libs} --simple-prompt"

data/script/destroy ADDED

@@ -0,0 +1,14 @@
+#!/usr/bin/env ruby
+APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
+begin
+  require 'rubigen'
+rescue LoadError
+  require 'rubygems'
+  require 'rubigen'
+end
+require 'rubigen/scripts/destroy'
+ARGV.shift if ['--help', '-h'].include?(ARGV[0])
+RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
+RubiGen::Scripts::Destroy.new.run(ARGV)

data/script/generate ADDED

@@ -0,0 +1,14 @@
+#!/usr/bin/env ruby
+APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
+begin
+  require 'rubigen'
+rescue LoadError
+  require 'rubygems'
+  require 'rubigen'
+end
+require 'rubigen/scripts/generate'
+ARGV.shift if ['--help', '-h'].include?(ARGV[0])
+RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
+RubiGen::Scripts::Generate.new.run(ARGV)

data/spec/albumartexchange_spec.rb ADDED

@@ -0,0 +1,26 @@
+require File.dirname(__FILE__) + '/spec_helper.rb'
+describe AlbumArtExchange do
+  it "Should have a start url form where the scaper starts" do
+    AlbumArtExchange.class_variables.should include("@@url")
+    AlbumArtExchange.url.should ==  'http://www.albumartexchange.com/covers.php?sort=7&q='
+  end
+  it "Should find the album art url for DJ TIESTO - In Search Of Sunrise 6" do
+     @album =  AlbumArtExchange.new()
+     @album.scrape("DJ TIESTO","In Search of Sunrise 6").should == "http://www.albumartexchange.com/gallery/images/public/ti/tiesto-insear_02.jpg"
+  end
+  it "Should return nil when no album art is found" do
+    @album =  AlbumArtExchange.new()
+    @album.scrape("Irene Moors & De Smurfen","Ga Je Mee Naar Smurfenland").should be(nil)
+  end
+  it "Should find the album art for the following albums" do
+    @album =  AlbumArtExchange.new()
+    @album.scrape("Hi_Tack","Let's Dance").should == "http://www.albumartexchange.com/gallery/images/public/hi/hi_tac-letsda.jpg"
+    @album.scrape("24 Grana","underpop").should == "http://www.albumartexchange.com/gallery/images/public/24/24gran-underp_02.jpg"
+    @album.scrape("Green Day","Dookie").should == "http://www.albumartexchange.com/gallery/images/public/gr/greend-dookie_05.jpg"
+  end
+end

data/spec/amazon_spec.rb ADDED

@@ -0,0 +1,34 @@
+# require File.dirname(__FILE__) + '/spec_helper.rb'
+#
+# describe Amazon do
+#
+#   it "Should have a start url form where the scaper starts" do
+#     Amazon.class_variables.should include("@@url")
+#     Amazon.url.should ==  'http://webservices.amazon.com/'
+#   end
+#
+#   it "Should find the album art url for DJ TIESTO - In Search Of Sunrise 6" do
+#      @amazon =  Amazon.new()
+#      @amazon.scrape("DJ TIESTO","In Search of Sunrise 6").should == "http://ecx.images-amazon.com/images/I/515ZxHVU5RL.jpg"
+#   end
+#
+#   it "Should find the album art url for Caféine de Christophe Willem  " do
+#      @amazon =  Amazon.new()
+#      @amazon.scrape("Christophe Willem","Caféine").should == "http://ecx.images-amazon.com/images/I/61sOk67nVkL.jpg"
+#   end
+#
+#   it "Should find the album art url for Miss Météores de Olivia Ruiz" do
+#      @amazon =  Amazon.new()
+#      @amazon.scrape("Olivia Ruiz","Miss Météores").should == "http://ecx.images-amazon.com/images/I/51S7recnyQL.jpg"
+#   end
+#
+#   it "Should find the album art url for  初音ミク ベスト impacts" do
+#      @amazon =  Amazon.new()
+#      @amazon.scrape(" 初音ミク ベスト","impacts").should == "http://ecx.images-amazon.com/images/I/51P-6djocGL.jpg"
+#   end
+#
+#   it "Should return nil if no album art is found " do
+#     @amazon =  Amazon.new()
+#     @amazon.scrape("internets","i did it for teh lulz").should == nil
+#   end
+# end