findart 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ gem "fakeweb"
2
+ require "fakeweb"
3
+
4
+ Before do
5
+ FakeWeb.allow_net_connect = false
6
+ end
7
+
8
+
9
+ Given /^that i want to find artwork for the album "([^\"]*)"$/ do |query| end
@@ -0,0 +1,29 @@
1
+ module CommonHelpers
2
+ def in_tmp_folder(&block)
3
+ FileUtils.chdir(@tmp_root, &block)
4
+ end
5
+
6
+ def in_project_folder(&block)
7
+ project_folder = @active_project_folder || @tmp_root
8
+ FileUtils.chdir(project_folder, &block)
9
+ end
10
+
11
+ def in_home_folder(&block)
12
+ FileUtils.chdir(@home_path, &block)
13
+ end
14
+
15
+ def force_local_lib_override(project_name = @project_name)
16
+ rakefile = File.read(File.join(project_name, 'Rakefile'))
17
+ File.open(File.join(project_name, 'Rakefile'), "w+") do |f|
18
+ f << "$:.unshift('#{@lib_path}')\n"
19
+ f << rakefile
20
+ end
21
+ end
22
+
23
+ def setup_active_project_folder project_name
24
+ @active_project_folder = File.join(@tmp_root, project_name)
25
+ @project_name = project_name
26
+ end
27
+ end
28
+
29
+ World(CommonHelpers)
@@ -0,0 +1,14 @@
1
+ require File.dirname(__FILE__) + "/../../lib/FindArt"
2
+
3
+ gem 'cucumber'
4
+ require 'cucumber'
5
+ gem 'rspec'
6
+ require 'spec'
7
+
8
+ Before do
9
+ @tmp_root = File.dirname(__FILE__) + "/../../tmp"
10
+ @home_path = File.expand_path(File.join(@tmp_root, "home"))
11
+ FileUtils.rm_rf @tmp_root
12
+ FileUtils.mkdir_p @home_path
13
+ ENV['HOME'] = @home_path
14
+ end
@@ -0,0 +1,11 @@
1
+ module Matchers
2
+ def contain(expected)
3
+ simple_matcher("contain #{expected.inspect}") do |given, matcher|
4
+ matcher.failure_message = "expected #{given.inspect} to contain #{expected.inspect}"
5
+ matcher.negative_failure_message = "expected #{given.inspect} not to contain #{expected.inspect}"
6
+ given.index expected
7
+ end
8
+ end
9
+ end
10
+
11
+ World(Matchers)
@@ -0,0 +1,20 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless
2
+ $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
+
4
+ require 'rubygems'
5
+ require 'mechanize'
6
+ require 'hpricot'
7
+
8
+ #meta programming patch
9
+ class Object # http://whytheluckystiff.net/articles/seeingMetaclassesClearly.html
10
+ def meta_def name, &blk
11
+ (class << self; self; end).instance_eval { define_method name, &blk }
12
+ end
13
+ end
14
+
15
+ module FindArt
16
+ VERSION = '0.0.1'
17
+ end
18
+
19
+ require "#{File.dirname(__FILE__)}/FindArt/scraper.rb"
20
+ Dir["#{File.dirname(__FILE__)}/FindArt/scrapers/*.rb"].each {|file| require file }
@@ -0,0 +1,76 @@
1
+
2
+ # Factory that contains site specific scapers.
3
+ # Scrapers are classes that collect urls of album art for a given artist and title.
4
+ # Scraper classes can self register them self by calling register_scraper in the class body.
5
+ # == Example
6
+ #
7
+ # @scraper = Scraper.new
8
+ #
9
+ # class TestScraper < Scraper
10
+ # register_scraper :test_scraper
11
+ # end
12
+ #
13
+ # @scraper.scrapers
14
+ # {:test_scraper=>TestScraper}
15
+ module FindArt
16
+ class Scraper
17
+ @@scrapers = {}
18
+
19
+
20
+ # class method for registering scrapers
21
+ def self.register_scraper(name)
22
+ @@scrapers[name] = self
23
+ end
24
+
25
+ # hash of all registerd scrapers
26
+ def scrapers
27
+ @@scrapers
28
+ end
29
+
30
+ def self.registerd_sites
31
+ @@scrapers.map {|scraper_name,klass| URI(klass.url).host}
32
+ end
33
+
34
+ # clears all registerd scapers
35
+ def self.unregister_scrapers!
36
+ @@scrapers = {}
37
+ end
38
+
39
+ # all registerd scrapers are used to find album art
40
+ # returns an array of urls
41
+ def find_art(artist,title,opts=nil)
42
+ threads = []
43
+ @@scrapers.each do |scraper_name,klass|
44
+ scraper = klass.new
45
+ threads << Thread.new { Thread.current["results"] = scraper.scrape(artist,title,opts)}
46
+ end
47
+ results = []
48
+ begin
49
+ threads.each {|t| t.join; results << t["results"] unless t['results'].nil? }
50
+ rescue
51
+ end
52
+ results
53
+ end
54
+
55
+ private
56
+
57
+ # helper method for setting the url where the scraper starts its search for album art
58
+ #
59
+ # == Example
60
+ #
61
+ # class JunoDownload < Scraper
62
+ # start_url "http://www.junodownload.com/search/"
63
+ # register_scraper :junodownload
64
+ # end
65
+ #
66
+ def self.start_url(url)
67
+ class_variable_set "@@url", url
68
+ meta_def :url do
69
+ class_variable_get "@@url"
70
+ end
71
+ end
72
+ end
73
+ end
74
+
75
+
76
+
@@ -0,0 +1,22 @@
1
+ module FindArt
2
+ class AlbumArtExchange < Scraper
3
+ start_url "http://www.albumartexchange.com/covers.php?sort=7&q="
4
+ register_scraper :albumartexchange
5
+
6
+ def scrape(artist,title,opts={})
7
+ url = nil
8
+ search_url = "#{@@url}#{CGI.escape("#{artist} #{title}")}"
9
+ browser = WWW::Mechanize.new
10
+ browser.get(search_url) do |page|
11
+ doc = Hpricot(page.body)
12
+ element = doc.at("* table tr td a img[@width='150']")
13
+ src = element["src"] if !element.nil? && !element["src"].nil?
14
+ if src
15
+ path = src.split("src=").last
16
+ url = "http://www.albumartexchange.com#{URI.decode(path)}"
17
+ end
18
+ end
19
+ url
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,47 @@
1
+ module FindArt
2
+ class Amazon < Scraper
3
+ start_url "http://webservices.amazon.com/"
4
+ register_scraper :amazon
5
+ @@urls = [
6
+ "http://webservices.amazon.com/",
7
+ "http://webservices.amazon.co.uk/",
8
+ "http://webservices.amazon.co.jp/",
9
+ "http://webservices.amazon.ca/",
10
+ "http://webservices.amazon.fr/",
11
+ "http://webservices.amazon.de/",
12
+ ]
13
+
14
+
15
+ # Uses the artist webservice to search for album covers.
16
+ # Tries all international amazon sites
17
+ def scrape(artist,title,opts={})
18
+ search_url = "onca/xml?Service=AWSECommerceService&AWSAccessKeyId=0NK019CD48HNEDK3PBG2&Operation=ItemSearch&SearchIndex=Music&ResponseGroup=Small,Images&Keywords=#{CGI.escape("#{artist} #{title}")}"
19
+ @@urls.each do |url|
20
+ art = find_best_art("#{url}#{search_url}")
21
+ return art unless art.nil?
22
+ end
23
+ nil
24
+ end
25
+
26
+ private
27
+
28
+ # Finds the best quality artwork availible at amazon
29
+ def find_best_art(search_url)
30
+ browser = WWW::Mechanize.new
31
+ search = browser.get(search_url)
32
+ doc = Hpricot(search.body)
33
+
34
+ large,medium,small = [],[],[]
35
+ (doc/:item).each do |item|
36
+ large << item.at("largeimage").at("url").innerHTML if item.at("largeimage")
37
+ medium << item.at("mediumimage").at("url").innerHTML if item.at("mediumimage")
38
+ small << item.at("smallimage").at("url").innerHTML if item.at("smallimage")
39
+ end
40
+
41
+ url = small.first unless small.empty?
42
+ url = medium.first unless medium.empty?
43
+ url = large.first unless large.empty?
44
+ end
45
+
46
+ end
47
+ end
@@ -0,0 +1,44 @@
1
+ # module FindArt
2
+ # #
3
+ # # class Discogs < Scraper
4
+ # # start_url "http://www.discogs.com/search?type=all&q="
5
+ # # register_scraper :discogs
6
+ # #
7
+ # # def scrape(artist,title,opts={})
8
+ # # url = nil
9
+ # # search_url = "#{@@url}#{CGI.escape("#{artist} #{title}")}"
10
+ # # browser = WWW::Mechanize.new
11
+ # # browser.get(search_url) do |page|
12
+ # # doc = Hpricot(page.body)
13
+ # #
14
+ # # # check if there are multiple results and get the top result
15
+ # # element = doc.at("* .search_result a")
16
+ # # if !element.nil?
17
+ # # # extract and fetch item page
18
+ # # item_page = browser.get(element.attributes["href"])
19
+ # # doc = Hpricot(item_page.body)
20
+ # #
21
+ # #
22
+ # #
23
+ # # end
24
+ # #
25
+ # # #extract art from product page
26
+ # # end
27
+ # # url
28
+ # # end
29
+ # #
30
+ # # # Extracts album art url from wallmart product page
31
+ # # def extract_art(doc)
32
+ # # puts doc
33
+ # # # url = nil
34
+ # # # element = doc.at("* td[@align='center'] a[href~='/viewimages']")
35
+ # # # href = element["href"] if !element.nil? && !element["href"].nil?
36
+ # # # if href
37
+ # # # match, url = *href.match(/javascript:photo_opener\('(http:\/\/.*.jpg)&/)
38
+ # # # end
39
+ # # # url
40
+ # # end
41
+ # #
42
+ # # end
43
+ #
44
+ # end
@@ -0,0 +1,31 @@
1
+ module FindArt
2
+ class JunoDownload < Scraper
3
+ start_url "http://www.junodownload.com/search/"
4
+ register_scraper :junodownload
5
+
6
+ def scrape(artist,title,opts={})
7
+ url = nil
8
+
9
+ browser = WWW::Mechanize.new
10
+ browser.get(@@url) do |page|
11
+ # go to search form
12
+ search = page.form_with(:action => 'http://www.junodownload.com/search/') do |f|
13
+ f.q = "#{artist} #{title}"
14
+ end.click_button
15
+ doc = Hpricot(search.body)
16
+ element = doc.at('.productcover img[@src^="http://cdn.images.juno.co.uk/75/"]')
17
+ src = nil
18
+ if !element.nil?
19
+ src = element.attributes['src']
20
+ # instead of cliking on the image to find the uri of the larger image we will replace some parts of the url
21
+ # lets hope junodownload keeps this convention
22
+ src.gsub!("/75/","/full/")
23
+ src.gsub!("-TN","-BIG")
24
+ url = src
25
+ end
26
+ end
27
+ url
28
+ end
29
+
30
+ end
31
+ end
@@ -0,0 +1,38 @@
1
+ module FindArt
2
+ class WalMart < Scraper
3
+ start_url "http://www.walmart.com/search/search-ng.do?earch_sort=2&search_query="
4
+ register_scraper :walmart
5
+
6
+ def scrape(artist,title,opts={})
7
+ url = nil
8
+ search_url = "#{@@url}#{CGI.escape("#{artist} #{title}")}"
9
+ browser = WWW::Mechanize.new
10
+ browser.get(search_url) do |page|
11
+ doc = Hpricot(page.body)
12
+
13
+ # check if there are multiple results and get the top result
14
+ element = doc.at("* .firstRow a")
15
+ if !element.nil?
16
+ # extract and fetch item page
17
+ item_page = browser.get(element.attributes["href"])
18
+ doc = Hpricot(item_page.body)
19
+ end
20
+
21
+ #extract art from product page
22
+ url = extract_art(doc)
23
+ end
24
+ url
25
+ end
26
+
27
+ # Extracts album art url from wallmart product page
28
+ def extract_art(doc)
29
+ url = nil
30
+ element = doc.at("* div[@class='LargeItemPhoto150'] a[@href^=javascript]")
31
+ href = element["href"] if !element.nil? && !element["href"].nil?
32
+ if href
33
+ match, url = *href.match(/javascript:photo_opener\('(http:\/\/.*.jpg)&/)
34
+ end
35
+ url
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+ # File: script/console
3
+ irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
4
+
5
+ libs = " -r irb/completion"
6
+ # Perhaps use a console_lib to store any extra methods I may want available in the cosole
7
+ # libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
8
+ libs << " -r #{File.dirname(__FILE__) + '/../lib/FindArt.rb'}"
9
+ puts "Loading FindArt gem"
10
+ exec "#{irb} #{libs} --simple-prompt"
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/destroy'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Destroy.new.run(ARGV)
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/generate'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Generate.new.run(ARGV)
@@ -0,0 +1,26 @@
1
+ require File.dirname(__FILE__) + '/spec_helper.rb'
2
+
3
+ describe AlbumArtExchange do
4
+
5
+ it "Should have a start url form where the scaper starts" do
6
+ AlbumArtExchange.class_variables.should include("@@url")
7
+ AlbumArtExchange.url.should == 'http://www.albumartexchange.com/covers.php?sort=7&q='
8
+ end
9
+
10
+ it "Should find the album art url for DJ TIESTO - In Search Of Sunrise 6" do
11
+ @album = AlbumArtExchange.new()
12
+ @album.scrape("DJ TIESTO","In Search of Sunrise 6").should == "http://www.albumartexchange.com/gallery/images/public/ti/tiesto-insear_02.jpg"
13
+ end
14
+
15
+ it "Should return nil when no album art is found" do
16
+ @album = AlbumArtExchange.new()
17
+ @album.scrape("Irene Moors & De Smurfen","Ga Je Mee Naar Smurfenland").should be(nil)
18
+ end
19
+
20
+ it "Should find the album art for the following albums" do
21
+ @album = AlbumArtExchange.new()
22
+ @album.scrape("Hi_Tack","Let's Dance").should == "http://www.albumartexchange.com/gallery/images/public/hi/hi_tac-letsda.jpg"
23
+ @album.scrape("24 Grana","underpop").should == "http://www.albumartexchange.com/gallery/images/public/24/24gran-underp_02.jpg"
24
+ @album.scrape("Green Day","Dookie").should == "http://www.albumartexchange.com/gallery/images/public/gr/greend-dookie_05.jpg"
25
+ end
26
+ end
@@ -0,0 +1,34 @@
1
+ # require File.dirname(__FILE__) + '/spec_helper.rb'
2
+ #
3
+ # describe Amazon do
4
+ #
5
+ # it "Should have a start url form where the scaper starts" do
6
+ # Amazon.class_variables.should include("@@url")
7
+ # Amazon.url.should == 'http://webservices.amazon.com/'
8
+ # end
9
+ #
10
+ # it "Should find the album art url for DJ TIESTO - In Search Of Sunrise 6" do
11
+ # @amazon = Amazon.new()
12
+ # @amazon.scrape("DJ TIESTO","In Search of Sunrise 6").should == "http://ecx.images-amazon.com/images/I/515ZxHVU5RL.jpg"
13
+ # end
14
+ #
15
+ # it "Should find the album art url for Caféine de Christophe Willem " do
16
+ # @amazon = Amazon.new()
17
+ # @amazon.scrape("Christophe Willem","Caféine").should == "http://ecx.images-amazon.com/images/I/61sOk67nVkL.jpg"
18
+ # end
19
+ #
20
+ # it "Should find the album art url for Miss Météores de Olivia Ruiz" do
21
+ # @amazon = Amazon.new()
22
+ # @amazon.scrape("Olivia Ruiz","Miss Météores").should == "http://ecx.images-amazon.com/images/I/51S7recnyQL.jpg"
23
+ # end
24
+ #
25
+ # it "Should find the album art url for 初音ミク ベスト impacts" do
26
+ # @amazon = Amazon.new()
27
+ # @amazon.scrape(" 初音ミク ベスト","impacts").should == "http://ecx.images-amazon.com/images/I/51P-6djocGL.jpg"
28
+ # end
29
+ #
30
+ # it "Should return nil if no album art is found " do
31
+ # @amazon = Amazon.new()
32
+ # @amazon.scrape("internets","i did it for teh lulz").should == nil
33
+ # end
34
+ # end