RubyGems - rbook - Versions diffs - 0.1 - Mend

rbook 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

data/COPYING +340 -0
data/LICENSE +13 -0
data/README +16 -0
data/Rakefile +206 -0
data/examples/titlepage.rb +14 -0
data/examples/www/find_all.rb +23 -0
data/examples/www/find_cover_from_amazon.rb +12 -0
data/examples/www/find_url_from_rainbow.rb +12 -0
data/examples/www/list.rb +13 -0
data/lib/rbook/bisac.rb +175 -0
data/lib/rbook/errors.rb +7 -0
data/lib/rbook/isbn.rb +249 -0
data/lib/rbook/onix.rb +68 -0
data/lib/rbook/onix/contributor.rb +60 -0
data/lib/rbook/onix/lists.rb +2 -0
data/lib/rbook/onix/lists/contributor_role.rb +10 -0
data/lib/rbook/onix/lists/product_form.rb +100 -0
data/lib/rbook/onix/message.rb +101 -0
data/lib/rbook/onix/product.rb +188 -0
data/lib/rbook/onix/sales_restriction.rb +51 -0
data/lib/rbook/onix/supply_detail.rb +68 -0
data/lib/rbook/onix/xchar.rb +98 -0
data/lib/rbook/titlepage.rb +96 -0
data/lib/rbook/titlepage/TitleQueryClient.rb +62 -0
data/lib/rbook/titlepage/titlepage_driver.rb +134 -0
data/lib/rbook/titlepage/titlepage_utils.rb +374 -0
data/lib/rbook/www.rb +172 -0
data/lib/rbook/www/aau_scraper.rb +76 -0
data/lib/rbook/www/amazon_uk_scraper.rb +44 -0
data/lib/rbook/www/base.rb +87 -0
data/lib/rbook/www/harper_au_scraper.rb +56 -0
data/lib/rbook/www/harper_us_scraper.rb +55 -0
data/lib/rbook/www/hha_scraper.rb +50 -0
data/lib/rbook/www/macmillan_scraper.rb +62 -0
data/lib/rbook/www/orbis_scraper.rb +48 -0
data/lib/rbook/www/oup_scraper.rb +64 -0
data/lib/rbook/www/paulist_scraper.rb +53 -0
data/lib/rbook/www/pearson_au_scraper.rb +52 -0
data/lib/rbook/www/penguin_scraper.rb +45 -0
data/lib/rbook/www/random_au_scraper.rb +90 -0
data/lib/rbook/www/random_us_scraper.rb +59 -0
data/lib/rbook/www/sas_scraper.rb +54 -0
data/lib/rbook/www/unireps_scraper.rb +58 -0
data/lib/rbook/www/wiley_us_scraper.rb +54 -0
data/test/data/abingdon.xml +38931 -0
data/test/data/augsburg.xml +39009 -0
data/test/data/chalice.xml +10851 -0
data/test/data/eerdsman.xml +36942 -0
data/test/data/invalid_no_product.xml +9 -0
data/test/data/not_xml.csv +1 -0
data/test/data/single_product.xml +50 -0
data/test/data/xml_not_onix.xml +7 -0
data/test/mocks/titlepage_driver.rb +107 -0
data/test/unit/bisac_test.rb +57 -0
data/test/unit/isbn_test.rb +149 -0
data/test/unit/onix/contributor_test.rb +50 -0
data/test/unit/onix/message_test.rb +119 -0
data/test/unit/onix/product_test.rb +101 -0
data/test/unit/onix/sales_restriction_test.rb +48 -0
data/test/unit/onix/supply_detail_test.rb +53 -0
data/test/unit/onix/xchar_test.rb +37 -0
data/test/unit/titlepage_test.rb +127 -0
metadata +130 -0

data/lib/rbook/www/aau_scraper.rb ADDED Viewed

@@ -0,0 +1,76 @@
+module RBook
+  module WWW
+  class AAUScraper < Base
+    SCRAPER_ID = :aau
+    SCRAPER_NAME = "Allen and Unwin".freeze
+    SCRAPER_SITE = "http://www.allenandunwin.com/".freeze
+    #add_publisher( self, "9781741100000", "9781741199999" )
+    #add_publisher( self, "9781865000000", "9781865099999" )
+    add_scraper( self )
+    def initialize
+      @url_protocol = "http://"
+      @url_host = "www.allenandunwin.com"
+      @url_path = "/bookseller/product.aspx?ISBN="
+    end
+    def get_info(isbn)
+      raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
+      isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
+      rba = Scraper.define do
+        process "span#lblISBN", :isbn => :text
+        process "h1>span#lblBookTitle", :title => :text
+        process "span#lblAusRRP", :rrp => :text
+        process "span#lblPublisher", :publisher => :text
+        process "span#lblImprint", :imprint => :text
+        process "span#lblBinding", :form => :text
+        process "span#lblExtent", :pages => :text
+        process "span#lblPubDate", :pubdate => :text
+        process "span#lblDescription", :description => :text
+        process "span#lblAuthor_bio", :authorbio => :text
+        process "a#hypHiRes", :cover_large => "@href"
+        process "a#imgProduct", :cover_thumb => "@href"
+        result :isbn, :title, :rrp, :publisher, :imprint, :form, :pages, :pubdate, :description, :authorbio, :cover_thumb, :cover_large
+      end
+      content = Net::HTTP.get URI.parse(get_link(isbn))
+      result = rba.scrape(content)
+      if result.title.nil? || result.title == ""
+        return nil
+      else
+        info = {}
+        info[:isbn] = result.isbn.gsub("ISBN : ", "")
+        info[:title] = result.title unless result.title.nil?
+        info[:rrp] = result.rrp.gsub("Australian Price : ", "").gsub(/\sInc. GST\n.+/,"") unless result.rrp.nil?
+        info[:publisher] = result.publisher.gsub("Publisher : ", "") unless result.imprint.nil?
+        info[:imprint] = result.imprint.gsub("Imprint : ", "") unless result.imprint.nil?
+        info[:format] = result.form.gsub("Format : ", "") unless result.form.nil?
+        info[:pages] = result.pages.gsub("Number of pages : ", "") unless result.pages.nil?
+        info[:pubdate] = result.pubdate.gsub("Publication Date : ", "") unless result.pubdate.nil?
+        info[:description] = result.description unless result.description.nil?
+        info[:authorbio] = result.authorbio.gsub("About the Author :\n", "") unless result.authorbio.nil?
+        info[:cover_large] = @url_protocol + @url_host + result.cover_large.gsub(/^../, "") unless result.cover_large.nil?
+        info[:cover_thumb] = @url_protocol + @url_host + result.cover_thumb unless result.cover_thumb.nil?
+        info[:link] = get_link(isbn)
+        info[:from_name] = SCRAPER_NAME
+        info[:from_url] = SCRAPER_SITE
+        return info
+      end
+    end
+    def get_link(isbn)
+      return nil unless ISBN::valid_isbn?(isbn)
+      return @url_protocol + @url_host + @url_path + ISBN::convert_to_isbn10(isbn)
+    end
+  end
+  end
+end

data/lib/rbook/www/amazon_uk_scraper.rb ADDED Viewed

@@ -0,0 +1,44 @@
+module RBook
+  module WWW
+  class AmazonUKScraper < Base
+    SCRAPER_ID = :amazon_uk
+    SCRAPER_NAME = "Amazon UK".freeze
+    SCRAPER_SITE = "http://www.amazon.co.uk/".freeze
+    #add_retailer( self )
+    add_scraper( self )
+    def get_cover(isbn)
+      isbn = ISBN::convert_to_isbn13(isbn)
+      isbn10 = ISBN::convert_to_isbn10(isbn)
+      return nil if isbn.nil? || isbn10.nil?
+      url_prefix = "http://images.amazon.com/images/P/"
+      url_suffix = ".02.LZZZZZZZ.jpg"
+      link = url_prefix + isbn10 + url_suffix
+      begin
+        response = Net::HTTP.get_response URI.parse(link)
+          if response.code != "200"
+            return nil
+          elsif response.body.size <= 807
+            return nil
+          else
+            result = {}
+            result[:data] = response.body
+            result[:content_type] = "image/jpeg"
+            return result
+          end
+      rescue
+        return nil
+      end
+    end
+  end
+  end
+end

data/lib/rbook/www/base.rb ADDED Viewed

@@ -0,0 +1,87 @@
+require 'net/http'
+require 'uri'
+module RBook
+  module WWW
+  class Base
+    @@scrapers = []
+    # registers a new scraper with the library.
+    # classname - the class to add
+    def self.add_scraper(classname)
+      @@scrapers << classname
+    end
+    # find a scraper matching the requested id
+    # id - a scraper id as a symbol
+    def self.find_scraper(id)
+      @@scrapers.each do |scraper|
+        return scraper if scraper::SCRAPER_ID == id
+      end
+      return nil
+    end
+    # find any scrapers matching the requested ids
+    # ids - an array of scraper id's as symbols
+    def self.find_scrapers(ids)
+      ret = []
+      @@scrapers.each do |scraper|
+        ret << scraper if ids.contains?(scraper::SCRAPER_ID)
+      end
+      return ret
+    end
+    # This method can be overwritten in each scraper. It should return a hash containing the binary data
+    # and mimetype of the largest cover image it can find for the requested isbn
+    def get_cover(isbn)
+      info = get_info(isbn)
+      return nil if info.nil?
+      return nil unless info.kind_of?(Hash)
+      link = info[:cover_large] || info[:cover_medium] || info[:cover_thumb]
+      return nil if link.nil?
+      begin
+        response = Net::HTTP.get_response URI.parse(link)
+          if response.code != "200"
+            raise response.code.to_s
+            return nil
+          else
+            result = {}
+            result[:data] = response.body
+            result[:content_type] = "image/jpeg"
+            return result
+          end
+      rescue
+        return nil
+      end
+    end
+    # This method can be overwritten in each scraper. It should return a hash of any information on
+    # the requested isbn it can find
+    def get_info(isbn)
+      nil
+    end
+    # This method can be overwritten in each scraper. It should return a link to the requested isbn
+    # on the targets website
+    def get_link(isbn)
+      nil
+    end
+    # return the symbol used to uniquely identify each scraper
+    def scraper_id
+      return SCRAPER_ID
+    end
+    def self.scrapers
+      @@scrapers
+    end
+  end
+end
+end

data/lib/rbook/www/harper_au_scraper.rb ADDED Viewed

@@ -0,0 +1,56 @@
+module RBook
+  module WWW
+  class HarperCollinsAUScraper < Base
+    SCRAPER_ID = :harper_au
+    SCRAPER_NAME = "Harper Collins Australia".freeze
+    SCRAPER_SITE = "http://www.harpercollins.com.au/".freeze
+    #add_publisher( self, "9780006400000", "9780006499999" )
+    #add_publisher( self, "9780007100000", "9780007199999" )
+    add_scraper( self )
+    def get_info(isbn)
+      raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
+      isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
+      @protocol = "http://"
+      @host = "www.harpercollins.com.au"
+      @path = "/global_scripts/product_catalog/book_xml.asp?isbn="
+      @link = @protocol + @host + @path + ISBN::convert_to_isbn10(isbn)
+      rba = Scraper.define do
+        process "div.header", :title => :text
+        process "div.subtitle", :subtitle => :text
+        process "div.byline", :author => :text
+        process "img.bookJacket", :cover => "@src"
+        result :title, :subtitle, :author, :cover
+      end
+      content = Net::HTTP.get URI.parse(@link)
+      result = rba.scrape(content)
+      if result.title.nil?
+        return nil
+      else
+        info = {}
+        info[:isbn] = isbn
+        info[:title] = result.title
+        info[:subtitle] = result.subtitle
+        info[:author] = result.author.gsub("by ","")
+        info[:cover] = result.cover
+        info[:link] = @link
+        info[:from_name] = SCRAPER_NAME
+        info[:from_url] = SCRAPER_SITE
+        return info
+      end
+    end
+  end
+  end
+end

data/lib/rbook/www/harper_us_scraper.rb ADDED Viewed

@@ -0,0 +1,55 @@
+module RBook
+  module WWW
+  class HarperCollinsUSScraper < Base
+    SCRAPER_ID = :harper_us
+    SCRAPER_NAME = "Harper Collins United States".freeze
+    SCRAPER_SITE = "http://www.harpercollins.com/".freeze
+    #add_publisher( self, "9780060000000", "9780060999999" )
+    add_scraper( self )
+    def get_info(isbn)
+      raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
+      isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
+      @protocol = "http://"
+      @host = "www.harpercollins.com"
+      @path = "/book/index.aspx?isbn="
+      @link = @protocol + @host + @path + isbn
+      rba = Scraper.define do
+        process "h1.bookTitle", :title => :text
+        process "h2.bookSubTitle", :subtitle => :text
+        process "h3.byLine", :author => :text
+        process "img.bookJacket", :cover => "@src"
+        result :title, :subtitle, :author, :cover
+      end
+      content = Net::HTTP.get URI.parse(@link)
+      result = rba.scrape(content)
+      if result.title.nil?
+        return nil
+      else
+        info = {}
+        info[:isbn] = isbn
+        info[:title] = result.title
+        info[:subtitle] = result.subtitle
+        info[:author] = result.author.gsub("by ", "")
+        info[:cover_thumb] = result.cover
+        info[:cover_medium] = result.cover.gsub("medium", "large")
+        info[:link] = @link
+        info[:from_name] = SCRAPER_NAME
+        info[:from_url] = SCRAPER_SITE
+        return info
+      end
+    end
+  end
+  end
+end

data/lib/rbook/www/hha_scraper.rb ADDED Viewed

@@ -0,0 +1,50 @@
+module RBook
+  module WWW
+  class HHAScraper < Base
+    SCRAPER_ID = :hha
+    SCRAPER_NAME = "Hodder Headline Australia".freeze
+    SCRAPER_SITE = "http://www.hha.com.au/".freeze
+    #add_publisher( self, "9780340800000", "9780340899999" )
+    #add_publisher( self, "9780755300000", "9780755399999" )
+    #add_publisher( self, "9780733600000", "9780733699999" )
+    add_scraper( self )
+    def get_info(isbn)
+      @protocol = "http://"
+      @host = "www.hha.com.au"
+      @path = "/books/"
+      @suffix = ".html"
+      @link = @protocol + @host + @path + ISBN::convert_to_isbn10(isbn) + @suffix
+      rba = Scraper.define do
+        process "h1.fiction", :title => :text
+        process "p.author", :author => :text
+        process "p.thumb>img", :cover_thumb => "@src"
+        result :title, :author, :cover_thumb
+      end
+      content = Net::HTTP.get URI.parse(@link)
+      result = rba.scrape(content)
+      if result.cover_thumb.nil?
+        return nil
+      else
+        info = {}
+        info[:isbn] = isbn
+        info[:author] = result.author
+        info[:cover_thumb] = @protocol + @host + result.cover_thumb
+        info[:link] = @link
+        info[:from_name] = SCRAPER_NAME
+        info[:from_url] = SCRAPER_SITE
+        return info
+      end
+    end
+  end
+  end
+end

data/lib/rbook/www/macmillan_scraper.rb ADDED Viewed

@@ -0,0 +1,62 @@
+module RBook
+  module WWW
+  class MacmillanScraper < Base
+    SCRAPER_ID = :macmillan
+    SCRAPER_NAME = "Pan Macmillan".freeze
+    SCRAPER_SITE = "http://www.panmacmillan.com.au/".freeze
+    #add_publisher( self, "9780312900000", "9780312999999" )
+    #add_publisher( self, "9780330400000", "9780330499999" )
+    #add_publisher( self, "9781403000000", "9781405099999" )
+    add_scraper( self )
+    def initialize
+      @url_protocol = "http://"
+      @url_host = "www.panmacmillan.com.au"
+      @url_path = "/display_title.asp?ISBN="
+      @url_suffix = "&Author=Barker,%20Robin"
+    end
+    def get_info(isbn)
+      isbn = ISBN::convert_to_isbn13(isbn)
+      return nil if isbn.nil?
+      mac = Scraper.define do
+        process "div.titlecontent>div.isbn>span", :isbn => :text
+        process "td[width=70%]>h1", :title => :text
+        process "a[title=Click on image to view a larger version]>img", :cover_medium => "@src"
+        process "a[title=Click on image to view a larger version]", :cover_large => "@href"
+        result :isbn, :title, :cover_medium, :cover_large
+      end
+      content = Net::HTTP.get URI.parse(get_link(isbn))
+      result = mac.scrape(content)
+      if result.title.nil?
+        return nil
+      else
+        info = {}
+        info[:isbn] = isbn
+        info[:title] = result.title
+        info[:cover_medium] = @url_protocol + @url_host + result.cover_medium.gsub("..", "") unless result.cover_medium.nil?
+        info[:cover_large] = @url_protocol + @url_host + result.cover_large.gsub("..", "") unless result.cover_large.nil?
+        info[:link] = get_link(isbn)
+        info[:from_name] = SCRAPER_NAME
+        info[:from_url] = SCRAPER_SITE
+        return info
+      end
+    end
+    def get_link(isbn)
+      return nil unless ISBN::valid_isbn?(isbn)
+      return @url_protocol + @url_host + @url_path + ISBN::convert_to_isbn10(isbn) + @url_suffix
+    end
+  end
+  end
+end

data/lib/rbook/www/orbis_scraper.rb ADDED Viewed

@@ -0,0 +1,48 @@
+module RBook
+  module WWW
+  class OrbisScraper < Base
+    SCRAPER_ID = :orbis
+    SCRAPER_NAME = "Orbis Books"
+    SCRAPER_SITE = "http://www.orbisbooks.com/"
+    #add_publisher( self, "978157070000", "9781570799999")
+    add_scraper( self )
+    def get_info(isbn)
+      @protocol = "http://"
+      @host = "www.maryknollmall.org"
+      @path = "/description.cfm?ISBN="
+      @grouped_isbn = ISBN::add_groups(ISBN::convert_to_isbn10(isbn))
+      @link = @protocol + @host + @path + @grouped_isbn
+      oup = Scraper.define do
+        process "tr>td[colspan=4]>font[size=3]", :description => :text # doesn't currently work
+        process "table>tr>td[rowspan=2]>img", :cover_thumb => "@src"
+        result :description, :cover_thumb
+      end
+      content = Net::HTTP.get URI.parse(@link)
+      result = oup.scrape(content)
+      if result.cover_thumb.nil?
+        return nil
+      else
+        info = {}
+        info[:isbn] = isbn
+        info[:cover_thumb] = @protocol + @host + result.cover_thumb unless result.cover_thumb.nil?
+        info[:link] = @link
+        info[:from_name] = SCRAPER_NAME
+        info[:from_url] = SCRAPER_SITE
+        return info
+      end
+    end
+  end
+  end
+end