RubyGems - rbook - Versions diffs - 0.1 - Mend

rbook 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

data/COPYING +340 -0
data/LICENSE +13 -0
data/README +16 -0
data/Rakefile +206 -0
data/examples/titlepage.rb +14 -0
data/examples/www/find_all.rb +23 -0
data/examples/www/find_cover_from_amazon.rb +12 -0
data/examples/www/find_url_from_rainbow.rb +12 -0
data/examples/www/list.rb +13 -0
data/lib/rbook/bisac.rb +175 -0
data/lib/rbook/errors.rb +7 -0
data/lib/rbook/isbn.rb +249 -0
data/lib/rbook/onix.rb +68 -0
data/lib/rbook/onix/contributor.rb +60 -0
data/lib/rbook/onix/lists.rb +2 -0
data/lib/rbook/onix/lists/contributor_role.rb +10 -0
data/lib/rbook/onix/lists/product_form.rb +100 -0
data/lib/rbook/onix/message.rb +101 -0
data/lib/rbook/onix/product.rb +188 -0
data/lib/rbook/onix/sales_restriction.rb +51 -0
data/lib/rbook/onix/supply_detail.rb +68 -0
data/lib/rbook/onix/xchar.rb +98 -0
data/lib/rbook/titlepage.rb +96 -0
data/lib/rbook/titlepage/TitleQueryClient.rb +62 -0
data/lib/rbook/titlepage/titlepage_driver.rb +134 -0
data/lib/rbook/titlepage/titlepage_utils.rb +374 -0
data/lib/rbook/www.rb +172 -0
data/lib/rbook/www/aau_scraper.rb +76 -0
data/lib/rbook/www/amazon_uk_scraper.rb +44 -0
data/lib/rbook/www/base.rb +87 -0
data/lib/rbook/www/harper_au_scraper.rb +56 -0
data/lib/rbook/www/harper_us_scraper.rb +55 -0
data/lib/rbook/www/hha_scraper.rb +50 -0
data/lib/rbook/www/macmillan_scraper.rb +62 -0
data/lib/rbook/www/orbis_scraper.rb +48 -0
data/lib/rbook/www/oup_scraper.rb +64 -0
data/lib/rbook/www/paulist_scraper.rb +53 -0
data/lib/rbook/www/pearson_au_scraper.rb +52 -0
data/lib/rbook/www/penguin_scraper.rb +45 -0
data/lib/rbook/www/random_au_scraper.rb +90 -0
data/lib/rbook/www/random_us_scraper.rb +59 -0
data/lib/rbook/www/sas_scraper.rb +54 -0
data/lib/rbook/www/unireps_scraper.rb +58 -0
data/lib/rbook/www/wiley_us_scraper.rb +54 -0
data/test/data/abingdon.xml +38931 -0
data/test/data/augsburg.xml +39009 -0
data/test/data/chalice.xml +10851 -0
data/test/data/eerdsman.xml +36942 -0
data/test/data/invalid_no_product.xml +9 -0
data/test/data/not_xml.csv +1 -0
data/test/data/single_product.xml +50 -0
data/test/data/xml_not_onix.xml +7 -0
data/test/mocks/titlepage_driver.rb +107 -0
data/test/unit/bisac_test.rb +57 -0
data/test/unit/isbn_test.rb +149 -0
data/test/unit/onix/contributor_test.rb +50 -0
data/test/unit/onix/message_test.rb +119 -0
data/test/unit/onix/product_test.rb +101 -0
data/test/unit/onix/sales_restriction_test.rb +48 -0
data/test/unit/onix/supply_detail_test.rb +53 -0
data/test/unit/onix/xchar_test.rb +37 -0
data/test/unit/titlepage_test.rb +127 -0
metadata +130 -0

data/lib/rbook/www/oup_scraper.rb ADDED Viewed

@@ -0,0 +1,64 @@
+module RBook
+  module WWW
+  class OUPScraper < Base
+    SCRAPER_ID = :oup
+    SCRAPER_NAME = "Oxford University Press"
+    SCRAPER_SITE = "http://au.oup.com/"
+    #add_publisher( self, "978019200000", "978019999999")
+    add_scraper( self )
+    def get_info(isbn)
+      isbn = ISBN::convert_to_isbn10(isbn)
+      @protocol = "http://"
+      @host = "au.oup.com"
+      @path = "/searchbuy/SearchBook.asp?isbn="
+      @link = @protocol + @host + @path + isbn
+      oup = Scraper.define do
+        process "div.isbn13", :isbn => :text
+        process "td.title", :title => :text
+        process "td.author", :author => :text
+        process "div.BookInfo", :misc => :text
+        process "td>p.blurb", :description => :text
+        process "td>img[width=100]", :cover => "@src"
+        result :isbn, :title, :author, :misc, :description, :cover
+      end
+      content = Net::HTTP.get URI.parse(@link)
+      result = oup.scrape(content)
+      if result.isbn.nil?
+        return nil
+      else
+        info = {}
+        info[:isbn] = result.isbn.gsub(/\ ISBN-13:\n/, "")
+        info[:title] = result.title
+        info[:author] = result.author
+        info[:description] = result.description
+        info[:cover_thumb] = @protocol + @host + result.cover
+        info[:link] = @link
+        tmp = result.misc.match(/\n(.+)\n(.+) pages\n(.+)\n(.+)\n(.+)/)
+        if !tmp.nil? && tmp.length == 5
+          info[:published] = tmp[1]
+          info[:pages] = tmp[2]
+          info[:format] = tmp[3]
+          info[:rrp] = tmp[4]
+        end
+        info[:from_name] = SCRAPER_NAME
+        info[:from_url] = SCRAPER_SITE
+        return info
+      end
+    end
+  end
+  end
+end

data/lib/rbook/www/paulist_scraper.rb ADDED Viewed

@@ -0,0 +1,53 @@
+module RBook
+  module WWW
+  class PaulistScraper < Base
+    SCRAPER_ID = :paulist
+    SCRAPER_NAME = "Paulist Press".freeze
+    SCRAPER_SITE = "http://www.paulistpress.com/".freeze
+    #add_publisher( self, "9780809100000", "9780809199999" )
+    add_scraper( self )
+    def get_info(isbn)
+      raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
+      isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
+      @protocol = "http://"
+      @host = "www.paulistpress.com"
+      @path = "/"
+      @abv_isbn = ISBN::convert_to_isbn10(isbn)[-5, 5]
+      @suffix = ".html"
+      @link = @protocol + @host + @path + @abv_isbn[0,4] + "-" + @abv_isbn[-1,1] + @suffix
+      rba = Scraper.define do
+        process "tr>td>h4", :title => :text
+        process "img[width=120][height=180]", :cover => "@src"
+        result :title, :cover
+      end
+      content = Net::HTTP.get URI.parse(@link)
+      result = rba.scrape(content)
+      if result.title.nil?
+        return nil
+      else
+        info = {}
+        info[:isbn] = isbn
+        info[:title] = result.title.gsub("Details for ", "")
+        info[:cover_thumb] = @protocol + @host + result.cover
+        info[:link] = @link
+        info[:from_name] = SCRAPER_NAME
+        info[:from_url] = SCRAPER_SITE
+        return info
+      end
+    end
+  end
+  end
+end

data/lib/rbook/www/pearson_au_scraper.rb ADDED Viewed

@@ -0,0 +1,52 @@
+module RBook
+  module WWW
+  class PearsonAUScraper < Base
+    SCRAPER_ID = :pearson_au
+    SCRAPER_NAME = "Pearson Education Australia".freeze
+    SCRAPER_SITE = "http://www.pearsoned.com.au/".freeze
+    #add_publisher( self, "9780130000000", "9780139999999" )
+    #add_publisher( self, "9780321000000", "9780321999999" )
+    #add_publisher( self, "9780201000000", "9780201999999" )
+    #add_publisher( self, "9780201000000", "9780201999999" )
+    add_scraper( self )
+    def get_info(isbn)
+      @protocol = "http://"
+      @host = "www.pearsoned.com.au"
+      @path = "/Catalogue/TitleDetails.aspx?isbn="
+      @link = @protocol + @host + @path + ISBN::convert_to_isbn10(isbn)
+      rba = Scraper.define do
+        process "span.PageHeading", :title => :text
+        process "span#_publishedLabel", :pubdate => :text
+        process "span#_priceLabel", :rrp => :text
+        process "img.CoverImage", :cover_thumb => "@src"
+        result :title, :pubdate, :rrp, :cover_thumb
+      end
+      content = Net::HTTP.get URI.parse(@link)
+      result = rba.scrape(content)
+      if result.rrp.nil?
+        return nil
+      else
+        info = {}
+        info[:isbn] = isbn
+        info[:pubdate] = result.pubdate
+        info[:rrp] = result.rrp
+        info[:cover_thumb] = @protocol + @host + result.cover_thumb
+        info[:link] = @link
+        info[:from_name] = SCRAPER_NAME
+        info[:from_url] = SCRAPER_SITE
+        return info
+      end
+    end
+  end
+  end
+end

data/lib/rbook/www/penguin_scraper.rb ADDED Viewed

@@ -0,0 +1,45 @@
+module RBook
+  module WWW
+  class PenguinScraper < Base
+    SCRAPER_ID = :penguin
+    SCRAPER_NAME = "Penguin Books Australia".freeze
+    SCRAPER_SITE = "http://www.penguin.com.au/".freeze
+    #add_publisher( self, "9781857200000", "9781857299999" )
+    add_scraper( self )
+    def get_info(isbn)
+      @protocol = "http://"
+      @host = "www.penguin.com.au"
+      @path = "/catalog/search-title-details.cfm?SBN="
+      @link = @protocol + @host + @path + ISBN::convert_to_isbn10(isbn)
+      rba = Scraper.define do
+        process "font>b", :title => :text
+        process "img[alt=cover]", :cover_thumb => "@src"
+        result :title, :cover_thumb
+      end
+      content = Net::HTTP.get URI.parse(@link)
+      result = rba.scrape(content)
+      if result.cover_thumb.nil?
+        return nil
+      else
+        info = {}
+        info[:isbn] = isbn
+        info[:cover_thumb] = @protocol + @host + result.cover_thumb.gsub("..", "")
+        info[:link] = @link
+        info[:from_name] = SCRAPER_NAME
+        info[:from_url] = SCRAPER_SITE
+        return info
+      end
+    end
+  end
+  end
+end

data/lib/rbook/www/random_au_scraper.rb ADDED Viewed

@@ -0,0 +1,90 @@
+require 'cgi'
+module RBook
+  module WWW
+  class RandomAUScraper < Base
+    SCRAPER_ID = :random_au
+    SCRAPER_NAME = "Random AU".freeze
+    SCRAPER_SITE = "http://www.randomhouse.com.au/".freeze
+    #add_publisher( self, "9781400000000", "9781400099999" )
+    #add_publisher( self, "9781863200000", "9781863299999" )
+    #add_publisher( self, "9781904900000", "9781904999999" )
+    add_scraper( self )
+    def get_cover(isbn)
+      info = get_info(isbn)
+      return nil if info.nil?
+      return nil unless info.kind_of?(Hash)
+      link = info[:cover_large] || info[:cover_medium] || info[:cover_thumb]
+      return nil if link.nil?
+      begin
+        response = Net::HTTP.get_response URI.parse(link)
+          if response.code != "200"
+            raise response.code.to_s
+            return nil
+          else
+            result = {}
+            result[:data] = response.body
+            result[:content_type] = "image/jpeg"
+            return result
+          end
+      rescue
+        return nil
+      end
+    end
+    def get_info(isbn)
+      @protocol = "http://"
+      @host = "www.randomhouse.com.au"
+      @path = "/Books/Default.aspx?Page=Book&ID="
+      @link = @protocol + @host + @path + isbn
+      random = Scraper.define do
+        process "td.bookTitle", :title => :text
+        process "span#rBodyModule__ctl0_lblISBN13", :isbn => :text
+        process "span#rBodyModule__ctl0_lblAuthor", :author => :text
+        process "span#rBodyModule__ctl0_lblFormat", :form => :text
+        process "span#rBodyModule__ctl0_lblImprint", :imprint => :text
+        process "span#rBodyModule__ctl0_lblRRP", :rrp => :text
+        process "span#rBodyModule__ctl0_lblRelease", :pubdate => :text
+        process "td.detBook>table>tr>td.standard[colspan=2]", :description => :text
+        process "img#rBodyModule__ctl0_imgBook", :cover_thumb => "@src"
+        process "a#rBodyModule__ctl0_hl300Image", :cover_large => "@href"
+        result :title, :isbn, :author, :form, :imprint, :rrp, :pubdate, :description, :cover_thumb, :cover_large
+      end
+      content = Net::HTTP.get URI.parse(@link)
+      result = random.scrape(content)
+      if result.isbn.nil?
+        return nil
+      else
+        info = {}
+        info[:isbn] = isbn
+        info[:title] = result.title
+        info[:author] = result.author
+        info[:form] = result.form
+        info[:imprint] = result.imprint
+        info[:rrp] = result.rrp
+        info[:pubdate] = result.pubdate
+        info[:description] = result.description
+        info[:cover_thumb] = result.cover_thumb
+        info[:cover_large] = result.cover_large
+        info[:link] = @link
+        info[:from_name] = SCRAPER_NAME
+        info[:from_url] = SCRAPER_SITE
+        return info
+      end
+    end
+  end
+  end
+end

data/lib/rbook/www/random_us_scraper.rb ADDED Viewed

@@ -0,0 +1,59 @@
+require 'cgi'
+module RBook
+  module WWW
+  class RandomUSScraper < Base
+    SCRAPER_ID = :random_us
+    SCRAPER_NAME = "Random US".freeze
+    SCRAPER_SITE = "http://www.randomhouse.com/".freeze
+    #add_publisher( self, "9780517300000", "9780517399999" )
+    #add_publisher( self, "9780553800000", "9780553899999" )
+    #add_publisher( self, "9780307000000", "9780307999999" )
+    #add_publisher( self, "9780385500000", "9780385599999" )
+    #add_publisher( self, "9780767900000", "9780767999999" )
+    #add_publisher( self, "9780679600000", "9780679699999" )
+    #add_publisher( self, "9781400000000", "9781400099999" )
+    add_scraper( self )
+    def get_info(isbn)
+      @protocol = "http://"
+      @host = "www.randomhouse.com"
+      @path = "/catalog/display.pperl?isbn="
+      @link = @protocol + @host + @path + isbn
+      random = Scraper.define do
+        process "div#catalog_display>h1", :title => :text
+        process "meta[name=gsa.format]", :form => "@content"
+        process "meta[name=gsa.author]", :author => "@content"
+        process "div#catalog_content>p", :description => :text
+        process "div.rhbw_cover>img", :cover_large => "@src"
+        result :title, :author, :form, :description, :cover_large
+      end
+      content = Net::HTTP.get URI.parse(@link)
+      result = random.scrape(content)
+      if result.title.nil?
+        return nil
+      else
+        info = {}
+        info[:isbn] = isbn
+        info[:title] = result.title
+        info[:author] = result.author
+        info[:form] = result.form
+        info[:description] = result.description
+        info[:cover_large] = @protocol + @host + result.cover_large.gsub("&amp;","&").gsub("=150","=600") unless result.cover_large.nil?
+        info[:link] = @link
+        info[:from_name] = SCRAPER_NAME
+        info[:from_url] = SCRAPER_SITE
+        return info
+      end
+    end
+  end
+  end
+end

data/lib/rbook/www/sas_scraper.rb ADDED Viewed

@@ -0,0 +1,54 @@
+module RBook
+  module WWW
+  class SASScraper < Base
+    SCRAPER_ID = :sas_us
+    SCRAPER_NAME = "Simon and Schuster US".freeze
+    SCRAPER_SITE = "http://www.simonsays.com/".freeze
+    #add_publisher( self, "9780671700000", "9780671799999" )
+    #add_publisher( self, "9780731800000", "9780731899999" )
+    #add_publisher( self, "9780743200000", "9780743299999" )
+    #add_publisher( self, "9781416500000", "9781416599999" )
+    add_scraper( self )
+    def get_info(isbn)
+      raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
+      isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
+      @protocol = "http://"
+      @host = "www.simonsays.com"
+      @path = "/subs/book.cfm?areaid=288&isbn="
+      @link = @protocol + @host + @path + ISBN::convert_to_isbn10(isbn)
+      rba = Scraper.define do
+        process "font[face=Verdana, Arial, Helvetica]>b", :title => :text
+        process "td[rowspan=3]>img[border=0]", :cover_thumb => "@src"
+        result :title, :cover_thumb
+      end
+      content = Net::HTTP.get URI.parse(@link)
+      result = rba.scrape(content)
+      if result.title.nil?
+        return nil
+      else
+        info = {}
+        info[:isbn] = isbn
+        info[:title] = result.title
+        info[:cover_thumb] = @protocol + @host + result.cover_thumb unless result.cover_thumb.nil?
+        info[:link] = @link
+        info[:from_name] = SCRAPER_NAME
+        info[:from_url] = SCRAPER_SITE
+        return info
+      end
+    end
+  end
+  end
+end

data/lib/rbook/www/unireps_scraper.rb ADDED Viewed

@@ -0,0 +1,58 @@
+module RBook
+  module WWW
+  class UnirepsScraper < Base
+    SCRAPER_ID = :unireps
+    SCRAPER_NAME = "Unireps".freeze
+    SCRAPER_SITE = "http://www.unireps.com.au/".freeze
+    #add_publisher( self, "9780643000000", "9780643099999" )
+    #add_publisher( self, "9780868400000", "9780868499999" )
+    #add_publisher( self, "9780908800000", "9780908999999" )
+    #add_publisher( self, "9780909600000", "9780909699999" )
+    #add_publisher( self, "9781877000000", "9781877099999" )
+    #add_publisher( self, "9781920700000", "9781920799999" )
+    add_scraper( self )
+    def get_info(isbn)
+      raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
+      isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
+      isbn10 = ISBN::convert_to_isbn10(isbn)
+      @protocol = "http://"
+      @host = "www.unireps.com.au"
+      @path = "/isbn/"
+      @suffix = ".htm"
+      @link = @protocol + @host + @path + isbn10 + @suffix
+      rba = Scraper.define do
+        process "h4", :title => :text
+        process "img[alt=#{isbn10}]", :cover_thumb => "@src"
+        result :title, :cover_thumb
+      end
+      content = Net::HTTP.get URI.parse(@link)
+      result = rba.scrape(content)
+      if result.title.nil?
+        return nil
+      else
+        info = {}
+        info[:isbn] = isbn
+        info[:title] = result.title unless result.title.nil?
+        info[:cover_thumb] = @protocol + @host + @path + result.cover_thumb unless result.cover_thumb.nil?
+        info[:link] = @link
+        info[:from_name] = SCRAPER_NAME
+        info[:from_url] = SCRAPER_SITE
+        return info
+      end
+    end
+  end
+  end
+end