rbook 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. data/COPYING +340 -0
  2. data/LICENSE +13 -0
  3. data/README +16 -0
  4. data/Rakefile +206 -0
  5. data/examples/titlepage.rb +14 -0
  6. data/examples/www/find_all.rb +23 -0
  7. data/examples/www/find_cover_from_amazon.rb +12 -0
  8. data/examples/www/find_url_from_rainbow.rb +12 -0
  9. data/examples/www/list.rb +13 -0
  10. data/lib/rbook/bisac.rb +175 -0
  11. data/lib/rbook/errors.rb +7 -0
  12. data/lib/rbook/isbn.rb +249 -0
  13. data/lib/rbook/onix.rb +68 -0
  14. data/lib/rbook/onix/contributor.rb +60 -0
  15. data/lib/rbook/onix/lists.rb +2 -0
  16. data/lib/rbook/onix/lists/contributor_role.rb +10 -0
  17. data/lib/rbook/onix/lists/product_form.rb +100 -0
  18. data/lib/rbook/onix/message.rb +101 -0
  19. data/lib/rbook/onix/product.rb +188 -0
  20. data/lib/rbook/onix/sales_restriction.rb +51 -0
  21. data/lib/rbook/onix/supply_detail.rb +68 -0
  22. data/lib/rbook/onix/xchar.rb +98 -0
  23. data/lib/rbook/titlepage.rb +96 -0
  24. data/lib/rbook/titlepage/TitleQueryClient.rb +62 -0
  25. data/lib/rbook/titlepage/titlepage_driver.rb +134 -0
  26. data/lib/rbook/titlepage/titlepage_utils.rb +374 -0
  27. data/lib/rbook/www.rb +172 -0
  28. data/lib/rbook/www/aau_scraper.rb +76 -0
  29. data/lib/rbook/www/amazon_uk_scraper.rb +44 -0
  30. data/lib/rbook/www/base.rb +87 -0
  31. data/lib/rbook/www/harper_au_scraper.rb +56 -0
  32. data/lib/rbook/www/harper_us_scraper.rb +55 -0
  33. data/lib/rbook/www/hha_scraper.rb +50 -0
  34. data/lib/rbook/www/macmillan_scraper.rb +62 -0
  35. data/lib/rbook/www/orbis_scraper.rb +48 -0
  36. data/lib/rbook/www/oup_scraper.rb +64 -0
  37. data/lib/rbook/www/paulist_scraper.rb +53 -0
  38. data/lib/rbook/www/pearson_au_scraper.rb +52 -0
  39. data/lib/rbook/www/penguin_scraper.rb +45 -0
  40. data/lib/rbook/www/random_au_scraper.rb +90 -0
  41. data/lib/rbook/www/random_us_scraper.rb +59 -0
  42. data/lib/rbook/www/sas_scraper.rb +54 -0
  43. data/lib/rbook/www/unireps_scraper.rb +58 -0
  44. data/lib/rbook/www/wiley_us_scraper.rb +54 -0
  45. data/test/data/abingdon.xml +38931 -0
  46. data/test/data/augsburg.xml +39009 -0
  47. data/test/data/chalice.xml +10851 -0
  48. data/test/data/eerdsman.xml +36942 -0
  49. data/test/data/invalid_no_product.xml +9 -0
  50. data/test/data/not_xml.csv +1 -0
  51. data/test/data/single_product.xml +50 -0
  52. data/test/data/xml_not_onix.xml +7 -0
  53. data/test/mocks/titlepage_driver.rb +107 -0
  54. data/test/unit/bisac_test.rb +57 -0
  55. data/test/unit/isbn_test.rb +149 -0
  56. data/test/unit/onix/contributor_test.rb +50 -0
  57. data/test/unit/onix/message_test.rb +119 -0
  58. data/test/unit/onix/product_test.rb +101 -0
  59. data/test/unit/onix/sales_restriction_test.rb +48 -0
  60. data/test/unit/onix/supply_detail_test.rb +53 -0
  61. data/test/unit/onix/xchar_test.rb +37 -0
  62. data/test/unit/titlepage_test.rb +127 -0
  63. metadata +130 -0
@@ -0,0 +1,64 @@
1
+
2
+ module RBook
3
+ module WWW
4
+
5
+ class OUPScraper < Base
6
+
7
+ SCRAPER_ID = :oup
8
+ SCRAPER_NAME = "Oxford University Press"
9
+ SCRAPER_SITE = "http://au.oup.com/"
10
+
11
+ #add_publisher( self, "978019200000", "978019999999")
12
+ add_scraper( self )
13
+
14
+ def get_info(isbn)
15
+
16
+ isbn = ISBN::convert_to_isbn10(isbn)
17
+
18
+ @protocol = "http://"
19
+ @host = "au.oup.com"
20
+ @path = "/searchbuy/SearchBook.asp?isbn="
21
+ @link = @protocol + @host + @path + isbn
22
+
23
+ oup = Scraper.define do
24
+ process "div.isbn13", :isbn => :text
25
+ process "td.title", :title => :text
26
+ process "td.author", :author => :text
27
+ process "div.BookInfo", :misc => :text
28
+ process "td>p.blurb", :description => :text
29
+ process "td>img[width=100]", :cover => "@src"
30
+ result :isbn, :title, :author, :misc, :description, :cover
31
+ end
32
+
33
+ content = Net::HTTP.get URI.parse(@link)
34
+
35
+ result = oup.scrape(content)
36
+
37
+
38
+ if result.isbn.nil?
39
+ return nil
40
+ else
41
+
42
+ info = {}
43
+ info[:isbn] = result.isbn.gsub(/\ ISBN-13:\n/, "")
44
+ info[:title] = result.title
45
+ info[:author] = result.author
46
+ info[:description] = result.description
47
+ info[:cover_thumb] = @protocol + @host + result.cover
48
+ info[:link] = @link
49
+ tmp = result.misc.match(/\n(.+)\n(.+) pages\n(.+)\n(.+)\n(.+)/)
50
+ if !tmp.nil? && tmp.length == 5
51
+ info[:published] = tmp[1]
52
+ info[:pages] = tmp[2]
53
+ info[:format] = tmp[3]
54
+ info[:rrp] = tmp[4]
55
+ end
56
+ info[:from_name] = SCRAPER_NAME
57
+ info[:from_url] = SCRAPER_SITE
58
+ return info
59
+ end
60
+ end
61
+
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,53 @@
1
+
2
+ module RBook
3
+ module WWW
4
+
5
+ class PaulistScraper < Base
6
+
7
+ SCRAPER_ID = :paulist
8
+ SCRAPER_NAME = "Paulist Press".freeze
9
+ SCRAPER_SITE = "http://www.paulistpress.com/".freeze
10
+
11
+ #add_publisher( self, "9780809100000", "9780809199999" )
12
+ add_scraper( self )
13
+
14
+ def get_info(isbn)
15
+
16
+ raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
17
+
18
+ isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
19
+
20
+ @protocol = "http://"
21
+ @host = "www.paulistpress.com"
22
+ @path = "/"
23
+ @abv_isbn = ISBN::convert_to_isbn10(isbn)[-5, 5]
24
+ @suffix = ".html"
25
+ @link = @protocol + @host + @path + @abv_isbn[0,4] + "-" + @abv_isbn[-1,1] + @suffix
26
+
27
+ rba = Scraper.define do
28
+ process "tr>td>h4", :title => :text
29
+ process "img[width=120][height=180]", :cover => "@src"
30
+ result :title, :cover
31
+ end
32
+
33
+ content = Net::HTTP.get URI.parse(@link)
34
+ result = rba.scrape(content)
35
+
36
+ if result.title.nil?
37
+ return nil
38
+ else
39
+
40
+ info = {}
41
+ info[:isbn] = isbn
42
+ info[:title] = result.title.gsub("Details for ", "")
43
+ info[:cover_thumb] = @protocol + @host + result.cover
44
+ info[:link] = @link
45
+ info[:from_name] = SCRAPER_NAME
46
+ info[:from_url] = SCRAPER_SITE
47
+ return info
48
+ end
49
+ end
50
+
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,52 @@
1
+
2
+ module RBook
3
+ module WWW
4
+
5
+ class PearsonAUScraper < Base
6
+
7
+ SCRAPER_ID = :pearson_au
8
+ SCRAPER_NAME = "Pearson Education Australia".freeze
9
+ SCRAPER_SITE = "http://www.pearsoned.com.au/".freeze
10
+
11
+ #add_publisher( self, "9780130000000", "9780139999999" )
12
+ #add_publisher( self, "9780321000000", "9780321999999" )
13
+ #add_publisher( self, "9780201000000", "9780201999999" )
14
+ #add_publisher( self, "9780201000000", "9780201999999" )
15
+ add_scraper( self )
16
+
17
+ def get_info(isbn)
18
+ @protocol = "http://"
19
+ @host = "www.pearsoned.com.au"
20
+ @path = "/Catalogue/TitleDetails.aspx?isbn="
21
+ @link = @protocol + @host + @path + ISBN::convert_to_isbn10(isbn)
22
+
23
+ rba = Scraper.define do
24
+ process "span.PageHeading", :title => :text
25
+ process "span#_publishedLabel", :pubdate => :text
26
+ process "span#_priceLabel", :rrp => :text
27
+ process "img.CoverImage", :cover_thumb => "@src"
28
+ result :title, :pubdate, :rrp, :cover_thumb
29
+ end
30
+
31
+ content = Net::HTTP.get URI.parse(@link)
32
+ result = rba.scrape(content)
33
+
34
+ if result.rrp.nil?
35
+ return nil
36
+ else
37
+
38
+ info = {}
39
+ info[:isbn] = isbn
40
+ info[:pubdate] = result.pubdate
41
+ info[:rrp] = result.rrp
42
+ info[:cover_thumb] = @protocol + @host + result.cover_thumb
43
+ info[:link] = @link
44
+ info[:from_name] = SCRAPER_NAME
45
+ info[:from_url] = SCRAPER_SITE
46
+ return info
47
+ end
48
+ end
49
+
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,45 @@
1
+
2
+ module RBook
3
+ module WWW
4
+
5
+ class PenguinScraper < Base
6
+
7
+ SCRAPER_ID = :penguin
8
+ SCRAPER_NAME = "Penguin Books Australia".freeze
9
+ SCRAPER_SITE = "http://www.penguin.com.au/".freeze
10
+
11
+ #add_publisher( self, "9781857200000", "9781857299999" )
12
+ add_scraper( self )
13
+
14
+ def get_info(isbn)
15
+ @protocol = "http://"
16
+ @host = "www.penguin.com.au"
17
+ @path = "/catalog/search-title-details.cfm?SBN="
18
+ @link = @protocol + @host + @path + ISBN::convert_to_isbn10(isbn)
19
+
20
+ rba = Scraper.define do
21
+ process "font>b", :title => :text
22
+ process "img[alt=cover]", :cover_thumb => "@src"
23
+ result :title, :cover_thumb
24
+ end
25
+
26
+ content = Net::HTTP.get URI.parse(@link)
27
+ result = rba.scrape(content)
28
+
29
+ if result.cover_thumb.nil?
30
+ return nil
31
+ else
32
+
33
+ info = {}
34
+ info[:isbn] = isbn
35
+ info[:cover_thumb] = @protocol + @host + result.cover_thumb.gsub("..", "")
36
+ info[:link] = @link
37
+ info[:from_name] = SCRAPER_NAME
38
+ info[:from_url] = SCRAPER_SITE
39
+ return info
40
+ end
41
+ end
42
+
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,90 @@
1
+ require 'cgi'
2
+
3
+ module RBook
4
+ module WWW
5
+
6
+ class RandomAUScraper < Base
7
+
8
+ SCRAPER_ID = :random_au
9
+ SCRAPER_NAME = "Random AU".freeze
10
+ SCRAPER_SITE = "http://www.randomhouse.com.au/".freeze
11
+
12
+ #add_publisher( self, "9781400000000", "9781400099999" )
13
+ #add_publisher( self, "9781863200000", "9781863299999" )
14
+ #add_publisher( self, "9781904900000", "9781904999999" )
15
+ add_scraper( self )
16
+
17
+ def get_cover(isbn)
18
+
19
+ info = get_info(isbn)
20
+ return nil if info.nil?
21
+ return nil unless info.kind_of?(Hash)
22
+
23
+ link = info[:cover_large] || info[:cover_medium] || info[:cover_thumb]
24
+
25
+ return nil if link.nil?
26
+
27
+ begin
28
+ response = Net::HTTP.get_response URI.parse(link)
29
+ if response.code != "200"
30
+ raise response.code.to_s
31
+ return nil
32
+ else
33
+ result = {}
34
+ result[:data] = response.body
35
+ result[:content_type] = "image/jpeg"
36
+ return result
37
+ end
38
+ rescue
39
+ return nil
40
+ end
41
+ end
42
+
43
+ def get_info(isbn)
44
+
45
+ @protocol = "http://"
46
+ @host = "www.randomhouse.com.au"
47
+ @path = "/Books/Default.aspx?Page=Book&ID="
48
+ @link = @protocol + @host + @path + isbn
49
+
50
+ random = Scraper.define do
51
+ process "td.bookTitle", :title => :text
52
+ process "span#rBodyModule__ctl0_lblISBN13", :isbn => :text
53
+ process "span#rBodyModule__ctl0_lblAuthor", :author => :text
54
+ process "span#rBodyModule__ctl0_lblFormat", :form => :text
55
+ process "span#rBodyModule__ctl0_lblImprint", :imprint => :text
56
+ process "span#rBodyModule__ctl0_lblRRP", :rrp => :text
57
+ process "span#rBodyModule__ctl0_lblRelease", :pubdate => :text
58
+ process "td.detBook>table>tr>td.standard[colspan=2]", :description => :text
59
+ process "img#rBodyModule__ctl0_imgBook", :cover_thumb => "@src"
60
+ process "a#rBodyModule__ctl0_hl300Image", :cover_large => "@href"
61
+ result :title, :isbn, :author, :form, :imprint, :rrp, :pubdate, :description, :cover_thumb, :cover_large
62
+ end
63
+
64
+ content = Net::HTTP.get URI.parse(@link)
65
+ result = random.scrape(content)
66
+
67
+ if result.isbn.nil?
68
+ return nil
69
+ else
70
+ info = {}
71
+ info[:isbn] = isbn
72
+ info[:title] = result.title
73
+ info[:author] = result.author
74
+ info[:form] = result.form
75
+ info[:imprint] = result.imprint
76
+ info[:rrp] = result.rrp
77
+ info[:pubdate] = result.pubdate
78
+ info[:description] = result.description
79
+ info[:cover_thumb] = result.cover_thumb
80
+ info[:cover_large] = result.cover_large
81
+ info[:link] = @link
82
+ info[:from_name] = SCRAPER_NAME
83
+ info[:from_url] = SCRAPER_SITE
84
+ return info
85
+ end
86
+ end
87
+
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,59 @@
1
+ require 'cgi'
2
+
3
+ module RBook
4
+ module WWW
5
+
6
+ class RandomUSScraper < Base
7
+
8
+ SCRAPER_ID = :random_us
9
+ SCRAPER_NAME = "Random US".freeze
10
+ SCRAPER_SITE = "http://www.randomhouse.com/".freeze
11
+
12
+ #add_publisher( self, "9780517300000", "9780517399999" )
13
+ #add_publisher( self, "9780553800000", "9780553899999" )
14
+ #add_publisher( self, "9780307000000", "9780307999999" )
15
+ #add_publisher( self, "9780385500000", "9780385599999" )
16
+ #add_publisher( self, "9780767900000", "9780767999999" )
17
+ #add_publisher( self, "9780679600000", "9780679699999" )
18
+ #add_publisher( self, "9781400000000", "9781400099999" )
19
+ add_scraper( self )
20
+
21
+ def get_info(isbn)
22
+
23
+ @protocol = "http://"
24
+ @host = "www.randomhouse.com"
25
+ @path = "/catalog/display.pperl?isbn="
26
+ @link = @protocol + @host + @path + isbn
27
+
28
+ random = Scraper.define do
29
+ process "div#catalog_display>h1", :title => :text
30
+ process "meta[name=gsa.format]", :form => "@content"
31
+ process "meta[name=gsa.author]", :author => "@content"
32
+ process "div#catalog_content>p", :description => :text
33
+ process "div.rhbw_cover>img", :cover_large => "@src"
34
+ result :title, :author, :form, :description, :cover_large
35
+ end
36
+
37
+ content = Net::HTTP.get URI.parse(@link)
38
+ result = random.scrape(content)
39
+
40
+ if result.title.nil?
41
+ return nil
42
+ else
43
+ info = {}
44
+ info[:isbn] = isbn
45
+ info[:title] = result.title
46
+ info[:author] = result.author
47
+ info[:form] = result.form
48
+ info[:description] = result.description
49
+ info[:cover_large] = @protocol + @host + result.cover_large.gsub("&amp;","&").gsub("=150","=600") unless result.cover_large.nil?
50
+ info[:link] = @link
51
+ info[:from_name] = SCRAPER_NAME
52
+ info[:from_url] = SCRAPER_SITE
53
+ return info
54
+ end
55
+ end
56
+
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,54 @@
1
+
2
+ module RBook
3
+ module WWW
4
+
5
+ class SASScraper < Base
6
+
7
+ SCRAPER_ID = :sas_us
8
+ SCRAPER_NAME = "Simon and Schuster US".freeze
9
+ SCRAPER_SITE = "http://www.simonsays.com/".freeze
10
+
11
+ #add_publisher( self, "9780671700000", "9780671799999" )
12
+ #add_publisher( self, "9780731800000", "9780731899999" )
13
+ #add_publisher( self, "9780743200000", "9780743299999" )
14
+ #add_publisher( self, "9781416500000", "9781416599999" )
15
+ add_scraper( self )
16
+
17
+ def get_info(isbn)
18
+
19
+ raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
20
+
21
+ isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
22
+
23
+ @protocol = "http://"
24
+ @host = "www.simonsays.com"
25
+ @path = "/subs/book.cfm?areaid=288&isbn="
26
+ @link = @protocol + @host + @path + ISBN::convert_to_isbn10(isbn)
27
+
28
+ rba = Scraper.define do
29
+ process "font[face=Verdana, Arial, Helvetica]>b", :title => :text
30
+ process "td[rowspan=3]>img[border=0]", :cover_thumb => "@src"
31
+ result :title, :cover_thumb
32
+ end
33
+
34
+ content = Net::HTTP.get URI.parse(@link)
35
+ result = rba.scrape(content)
36
+
37
+ if result.title.nil?
38
+ return nil
39
+ else
40
+
41
+ info = {}
42
+ info[:isbn] = isbn
43
+ info[:title] = result.title
44
+ info[:cover_thumb] = @protocol + @host + result.cover_thumb unless result.cover_thumb.nil?
45
+ info[:link] = @link
46
+ info[:from_name] = SCRAPER_NAME
47
+ info[:from_url] = SCRAPER_SITE
48
+ return info
49
+ end
50
+ end
51
+
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,58 @@
1
+
2
+ module RBook
3
+ module WWW
4
+
5
+ class UnirepsScraper < Base
6
+
7
+ SCRAPER_ID = :unireps
8
+ SCRAPER_NAME = "Unireps".freeze
9
+ SCRAPER_SITE = "http://www.unireps.com.au/".freeze
10
+
11
+ #add_publisher( self, "9780643000000", "9780643099999" )
12
+ #add_publisher( self, "9780868400000", "9780868499999" )
13
+ #add_publisher( self, "9780908800000", "9780908999999" )
14
+ #add_publisher( self, "9780909600000", "9780909699999" )
15
+ #add_publisher( self, "9781877000000", "9781877099999" )
16
+ #add_publisher( self, "9781920700000", "9781920799999" )
17
+ add_scraper( self )
18
+
19
+ def get_info(isbn)
20
+
21
+ raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
22
+
23
+ isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
24
+ isbn10 = ISBN::convert_to_isbn10(isbn)
25
+
26
+ @protocol = "http://"
27
+ @host = "www.unireps.com.au"
28
+ @path = "/isbn/"
29
+ @suffix = ".htm"
30
+ @link = @protocol + @host + @path + isbn10 + @suffix
31
+
32
+ rba = Scraper.define do
33
+ process "h4", :title => :text
34
+ process "img[alt=#{isbn10}]", :cover_thumb => "@src"
35
+ result :title, :cover_thumb
36
+ end
37
+
38
+ content = Net::HTTP.get URI.parse(@link)
39
+ result = rba.scrape(content)
40
+
41
+ if result.title.nil?
42
+ return nil
43
+ else
44
+
45
+ info = {}
46
+ info[:isbn] = isbn
47
+ info[:title] = result.title unless result.title.nil?
48
+ info[:cover_thumb] = @protocol + @host + @path + result.cover_thumb unless result.cover_thumb.nil?
49
+ info[:link] = @link
50
+ info[:from_name] = SCRAPER_NAME
51
+ info[:from_url] = SCRAPER_SITE
52
+ return info
53
+ end
54
+ end
55
+
56
+ end
57
+ end
58
+ end