rbook 0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. data/COPYING +340 -0
  2. data/LICENSE +13 -0
  3. data/README +16 -0
  4. data/Rakefile +206 -0
  5. data/examples/titlepage.rb +14 -0
  6. data/examples/www/find_all.rb +23 -0
  7. data/examples/www/find_cover_from_amazon.rb +12 -0
  8. data/examples/www/find_url_from_rainbow.rb +12 -0
  9. data/examples/www/list.rb +13 -0
  10. data/lib/rbook/bisac.rb +175 -0
  11. data/lib/rbook/errors.rb +7 -0
  12. data/lib/rbook/isbn.rb +249 -0
  13. data/lib/rbook/onix.rb +68 -0
  14. data/lib/rbook/onix/contributor.rb +60 -0
  15. data/lib/rbook/onix/lists.rb +2 -0
  16. data/lib/rbook/onix/lists/contributor_role.rb +10 -0
  17. data/lib/rbook/onix/lists/product_form.rb +100 -0
  18. data/lib/rbook/onix/message.rb +101 -0
  19. data/lib/rbook/onix/product.rb +188 -0
  20. data/lib/rbook/onix/sales_restriction.rb +51 -0
  21. data/lib/rbook/onix/supply_detail.rb +68 -0
  22. data/lib/rbook/onix/xchar.rb +98 -0
  23. data/lib/rbook/titlepage.rb +96 -0
  24. data/lib/rbook/titlepage/TitleQueryClient.rb +62 -0
  25. data/lib/rbook/titlepage/titlepage_driver.rb +134 -0
  26. data/lib/rbook/titlepage/titlepage_utils.rb +374 -0
  27. data/lib/rbook/www.rb +172 -0
  28. data/lib/rbook/www/aau_scraper.rb +76 -0
  29. data/lib/rbook/www/amazon_uk_scraper.rb +44 -0
  30. data/lib/rbook/www/base.rb +87 -0
  31. data/lib/rbook/www/harper_au_scraper.rb +56 -0
  32. data/lib/rbook/www/harper_us_scraper.rb +55 -0
  33. data/lib/rbook/www/hha_scraper.rb +50 -0
  34. data/lib/rbook/www/macmillan_scraper.rb +62 -0
  35. data/lib/rbook/www/orbis_scraper.rb +48 -0
  36. data/lib/rbook/www/oup_scraper.rb +64 -0
  37. data/lib/rbook/www/paulist_scraper.rb +53 -0
  38. data/lib/rbook/www/pearson_au_scraper.rb +52 -0
  39. data/lib/rbook/www/penguin_scraper.rb +45 -0
  40. data/lib/rbook/www/random_au_scraper.rb +90 -0
  41. data/lib/rbook/www/random_us_scraper.rb +59 -0
  42. data/lib/rbook/www/sas_scraper.rb +54 -0
  43. data/lib/rbook/www/unireps_scraper.rb +58 -0
  44. data/lib/rbook/www/wiley_us_scraper.rb +54 -0
  45. data/test/data/abingdon.xml +38931 -0
  46. data/test/data/augsburg.xml +39009 -0
  47. data/test/data/chalice.xml +10851 -0
  48. data/test/data/eerdsman.xml +36942 -0
  49. data/test/data/invalid_no_product.xml +9 -0
  50. data/test/data/not_xml.csv +1 -0
  51. data/test/data/single_product.xml +50 -0
  52. data/test/data/xml_not_onix.xml +7 -0
  53. data/test/mocks/titlepage_driver.rb +107 -0
  54. data/test/unit/bisac_test.rb +57 -0
  55. data/test/unit/isbn_test.rb +149 -0
  56. data/test/unit/onix/contributor_test.rb +50 -0
  57. data/test/unit/onix/message_test.rb +119 -0
  58. data/test/unit/onix/product_test.rb +101 -0
  59. data/test/unit/onix/sales_restriction_test.rb +48 -0
  60. data/test/unit/onix/supply_detail_test.rb +53 -0
  61. data/test/unit/onix/xchar_test.rb +37 -0
  62. data/test/unit/titlepage_test.rb +127 -0
  63. metadata +130 -0
@@ -0,0 +1,64 @@
1
+
2
+ module RBook
3
+ module WWW
4
+
5
+ class OUPScraper < Base
6
+
7
+ SCRAPER_ID = :oup
8
+ SCRAPER_NAME = "Oxford University Press"
9
+ SCRAPER_SITE = "http://au.oup.com/"
10
+
11
+ #add_publisher( self, "978019200000", "978019999999")
12
+ add_scraper( self )
13
+
14
+ def get_info(isbn)
15
+
16
+ isbn = ISBN::convert_to_isbn10(isbn)
17
+
18
+ @protocol = "http://"
19
+ @host = "au.oup.com"
20
+ @path = "/searchbuy/SearchBook.asp?isbn="
21
+ @link = @protocol + @host + @path + isbn
22
+
23
+ oup = Scraper.define do
24
+ process "div.isbn13", :isbn => :text
25
+ process "td.title", :title => :text
26
+ process "td.author", :author => :text
27
+ process "div.BookInfo", :misc => :text
28
+ process "td>p.blurb", :description => :text
29
+ process "td>img[width=100]", :cover => "@src"
30
+ result :isbn, :title, :author, :misc, :description, :cover
31
+ end
32
+
33
+ content = Net::HTTP.get URI.parse(@link)
34
+
35
+ result = oup.scrape(content)
36
+
37
+
38
+ if result.isbn.nil?
39
+ return nil
40
+ else
41
+
42
+ info = {}
43
+ info[:isbn] = result.isbn.gsub(/\ ISBN-13:\n/, "")
44
+ info[:title] = result.title
45
+ info[:author] = result.author
46
+ info[:description] = result.description
47
+ info[:cover_thumb] = @protocol + @host + result.cover
48
+ info[:link] = @link
49
+ tmp = result.misc.match(/\n(.+)\n(.+) pages\n(.+)\n(.+)\n(.+)/)
50
+ if !tmp.nil? && tmp.length == 5
51
+ info[:published] = tmp[1]
52
+ info[:pages] = tmp[2]
53
+ info[:format] = tmp[3]
54
+ info[:rrp] = tmp[4]
55
+ end
56
+ info[:from_name] = SCRAPER_NAME
57
+ info[:from_url] = SCRAPER_SITE
58
+ return info
59
+ end
60
+ end
61
+
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,53 @@
1
+
2
+ module RBook
3
+ module WWW
4
+
5
+ class PaulistScraper < Base
6
+
7
+ SCRAPER_ID = :paulist
8
+ SCRAPER_NAME = "Paulist Press".freeze
9
+ SCRAPER_SITE = "http://www.paulistpress.com/".freeze
10
+
11
+ #add_publisher( self, "9780809100000", "9780809199999" )
12
+ add_scraper( self )
13
+
14
+ def get_info(isbn)
15
+
16
+ raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
17
+
18
+ isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
19
+
20
+ @protocol = "http://"
21
+ @host = "www.paulistpress.com"
22
+ @path = "/"
23
+ @abv_isbn = ISBN::convert_to_isbn10(isbn)[-5, 5]
24
+ @suffix = ".html"
25
+ @link = @protocol + @host + @path + @abv_isbn[0,4] + "-" + @abv_isbn[-1,1] + @suffix
26
+
27
+ rba = Scraper.define do
28
+ process "tr>td>h4", :title => :text
29
+ process "img[width=120][height=180]", :cover => "@src"
30
+ result :title, :cover
31
+ end
32
+
33
+ content = Net::HTTP.get URI.parse(@link)
34
+ result = rba.scrape(content)
35
+
36
+ if result.title.nil?
37
+ return nil
38
+ else
39
+
40
+ info = {}
41
+ info[:isbn] = isbn
42
+ info[:title] = result.title.gsub("Details for ", "")
43
+ info[:cover_thumb] = @protocol + @host + result.cover
44
+ info[:link] = @link
45
+ info[:from_name] = SCRAPER_NAME
46
+ info[:from_url] = SCRAPER_SITE
47
+ return info
48
+ end
49
+ end
50
+
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,52 @@
1
+
2
+ module RBook
3
+ module WWW
4
+
5
+ class PearsonAUScraper < Base
6
+
7
+ SCRAPER_ID = :pearson_au
8
+ SCRAPER_NAME = "Pearson Education Australia".freeze
9
+ SCRAPER_SITE = "http://www.pearsoned.com.au/".freeze
10
+
11
+ #add_publisher( self, "9780130000000", "9780139999999" )
12
+ #add_publisher( self, "9780321000000", "9780321999999" )
13
+ #add_publisher( self, "9780201000000", "9780201999999" )
14
+ #add_publisher( self, "9780201000000", "9780201999999" )
15
+ add_scraper( self )
16
+
17
+ def get_info(isbn)
18
+ @protocol = "http://"
19
+ @host = "www.pearsoned.com.au"
20
+ @path = "/Catalogue/TitleDetails.aspx?isbn="
21
+ @link = @protocol + @host + @path + ISBN::convert_to_isbn10(isbn)
22
+
23
+ rba = Scraper.define do
24
+ process "span.PageHeading", :title => :text
25
+ process "span#_publishedLabel", :pubdate => :text
26
+ process "span#_priceLabel", :rrp => :text
27
+ process "img.CoverImage", :cover_thumb => "@src"
28
+ result :title, :pubdate, :rrp, :cover_thumb
29
+ end
30
+
31
+ content = Net::HTTP.get URI.parse(@link)
32
+ result = rba.scrape(content)
33
+
34
+ if result.rrp.nil?
35
+ return nil
36
+ else
37
+
38
+ info = {}
39
+ info[:isbn] = isbn
40
+ info[:pubdate] = result.pubdate
41
+ info[:rrp] = result.rrp
42
+ info[:cover_thumb] = @protocol + @host + result.cover_thumb
43
+ info[:link] = @link
44
+ info[:from_name] = SCRAPER_NAME
45
+ info[:from_url] = SCRAPER_SITE
46
+ return info
47
+ end
48
+ end
49
+
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,45 @@
1
+
2
+ module RBook
3
+ module WWW
4
+
5
+ class PenguinScraper < Base
6
+
7
+ SCRAPER_ID = :penguin
8
+ SCRAPER_NAME = "Penguin Books Australia".freeze
9
+ SCRAPER_SITE = "http://www.penguin.com.au/".freeze
10
+
11
+ #add_publisher( self, "9781857200000", "9781857299999" )
12
+ add_scraper( self )
13
+
14
+ def get_info(isbn)
15
+ @protocol = "http://"
16
+ @host = "www.penguin.com.au"
17
+ @path = "/catalog/search-title-details.cfm?SBN="
18
+ @link = @protocol + @host + @path + ISBN::convert_to_isbn10(isbn)
19
+
20
+ rba = Scraper.define do
21
+ process "font>b", :title => :text
22
+ process "img[alt=cover]", :cover_thumb => "@src"
23
+ result :title, :cover_thumb
24
+ end
25
+
26
+ content = Net::HTTP.get URI.parse(@link)
27
+ result = rba.scrape(content)
28
+
29
+ if result.cover_thumb.nil?
30
+ return nil
31
+ else
32
+
33
+ info = {}
34
+ info[:isbn] = isbn
35
+ info[:cover_thumb] = @protocol + @host + result.cover_thumb.gsub("..", "")
36
+ info[:link] = @link
37
+ info[:from_name] = SCRAPER_NAME
38
+ info[:from_url] = SCRAPER_SITE
39
+ return info
40
+ end
41
+ end
42
+
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,90 @@
1
+ require 'cgi'
2
+
3
+ module RBook
4
+ module WWW
5
+
6
+ class RandomAUScraper < Base
7
+
8
+ SCRAPER_ID = :random_au
9
+ SCRAPER_NAME = "Random AU".freeze
10
+ SCRAPER_SITE = "http://www.randomhouse.com.au/".freeze
11
+
12
+ #add_publisher( self, "9781400000000", "9781400099999" )
13
+ #add_publisher( self, "9781863200000", "9781863299999" )
14
+ #add_publisher( self, "9781904900000", "9781904999999" )
15
+ add_scraper( self )
16
+
17
+ def get_cover(isbn)
18
+
19
+ info = get_info(isbn)
20
+ return nil if info.nil?
21
+ return nil unless info.kind_of?(Hash)
22
+
23
+ link = info[:cover_large] || info[:cover_medium] || info[:cover_thumb]
24
+
25
+ return nil if link.nil?
26
+
27
+ begin
28
+ response = Net::HTTP.get_response URI.parse(link)
29
+ if response.code != "200"
30
+ raise response.code.to_s
31
+ return nil
32
+ else
33
+ result = {}
34
+ result[:data] = response.body
35
+ result[:content_type] = "image/jpeg"
36
+ return result
37
+ end
38
+ rescue
39
+ return nil
40
+ end
41
+ end
42
+
43
+ def get_info(isbn)
44
+
45
+ @protocol = "http://"
46
+ @host = "www.randomhouse.com.au"
47
+ @path = "/Books/Default.aspx?Page=Book&ID="
48
+ @link = @protocol + @host + @path + isbn
49
+
50
+ random = Scraper.define do
51
+ process "td.bookTitle", :title => :text
52
+ process "span#rBodyModule__ctl0_lblISBN13", :isbn => :text
53
+ process "span#rBodyModule__ctl0_lblAuthor", :author => :text
54
+ process "span#rBodyModule__ctl0_lblFormat", :form => :text
55
+ process "span#rBodyModule__ctl0_lblImprint", :imprint => :text
56
+ process "span#rBodyModule__ctl0_lblRRP", :rrp => :text
57
+ process "span#rBodyModule__ctl0_lblRelease", :pubdate => :text
58
+ process "td.detBook>table>tr>td.standard[colspan=2]", :description => :text
59
+ process "img#rBodyModule__ctl0_imgBook", :cover_thumb => "@src"
60
+ process "a#rBodyModule__ctl0_hl300Image", :cover_large => "@href"
61
+ result :title, :isbn, :author, :form, :imprint, :rrp, :pubdate, :description, :cover_thumb, :cover_large
62
+ end
63
+
64
+ content = Net::HTTP.get URI.parse(@link)
65
+ result = random.scrape(content)
66
+
67
+ if result.isbn.nil?
68
+ return nil
69
+ else
70
+ info = {}
71
+ info[:isbn] = isbn
72
+ info[:title] = result.title
73
+ info[:author] = result.author
74
+ info[:form] = result.form
75
+ info[:imprint] = result.imprint
76
+ info[:rrp] = result.rrp
77
+ info[:pubdate] = result.pubdate
78
+ info[:description] = result.description
79
+ info[:cover_thumb] = result.cover_thumb
80
+ info[:cover_large] = result.cover_large
81
+ info[:link] = @link
82
+ info[:from_name] = SCRAPER_NAME
83
+ info[:from_url] = SCRAPER_SITE
84
+ return info
85
+ end
86
+ end
87
+
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,59 @@
1
+ require 'cgi'
2
+
3
+ module RBook
4
+ module WWW
5
+
6
+ class RandomUSScraper < Base
7
+
8
+ SCRAPER_ID = :random_us
9
+ SCRAPER_NAME = "Random US".freeze
10
+ SCRAPER_SITE = "http://www.randomhouse.com/".freeze
11
+
12
+ #add_publisher( self, "9780517300000", "9780517399999" )
13
+ #add_publisher( self, "9780553800000", "9780553899999" )
14
+ #add_publisher( self, "9780307000000", "9780307999999" )
15
+ #add_publisher( self, "9780385500000", "9780385599999" )
16
+ #add_publisher( self, "9780767900000", "9780767999999" )
17
+ #add_publisher( self, "9780679600000", "9780679699999" )
18
+ #add_publisher( self, "9781400000000", "9781400099999" )
19
+ add_scraper( self )
20
+
21
+ def get_info(isbn)
22
+
23
+ @protocol = "http://"
24
+ @host = "www.randomhouse.com"
25
+ @path = "/catalog/display.pperl?isbn="
26
+ @link = @protocol + @host + @path + isbn
27
+
28
+ random = Scraper.define do
29
+ process "div#catalog_display>h1", :title => :text
30
+ process "meta[name=gsa.format]", :form => "@content"
31
+ process "meta[name=gsa.author]", :author => "@content"
32
+ process "div#catalog_content>p", :description => :text
33
+ process "div.rhbw_cover>img", :cover_large => "@src"
34
+ result :title, :author, :form, :description, :cover_large
35
+ end
36
+
37
+ content = Net::HTTP.get URI.parse(@link)
38
+ result = random.scrape(content)
39
+
40
+ if result.title.nil?
41
+ return nil
42
+ else
43
+ info = {}
44
+ info[:isbn] = isbn
45
+ info[:title] = result.title
46
+ info[:author] = result.author
47
+ info[:form] = result.form
48
+ info[:description] = result.description
49
+ info[:cover_large] = @protocol + @host + result.cover_large.gsub("&amp;","&").gsub("=150","=600") unless result.cover_large.nil?
50
+ info[:link] = @link
51
+ info[:from_name] = SCRAPER_NAME
52
+ info[:from_url] = SCRAPER_SITE
53
+ return info
54
+ end
55
+ end
56
+
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,54 @@
1
+
2
+ module RBook
3
+ module WWW
4
+
5
+ class SASScraper < Base
6
+
7
+ SCRAPER_ID = :sas_us
8
+ SCRAPER_NAME = "Simon and Schuster US".freeze
9
+ SCRAPER_SITE = "http://www.simonsays.com/".freeze
10
+
11
+ #add_publisher( self, "9780671700000", "9780671799999" )
12
+ #add_publisher( self, "9780731800000", "9780731899999" )
13
+ #add_publisher( self, "9780743200000", "9780743299999" )
14
+ #add_publisher( self, "9781416500000", "9781416599999" )
15
+ add_scraper( self )
16
+
17
+ def get_info(isbn)
18
+
19
+ raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
20
+
21
+ isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
22
+
23
+ @protocol = "http://"
24
+ @host = "www.simonsays.com"
25
+ @path = "/subs/book.cfm?areaid=288&isbn="
26
+ @link = @protocol + @host + @path + ISBN::convert_to_isbn10(isbn)
27
+
28
+ rba = Scraper.define do
29
+ process "font[face=Verdana, Arial, Helvetica]>b", :title => :text
30
+ process "td[rowspan=3]>img[border=0]", :cover_thumb => "@src"
31
+ result :title, :cover_thumb
32
+ end
33
+
34
+ content = Net::HTTP.get URI.parse(@link)
35
+ result = rba.scrape(content)
36
+
37
+ if result.title.nil?
38
+ return nil
39
+ else
40
+
41
+ info = {}
42
+ info[:isbn] = isbn
43
+ info[:title] = result.title
44
+ info[:cover_thumb] = @protocol + @host + result.cover_thumb unless result.cover_thumb.nil?
45
+ info[:link] = @link
46
+ info[:from_name] = SCRAPER_NAME
47
+ info[:from_url] = SCRAPER_SITE
48
+ return info
49
+ end
50
+ end
51
+
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,58 @@
1
+
2
+ module RBook
3
+ module WWW
4
+
5
+ class UnirepsScraper < Base
6
+
7
+ SCRAPER_ID = :unireps
8
+ SCRAPER_NAME = "Unireps".freeze
9
+ SCRAPER_SITE = "http://www.unireps.com.au/".freeze
10
+
11
+ #add_publisher( self, "9780643000000", "9780643099999" )
12
+ #add_publisher( self, "9780868400000", "9780868499999" )
13
+ #add_publisher( self, "9780908800000", "9780908999999" )
14
+ #add_publisher( self, "9780909600000", "9780909699999" )
15
+ #add_publisher( self, "9781877000000", "9781877099999" )
16
+ #add_publisher( self, "9781920700000", "9781920799999" )
17
+ add_scraper( self )
18
+
19
+ def get_info(isbn)
20
+
21
+ raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
22
+
23
+ isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
24
+ isbn10 = ISBN::convert_to_isbn10(isbn)
25
+
26
+ @protocol = "http://"
27
+ @host = "www.unireps.com.au"
28
+ @path = "/isbn/"
29
+ @suffix = ".htm"
30
+ @link = @protocol + @host + @path + isbn10 + @suffix
31
+
32
+ rba = Scraper.define do
33
+ process "h4", :title => :text
34
+ process "img[alt=#{isbn10}]", :cover_thumb => "@src"
35
+ result :title, :cover_thumb
36
+ end
37
+
38
+ content = Net::HTTP.get URI.parse(@link)
39
+ result = rba.scrape(content)
40
+
41
+ if result.title.nil?
42
+ return nil
43
+ else
44
+
45
+ info = {}
46
+ info[:isbn] = isbn
47
+ info[:title] = result.title unless result.title.nil?
48
+ info[:cover_thumb] = @protocol + @host + @path + result.cover_thumb unless result.cover_thumb.nil?
49
+ info[:link] = @link
50
+ info[:from_name] = SCRAPER_NAME
51
+ info[:from_url] = SCRAPER_SITE
52
+ return info
53
+ end
54
+ end
55
+
56
+ end
57
+ end
58
+ end