rbook 0.4.3 → 0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. data/Rakefile +13 -176
  2. metadata +57 -117
  3. data/COPYING +0 -340
  4. data/LICENSE +0 -13
  5. data/README +0 -17
  6. data/examples/gbip.rb +0 -12
  7. data/examples/onix/stream_reader.rb +0 -13
  8. data/examples/pacstream.rb +0 -13
  9. data/examples/titlepage.rb +0 -14
  10. data/examples/titlepage_with_proxy.rb +0 -14
  11. data/examples/titlepage_www.rb +0 -18
  12. data/examples/www/find_all.rb +0 -23
  13. data/examples/www/find_cover_from_amazon.rb +0 -16
  14. data/examples/www/find_url_from_rainbow.rb +0 -12
  15. data/examples/www/list.rb +0 -13
  16. data/lib/rbook/bisac.rb +0 -31
  17. data/lib/rbook/bisac/message.rb +0 -99
  18. data/lib/rbook/bisac/po.rb +0 -97
  19. data/lib/rbook/bisac/po_line_item.rb +0 -33
  20. data/lib/rbook/bisac/product.rb +0 -176
  21. data/lib/rbook/errors.rb +0 -8
  22. data/lib/rbook/gbip.rb +0 -21
  23. data/lib/rbook/gbip/pos.rb +0 -118
  24. data/lib/rbook/gbip/title.rb +0 -36
  25. data/lib/rbook/gbip/warehouse.rb +0 -27
  26. data/lib/rbook/isbn.rb +0 -255
  27. data/lib/rbook/onix.rb +0 -70
  28. data/lib/rbook/onix/contributor.rb +0 -60
  29. data/lib/rbook/onix/lists.rb +0 -2
  30. data/lib/rbook/onix/lists/contributor_role.rb +0 -10
  31. data/lib/rbook/onix/lists/product_form.rb +0 -100
  32. data/lib/rbook/onix/message.rb +0 -112
  33. data/lib/rbook/onix/product.rb +0 -189
  34. data/lib/rbook/onix/sales_restriction.rb +0 -51
  35. data/lib/rbook/onix/stream_reader.rb +0 -120
  36. data/lib/rbook/onix/stream_writer.rb +0 -40
  37. data/lib/rbook/onix/supply_detail.rb +0 -68
  38. data/lib/rbook/onix/xchar.rb +0 -98
  39. data/lib/rbook/pacstream.rb +0 -64
  40. data/lib/rbook/titlepage.rb +0 -37
  41. data/lib/rbook/titlepage/client.rb +0 -126
  42. data/lib/rbook/titlepage/titlepage_driver.rb +0 -137
  43. data/lib/rbook/titlepage/titlepage_utils.rb +0 -379
  44. data/lib/rbook/titlepage/wwwclient.rb +0 -96
  45. data/lib/rbook/www.rb +0 -172
  46. data/lib/rbook/www/aau_scraper.rb +0 -76
  47. data/lib/rbook/www/amazon_uk_scraper.rb +0 -44
  48. data/lib/rbook/www/ban_scraper.rb +0 -62
  49. data/lib/rbook/www/base.rb +0 -87
  50. data/lib/rbook/www/harper_au_scraper.rb +0 -56
  51. data/lib/rbook/www/harper_us_scraper.rb +0 -55
  52. data/lib/rbook/www/hha_scraper.rb +0 -50
  53. data/lib/rbook/www/macmillan_scraper.rb +0 -62
  54. data/lib/rbook/www/orbis_scraper.rb +0 -48
  55. data/lib/rbook/www/oup_scraper.rb +0 -64
  56. data/lib/rbook/www/paulist_scraper.rb +0 -53
  57. data/lib/rbook/www/pearson_au_scraper.rb +0 -52
  58. data/lib/rbook/www/penguin_scraper.rb +0 -45
  59. data/lib/rbook/www/random_au_scraper.rb +0 -90
  60. data/lib/rbook/www/random_us_scraper.rb +0 -59
  61. data/lib/rbook/www/sas_scraper.rb +0 -54
  62. data/lib/rbook/www/unireps_scraper.rb +0 -58
  63. data/lib/rbook/www/wiley_us_scraper.rb +0 -54
  64. data/test/data/abingdon.xml +0 -38931
  65. data/test/data/augsburg.xml +0 -39009
  66. data/test/data/bisac_po.txt +0 -112
  67. data/test/data/chalice.xml +0 -10851
  68. data/test/data/eerdsman.xml +0 -36942
  69. data/test/data/invalid_no_product.xml +0 -9
  70. data/test/data/not_xml.csv +0 -1
  71. data/test/data/single_product.xml +0 -50
  72. data/test/data/valid_bisac.txt +0 -213
  73. data/test/data/xml_not_onix.xml +0 -7
  74. data/test/mocks/titlepage_driver.rb +0 -111
  75. data/test/unit/bisac/bisac_test.rb +0 -96
  76. data/test/unit/bisac/po_line_item_test.rb +0 -38
  77. data/test/unit/bisac/po_test.rb +0 -82
  78. data/test/unit/isbn_test.rb +0 -153
  79. data/test/unit/onix/contributor_test.rb +0 -50
  80. data/test/unit/onix/message_test.rb +0 -119
  81. data/test/unit/onix/product_test.rb +0 -101
  82. data/test/unit/onix/sales_restriction_test.rb +0 -48
  83. data/test/unit/onix/stream_reader_test.rb +0 -22
  84. data/test/unit/onix/stream_writer_test.rb +0 -32
  85. data/test/unit/onix/supply_detail_test.rb +0 -53
  86. data/test/unit/onix/xchar_test.rb +0 -37
  87. data/test/unit/titlepage_test.rb +0 -140
@@ -1,87 +0,0 @@
1
- require 'net/http'
2
- require 'uri'
3
-
4
- module RBook
5
- module WWW
6
-
7
- class Base
8
-
9
- @@scrapers = []
10
-
11
- # registers a new scraper with the library.
12
- # classname - the class to add
13
- def self.add_scraper(classname)
14
- @@scrapers << classname
15
- end
16
-
17
- # find a scraper matching the requested id
18
- # id - a scraper id as a symbol
19
- def self.find_scraper(id)
20
- @@scrapers.each do |scraper|
21
- return scraper if scraper::SCRAPER_ID == id
22
- end
23
- return nil
24
- end
25
-
26
- # find any scrapers matching the requested ids
27
- # ids - an array of scraper id's as symbols
28
- def self.find_scrapers(ids)
29
- ret = []
30
- @@scrapers.each do |scraper|
31
- ret << scraper if ids.include?(scraper::SCRAPER_ID)
32
- end
33
- return ret
34
- end
35
-
36
- # This method can be overwritten in each scraper. It should return a hash containing the binary data
37
- # and mimetype of the largest cover image it can find for the requested isbn
38
- def get_cover(isbn)
39
-
40
- info = get_info(isbn)
41
- return nil if info.nil?
42
- return nil unless info.kind_of?(Hash)
43
-
44
- link = info[:cover_large] || info[:cover_medium] || info[:cover_thumb]
45
-
46
- return nil if link.nil?
47
-
48
- begin
49
- response = Net::HTTP.get_response URI.parse(link)
50
- if response.code != "200"
51
- raise response.code.to_s
52
- return nil
53
- else
54
- result = {}
55
- result[:data] = response.body
56
- result[:content_type] = "image/jpeg"
57
- return result
58
- end
59
- rescue
60
- return nil
61
- end
62
- end
63
-
64
- # This method can be overwritten in each scraper. It should return a hash of any information on
65
- # the requested isbn it can find
66
- def get_info(isbn)
67
- nil
68
- end
69
-
70
- # This method can be overwritten in each scraper. It should return a link to the requested isbn
71
- # on the targets website
72
- def get_link(isbn)
73
- nil
74
- end
75
-
76
- # return the symbol used to uniquely identify each scraper
77
- def scraper_id
78
- return SCRAPER_ID
79
- end
80
-
81
- def self.scrapers
82
- @@scrapers
83
- end
84
- end
85
-
86
- end
87
- end
@@ -1,56 +0,0 @@
1
-
2
- module RBook
3
- module WWW
4
-
5
- class HarperCollinsAUScraper < Base
6
-
7
- SCRAPER_ID = :harper_au
8
- SCRAPER_NAME = "Harper Collins Australia".freeze
9
- SCRAPER_SITE = "http://www.harpercollins.com.au/".freeze
10
-
11
- #add_publisher( self, "9780006400000", "9780006499999" )
12
- #add_publisher( self, "9780007100000", "9780007199999" )
13
- add_scraper( self )
14
-
15
- def get_info(isbn)
16
-
17
- raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
18
-
19
- isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
20
-
21
- @protocol = "http://"
22
- @host = "www.harpercollins.com.au"
23
- @path = "/global_scripts/product_catalog/book_xml.asp?isbn="
24
- @link = @protocol + @host + @path + ISBN::convert_to_isbn10(isbn)
25
-
26
- rba = Scraper.define do
27
- process "div.header", :title => :text
28
- process "div.subtitle", :subtitle => :text
29
- process "div.byline", :author => :text
30
- process "img.bookJacket", :cover => "@src"
31
- result :title, :subtitle, :author, :cover
32
- end
33
-
34
- content = Net::HTTP.get URI.parse(@link)
35
- result = rba.scrape(content)
36
-
37
- if result.title.nil?
38
- return nil
39
- else
40
-
41
- info = {}
42
- info[:isbn] = isbn
43
- info[:title] = result.title
44
- info[:subtitle] = result.subtitle
45
- info[:author] = result.author.gsub("by ","")
46
- info[:cover] = result.cover
47
- info[:link] = @link
48
- info[:from_name] = SCRAPER_NAME
49
- info[:from_url] = SCRAPER_SITE
50
- return info
51
- end
52
- end
53
-
54
- end
55
- end
56
- end
@@ -1,55 +0,0 @@
1
-
2
- module RBook
3
- module WWW
4
-
5
- class HarperCollinsUSScraper < Base
6
-
7
- SCRAPER_ID = :harper_us
8
- SCRAPER_NAME = "Harper Collins United States".freeze
9
- SCRAPER_SITE = "http://www.harpercollins.com/".freeze
10
-
11
- #add_publisher( self, "9780060000000", "9780060999999" )
12
- add_scraper( self )
13
-
14
- def get_info(isbn)
15
- raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
16
-
17
- isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
18
-
19
- @protocol = "http://"
20
- @host = "www.harpercollins.com"
21
- @path = "/book/index.aspx?isbn="
22
- @link = @protocol + @host + @path + isbn
23
-
24
- rba = Scraper.define do
25
- process "h1.bookTitle", :title => :text
26
- process "h2.bookSubTitle", :subtitle => :text
27
- process "h3.byLine", :author => :text
28
- process "img.bookJacket", :cover => "@src"
29
- result :title, :subtitle, :author, :cover
30
- end
31
-
32
- content = Net::HTTP.get URI.parse(@link)
33
- result = rba.scrape(content)
34
-
35
- if result.title.nil?
36
- return nil
37
- else
38
-
39
- info = {}
40
- info[:isbn] = isbn
41
- info[:title] = result.title
42
- info[:subtitle] = result.subtitle
43
- info[:author] = result.author.gsub("by ", "")
44
- info[:cover_thumb] = result.cover
45
- info[:cover_medium] = result.cover.gsub("medium", "large")
46
- info[:link] = @link
47
- info[:from_name] = SCRAPER_NAME
48
- info[:from_url] = SCRAPER_SITE
49
- return info
50
- end
51
- end
52
-
53
- end
54
- end
55
- end
@@ -1,50 +0,0 @@
1
-
2
- module RBook
3
- module WWW
4
-
5
- class HHAScraper < Base
6
-
7
- SCRAPER_ID = :hha
8
- SCRAPER_NAME = "Hodder Headline Australia".freeze
9
- SCRAPER_SITE = "http://www.hha.com.au/".freeze
10
-
11
- #add_publisher( self, "9780340800000", "9780340899999" )
12
- #add_publisher( self, "9780755300000", "9780755399999" )
13
- #add_publisher( self, "9780733600000", "9780733699999" )
14
- add_scraper( self )
15
-
16
- def get_info(isbn)
17
- @protocol = "http://"
18
- @host = "www.hha.com.au"
19
- @path = "/books/"
20
- @suffix = ".html"
21
- @link = @protocol + @host + @path + ISBN::convert_to_isbn10(isbn) + @suffix
22
-
23
- rba = Scraper.define do
24
- process "h1.fiction", :title => :text
25
- process "p.author", :author => :text
26
- process "p.thumb>img", :cover_thumb => "@src"
27
- result :title, :author, :cover_thumb
28
- end
29
-
30
- content = Net::HTTP.get URI.parse(@link)
31
- result = rba.scrape(content)
32
-
33
- if result.cover_thumb.nil?
34
- return nil
35
- else
36
-
37
- info = {}
38
- info[:isbn] = isbn
39
- info[:author] = result.author
40
- info[:cover_thumb] = @protocol + @host + result.cover_thumb
41
- info[:link] = @link
42
- info[:from_name] = SCRAPER_NAME
43
- info[:from_url] = SCRAPER_SITE
44
- return info
45
- end
46
- end
47
-
48
- end
49
- end
50
- end
@@ -1,62 +0,0 @@
1
-
2
- module RBook
3
-
4
- module WWW
5
-
6
- class MacmillanScraper < Base
7
-
8
- SCRAPER_ID = :macmillan
9
- SCRAPER_NAME = "Pan Macmillan".freeze
10
- SCRAPER_SITE = "http://www.panmacmillan.com.au/".freeze
11
-
12
- #add_publisher( self, "9780312900000", "9780312999999" )
13
- #add_publisher( self, "9780330400000", "9780330499999" )
14
- #add_publisher( self, "9781403000000", "9781405099999" )
15
- add_scraper( self )
16
-
17
- def initialize
18
- @url_protocol = "http://"
19
- @url_host = "www.panmacmillan.com.au"
20
- @url_path = "/display_title.asp?ISBN="
21
- @url_suffix = "&Author=Barker,%20Robin"
22
- end
23
-
24
- def get_info(isbn)
25
-
26
- isbn = ISBN::convert_to_isbn13(isbn)
27
- return nil if isbn.nil?
28
-
29
- mac = Scraper.define do
30
- process "div.titlecontent>div.isbn>span", :isbn => :text
31
- process "td[width=70%]>h1", :title => :text
32
- process "a[title=Click on image to view a larger version]>img", :cover_medium => "@src"
33
- process "a[title=Click on image to view a larger version]", :cover_large => "@href"
34
- result :isbn, :title, :cover_medium, :cover_large
35
- end
36
-
37
- content = Net::HTTP.get URI.parse(get_link(isbn))
38
- result = mac.scrape(content)
39
-
40
- if result.title.nil?
41
- return nil
42
- else
43
-
44
- info = {}
45
- info[:isbn] = isbn
46
- info[:title] = result.title
47
- info[:cover_medium] = @url_protocol + @url_host + result.cover_medium.gsub("..", "") unless result.cover_medium.nil?
48
- info[:cover_large] = @url_protocol + @url_host + result.cover_large.gsub("..", "") unless result.cover_large.nil?
49
- info[:link] = get_link(isbn)
50
- info[:from_name] = SCRAPER_NAME
51
- info[:from_url] = SCRAPER_SITE
52
- return info
53
- end
54
- end
55
-
56
- def get_link(isbn)
57
- return nil unless ISBN::valid_isbn?(isbn)
58
- return @url_protocol + @url_host + @url_path + ISBN::convert_to_isbn10(isbn) + @url_suffix
59
- end
60
- end
61
- end
62
- end
@@ -1,48 +0,0 @@
1
-
2
- module RBook
3
- module WWW
4
-
5
- class OrbisScraper < Base
6
-
7
- SCRAPER_ID = :orbis
8
- SCRAPER_NAME = "Orbis Books"
9
- SCRAPER_SITE = "http://www.orbisbooks.com/"
10
-
11
- #add_publisher( self, "978157070000", "9781570799999")
12
- add_scraper( self )
13
-
14
- def get_info(isbn)
15
-
16
- @protocol = "http://"
17
- @host = "www.maryknollmall.org"
18
- @path = "/description.cfm?ISBN="
19
- @grouped_isbn = ISBN::add_groups(ISBN::convert_to_isbn10(isbn))
20
- @link = @protocol + @host + @path + @grouped_isbn
21
-
22
- oup = Scraper.define do
23
- process "tr>td[colspan=4]>font[size=3]", :description => :text # doesn't currently work
24
- process "table>tr>td[rowspan=2]>img", :cover_thumb => "@src"
25
- result :description, :cover_thumb
26
- end
27
-
28
- content = Net::HTTP.get URI.parse(@link)
29
-
30
- result = oup.scrape(content)
31
-
32
- if result.cover_thumb.nil?
33
- return nil
34
- else
35
-
36
- info = {}
37
- info[:isbn] = isbn
38
- info[:cover_thumb] = @protocol + @host + result.cover_thumb unless result.cover_thumb.nil?
39
- info[:link] = @link
40
- info[:from_name] = SCRAPER_NAME
41
- info[:from_url] = SCRAPER_SITE
42
- return info
43
- end
44
- end
45
-
46
- end
47
- end
48
- end
@@ -1,64 +0,0 @@
1
-
2
- module RBook
3
- module WWW
4
-
5
- class OUPScraper < Base
6
-
7
- SCRAPER_ID = :oup
8
- SCRAPER_NAME = "Oxford University Press"
9
- SCRAPER_SITE = "http://au.oup.com/"
10
-
11
- #add_publisher( self, "978019200000", "978019999999")
12
- add_scraper( self )
13
-
14
- def get_info(isbn)
15
-
16
- isbn = ISBN::convert_to_isbn10(isbn)
17
-
18
- @protocol = "http://"
19
- @host = "au.oup.com"
20
- @path = "/searchbuy/SearchBook.asp?isbn="
21
- @link = @protocol + @host + @path + isbn
22
-
23
- oup = Scraper.define do
24
- process "div.isbn13", :isbn => :text
25
- process "td.title", :title => :text
26
- process "td.author", :author => :text
27
- process "div.BookInfo", :misc => :text
28
- process "td>p.blurb", :description => :text
29
- process "td>img[width=100]", :cover => "@src"
30
- result :isbn, :title, :author, :misc, :description, :cover
31
- end
32
-
33
- content = Net::HTTP.get URI.parse(@link)
34
-
35
- result = oup.scrape(content)
36
-
37
-
38
- if result.isbn.nil?
39
- return nil
40
- else
41
-
42
- info = {}
43
- info[:isbn] = result.isbn.gsub(/\ ISBN-13:\n/, "")
44
- info[:title] = result.title
45
- info[:author] = result.author
46
- info[:description] = result.description
47
- info[:cover_thumb] = @protocol + @host + result.cover
48
- info[:link] = @link
49
- tmp = result.misc.match(/\n(.+)\n(.+) pages\n(.+)\n(.+)\n(.+)/)
50
- if !tmp.nil? && tmp.length == 5
51
- info[:published] = tmp[1]
52
- info[:pages] = tmp[2]
53
- info[:format] = tmp[3]
54
- info[:rrp] = tmp[4]
55
- end
56
- info[:from_name] = SCRAPER_NAME
57
- info[:from_url] = SCRAPER_SITE
58
- return info
59
- end
60
- end
61
-
62
- end
63
- end
64
- end
@@ -1,53 +0,0 @@
1
-
2
- module RBook
3
- module WWW
4
-
5
- class PaulistScraper < Base
6
-
7
- SCRAPER_ID = :paulist
8
- SCRAPER_NAME = "Paulist Press".freeze
9
- SCRAPER_SITE = "http://www.paulistpress.com/".freeze
10
-
11
- #add_publisher( self, "9780809100000", "9780809199999" )
12
- add_scraper( self )
13
-
14
- def get_info(isbn)
15
-
16
- raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
17
-
18
- isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
19
-
20
- @protocol = "http://"
21
- @host = "www.paulistpress.com"
22
- @path = "/"
23
- @abv_isbn = ISBN::convert_to_isbn10(isbn)[-5, 5]
24
- @suffix = ".html"
25
- @link = @protocol + @host + @path + @abv_isbn[0,4] + "-" + @abv_isbn[-1,1] + @suffix
26
-
27
- rba = Scraper.define do
28
- process "tr>td>h4", :title => :text
29
- process "img[width=120][height=180]", :cover => "@src"
30
- result :title, :cover
31
- end
32
-
33
- content = Net::HTTP.get URI.parse(@link)
34
- result = rba.scrape(content)
35
-
36
- if result.title.nil?
37
- return nil
38
- else
39
-
40
- info = {}
41
- info[:isbn] = isbn
42
- info[:title] = result.title.gsub("Details for ", "")
43
- info[:cover_thumb] = @protocol + @host + result.cover
44
- info[:link] = @link
45
- info[:from_name] = SCRAPER_NAME
46
- info[:from_url] = SCRAPER_SITE
47
- return info
48
- end
49
- end
50
-
51
- end
52
- end
53
- end