rbook 0.4.3 → 0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. data/Rakefile +13 -176
  2. metadata +57 -117
  3. data/COPYING +0 -340
  4. data/LICENSE +0 -13
  5. data/README +0 -17
  6. data/examples/gbip.rb +0 -12
  7. data/examples/onix/stream_reader.rb +0 -13
  8. data/examples/pacstream.rb +0 -13
  9. data/examples/titlepage.rb +0 -14
  10. data/examples/titlepage_with_proxy.rb +0 -14
  11. data/examples/titlepage_www.rb +0 -18
  12. data/examples/www/find_all.rb +0 -23
  13. data/examples/www/find_cover_from_amazon.rb +0 -16
  14. data/examples/www/find_url_from_rainbow.rb +0 -12
  15. data/examples/www/list.rb +0 -13
  16. data/lib/rbook/bisac.rb +0 -31
  17. data/lib/rbook/bisac/message.rb +0 -99
  18. data/lib/rbook/bisac/po.rb +0 -97
  19. data/lib/rbook/bisac/po_line_item.rb +0 -33
  20. data/lib/rbook/bisac/product.rb +0 -176
  21. data/lib/rbook/errors.rb +0 -8
  22. data/lib/rbook/gbip.rb +0 -21
  23. data/lib/rbook/gbip/pos.rb +0 -118
  24. data/lib/rbook/gbip/title.rb +0 -36
  25. data/lib/rbook/gbip/warehouse.rb +0 -27
  26. data/lib/rbook/isbn.rb +0 -255
  27. data/lib/rbook/onix.rb +0 -70
  28. data/lib/rbook/onix/contributor.rb +0 -60
  29. data/lib/rbook/onix/lists.rb +0 -2
  30. data/lib/rbook/onix/lists/contributor_role.rb +0 -10
  31. data/lib/rbook/onix/lists/product_form.rb +0 -100
  32. data/lib/rbook/onix/message.rb +0 -112
  33. data/lib/rbook/onix/product.rb +0 -189
  34. data/lib/rbook/onix/sales_restriction.rb +0 -51
  35. data/lib/rbook/onix/stream_reader.rb +0 -120
  36. data/lib/rbook/onix/stream_writer.rb +0 -40
  37. data/lib/rbook/onix/supply_detail.rb +0 -68
  38. data/lib/rbook/onix/xchar.rb +0 -98
  39. data/lib/rbook/pacstream.rb +0 -64
  40. data/lib/rbook/titlepage.rb +0 -37
  41. data/lib/rbook/titlepage/client.rb +0 -126
  42. data/lib/rbook/titlepage/titlepage_driver.rb +0 -137
  43. data/lib/rbook/titlepage/titlepage_utils.rb +0 -379
  44. data/lib/rbook/titlepage/wwwclient.rb +0 -96
  45. data/lib/rbook/www.rb +0 -172
  46. data/lib/rbook/www/aau_scraper.rb +0 -76
  47. data/lib/rbook/www/amazon_uk_scraper.rb +0 -44
  48. data/lib/rbook/www/ban_scraper.rb +0 -62
  49. data/lib/rbook/www/base.rb +0 -87
  50. data/lib/rbook/www/harper_au_scraper.rb +0 -56
  51. data/lib/rbook/www/harper_us_scraper.rb +0 -55
  52. data/lib/rbook/www/hha_scraper.rb +0 -50
  53. data/lib/rbook/www/macmillan_scraper.rb +0 -62
  54. data/lib/rbook/www/orbis_scraper.rb +0 -48
  55. data/lib/rbook/www/oup_scraper.rb +0 -64
  56. data/lib/rbook/www/paulist_scraper.rb +0 -53
  57. data/lib/rbook/www/pearson_au_scraper.rb +0 -52
  58. data/lib/rbook/www/penguin_scraper.rb +0 -45
  59. data/lib/rbook/www/random_au_scraper.rb +0 -90
  60. data/lib/rbook/www/random_us_scraper.rb +0 -59
  61. data/lib/rbook/www/sas_scraper.rb +0 -54
  62. data/lib/rbook/www/unireps_scraper.rb +0 -58
  63. data/lib/rbook/www/wiley_us_scraper.rb +0 -54
  64. data/test/data/abingdon.xml +0 -38931
  65. data/test/data/augsburg.xml +0 -39009
  66. data/test/data/bisac_po.txt +0 -112
  67. data/test/data/chalice.xml +0 -10851
  68. data/test/data/eerdsman.xml +0 -36942
  69. data/test/data/invalid_no_product.xml +0 -9
  70. data/test/data/not_xml.csv +0 -1
  71. data/test/data/single_product.xml +0 -50
  72. data/test/data/valid_bisac.txt +0 -213
  73. data/test/data/xml_not_onix.xml +0 -7
  74. data/test/mocks/titlepage_driver.rb +0 -111
  75. data/test/unit/bisac/bisac_test.rb +0 -96
  76. data/test/unit/bisac/po_line_item_test.rb +0 -38
  77. data/test/unit/bisac/po_test.rb +0 -82
  78. data/test/unit/isbn_test.rb +0 -153
  79. data/test/unit/onix/contributor_test.rb +0 -50
  80. data/test/unit/onix/message_test.rb +0 -119
  81. data/test/unit/onix/product_test.rb +0 -101
  82. data/test/unit/onix/sales_restriction_test.rb +0 -48
  83. data/test/unit/onix/stream_reader_test.rb +0 -22
  84. data/test/unit/onix/stream_writer_test.rb +0 -32
  85. data/test/unit/onix/supply_detail_test.rb +0 -53
  86. data/test/unit/onix/xchar_test.rb +0 -37
  87. data/test/unit/titlepage_test.rb +0 -140
@@ -1,87 +0,0 @@
1
- require 'net/http'
2
- require 'uri'
3
-
4
- module RBook
5
- module WWW
6
-
7
- class Base
8
-
9
- @@scrapers = []
10
-
11
- # registers a new scraper with the library.
12
- # classname - the class to add
13
- def self.add_scraper(classname)
14
- @@scrapers << classname
15
- end
16
-
17
- # find a scraper matching the requested id
18
- # id - a scraper id as a symbol
19
- def self.find_scraper(id)
20
- @@scrapers.each do |scraper|
21
- return scraper if scraper::SCRAPER_ID == id
22
- end
23
- return nil
24
- end
25
-
26
- # find any scrapers matching the requested ids
27
- # ids - an array of scraper id's as symbols
28
- def self.find_scrapers(ids)
29
- ret = []
30
- @@scrapers.each do |scraper|
31
- ret << scraper if ids.include?(scraper::SCRAPER_ID)
32
- end
33
- return ret
34
- end
35
-
36
- # This method can be overwritten in each scraper. It should return a hash containing the binary data
37
- # and mimetype of the largest cover image it can find for the requested isbn
38
- def get_cover(isbn)
39
-
40
- info = get_info(isbn)
41
- return nil if info.nil?
42
- return nil unless info.kind_of?(Hash)
43
-
44
- link = info[:cover_large] || info[:cover_medium] || info[:cover_thumb]
45
-
46
- return nil if link.nil?
47
-
48
- begin
49
- response = Net::HTTP.get_response URI.parse(link)
50
- if response.code != "200"
51
- raise response.code.to_s
52
- return nil
53
- else
54
- result = {}
55
- result[:data] = response.body
56
- result[:content_type] = "image/jpeg"
57
- return result
58
- end
59
- rescue
60
- return nil
61
- end
62
- end
63
-
64
- # This method can be overwritten in each scraper. It should return a hash of any information on
65
- # the requested isbn it can find
66
- def get_info(isbn)
67
- nil
68
- end
69
-
70
- # This method can be overwritten in each scraper. It should return a link to the requested isbn
71
- # on the targets website
72
- def get_link(isbn)
73
- nil
74
- end
75
-
76
- # return the symbol used to uniquely identify each scraper
77
- def scraper_id
78
- return SCRAPER_ID
79
- end
80
-
81
- def self.scrapers
82
- @@scrapers
83
- end
84
- end
85
-
86
- end
87
- end
@@ -1,56 +0,0 @@
1
-
2
- module RBook
3
- module WWW
4
-
5
- class HarperCollinsAUScraper < Base
6
-
7
- SCRAPER_ID = :harper_au
8
- SCRAPER_NAME = "Harper Collins Australia".freeze
9
- SCRAPER_SITE = "http://www.harpercollins.com.au/".freeze
10
-
11
- #add_publisher( self, "9780006400000", "9780006499999" )
12
- #add_publisher( self, "9780007100000", "9780007199999" )
13
- add_scraper( self )
14
-
15
- def get_info(isbn)
16
-
17
- raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
18
-
19
- isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
20
-
21
- @protocol = "http://"
22
- @host = "www.harpercollins.com.au"
23
- @path = "/global_scripts/product_catalog/book_xml.asp?isbn="
24
- @link = @protocol + @host + @path + ISBN::convert_to_isbn10(isbn)
25
-
26
- rba = Scraper.define do
27
- process "div.header", :title => :text
28
- process "div.subtitle", :subtitle => :text
29
- process "div.byline", :author => :text
30
- process "img.bookJacket", :cover => "@src"
31
- result :title, :subtitle, :author, :cover
32
- end
33
-
34
- content = Net::HTTP.get URI.parse(@link)
35
- result = rba.scrape(content)
36
-
37
- if result.title.nil?
38
- return nil
39
- else
40
-
41
- info = {}
42
- info[:isbn] = isbn
43
- info[:title] = result.title
44
- info[:subtitle] = result.subtitle
45
- info[:author] = result.author.gsub("by ","")
46
- info[:cover] = result.cover
47
- info[:link] = @link
48
- info[:from_name] = SCRAPER_NAME
49
- info[:from_url] = SCRAPER_SITE
50
- return info
51
- end
52
- end
53
-
54
- end
55
- end
56
- end
@@ -1,55 +0,0 @@
1
-
2
- module RBook
3
- module WWW
4
-
5
- class HarperCollinsUSScraper < Base
6
-
7
- SCRAPER_ID = :harper_us
8
- SCRAPER_NAME = "Harper Collins United States".freeze
9
- SCRAPER_SITE = "http://www.harpercollins.com/".freeze
10
-
11
- #add_publisher( self, "9780060000000", "9780060999999" )
12
- add_scraper( self )
13
-
14
- def get_info(isbn)
15
- raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
16
-
17
- isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
18
-
19
- @protocol = "http://"
20
- @host = "www.harpercollins.com"
21
- @path = "/book/index.aspx?isbn="
22
- @link = @protocol + @host + @path + isbn
23
-
24
- rba = Scraper.define do
25
- process "h1.bookTitle", :title => :text
26
- process "h2.bookSubTitle", :subtitle => :text
27
- process "h3.byLine", :author => :text
28
- process "img.bookJacket", :cover => "@src"
29
- result :title, :subtitle, :author, :cover
30
- end
31
-
32
- content = Net::HTTP.get URI.parse(@link)
33
- result = rba.scrape(content)
34
-
35
- if result.title.nil?
36
- return nil
37
- else
38
-
39
- info = {}
40
- info[:isbn] = isbn
41
- info[:title] = result.title
42
- info[:subtitle] = result.subtitle
43
- info[:author] = result.author.gsub("by ", "")
44
- info[:cover_thumb] = result.cover
45
- info[:cover_medium] = result.cover.gsub("medium", "large")
46
- info[:link] = @link
47
- info[:from_name] = SCRAPER_NAME
48
- info[:from_url] = SCRAPER_SITE
49
- return info
50
- end
51
- end
52
-
53
- end
54
- end
55
- end
@@ -1,50 +0,0 @@
1
-
2
- module RBook
3
- module WWW
4
-
5
- class HHAScraper < Base
6
-
7
- SCRAPER_ID = :hha
8
- SCRAPER_NAME = "Hodder Headline Australia".freeze
9
- SCRAPER_SITE = "http://www.hha.com.au/".freeze
10
-
11
- #add_publisher( self, "9780340800000", "9780340899999" )
12
- #add_publisher( self, "9780755300000", "9780755399999" )
13
- #add_publisher( self, "9780733600000", "9780733699999" )
14
- add_scraper( self )
15
-
16
- def get_info(isbn)
17
- @protocol = "http://"
18
- @host = "www.hha.com.au"
19
- @path = "/books/"
20
- @suffix = ".html"
21
- @link = @protocol + @host + @path + ISBN::convert_to_isbn10(isbn) + @suffix
22
-
23
- rba = Scraper.define do
24
- process "h1.fiction", :title => :text
25
- process "p.author", :author => :text
26
- process "p.thumb>img", :cover_thumb => "@src"
27
- result :title, :author, :cover_thumb
28
- end
29
-
30
- content = Net::HTTP.get URI.parse(@link)
31
- result = rba.scrape(content)
32
-
33
- if result.cover_thumb.nil?
34
- return nil
35
- else
36
-
37
- info = {}
38
- info[:isbn] = isbn
39
- info[:author] = result.author
40
- info[:cover_thumb] = @protocol + @host + result.cover_thumb
41
- info[:link] = @link
42
- info[:from_name] = SCRAPER_NAME
43
- info[:from_url] = SCRAPER_SITE
44
- return info
45
- end
46
- end
47
-
48
- end
49
- end
50
- end
@@ -1,62 +0,0 @@
1
-
2
- module RBook
3
-
4
- module WWW
5
-
6
- class MacmillanScraper < Base
7
-
8
- SCRAPER_ID = :macmillan
9
- SCRAPER_NAME = "Pan Macmillan".freeze
10
- SCRAPER_SITE = "http://www.panmacmillan.com.au/".freeze
11
-
12
- #add_publisher( self, "9780312900000", "9780312999999" )
13
- #add_publisher( self, "9780330400000", "9780330499999" )
14
- #add_publisher( self, "9781403000000", "9781405099999" )
15
- add_scraper( self )
16
-
17
- def initialize
18
- @url_protocol = "http://"
19
- @url_host = "www.panmacmillan.com.au"
20
- @url_path = "/display_title.asp?ISBN="
21
- @url_suffix = "&Author=Barker,%20Robin"
22
- end
23
-
24
- def get_info(isbn)
25
-
26
- isbn = ISBN::convert_to_isbn13(isbn)
27
- return nil if isbn.nil?
28
-
29
- mac = Scraper.define do
30
- process "div.titlecontent>div.isbn>span", :isbn => :text
31
- process "td[width=70%]>h1", :title => :text
32
- process "a[title=Click on image to view a larger version]>img", :cover_medium => "@src"
33
- process "a[title=Click on image to view a larger version]", :cover_large => "@href"
34
- result :isbn, :title, :cover_medium, :cover_large
35
- end
36
-
37
- content = Net::HTTP.get URI.parse(get_link(isbn))
38
- result = mac.scrape(content)
39
-
40
- if result.title.nil?
41
- return nil
42
- else
43
-
44
- info = {}
45
- info[:isbn] = isbn
46
- info[:title] = result.title
47
- info[:cover_medium] = @url_protocol + @url_host + result.cover_medium.gsub("..", "") unless result.cover_medium.nil?
48
- info[:cover_large] = @url_protocol + @url_host + result.cover_large.gsub("..", "") unless result.cover_large.nil?
49
- info[:link] = get_link(isbn)
50
- info[:from_name] = SCRAPER_NAME
51
- info[:from_url] = SCRAPER_SITE
52
- return info
53
- end
54
- end
55
-
56
- def get_link(isbn)
57
- return nil unless ISBN::valid_isbn?(isbn)
58
- return @url_protocol + @url_host + @url_path + ISBN::convert_to_isbn10(isbn) + @url_suffix
59
- end
60
- end
61
- end
62
- end
@@ -1,48 +0,0 @@
1
-
2
- module RBook
3
- module WWW
4
-
5
- class OrbisScraper < Base
6
-
7
- SCRAPER_ID = :orbis
8
- SCRAPER_NAME = "Orbis Books"
9
- SCRAPER_SITE = "http://www.orbisbooks.com/"
10
-
11
- #add_publisher( self, "978157070000", "9781570799999")
12
- add_scraper( self )
13
-
14
- def get_info(isbn)
15
-
16
- @protocol = "http://"
17
- @host = "www.maryknollmall.org"
18
- @path = "/description.cfm?ISBN="
19
- @grouped_isbn = ISBN::add_groups(ISBN::convert_to_isbn10(isbn))
20
- @link = @protocol + @host + @path + @grouped_isbn
21
-
22
- oup = Scraper.define do
23
- process "tr>td[colspan=4]>font[size=3]", :description => :text # doesn't currently work
24
- process "table>tr>td[rowspan=2]>img", :cover_thumb => "@src"
25
- result :description, :cover_thumb
26
- end
27
-
28
- content = Net::HTTP.get URI.parse(@link)
29
-
30
- result = oup.scrape(content)
31
-
32
- if result.cover_thumb.nil?
33
- return nil
34
- else
35
-
36
- info = {}
37
- info[:isbn] = isbn
38
- info[:cover_thumb] = @protocol + @host + result.cover_thumb unless result.cover_thumb.nil?
39
- info[:link] = @link
40
- info[:from_name] = SCRAPER_NAME
41
- info[:from_url] = SCRAPER_SITE
42
- return info
43
- end
44
- end
45
-
46
- end
47
- end
48
- end
@@ -1,64 +0,0 @@
1
-
2
- module RBook
3
- module WWW
4
-
5
- class OUPScraper < Base
6
-
7
- SCRAPER_ID = :oup
8
- SCRAPER_NAME = "Oxford University Press"
9
- SCRAPER_SITE = "http://au.oup.com/"
10
-
11
- #add_publisher( self, "978019200000", "978019999999")
12
- add_scraper( self )
13
-
14
- def get_info(isbn)
15
-
16
- isbn = ISBN::convert_to_isbn10(isbn)
17
-
18
- @protocol = "http://"
19
- @host = "au.oup.com"
20
- @path = "/searchbuy/SearchBook.asp?isbn="
21
- @link = @protocol + @host + @path + isbn
22
-
23
- oup = Scraper.define do
24
- process "div.isbn13", :isbn => :text
25
- process "td.title", :title => :text
26
- process "td.author", :author => :text
27
- process "div.BookInfo", :misc => :text
28
- process "td>p.blurb", :description => :text
29
- process "td>img[width=100]", :cover => "@src"
30
- result :isbn, :title, :author, :misc, :description, :cover
31
- end
32
-
33
- content = Net::HTTP.get URI.parse(@link)
34
-
35
- result = oup.scrape(content)
36
-
37
-
38
- if result.isbn.nil?
39
- return nil
40
- else
41
-
42
- info = {}
43
- info[:isbn] = result.isbn.gsub(/\ ISBN-13:\n/, "")
44
- info[:title] = result.title
45
- info[:author] = result.author
46
- info[:description] = result.description
47
- info[:cover_thumb] = @protocol + @host + result.cover
48
- info[:link] = @link
49
- tmp = result.misc.match(/\n(.+)\n(.+) pages\n(.+)\n(.+)\n(.+)/)
50
- if !tmp.nil? && tmp.length == 5
51
- info[:published] = tmp[1]
52
- info[:pages] = tmp[2]
53
- info[:format] = tmp[3]
54
- info[:rrp] = tmp[4]
55
- end
56
- info[:from_name] = SCRAPER_NAME
57
- info[:from_url] = SCRAPER_SITE
58
- return info
59
- end
60
- end
61
-
62
- end
63
- end
64
- end
@@ -1,53 +0,0 @@
1
-
2
- module RBook
3
- module WWW
4
-
5
- class PaulistScraper < Base
6
-
7
- SCRAPER_ID = :paulist
8
- SCRAPER_NAME = "Paulist Press".freeze
9
- SCRAPER_SITE = "http://www.paulistpress.com/".freeze
10
-
11
- #add_publisher( self, "9780809100000", "9780809199999" )
12
- add_scraper( self )
13
-
14
- def get_info(isbn)
15
-
16
- raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
17
-
18
- isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
19
-
20
- @protocol = "http://"
21
- @host = "www.paulistpress.com"
22
- @path = "/"
23
- @abv_isbn = ISBN::convert_to_isbn10(isbn)[-5, 5]
24
- @suffix = ".html"
25
- @link = @protocol + @host + @path + @abv_isbn[0,4] + "-" + @abv_isbn[-1,1] + @suffix
26
-
27
- rba = Scraper.define do
28
- process "tr>td>h4", :title => :text
29
- process "img[width=120][height=180]", :cover => "@src"
30
- result :title, :cover
31
- end
32
-
33
- content = Net::HTTP.get URI.parse(@link)
34
- result = rba.scrape(content)
35
-
36
- if result.title.nil?
37
- return nil
38
- else
39
-
40
- info = {}
41
- info[:isbn] = isbn
42
- info[:title] = result.title.gsub("Details for ", "")
43
- info[:cover_thumb] = @protocol + @host + result.cover
44
- info[:link] = @link
45
- info[:from_name] = SCRAPER_NAME
46
- info[:from_url] = SCRAPER_SITE
47
- return info
48
- end
49
- end
50
-
51
- end
52
- end
53
- end