rbook 0.4.3 → 0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. data/Rakefile +13 -176
  2. metadata +57 -117
  3. data/COPYING +0 -340
  4. data/LICENSE +0 -13
  5. data/README +0 -17
  6. data/examples/gbip.rb +0 -12
  7. data/examples/onix/stream_reader.rb +0 -13
  8. data/examples/pacstream.rb +0 -13
  9. data/examples/titlepage.rb +0 -14
  10. data/examples/titlepage_with_proxy.rb +0 -14
  11. data/examples/titlepage_www.rb +0 -18
  12. data/examples/www/find_all.rb +0 -23
  13. data/examples/www/find_cover_from_amazon.rb +0 -16
  14. data/examples/www/find_url_from_rainbow.rb +0 -12
  15. data/examples/www/list.rb +0 -13
  16. data/lib/rbook/bisac.rb +0 -31
  17. data/lib/rbook/bisac/message.rb +0 -99
  18. data/lib/rbook/bisac/po.rb +0 -97
  19. data/lib/rbook/bisac/po_line_item.rb +0 -33
  20. data/lib/rbook/bisac/product.rb +0 -176
  21. data/lib/rbook/errors.rb +0 -8
  22. data/lib/rbook/gbip.rb +0 -21
  23. data/lib/rbook/gbip/pos.rb +0 -118
  24. data/lib/rbook/gbip/title.rb +0 -36
  25. data/lib/rbook/gbip/warehouse.rb +0 -27
  26. data/lib/rbook/isbn.rb +0 -255
  27. data/lib/rbook/onix.rb +0 -70
  28. data/lib/rbook/onix/contributor.rb +0 -60
  29. data/lib/rbook/onix/lists.rb +0 -2
  30. data/lib/rbook/onix/lists/contributor_role.rb +0 -10
  31. data/lib/rbook/onix/lists/product_form.rb +0 -100
  32. data/lib/rbook/onix/message.rb +0 -112
  33. data/lib/rbook/onix/product.rb +0 -189
  34. data/lib/rbook/onix/sales_restriction.rb +0 -51
  35. data/lib/rbook/onix/stream_reader.rb +0 -120
  36. data/lib/rbook/onix/stream_writer.rb +0 -40
  37. data/lib/rbook/onix/supply_detail.rb +0 -68
  38. data/lib/rbook/onix/xchar.rb +0 -98
  39. data/lib/rbook/pacstream.rb +0 -64
  40. data/lib/rbook/titlepage.rb +0 -37
  41. data/lib/rbook/titlepage/client.rb +0 -126
  42. data/lib/rbook/titlepage/titlepage_driver.rb +0 -137
  43. data/lib/rbook/titlepage/titlepage_utils.rb +0 -379
  44. data/lib/rbook/titlepage/wwwclient.rb +0 -96
  45. data/lib/rbook/www.rb +0 -172
  46. data/lib/rbook/www/aau_scraper.rb +0 -76
  47. data/lib/rbook/www/amazon_uk_scraper.rb +0 -44
  48. data/lib/rbook/www/ban_scraper.rb +0 -62
  49. data/lib/rbook/www/base.rb +0 -87
  50. data/lib/rbook/www/harper_au_scraper.rb +0 -56
  51. data/lib/rbook/www/harper_us_scraper.rb +0 -55
  52. data/lib/rbook/www/hha_scraper.rb +0 -50
  53. data/lib/rbook/www/macmillan_scraper.rb +0 -62
  54. data/lib/rbook/www/orbis_scraper.rb +0 -48
  55. data/lib/rbook/www/oup_scraper.rb +0 -64
  56. data/lib/rbook/www/paulist_scraper.rb +0 -53
  57. data/lib/rbook/www/pearson_au_scraper.rb +0 -52
  58. data/lib/rbook/www/penguin_scraper.rb +0 -45
  59. data/lib/rbook/www/random_au_scraper.rb +0 -90
  60. data/lib/rbook/www/random_us_scraper.rb +0 -59
  61. data/lib/rbook/www/sas_scraper.rb +0 -54
  62. data/lib/rbook/www/unireps_scraper.rb +0 -58
  63. data/lib/rbook/www/wiley_us_scraper.rb +0 -54
  64. data/test/data/abingdon.xml +0 -38931
  65. data/test/data/augsburg.xml +0 -39009
  66. data/test/data/bisac_po.txt +0 -112
  67. data/test/data/chalice.xml +0 -10851
  68. data/test/data/eerdsman.xml +0 -36942
  69. data/test/data/invalid_no_product.xml +0 -9
  70. data/test/data/not_xml.csv +0 -1
  71. data/test/data/single_product.xml +0 -50
  72. data/test/data/valid_bisac.txt +0 -213
  73. data/test/data/xml_not_onix.xml +0 -7
  74. data/test/mocks/titlepage_driver.rb +0 -111
  75. data/test/unit/bisac/bisac_test.rb +0 -96
  76. data/test/unit/bisac/po_line_item_test.rb +0 -38
  77. data/test/unit/bisac/po_test.rb +0 -82
  78. data/test/unit/isbn_test.rb +0 -153
  79. data/test/unit/onix/contributor_test.rb +0 -50
  80. data/test/unit/onix/message_test.rb +0 -119
  81. data/test/unit/onix/product_test.rb +0 -101
  82. data/test/unit/onix/sales_restriction_test.rb +0 -48
  83. data/test/unit/onix/stream_reader_test.rb +0 -22
  84. data/test/unit/onix/stream_writer_test.rb +0 -32
  85. data/test/unit/onix/supply_detail_test.rb +0 -53
  86. data/test/unit/onix/xchar_test.rb +0 -37
  87. data/test/unit/titlepage_test.rb +0 -140
@@ -1,96 +0,0 @@
1
- $LOAD_PATH.unshift(File.dirname(__FILE__) + "/../")
2
-
3
- require 'rbook/isbn'
4
- require 'rbook/errors'
5
-
6
- module RBook
7
- module TitlePage
8
-
9
- # You should be aware of any limits of query volume imposed by the provider - currently a
10
- # maximum of 30 queries per minute is permitted.
11
- class WWWClient
12
-
13
- TITLEPAGE_DOMAIN = "www.titlepage.com"
14
- @@uri = nil
15
-
16
- def initialize
17
- end
18
-
19
- def get_onix_file(isbn)
20
- isbn = RBook::ISBN.convert_to_isbn13(isbn)
21
- raise ArgumentError, 'Invalid ISBN supplied' if isbn.nil?
22
-
23
- headers = { 'Cookie' => @cookie }
24
-
25
- login_response = Net::HTTP.start(TITLEPAGE_DOMAIN, 80) do |http|
26
- data = [
27
- "posted=yes",
28
- "quicksearch=#{isbn}",
29
- "qsrchby=ean",
30
- "detailed=Search"
31
- ].join("&")
32
- http.post('/results.php', data, headers)
33
- end
34
- regex = /onclick=\"bookPopUp\(\'(.+)\'\);\"/
35
- code = login_response.body.match(regex)
36
- if code.nil?
37
- return nil
38
- else
39
- code = code[1]
40
- end
41
- onix_file = Net::HTTP.start(TITLEPAGE_DOMAIN, 80) do |http|
42
- data = [
43
- "download=Download",
44
- "rr=#{code}"
45
- ].join("&")
46
- http.post('/detail.php', data, headers)
47
- end
48
- return onix_file.body
49
- end
50
-
51
- # login to the titlepage website.
52
- def login(username, password)
53
- login_response = Net::HTTP.start(TITLEPAGE_DOMAIN, 80) do |http|
54
- data = [
55
- "usr=#{username}",
56
- "pwd=#{password}",
57
- "login=Login"
58
- ].join("&")
59
- http.post('/index.php', data)
60
- end
61
- @cookie = login_response['set-cookie']
62
- end
63
-
64
- # logout from the titlepage API
65
- def logout
66
- if @cookie
67
- login_response = Net::HTTP.start(TITLEPAGE_DOMAIN, 80) do |http|
68
- http.get("/logout.php")
69
- end
70
- @cookie = nil
71
- end
72
- end
73
-
74
- # a convenience method to make queries to title page a little cleaner. This function
75
- # essentially calls the login and logout functions for you automatically.
76
- #
77
- # RBook::TitlePage::WWWClient.open("username","password") do |tp|
78
- # result = tp.get_onix_file("9780091835132")
79
- # end
80
- def self.open(username, password)
81
-
82
- tp = self.new
83
-
84
- begin
85
- tp.login(username, password)
86
-
87
- yield(tp)
88
-
89
- ensure
90
- tp.logout
91
- end
92
- end
93
-
94
- end
95
- end
96
- end
@@ -1,172 +0,0 @@
1
- $LOAD_PATH.unshift(File.dirname(__FILE__) + "/../")
2
-
3
- require 'rubygems'
4
- require 'scrapi'
5
-
6
- require 'rbook/isbn'
7
- require 'rbook/www/base'
8
-
9
- # load all scraping classes
10
- files = Dir.entries(File.dirname(__FILE__) + '/www/')
11
- files.delete(".")
12
- files.delete("..")
13
- files.delete(".svn")
14
- files.delete("base.rb")
15
- files.each do |file|
16
- require 'rbook/www/' + file
17
- end
18
-
19
- module RBook
20
- # A set of classes to make scraping title information from various publisher websites easier.
21
- #
22
- # Basic usage:
23
- # require 'rubygems'
24
- # require 'rbook/www'
25
- # RBook::WWW.find_info(:first, "1841492280", :penguin)
26
- # #=> Hash
27
- # RBook::WWW.find_info(:all, "1841492280", [:penguin, :harpercollins_au])
28
- # #=> Array of Hashes
29
- module WWW
30
-
31
- # Find any information possible about the supplied isbn using the
32
- # specified scrapers.
33
- #
34
- # - isbn - a valid isbn10 or isbn13
35
- # - scrapers - a symbol or array of symbols specifying which scrapers to search with
36
- #
37
- # Returns the results as an array containing the results
38
- #
39
- # RBook::WWW.find_info(:first, "1841492280", :penguin)
40
- # #=> Hash
41
- # RBook::WWW.find_info(:all, "1841492280", [:penguin, :harpercollins_au])
42
- # #=> Array of Hashes
43
- def self.find_info(search_type, isbn, scrapers)
44
-
45
- raise ArgumentError, 'search_type must be :first or :all' if !search_type.eql?(:first) && !search_type.eql?(:all)
46
- raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
47
- raise ArgumentError, 'scrapers must be a symbol or array of symbols' unless scrapers.kind_of?(Symbol) || scrapers.kind_of?(Array)
48
-
49
- isbn = ISBN::convert_to_isbn13(isbn)
50
-
51
- if scrapers.kind_of?(Symbol)
52
- scrapers = [Base::find_scraper(scrapers)]
53
- else
54
- scrapers = Base::find_scrapers(scrapers)
55
- end
56
-
57
- results = []
58
-
59
- scrapers.each do |scraper|
60
- worker = scraper.new
61
- result = worker.get_info(isbn)
62
- if !result.nil? && search_type.eql?(:first)
63
- return result
64
- elsif !result.nil?
65
- results << result
66
- end
67
- end
68
-
69
- if results.empty?
70
- return nil
71
- else
72
- return results
73
- end
74
- end
75
-
76
- # Find any information possible about the supplied isbn using the
77
- # specified scrapers.
78
- #
79
- # - isbn - a valid isbn10 or isbn13
80
- # - scrapers - a symbol or array of symbols specifying which scrapers to search with
81
- #
82
- # Returns the results as an array containing the results
83
- #
84
- # RBook::WWW.find_cover(:first, "1841492280", :penguin)
85
- # #=> Hash
86
- # RBook::WWW.find_cover(:all, "1841492280", [:penguin, :harpercollins_au])
87
- # #=> Array of Hashes
88
- def self.find_cover(search_type, isbn, scrapers)
89
-
90
- raise ArgumentError, 'search_type must be :first or :all' if !search_type.eql?(:first) && !search_type.eql?(:all)
91
- raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
92
- raise ArgumentError, 'scrapers must be a symbol or array of symbols' unless scrapers.kind_of?(Symbol) || scrapers.kind_of?(Array)
93
-
94
- isbn = ISBN::convert_to_isbn13(isbn)
95
-
96
- if scrapers.kind_of?(Symbol)
97
- scrapers = [Base::find_scraper(scrapers)]
98
- else
99
- scrapers = Base::find_scrapers(scrapers)
100
- end
101
-
102
- results = []
103
-
104
- scrapers.each do |scraper|
105
- worker = scraper.new
106
- result = worker.get_cover(isbn)
107
- if !result.nil? && search_type.eql?(:first)
108
- return result
109
- elsif !result.nil?
110
- results << result
111
- end
112
- end
113
-
114
- if results.empty?
115
- return nil
116
- else
117
- return results
118
- end
119
- end
120
-
121
- # Find any information possible about the supplied isbn using the
122
- # specified scrapers.
123
- #
124
- # - isbn - a valid isbn10 or isbn13
125
- # - scrapers - a symbol or array of symbols specifying which scrapers to search with
126
- #
127
- # Returns the results as an array containing the results
128
- #
129
- # RBook::WWW.find_url(:first, "1841492280", :penguin)
130
- # #=> Hash
131
- # RBook::WWW.find_url(:all, "1841492280", [:penguin, :harpercollins_au])
132
- # #=> Array of Hashes
133
- def self.find_url(search_type, isbn, scrapers)
134
-
135
- raise ArgumentError, 'search_type must be :first or :all' if !search_type.eql?(:first) && !search_type.eql?(:all)
136
- raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
137
- raise ArgumentError, 'scrapers must be a symbol or array of symbols' unless scrapers.kind_of?(Symbol) || scrapers.kind_of?(Array)
138
-
139
- isbn = ISBN::convert_to_isbn13(isbn)
140
-
141
- if scrapers.kind_of?(Symbol)
142
- scrapers = [Base::find_scraper(scrapers)]
143
- else
144
- scrapers = Base::find_scrapers(scrapers)
145
- end
146
-
147
- results = []
148
-
149
- scrapers.each do |scraper|
150
- worker = scraper.new
151
- result = worker.get_url(isbn)
152
- if !result.nil? && search_type.eql?(:first)
153
- return result
154
- elsif !result.nil
155
- results << result
156
- end
157
- end
158
-
159
- if results.empty?
160
- return nil
161
- else
162
- return results
163
- end
164
- end
165
-
166
- # returns an array of all available scrapers
167
- def self.scrapers
168
- Base.scrapers
169
- end
170
-
171
- end
172
- end
@@ -1,76 +0,0 @@
1
-
2
- module RBook
3
- module WWW
4
-
5
- class AAUScraper < Base
6
-
7
- SCRAPER_ID = :aau
8
- SCRAPER_NAME = "Allen and Unwin".freeze
9
- SCRAPER_SITE = "http://www.allenandunwin.com/".freeze
10
-
11
- #add_publisher( self, "9781741100000", "9781741199999" )
12
- #add_publisher( self, "9781865000000", "9781865099999" )
13
- add_scraper( self )
14
-
15
- def initialize
16
- @url_protocol = "http://"
17
- @url_host = "www.allenandunwin.com"
18
- @url_path = "/bookseller/product.aspx?ISBN="
19
- end
20
-
21
- def get_info(isbn)
22
-
23
- raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
24
-
25
- isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
26
-
27
- rba = Scraper.define do
28
- process "span#lblISBN", :isbn => :text
29
- process "h1>span#lblBookTitle", :title => :text
30
- process "span#lblAusRRP", :rrp => :text
31
- process "span#lblPublisher", :publisher => :text
32
- process "span#lblImprint", :imprint => :text
33
- process "span#lblBinding", :form => :text
34
- process "span#lblExtent", :pages => :text
35
- process "span#lblPubDate", :pubdate => :text
36
- process "span#lblDescription", :description => :text
37
- process "span#lblAuthor_bio", :authorbio => :text
38
- process "a#hypHiRes", :cover_large => "@href"
39
- process "a#imgProduct", :cover_thumb => "@href"
40
- result :isbn, :title, :rrp, :publisher, :imprint, :form, :pages, :pubdate, :description, :authorbio, :cover_thumb, :cover_large
41
- end
42
-
43
- content = Net::HTTP.get URI.parse(get_link(isbn))
44
- result = rba.scrape(content)
45
-
46
- if result.title.nil? || result.title == ""
47
- return nil
48
- else
49
-
50
- info = {}
51
- info[:isbn] = result.isbn.gsub("ISBN : ", "")
52
- info[:title] = result.title unless result.title.nil?
53
- info[:rrp] = result.rrp.gsub("Australian Price : ", "").gsub(/\sInc. GST\n.+/,"") unless result.rrp.nil?
54
- info[:publisher] = result.publisher.gsub("Publisher : ", "") unless result.imprint.nil?
55
- info[:imprint] = result.imprint.gsub("Imprint : ", "") unless result.imprint.nil?
56
- info[:format] = result.form.gsub("Format : ", "") unless result.form.nil?
57
- info[:pages] = result.pages.gsub("Number of pages : ", "") unless result.pages.nil?
58
- info[:pubdate] = result.pubdate.gsub("Publication Date : ", "") unless result.pubdate.nil?
59
- info[:description] = result.description unless result.description.nil?
60
- info[:authorbio] = result.authorbio.gsub("About the Author :\n", "") unless result.authorbio.nil?
61
- info[:cover_large] = @url_protocol + @url_host + result.cover_large.gsub(/^../, "") unless result.cover_large.nil?
62
- info[:cover_thumb] = @url_protocol + @url_host + result.cover_thumb unless result.cover_thumb.nil?
63
- info[:link] = get_link(isbn)
64
- info[:from_name] = SCRAPER_NAME
65
- info[:from_url] = SCRAPER_SITE
66
- return info
67
- end
68
- end
69
-
70
- def get_link(isbn)
71
- return nil unless ISBN::valid_isbn?(isbn)
72
- return @url_protocol + @url_host + @url_path + ISBN::convert_to_isbn10(isbn)
73
- end
74
- end
75
- end
76
- end
@@ -1,44 +0,0 @@
1
-
2
- module RBook
3
- module WWW
4
-
5
- class AmazonUKScraper < Base
6
-
7
- SCRAPER_ID = :amazon_uk
8
- SCRAPER_NAME = "Amazon UK".freeze
9
- SCRAPER_SITE = "http://www.amazon.co.uk/".freeze
10
-
11
- #add_retailer( self )
12
- add_scraper( self )
13
-
14
- def get_cover(isbn)
15
-
16
- isbn = ISBN::convert_to_isbn13(isbn)
17
- isbn10 = ISBN::convert_to_isbn10(isbn)
18
-
19
- return nil if isbn.nil? || isbn10.nil?
20
-
21
- url_prefix = "http://images.amazon.com/images/P/"
22
- url_suffix = ".02.LZZZZZZZ.jpg"
23
- link = url_prefix + isbn10 + url_suffix
24
-
25
- begin
26
- response = Net::HTTP.get_response URI.parse(link)
27
- if response.code != "200"
28
- return nil
29
- elsif response.body.size <= 807
30
- return nil
31
- else
32
- result = {}
33
- result[:data] = response.body
34
- result[:content_type] = "image/jpeg"
35
- return result
36
- end
37
- rescue
38
- return nil
39
- end
40
- end
41
-
42
- end
43
- end
44
- end
@@ -1,62 +0,0 @@
1
-
2
- module RBook
3
- module WWW
4
-
5
- class BarnesAndNobleScraper < Base
6
-
7
- SCRAPER_ID = :barnesandnoble
8
- SCRAPER_NAME = "Barnes and Noble".freeze
9
- SCRAPER_SITE = "http://www.barnesandnole.com/".freeze
10
-
11
- add_scraper( self )
12
-
13
- def get_info(isbn)
14
-
15
- @protocol = "http://"
16
- @host = "search.barnesandnoble.com"
17
- @path = "/booksearch/isbninquiry.asp?z=y&cds2Pid=9481&isbn="
18
- @imgviewer_path = "/booksearch/imageviewer.asp?z=y&ean="
19
- @link = @protocol + @host + @path + ISBN::convert_to_isbn10(isbn)
20
- @imgviewer_link = @protocol + @host + @imgviewer_path + ISBN::convert_to_isbn10(isbn)
21
-
22
- main = Scraper.define do
23
- process "h1#title", :title => :text
24
- process "h2#contributor>a", :author => :text
25
- process "li.format", :form => :text
26
- process "div#coverImage>a>noscript>img", :cover_thumb => "@src"
27
- result :title, :author, :form, :cover_thumb
28
- end
29
-
30
- imgscraper = Scraper.define do
31
- process "div>img[alt=Cover Image]", :cover_large => "@src"
32
- result :cover_large
33
- end
34
-
35
- content = Net::HTTP.get URI.parse(@link)
36
- result = main.scrape(content)
37
- if result.title.nil?
38
- return nil
39
- else
40
-
41
- info = {}
42
- info[:isbn] = isbn
43
- info[:title] = result.title unless result.title.nil?
44
- info[:author] = result.author unless result.author.nil?
45
- info[:format] = result.form unless result.form.nil?
46
- info[:cover_thumb] = result.cover_thumb
47
- info[:link] = @link
48
- info[:from_name] = SCRAPER_NAME
49
- info[:from_url] = SCRAPER_SITE
50
-
51
- content = Net::HTTP.get URI.parse(@imgviewer_link)
52
- result = imgscraper.scrape(content)
53
-
54
- info[:cover_large] = result unless result.nil?
55
-
56
- return info
57
- end
58
- end
59
-
60
- end
61
- end
62
- end