rbook 0.4.3 → 0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. data/Rakefile +13 -176
  2. metadata +57 -117
  3. data/COPYING +0 -340
  4. data/LICENSE +0 -13
  5. data/README +0 -17
  6. data/examples/gbip.rb +0 -12
  7. data/examples/onix/stream_reader.rb +0 -13
  8. data/examples/pacstream.rb +0 -13
  9. data/examples/titlepage.rb +0 -14
  10. data/examples/titlepage_with_proxy.rb +0 -14
  11. data/examples/titlepage_www.rb +0 -18
  12. data/examples/www/find_all.rb +0 -23
  13. data/examples/www/find_cover_from_amazon.rb +0 -16
  14. data/examples/www/find_url_from_rainbow.rb +0 -12
  15. data/examples/www/list.rb +0 -13
  16. data/lib/rbook/bisac.rb +0 -31
  17. data/lib/rbook/bisac/message.rb +0 -99
  18. data/lib/rbook/bisac/po.rb +0 -97
  19. data/lib/rbook/bisac/po_line_item.rb +0 -33
  20. data/lib/rbook/bisac/product.rb +0 -176
  21. data/lib/rbook/errors.rb +0 -8
  22. data/lib/rbook/gbip.rb +0 -21
  23. data/lib/rbook/gbip/pos.rb +0 -118
  24. data/lib/rbook/gbip/title.rb +0 -36
  25. data/lib/rbook/gbip/warehouse.rb +0 -27
  26. data/lib/rbook/isbn.rb +0 -255
  27. data/lib/rbook/onix.rb +0 -70
  28. data/lib/rbook/onix/contributor.rb +0 -60
  29. data/lib/rbook/onix/lists.rb +0 -2
  30. data/lib/rbook/onix/lists/contributor_role.rb +0 -10
  31. data/lib/rbook/onix/lists/product_form.rb +0 -100
  32. data/lib/rbook/onix/message.rb +0 -112
  33. data/lib/rbook/onix/product.rb +0 -189
  34. data/lib/rbook/onix/sales_restriction.rb +0 -51
  35. data/lib/rbook/onix/stream_reader.rb +0 -120
  36. data/lib/rbook/onix/stream_writer.rb +0 -40
  37. data/lib/rbook/onix/supply_detail.rb +0 -68
  38. data/lib/rbook/onix/xchar.rb +0 -98
  39. data/lib/rbook/pacstream.rb +0 -64
  40. data/lib/rbook/titlepage.rb +0 -37
  41. data/lib/rbook/titlepage/client.rb +0 -126
  42. data/lib/rbook/titlepage/titlepage_driver.rb +0 -137
  43. data/lib/rbook/titlepage/titlepage_utils.rb +0 -379
  44. data/lib/rbook/titlepage/wwwclient.rb +0 -96
  45. data/lib/rbook/www.rb +0 -172
  46. data/lib/rbook/www/aau_scraper.rb +0 -76
  47. data/lib/rbook/www/amazon_uk_scraper.rb +0 -44
  48. data/lib/rbook/www/ban_scraper.rb +0 -62
  49. data/lib/rbook/www/base.rb +0 -87
  50. data/lib/rbook/www/harper_au_scraper.rb +0 -56
  51. data/lib/rbook/www/harper_us_scraper.rb +0 -55
  52. data/lib/rbook/www/hha_scraper.rb +0 -50
  53. data/lib/rbook/www/macmillan_scraper.rb +0 -62
  54. data/lib/rbook/www/orbis_scraper.rb +0 -48
  55. data/lib/rbook/www/oup_scraper.rb +0 -64
  56. data/lib/rbook/www/paulist_scraper.rb +0 -53
  57. data/lib/rbook/www/pearson_au_scraper.rb +0 -52
  58. data/lib/rbook/www/penguin_scraper.rb +0 -45
  59. data/lib/rbook/www/random_au_scraper.rb +0 -90
  60. data/lib/rbook/www/random_us_scraper.rb +0 -59
  61. data/lib/rbook/www/sas_scraper.rb +0 -54
  62. data/lib/rbook/www/unireps_scraper.rb +0 -58
  63. data/lib/rbook/www/wiley_us_scraper.rb +0 -54
  64. data/test/data/abingdon.xml +0 -38931
  65. data/test/data/augsburg.xml +0 -39009
  66. data/test/data/bisac_po.txt +0 -112
  67. data/test/data/chalice.xml +0 -10851
  68. data/test/data/eerdsman.xml +0 -36942
  69. data/test/data/invalid_no_product.xml +0 -9
  70. data/test/data/not_xml.csv +0 -1
  71. data/test/data/single_product.xml +0 -50
  72. data/test/data/valid_bisac.txt +0 -213
  73. data/test/data/xml_not_onix.xml +0 -7
  74. data/test/mocks/titlepage_driver.rb +0 -111
  75. data/test/unit/bisac/bisac_test.rb +0 -96
  76. data/test/unit/bisac/po_line_item_test.rb +0 -38
  77. data/test/unit/bisac/po_test.rb +0 -82
  78. data/test/unit/isbn_test.rb +0 -153
  79. data/test/unit/onix/contributor_test.rb +0 -50
  80. data/test/unit/onix/message_test.rb +0 -119
  81. data/test/unit/onix/product_test.rb +0 -101
  82. data/test/unit/onix/sales_restriction_test.rb +0 -48
  83. data/test/unit/onix/stream_reader_test.rb +0 -22
  84. data/test/unit/onix/stream_writer_test.rb +0 -32
  85. data/test/unit/onix/supply_detail_test.rb +0 -53
  86. data/test/unit/onix/xchar_test.rb +0 -37
  87. data/test/unit/titlepage_test.rb +0 -140
@@ -1,96 +0,0 @@
1
- $LOAD_PATH.unshift(File.dirname(__FILE__) + "/../")
2
-
3
- require 'rbook/isbn'
4
- require 'rbook/errors'
5
-
6
- module RBook
7
- module TitlePage
8
-
9
- # You should be aware of any limits of query volume imposed by the provider - currently a
10
- # maximum of 30 queries per minute is permitted.
11
- class WWWClient
12
-
13
- TITLEPAGE_DOMAIN = "www.titlepage.com"
14
- @@uri = nil
15
-
16
- def initialize
17
- end
18
-
19
- def get_onix_file(isbn)
20
- isbn = RBook::ISBN.convert_to_isbn13(isbn)
21
- raise ArgumentError, 'Invalid ISBN supplied' if isbn.nil?
22
-
23
- headers = { 'Cookie' => @cookie }
24
-
25
- login_response = Net::HTTP.start(TITLEPAGE_DOMAIN, 80) do |http|
26
- data = [
27
- "posted=yes",
28
- "quicksearch=#{isbn}",
29
- "qsrchby=ean",
30
- "detailed=Search"
31
- ].join("&")
32
- http.post('/results.php', data, headers)
33
- end
34
- regex = /onclick=\"bookPopUp\(\'(.+)\'\);\"/
35
- code = login_response.body.match(regex)
36
- if code.nil?
37
- return nil
38
- else
39
- code = code[1]
40
- end
41
- onix_file = Net::HTTP.start(TITLEPAGE_DOMAIN, 80) do |http|
42
- data = [
43
- "download=Download",
44
- "rr=#{code}"
45
- ].join("&")
46
- http.post('/detail.php', data, headers)
47
- end
48
- return onix_file.body
49
- end
50
-
51
- # login to the titlepage website.
52
- def login(username, password)
53
- login_response = Net::HTTP.start(TITLEPAGE_DOMAIN, 80) do |http|
54
- data = [
55
- "usr=#{username}",
56
- "pwd=#{password}",
57
- "login=Login"
58
- ].join("&")
59
- http.post('/index.php', data)
60
- end
61
- @cookie = login_response['set-cookie']
62
- end
63
-
64
- # logout from the titlepage API
65
- def logout
66
- if @cookie
67
- login_response = Net::HTTP.start(TITLEPAGE_DOMAIN, 80) do |http|
68
- http.get("/logout.php")
69
- end
70
- @cookie = nil
71
- end
72
- end
73
-
74
- # a convenience method to make queries to title page a little cleaner. This function
75
- # essentially calls the login and logout functions for you automatically.
76
- #
77
- # RBook::TitlePage::WWWClient.open("username","password") do |tp|
78
- # result = tp.get_onix_file("9780091835132")
79
- # end
80
- def self.open(username, password)
81
-
82
- tp = self.new
83
-
84
- begin
85
- tp.login(username, password)
86
-
87
- yield(tp)
88
-
89
- ensure
90
- tp.logout
91
- end
92
- end
93
-
94
- end
95
- end
96
- end
@@ -1,172 +0,0 @@
1
- $LOAD_PATH.unshift(File.dirname(__FILE__) + "/../")
2
-
3
- require 'rubygems'
4
- require 'scrapi'
5
-
6
- require 'rbook/isbn'
7
- require 'rbook/www/base'
8
-
9
- # load all scraping classes
10
- files = Dir.entries(File.dirname(__FILE__) + '/www/')
11
- files.delete(".")
12
- files.delete("..")
13
- files.delete(".svn")
14
- files.delete("base.rb")
15
- files.each do |file|
16
- require 'rbook/www/' + file
17
- end
18
-
19
- module RBook
20
- # A set of classes to make scraping title information from various publisher websites easier.
21
- #
22
- # Basic usage:
23
- # require 'rubygems'
24
- # require 'rbook/www'
25
- # RBook::WWW.find_info(:first, "1841492280", :penguin)
26
- # #=> Hash
27
- # RBook::WWW.find_info(:all, "1841492280", [:penguin, :harpercollins_au])
28
- # #=> Array of Hashes
29
- module WWW
30
-
31
- # Find any information possible about the supplied isbn using the
32
- # specified scrapers.
33
- #
34
- # - isbn - a valid isbn10 or isbn13
35
- # - scrapers - a symbol or array of symbols specifying which scrapers to search with
36
- #
37
- # Returns the results as an array containing the results
38
- #
39
- # RBook::WWW.find_info(:first, "1841492280", :penguin)
40
- # #=> Hash
41
- # RBook::WWW.find_info(:all, "1841492280", [:penguin, :harpercollins_au])
42
- # #=> Array of Hashes
43
- def self.find_info(search_type, isbn, scrapers)
44
-
45
- raise ArgumentError, 'search_type must be :first or :all' if !search_type.eql?(:first) && !search_type.eql?(:all)
46
- raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
47
- raise ArgumentError, 'scrapers must be a symbol or array of symbols' unless scrapers.kind_of?(Symbol) || scrapers.kind_of?(Array)
48
-
49
- isbn = ISBN::convert_to_isbn13(isbn)
50
-
51
- if scrapers.kind_of?(Symbol)
52
- scrapers = [Base::find_scraper(scrapers)]
53
- else
54
- scrapers = Base::find_scrapers(scrapers)
55
- end
56
-
57
- results = []
58
-
59
- scrapers.each do |scraper|
60
- worker = scraper.new
61
- result = worker.get_info(isbn)
62
- if !result.nil? && search_type.eql?(:first)
63
- return result
64
- elsif !result.nil?
65
- results << result
66
- end
67
- end
68
-
69
- if results.empty?
70
- return nil
71
- else
72
- return results
73
- end
74
- end
75
-
76
- # Find any information possible about the supplied isbn using the
77
- # specified scrapers.
78
- #
79
- # - isbn - a valid isbn10 or isbn13
80
- # - scrapers - a symbol or array of symbols specifying which scrapers to search with
81
- #
82
- # Returns the results as an array containing the results
83
- #
84
- # RBook::WWW.find_cover(:first, "1841492280", :penguin)
85
- # #=> Hash
86
- # RBook::WWW.find_cover(:all, "1841492280", [:penguin, :harpercollins_au])
87
- # #=> Array of Hashes
88
- def self.find_cover(search_type, isbn, scrapers)
89
-
90
- raise ArgumentError, 'search_type must be :first or :all' if !search_type.eql?(:first) && !search_type.eql?(:all)
91
- raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
92
- raise ArgumentError, 'scrapers must be a symbol or array of symbols' unless scrapers.kind_of?(Symbol) || scrapers.kind_of?(Array)
93
-
94
- isbn = ISBN::convert_to_isbn13(isbn)
95
-
96
- if scrapers.kind_of?(Symbol)
97
- scrapers = [Base::find_scraper(scrapers)]
98
- else
99
- scrapers = Base::find_scrapers(scrapers)
100
- end
101
-
102
- results = []
103
-
104
- scrapers.each do |scraper|
105
- worker = scraper.new
106
- result = worker.get_cover(isbn)
107
- if !result.nil? && search_type.eql?(:first)
108
- return result
109
- elsif !result.nil?
110
- results << result
111
- end
112
- end
113
-
114
- if results.empty?
115
- return nil
116
- else
117
- return results
118
- end
119
- end
120
-
121
- # Find any information possible about the supplied isbn using the
122
- # specified scrapers.
123
- #
124
- # - isbn - a valid isbn10 or isbn13
125
- # - scrapers - a symbol or array of symbols specifying which scrapers to search with
126
- #
127
- # Returns the results as an array containing the results
128
- #
129
- # RBook::WWW.find_url(:first, "1841492280", :penguin)
130
- # #=> Hash
131
- # RBook::WWW.find_url(:all, "1841492280", [:penguin, :harpercollins_au])
132
- # #=> Array of Hashes
133
- def self.find_url(search_type, isbn, scrapers)
134
-
135
- raise ArgumentError, 'search_type must be :first or :all' if !search_type.eql?(:first) && !search_type.eql?(:all)
136
- raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
137
- raise ArgumentError, 'scrapers must be a symbol or array of symbols' unless scrapers.kind_of?(Symbol) || scrapers.kind_of?(Array)
138
-
139
- isbn = ISBN::convert_to_isbn13(isbn)
140
-
141
- if scrapers.kind_of?(Symbol)
142
- scrapers = [Base::find_scraper(scrapers)]
143
- else
144
- scrapers = Base::find_scrapers(scrapers)
145
- end
146
-
147
- results = []
148
-
149
- scrapers.each do |scraper|
150
- worker = scraper.new
151
- result = worker.get_url(isbn)
152
- if !result.nil? && search_type.eql?(:first)
153
- return result
154
- elsif !result.nil
155
- results << result
156
- end
157
- end
158
-
159
- if results.empty?
160
- return nil
161
- else
162
- return results
163
- end
164
- end
165
-
166
- # returns an array of all available scrapers
167
- def self.scrapers
168
- Base.scrapers
169
- end
170
-
171
- end
172
- end
@@ -1,76 +0,0 @@
1
-
2
- module RBook
3
- module WWW
4
-
5
- class AAUScraper < Base
6
-
7
- SCRAPER_ID = :aau
8
- SCRAPER_NAME = "Allen and Unwin".freeze
9
- SCRAPER_SITE = "http://www.allenandunwin.com/".freeze
10
-
11
- #add_publisher( self, "9781741100000", "9781741199999" )
12
- #add_publisher( self, "9781865000000", "9781865099999" )
13
- add_scraper( self )
14
-
15
- def initialize
16
- @url_protocol = "http://"
17
- @url_host = "www.allenandunwin.com"
18
- @url_path = "/bookseller/product.aspx?ISBN="
19
- end
20
-
21
- def get_info(isbn)
22
-
23
- raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
24
-
25
- isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
26
-
27
- rba = Scraper.define do
28
- process "span#lblISBN", :isbn => :text
29
- process "h1>span#lblBookTitle", :title => :text
30
- process "span#lblAusRRP", :rrp => :text
31
- process "span#lblPublisher", :publisher => :text
32
- process "span#lblImprint", :imprint => :text
33
- process "span#lblBinding", :form => :text
34
- process "span#lblExtent", :pages => :text
35
- process "span#lblPubDate", :pubdate => :text
36
- process "span#lblDescription", :description => :text
37
- process "span#lblAuthor_bio", :authorbio => :text
38
- process "a#hypHiRes", :cover_large => "@href"
39
- process "a#imgProduct", :cover_thumb => "@href"
40
- result :isbn, :title, :rrp, :publisher, :imprint, :form, :pages, :pubdate, :description, :authorbio, :cover_thumb, :cover_large
41
- end
42
-
43
- content = Net::HTTP.get URI.parse(get_link(isbn))
44
- result = rba.scrape(content)
45
-
46
- if result.title.nil? || result.title == ""
47
- return nil
48
- else
49
-
50
- info = {}
51
- info[:isbn] = result.isbn.gsub("ISBN : ", "")
52
- info[:title] = result.title unless result.title.nil?
53
- info[:rrp] = result.rrp.gsub("Australian Price : ", "").gsub(/\sInc. GST\n.+/,"") unless result.rrp.nil?
54
- info[:publisher] = result.publisher.gsub("Publisher : ", "") unless result.imprint.nil?
55
- info[:imprint] = result.imprint.gsub("Imprint : ", "") unless result.imprint.nil?
56
- info[:format] = result.form.gsub("Format : ", "") unless result.form.nil?
57
- info[:pages] = result.pages.gsub("Number of pages : ", "") unless result.pages.nil?
58
- info[:pubdate] = result.pubdate.gsub("Publication Date : ", "") unless result.pubdate.nil?
59
- info[:description] = result.description unless result.description.nil?
60
- info[:authorbio] = result.authorbio.gsub("About the Author :\n", "") unless result.authorbio.nil?
61
- info[:cover_large] = @url_protocol + @url_host + result.cover_large.gsub(/^../, "") unless result.cover_large.nil?
62
- info[:cover_thumb] = @url_protocol + @url_host + result.cover_thumb unless result.cover_thumb.nil?
63
- info[:link] = get_link(isbn)
64
- info[:from_name] = SCRAPER_NAME
65
- info[:from_url] = SCRAPER_SITE
66
- return info
67
- end
68
- end
69
-
70
- def get_link(isbn)
71
- return nil unless ISBN::valid_isbn?(isbn)
72
- return @url_protocol + @url_host + @url_path + ISBN::convert_to_isbn10(isbn)
73
- end
74
- end
75
- end
76
- end
@@ -1,44 +0,0 @@
1
-
2
- module RBook
3
- module WWW
4
-
5
- class AmazonUKScraper < Base
6
-
7
- SCRAPER_ID = :amazon_uk
8
- SCRAPER_NAME = "Amazon UK".freeze
9
- SCRAPER_SITE = "http://www.amazon.co.uk/".freeze
10
-
11
- #add_retailer( self )
12
- add_scraper( self )
13
-
14
- def get_cover(isbn)
15
-
16
- isbn = ISBN::convert_to_isbn13(isbn)
17
- isbn10 = ISBN::convert_to_isbn10(isbn)
18
-
19
- return nil if isbn.nil? || isbn10.nil?
20
-
21
- url_prefix = "http://images.amazon.com/images/P/"
22
- url_suffix = ".02.LZZZZZZZ.jpg"
23
- link = url_prefix + isbn10 + url_suffix
24
-
25
- begin
26
- response = Net::HTTP.get_response URI.parse(link)
27
- if response.code != "200"
28
- return nil
29
- elsif response.body.size <= 807
30
- return nil
31
- else
32
- result = {}
33
- result[:data] = response.body
34
- result[:content_type] = "image/jpeg"
35
- return result
36
- end
37
- rescue
38
- return nil
39
- end
40
- end
41
-
42
- end
43
- end
44
- end
@@ -1,62 +0,0 @@
1
-
2
- module RBook
3
- module WWW
4
-
5
- class BarnesAndNobleScraper < Base
6
-
7
- SCRAPER_ID = :barnesandnoble
8
- SCRAPER_NAME = "Barnes and Noble".freeze
9
- SCRAPER_SITE = "http://www.barnesandnole.com/".freeze
10
-
11
- add_scraper( self )
12
-
13
- def get_info(isbn)
14
-
15
- @protocol = "http://"
16
- @host = "search.barnesandnoble.com"
17
- @path = "/booksearch/isbninquiry.asp?z=y&cds2Pid=9481&isbn="
18
- @imgviewer_path = "/booksearch/imageviewer.asp?z=y&ean="
19
- @link = @protocol + @host + @path + ISBN::convert_to_isbn10(isbn)
20
- @imgviewer_link = @protocol + @host + @imgviewer_path + ISBN::convert_to_isbn10(isbn)
21
-
22
- main = Scraper.define do
23
- process "h1#title", :title => :text
24
- process "h2#contributor>a", :author => :text
25
- process "li.format", :form => :text
26
- process "div#coverImage>a>noscript>img", :cover_thumb => "@src"
27
- result :title, :author, :form, :cover_thumb
28
- end
29
-
30
- imgscraper = Scraper.define do
31
- process "div>img[alt=Cover Image]", :cover_large => "@src"
32
- result :cover_large
33
- end
34
-
35
- content = Net::HTTP.get URI.parse(@link)
36
- result = main.scrape(content)
37
- if result.title.nil?
38
- return nil
39
- else
40
-
41
- info = {}
42
- info[:isbn] = isbn
43
- info[:title] = result.title unless result.title.nil?
44
- info[:author] = result.author unless result.author.nil?
45
- info[:format] = result.form unless result.form.nil?
46
- info[:cover_thumb] = result.cover_thumb
47
- info[:link] = @link
48
- info[:from_name] = SCRAPER_NAME
49
- info[:from_url] = SCRAPER_SITE
50
-
51
- content = Net::HTTP.get URI.parse(@imgviewer_link)
52
- result = imgscraper.scrape(content)
53
-
54
- info[:cover_large] = result unless result.nil?
55
-
56
- return info
57
- end
58
- end
59
-
60
- end
61
- end
62
- end