rbook 0.4.3 → 0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +13 -176
- metadata +57 -117
- data/COPYING +0 -340
- data/LICENSE +0 -13
- data/README +0 -17
- data/examples/gbip.rb +0 -12
- data/examples/onix/stream_reader.rb +0 -13
- data/examples/pacstream.rb +0 -13
- data/examples/titlepage.rb +0 -14
- data/examples/titlepage_with_proxy.rb +0 -14
- data/examples/titlepage_www.rb +0 -18
- data/examples/www/find_all.rb +0 -23
- data/examples/www/find_cover_from_amazon.rb +0 -16
- data/examples/www/find_url_from_rainbow.rb +0 -12
- data/examples/www/list.rb +0 -13
- data/lib/rbook/bisac.rb +0 -31
- data/lib/rbook/bisac/message.rb +0 -99
- data/lib/rbook/bisac/po.rb +0 -97
- data/lib/rbook/bisac/po_line_item.rb +0 -33
- data/lib/rbook/bisac/product.rb +0 -176
- data/lib/rbook/errors.rb +0 -8
- data/lib/rbook/gbip.rb +0 -21
- data/lib/rbook/gbip/pos.rb +0 -118
- data/lib/rbook/gbip/title.rb +0 -36
- data/lib/rbook/gbip/warehouse.rb +0 -27
- data/lib/rbook/isbn.rb +0 -255
- data/lib/rbook/onix.rb +0 -70
- data/lib/rbook/onix/contributor.rb +0 -60
- data/lib/rbook/onix/lists.rb +0 -2
- data/lib/rbook/onix/lists/contributor_role.rb +0 -10
- data/lib/rbook/onix/lists/product_form.rb +0 -100
- data/lib/rbook/onix/message.rb +0 -112
- data/lib/rbook/onix/product.rb +0 -189
- data/lib/rbook/onix/sales_restriction.rb +0 -51
- data/lib/rbook/onix/stream_reader.rb +0 -120
- data/lib/rbook/onix/stream_writer.rb +0 -40
- data/lib/rbook/onix/supply_detail.rb +0 -68
- data/lib/rbook/onix/xchar.rb +0 -98
- data/lib/rbook/pacstream.rb +0 -64
- data/lib/rbook/titlepage.rb +0 -37
- data/lib/rbook/titlepage/client.rb +0 -126
- data/lib/rbook/titlepage/titlepage_driver.rb +0 -137
- data/lib/rbook/titlepage/titlepage_utils.rb +0 -379
- data/lib/rbook/titlepage/wwwclient.rb +0 -96
- data/lib/rbook/www.rb +0 -172
- data/lib/rbook/www/aau_scraper.rb +0 -76
- data/lib/rbook/www/amazon_uk_scraper.rb +0 -44
- data/lib/rbook/www/ban_scraper.rb +0 -62
- data/lib/rbook/www/base.rb +0 -87
- data/lib/rbook/www/harper_au_scraper.rb +0 -56
- data/lib/rbook/www/harper_us_scraper.rb +0 -55
- data/lib/rbook/www/hha_scraper.rb +0 -50
- data/lib/rbook/www/macmillan_scraper.rb +0 -62
- data/lib/rbook/www/orbis_scraper.rb +0 -48
- data/lib/rbook/www/oup_scraper.rb +0 -64
- data/lib/rbook/www/paulist_scraper.rb +0 -53
- data/lib/rbook/www/pearson_au_scraper.rb +0 -52
- data/lib/rbook/www/penguin_scraper.rb +0 -45
- data/lib/rbook/www/random_au_scraper.rb +0 -90
- data/lib/rbook/www/random_us_scraper.rb +0 -59
- data/lib/rbook/www/sas_scraper.rb +0 -54
- data/lib/rbook/www/unireps_scraper.rb +0 -58
- data/lib/rbook/www/wiley_us_scraper.rb +0 -54
- data/test/data/abingdon.xml +0 -38931
- data/test/data/augsburg.xml +0 -39009
- data/test/data/bisac_po.txt +0 -112
- data/test/data/chalice.xml +0 -10851
- data/test/data/eerdsman.xml +0 -36942
- data/test/data/invalid_no_product.xml +0 -9
- data/test/data/not_xml.csv +0 -1
- data/test/data/single_product.xml +0 -50
- data/test/data/valid_bisac.txt +0 -213
- data/test/data/xml_not_onix.xml +0 -7
- data/test/mocks/titlepage_driver.rb +0 -111
- data/test/unit/bisac/bisac_test.rb +0 -96
- data/test/unit/bisac/po_line_item_test.rb +0 -38
- data/test/unit/bisac/po_test.rb +0 -82
- data/test/unit/isbn_test.rb +0 -153
- data/test/unit/onix/contributor_test.rb +0 -50
- data/test/unit/onix/message_test.rb +0 -119
- data/test/unit/onix/product_test.rb +0 -101
- data/test/unit/onix/sales_restriction_test.rb +0 -48
- data/test/unit/onix/stream_reader_test.rb +0 -22
- data/test/unit/onix/stream_writer_test.rb +0 -32
- data/test/unit/onix/supply_detail_test.rb +0 -53
- data/test/unit/onix/xchar_test.rb +0 -37
- data/test/unit/titlepage_test.rb +0 -140
@@ -1,96 +0,0 @@
|
|
1
|
-
$LOAD_PATH.unshift(File.dirname(__FILE__) + "/../")
|
2
|
-
|
3
|
-
require 'rbook/isbn'
|
4
|
-
require 'rbook/errors'
|
5
|
-
|
6
|
-
module RBook
|
7
|
-
module TitlePage
|
8
|
-
|
9
|
-
# You should be aware of any limits of query volume imposed by the provider - currently a
|
10
|
-
# maximum of 30 queries per minute is permitted.
|
11
|
-
class WWWClient
|
12
|
-
|
13
|
-
TITLEPAGE_DOMAIN = "www.titlepage.com"
|
14
|
-
@@uri = nil
|
15
|
-
|
16
|
-
def initialize
|
17
|
-
end
|
18
|
-
|
19
|
-
def get_onix_file(isbn)
|
20
|
-
isbn = RBook::ISBN.convert_to_isbn13(isbn)
|
21
|
-
raise ArgumentError, 'Invalid ISBN supplied' if isbn.nil?
|
22
|
-
|
23
|
-
headers = { 'Cookie' => @cookie }
|
24
|
-
|
25
|
-
login_response = Net::HTTP.start(TITLEPAGE_DOMAIN, 80) do |http|
|
26
|
-
data = [
|
27
|
-
"posted=yes",
|
28
|
-
"quicksearch=#{isbn}",
|
29
|
-
"qsrchby=ean",
|
30
|
-
"detailed=Search"
|
31
|
-
].join("&")
|
32
|
-
http.post('/results.php', data, headers)
|
33
|
-
end
|
34
|
-
regex = /onclick=\"bookPopUp\(\'(.+)\'\);\"/
|
35
|
-
code = login_response.body.match(regex)
|
36
|
-
if code.nil?
|
37
|
-
return nil
|
38
|
-
else
|
39
|
-
code = code[1]
|
40
|
-
end
|
41
|
-
onix_file = Net::HTTP.start(TITLEPAGE_DOMAIN, 80) do |http|
|
42
|
-
data = [
|
43
|
-
"download=Download",
|
44
|
-
"rr=#{code}"
|
45
|
-
].join("&")
|
46
|
-
http.post('/detail.php', data, headers)
|
47
|
-
end
|
48
|
-
return onix_file.body
|
49
|
-
end
|
50
|
-
|
51
|
-
# login to the titlepage website.
|
52
|
-
def login(username, password)
|
53
|
-
login_response = Net::HTTP.start(TITLEPAGE_DOMAIN, 80) do |http|
|
54
|
-
data = [
|
55
|
-
"usr=#{username}",
|
56
|
-
"pwd=#{password}",
|
57
|
-
"login=Login"
|
58
|
-
].join("&")
|
59
|
-
http.post('/index.php', data)
|
60
|
-
end
|
61
|
-
@cookie = login_response['set-cookie']
|
62
|
-
end
|
63
|
-
|
64
|
-
# logout from the titlepage API
|
65
|
-
def logout
|
66
|
-
if @cookie
|
67
|
-
login_response = Net::HTTP.start(TITLEPAGE_DOMAIN, 80) do |http|
|
68
|
-
http.get("/logout.php")
|
69
|
-
end
|
70
|
-
@cookie = nil
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
# a convenience method to make queries to title page a little cleaner. This function
|
75
|
-
# essentially calls the login and logout functions for you automatically.
|
76
|
-
#
|
77
|
-
# RBook::TitlePage::WWWClient.open("username","password") do |tp|
|
78
|
-
# result = tp.get_onix_file("9780091835132")
|
79
|
-
# end
|
80
|
-
def self.open(username, password)
|
81
|
-
|
82
|
-
tp = self.new
|
83
|
-
|
84
|
-
begin
|
85
|
-
tp.login(username, password)
|
86
|
-
|
87
|
-
yield(tp)
|
88
|
-
|
89
|
-
ensure
|
90
|
-
tp.logout
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
end
|
95
|
-
end
|
96
|
-
end
|
data/lib/rbook/www.rb
DELETED
@@ -1,172 +0,0 @@
|
|
1
|
-
$LOAD_PATH.unshift(File.dirname(__FILE__) + "/../")
|
2
|
-
|
3
|
-
require 'rubygems'
|
4
|
-
require 'scrapi'
|
5
|
-
|
6
|
-
require 'rbook/isbn'
|
7
|
-
require 'rbook/www/base'
|
8
|
-
|
9
|
-
# load all scraping classes
|
10
|
-
files = Dir.entries(File.dirname(__FILE__) + '/www/')
|
11
|
-
files.delete(".")
|
12
|
-
files.delete("..")
|
13
|
-
files.delete(".svn")
|
14
|
-
files.delete("base.rb")
|
15
|
-
files.each do |file|
|
16
|
-
require 'rbook/www/' + file
|
17
|
-
end
|
18
|
-
|
19
|
-
module RBook
|
20
|
-
# A set of classes to make scraping title information from various publisher websites easier.
|
21
|
-
#
|
22
|
-
# Basic usage:
|
23
|
-
# require 'rubygems'
|
24
|
-
# require 'rbook/www'
|
25
|
-
# RBook::WWW.find_info(:first, "1841492280", :penguin)
|
26
|
-
# #=> Hash
|
27
|
-
# RBook::WWW.find_info(:all, "1841492280", [:penguin, :harpercollins_au])
|
28
|
-
# #=> Array of Hashes
|
29
|
-
module WWW
|
30
|
-
|
31
|
-
# Find any information possible about the supplied isbn using the
|
32
|
-
# specified scrapers.
|
33
|
-
#
|
34
|
-
# - isbn - a valid isbn10 or isbn13
|
35
|
-
# - scrapers - a symbol or array of symbols specifying which scrapers to search with
|
36
|
-
#
|
37
|
-
# Returns the results as an array containing the results
|
38
|
-
#
|
39
|
-
# RBook::WWW.find_info(:first, "1841492280", :penguin)
|
40
|
-
# #=> Hash
|
41
|
-
# RBook::WWW.find_info(:all, "1841492280", [:penguin, :harpercollins_au])
|
42
|
-
# #=> Array of Hashes
|
43
|
-
def self.find_info(search_type, isbn, scrapers)
|
44
|
-
|
45
|
-
raise ArgumentError, 'search_type must be :first or :all' if !search_type.eql?(:first) && !search_type.eql?(:all)
|
46
|
-
raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
|
47
|
-
raise ArgumentError, 'scrapers must be a symbol or array of symbols' unless scrapers.kind_of?(Symbol) || scrapers.kind_of?(Array)
|
48
|
-
|
49
|
-
isbn = ISBN::convert_to_isbn13(isbn)
|
50
|
-
|
51
|
-
if scrapers.kind_of?(Symbol)
|
52
|
-
scrapers = [Base::find_scraper(scrapers)]
|
53
|
-
else
|
54
|
-
scrapers = Base::find_scrapers(scrapers)
|
55
|
-
end
|
56
|
-
|
57
|
-
results = []
|
58
|
-
|
59
|
-
scrapers.each do |scraper|
|
60
|
-
worker = scraper.new
|
61
|
-
result = worker.get_info(isbn)
|
62
|
-
if !result.nil? && search_type.eql?(:first)
|
63
|
-
return result
|
64
|
-
elsif !result.nil?
|
65
|
-
results << result
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
if results.empty?
|
70
|
-
return nil
|
71
|
-
else
|
72
|
-
return results
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
# Find any information possible about the supplied isbn using the
|
77
|
-
# specified scrapers.
|
78
|
-
#
|
79
|
-
# - isbn - a valid isbn10 or isbn13
|
80
|
-
# - scrapers - a symbol or array of symbols specifying which scrapers to search with
|
81
|
-
#
|
82
|
-
# Returns the results as an array containing the results
|
83
|
-
#
|
84
|
-
# RBook::WWW.find_cover(:first, "1841492280", :penguin)
|
85
|
-
# #=> Hash
|
86
|
-
# RBook::WWW.find_cover(:all, "1841492280", [:penguin, :harpercollins_au])
|
87
|
-
# #=> Array of Hashes
|
88
|
-
def self.find_cover(search_type, isbn, scrapers)
|
89
|
-
|
90
|
-
raise ArgumentError, 'search_type must be :first or :all' if !search_type.eql?(:first) && !search_type.eql?(:all)
|
91
|
-
raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
|
92
|
-
raise ArgumentError, 'scrapers must be a symbol or array of symbols' unless scrapers.kind_of?(Symbol) || scrapers.kind_of?(Array)
|
93
|
-
|
94
|
-
isbn = ISBN::convert_to_isbn13(isbn)
|
95
|
-
|
96
|
-
if scrapers.kind_of?(Symbol)
|
97
|
-
scrapers = [Base::find_scraper(scrapers)]
|
98
|
-
else
|
99
|
-
scrapers = Base::find_scrapers(scrapers)
|
100
|
-
end
|
101
|
-
|
102
|
-
results = []
|
103
|
-
|
104
|
-
scrapers.each do |scraper|
|
105
|
-
worker = scraper.new
|
106
|
-
result = worker.get_cover(isbn)
|
107
|
-
if !result.nil? && search_type.eql?(:first)
|
108
|
-
return result
|
109
|
-
elsif !result.nil?
|
110
|
-
results << result
|
111
|
-
end
|
112
|
-
end
|
113
|
-
|
114
|
-
if results.empty?
|
115
|
-
return nil
|
116
|
-
else
|
117
|
-
return results
|
118
|
-
end
|
119
|
-
end
|
120
|
-
|
121
|
-
# Find any information possible about the supplied isbn using the
|
122
|
-
# specified scrapers.
|
123
|
-
#
|
124
|
-
# - isbn - a valid isbn10 or isbn13
|
125
|
-
# - scrapers - a symbol or array of symbols specifying which scrapers to search with
|
126
|
-
#
|
127
|
-
# Returns the results as an array containing the results
|
128
|
-
#
|
129
|
-
# RBook::WWW.find_url(:first, "1841492280", :penguin)
|
130
|
-
# #=> Hash
|
131
|
-
# RBook::WWW.find_url(:all, "1841492280", [:penguin, :harpercollins_au])
|
132
|
-
# #=> Array of Hashes
|
133
|
-
def self.find_url(search_type, isbn, scrapers)
|
134
|
-
|
135
|
-
raise ArgumentError, 'search_type must be :first or :all' if !search_type.eql?(:first) && !search_type.eql?(:all)
|
136
|
-
raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
|
137
|
-
raise ArgumentError, 'scrapers must be a symbol or array of symbols' unless scrapers.kind_of?(Symbol) || scrapers.kind_of?(Array)
|
138
|
-
|
139
|
-
isbn = ISBN::convert_to_isbn13(isbn)
|
140
|
-
|
141
|
-
if scrapers.kind_of?(Symbol)
|
142
|
-
scrapers = [Base::find_scraper(scrapers)]
|
143
|
-
else
|
144
|
-
scrapers = Base::find_scrapers(scrapers)
|
145
|
-
end
|
146
|
-
|
147
|
-
results = []
|
148
|
-
|
149
|
-
scrapers.each do |scraper|
|
150
|
-
worker = scraper.new
|
151
|
-
result = worker.get_url(isbn)
|
152
|
-
if !result.nil? && search_type.eql?(:first)
|
153
|
-
return result
|
154
|
-
elsif !result.nil
|
155
|
-
results << result
|
156
|
-
end
|
157
|
-
end
|
158
|
-
|
159
|
-
if results.empty?
|
160
|
-
return nil
|
161
|
-
else
|
162
|
-
return results
|
163
|
-
end
|
164
|
-
end
|
165
|
-
|
166
|
-
# returns an array of all available scrapers
|
167
|
-
def self.scrapers
|
168
|
-
Base.scrapers
|
169
|
-
end
|
170
|
-
|
171
|
-
end
|
172
|
-
end
|
@@ -1,76 +0,0 @@
|
|
1
|
-
|
2
|
-
module RBook
|
3
|
-
module WWW
|
4
|
-
|
5
|
-
class AAUScraper < Base
|
6
|
-
|
7
|
-
SCRAPER_ID = :aau
|
8
|
-
SCRAPER_NAME = "Allen and Unwin".freeze
|
9
|
-
SCRAPER_SITE = "http://www.allenandunwin.com/".freeze
|
10
|
-
|
11
|
-
#add_publisher( self, "9781741100000", "9781741199999" )
|
12
|
-
#add_publisher( self, "9781865000000", "9781865099999" )
|
13
|
-
add_scraper( self )
|
14
|
-
|
15
|
-
def initialize
|
16
|
-
@url_protocol = "http://"
|
17
|
-
@url_host = "www.allenandunwin.com"
|
18
|
-
@url_path = "/bookseller/product.aspx?ISBN="
|
19
|
-
end
|
20
|
-
|
21
|
-
def get_info(isbn)
|
22
|
-
|
23
|
-
raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
|
24
|
-
|
25
|
-
isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
|
26
|
-
|
27
|
-
rba = Scraper.define do
|
28
|
-
process "span#lblISBN", :isbn => :text
|
29
|
-
process "h1>span#lblBookTitle", :title => :text
|
30
|
-
process "span#lblAusRRP", :rrp => :text
|
31
|
-
process "span#lblPublisher", :publisher => :text
|
32
|
-
process "span#lblImprint", :imprint => :text
|
33
|
-
process "span#lblBinding", :form => :text
|
34
|
-
process "span#lblExtent", :pages => :text
|
35
|
-
process "span#lblPubDate", :pubdate => :text
|
36
|
-
process "span#lblDescription", :description => :text
|
37
|
-
process "span#lblAuthor_bio", :authorbio => :text
|
38
|
-
process "a#hypHiRes", :cover_large => "@href"
|
39
|
-
process "a#imgProduct", :cover_thumb => "@href"
|
40
|
-
result :isbn, :title, :rrp, :publisher, :imprint, :form, :pages, :pubdate, :description, :authorbio, :cover_thumb, :cover_large
|
41
|
-
end
|
42
|
-
|
43
|
-
content = Net::HTTP.get URI.parse(get_link(isbn))
|
44
|
-
result = rba.scrape(content)
|
45
|
-
|
46
|
-
if result.title.nil? || result.title == ""
|
47
|
-
return nil
|
48
|
-
else
|
49
|
-
|
50
|
-
info = {}
|
51
|
-
info[:isbn] = result.isbn.gsub("ISBN : ", "")
|
52
|
-
info[:title] = result.title unless result.title.nil?
|
53
|
-
info[:rrp] = result.rrp.gsub("Australian Price : ", "").gsub(/\sInc. GST\n.+/,"") unless result.rrp.nil?
|
54
|
-
info[:publisher] = result.publisher.gsub("Publisher : ", "") unless result.imprint.nil?
|
55
|
-
info[:imprint] = result.imprint.gsub("Imprint : ", "") unless result.imprint.nil?
|
56
|
-
info[:format] = result.form.gsub("Format : ", "") unless result.form.nil?
|
57
|
-
info[:pages] = result.pages.gsub("Number of pages : ", "") unless result.pages.nil?
|
58
|
-
info[:pubdate] = result.pubdate.gsub("Publication Date : ", "") unless result.pubdate.nil?
|
59
|
-
info[:description] = result.description unless result.description.nil?
|
60
|
-
info[:authorbio] = result.authorbio.gsub("About the Author :\n", "") unless result.authorbio.nil?
|
61
|
-
info[:cover_large] = @url_protocol + @url_host + result.cover_large.gsub(/^../, "") unless result.cover_large.nil?
|
62
|
-
info[:cover_thumb] = @url_protocol + @url_host + result.cover_thumb unless result.cover_thumb.nil?
|
63
|
-
info[:link] = get_link(isbn)
|
64
|
-
info[:from_name] = SCRAPER_NAME
|
65
|
-
info[:from_url] = SCRAPER_SITE
|
66
|
-
return info
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
def get_link(isbn)
|
71
|
-
return nil unless ISBN::valid_isbn?(isbn)
|
72
|
-
return @url_protocol + @url_host + @url_path + ISBN::convert_to_isbn10(isbn)
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
@@ -1,44 +0,0 @@
|
|
1
|
-
|
2
|
-
module RBook
|
3
|
-
module WWW
|
4
|
-
|
5
|
-
class AmazonUKScraper < Base
|
6
|
-
|
7
|
-
SCRAPER_ID = :amazon_uk
|
8
|
-
SCRAPER_NAME = "Amazon UK".freeze
|
9
|
-
SCRAPER_SITE = "http://www.amazon.co.uk/".freeze
|
10
|
-
|
11
|
-
#add_retailer( self )
|
12
|
-
add_scraper( self )
|
13
|
-
|
14
|
-
def get_cover(isbn)
|
15
|
-
|
16
|
-
isbn = ISBN::convert_to_isbn13(isbn)
|
17
|
-
isbn10 = ISBN::convert_to_isbn10(isbn)
|
18
|
-
|
19
|
-
return nil if isbn.nil? || isbn10.nil?
|
20
|
-
|
21
|
-
url_prefix = "http://images.amazon.com/images/P/"
|
22
|
-
url_suffix = ".02.LZZZZZZZ.jpg"
|
23
|
-
link = url_prefix + isbn10 + url_suffix
|
24
|
-
|
25
|
-
begin
|
26
|
-
response = Net::HTTP.get_response URI.parse(link)
|
27
|
-
if response.code != "200"
|
28
|
-
return nil
|
29
|
-
elsif response.body.size <= 807
|
30
|
-
return nil
|
31
|
-
else
|
32
|
-
result = {}
|
33
|
-
result[:data] = response.body
|
34
|
-
result[:content_type] = "image/jpeg"
|
35
|
-
return result
|
36
|
-
end
|
37
|
-
rescue
|
38
|
-
return nil
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
@@ -1,62 +0,0 @@
|
|
1
|
-
|
2
|
-
module RBook
|
3
|
-
module WWW
|
4
|
-
|
5
|
-
class BarnesAndNobleScraper < Base
|
6
|
-
|
7
|
-
SCRAPER_ID = :barnesandnoble
|
8
|
-
SCRAPER_NAME = "Barnes and Noble".freeze
|
9
|
-
SCRAPER_SITE = "http://www.barnesandnole.com/".freeze
|
10
|
-
|
11
|
-
add_scraper( self )
|
12
|
-
|
13
|
-
def get_info(isbn)
|
14
|
-
|
15
|
-
@protocol = "http://"
|
16
|
-
@host = "search.barnesandnoble.com"
|
17
|
-
@path = "/booksearch/isbninquiry.asp?z=y&cds2Pid=9481&isbn="
|
18
|
-
@imgviewer_path = "/booksearch/imageviewer.asp?z=y&ean="
|
19
|
-
@link = @protocol + @host + @path + ISBN::convert_to_isbn10(isbn)
|
20
|
-
@imgviewer_link = @protocol + @host + @imgviewer_path + ISBN::convert_to_isbn10(isbn)
|
21
|
-
|
22
|
-
main = Scraper.define do
|
23
|
-
process "h1#title", :title => :text
|
24
|
-
process "h2#contributor>a", :author => :text
|
25
|
-
process "li.format", :form => :text
|
26
|
-
process "div#coverImage>a>noscript>img", :cover_thumb => "@src"
|
27
|
-
result :title, :author, :form, :cover_thumb
|
28
|
-
end
|
29
|
-
|
30
|
-
imgscraper = Scraper.define do
|
31
|
-
process "div>img[alt=Cover Image]", :cover_large => "@src"
|
32
|
-
result :cover_large
|
33
|
-
end
|
34
|
-
|
35
|
-
content = Net::HTTP.get URI.parse(@link)
|
36
|
-
result = main.scrape(content)
|
37
|
-
if result.title.nil?
|
38
|
-
return nil
|
39
|
-
else
|
40
|
-
|
41
|
-
info = {}
|
42
|
-
info[:isbn] = isbn
|
43
|
-
info[:title] = result.title unless result.title.nil?
|
44
|
-
info[:author] = result.author unless result.author.nil?
|
45
|
-
info[:format] = result.form unless result.form.nil?
|
46
|
-
info[:cover_thumb] = result.cover_thumb
|
47
|
-
info[:link] = @link
|
48
|
-
info[:from_name] = SCRAPER_NAME
|
49
|
-
info[:from_url] = SCRAPER_SITE
|
50
|
-
|
51
|
-
content = Net::HTTP.get URI.parse(@imgviewer_link)
|
52
|
-
result = imgscraper.scrape(content)
|
53
|
-
|
54
|
-
info[:cover_large] = result unless result.nil?
|
55
|
-
|
56
|
-
return info
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|