rbook 0.4.3 → 0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +13 -176
- metadata +57 -117
- data/COPYING +0 -340
- data/LICENSE +0 -13
- data/README +0 -17
- data/examples/gbip.rb +0 -12
- data/examples/onix/stream_reader.rb +0 -13
- data/examples/pacstream.rb +0 -13
- data/examples/titlepage.rb +0 -14
- data/examples/titlepage_with_proxy.rb +0 -14
- data/examples/titlepage_www.rb +0 -18
- data/examples/www/find_all.rb +0 -23
- data/examples/www/find_cover_from_amazon.rb +0 -16
- data/examples/www/find_url_from_rainbow.rb +0 -12
- data/examples/www/list.rb +0 -13
- data/lib/rbook/bisac.rb +0 -31
- data/lib/rbook/bisac/message.rb +0 -99
- data/lib/rbook/bisac/po.rb +0 -97
- data/lib/rbook/bisac/po_line_item.rb +0 -33
- data/lib/rbook/bisac/product.rb +0 -176
- data/lib/rbook/errors.rb +0 -8
- data/lib/rbook/gbip.rb +0 -21
- data/lib/rbook/gbip/pos.rb +0 -118
- data/lib/rbook/gbip/title.rb +0 -36
- data/lib/rbook/gbip/warehouse.rb +0 -27
- data/lib/rbook/isbn.rb +0 -255
- data/lib/rbook/onix.rb +0 -70
- data/lib/rbook/onix/contributor.rb +0 -60
- data/lib/rbook/onix/lists.rb +0 -2
- data/lib/rbook/onix/lists/contributor_role.rb +0 -10
- data/lib/rbook/onix/lists/product_form.rb +0 -100
- data/lib/rbook/onix/message.rb +0 -112
- data/lib/rbook/onix/product.rb +0 -189
- data/lib/rbook/onix/sales_restriction.rb +0 -51
- data/lib/rbook/onix/stream_reader.rb +0 -120
- data/lib/rbook/onix/stream_writer.rb +0 -40
- data/lib/rbook/onix/supply_detail.rb +0 -68
- data/lib/rbook/onix/xchar.rb +0 -98
- data/lib/rbook/pacstream.rb +0 -64
- data/lib/rbook/titlepage.rb +0 -37
- data/lib/rbook/titlepage/client.rb +0 -126
- data/lib/rbook/titlepage/titlepage_driver.rb +0 -137
- data/lib/rbook/titlepage/titlepage_utils.rb +0 -379
- data/lib/rbook/titlepage/wwwclient.rb +0 -96
- data/lib/rbook/www.rb +0 -172
- data/lib/rbook/www/aau_scraper.rb +0 -76
- data/lib/rbook/www/amazon_uk_scraper.rb +0 -44
- data/lib/rbook/www/ban_scraper.rb +0 -62
- data/lib/rbook/www/base.rb +0 -87
- data/lib/rbook/www/harper_au_scraper.rb +0 -56
- data/lib/rbook/www/harper_us_scraper.rb +0 -55
- data/lib/rbook/www/hha_scraper.rb +0 -50
- data/lib/rbook/www/macmillan_scraper.rb +0 -62
- data/lib/rbook/www/orbis_scraper.rb +0 -48
- data/lib/rbook/www/oup_scraper.rb +0 -64
- data/lib/rbook/www/paulist_scraper.rb +0 -53
- data/lib/rbook/www/pearson_au_scraper.rb +0 -52
- data/lib/rbook/www/penguin_scraper.rb +0 -45
- data/lib/rbook/www/random_au_scraper.rb +0 -90
- data/lib/rbook/www/random_us_scraper.rb +0 -59
- data/lib/rbook/www/sas_scraper.rb +0 -54
- data/lib/rbook/www/unireps_scraper.rb +0 -58
- data/lib/rbook/www/wiley_us_scraper.rb +0 -54
- data/test/data/abingdon.xml +0 -38931
- data/test/data/augsburg.xml +0 -39009
- data/test/data/bisac_po.txt +0 -112
- data/test/data/chalice.xml +0 -10851
- data/test/data/eerdsman.xml +0 -36942
- data/test/data/invalid_no_product.xml +0 -9
- data/test/data/not_xml.csv +0 -1
- data/test/data/single_product.xml +0 -50
- data/test/data/valid_bisac.txt +0 -213
- data/test/data/xml_not_onix.xml +0 -7
- data/test/mocks/titlepage_driver.rb +0 -111
- data/test/unit/bisac/bisac_test.rb +0 -96
- data/test/unit/bisac/po_line_item_test.rb +0 -38
- data/test/unit/bisac/po_test.rb +0 -82
- data/test/unit/isbn_test.rb +0 -153
- data/test/unit/onix/contributor_test.rb +0 -50
- data/test/unit/onix/message_test.rb +0 -119
- data/test/unit/onix/product_test.rb +0 -101
- data/test/unit/onix/sales_restriction_test.rb +0 -48
- data/test/unit/onix/stream_reader_test.rb +0 -22
- data/test/unit/onix/stream_writer_test.rb +0 -32
- data/test/unit/onix/supply_detail_test.rb +0 -53
- data/test/unit/onix/xchar_test.rb +0 -37
- data/test/unit/titlepage_test.rb +0 -140
@@ -1,96 +0,0 @@
|
|
1
|
-
$LOAD_PATH.unshift(File.dirname(__FILE__) + "/../")
|
2
|
-
|
3
|
-
require 'rbook/isbn'
|
4
|
-
require 'rbook/errors'
|
5
|
-
|
6
|
-
module RBook
|
7
|
-
module TitlePage
|
8
|
-
|
9
|
-
# You should be aware of any limits of query volume imposed by the provider - currently a
|
10
|
-
# maximum of 30 queries per minute is permitted.
|
11
|
-
class WWWClient
|
12
|
-
|
13
|
-
TITLEPAGE_DOMAIN = "www.titlepage.com"
|
14
|
-
@@uri = nil
|
15
|
-
|
16
|
-
def initialize
|
17
|
-
end
|
18
|
-
|
19
|
-
def get_onix_file(isbn)
|
20
|
-
isbn = RBook::ISBN.convert_to_isbn13(isbn)
|
21
|
-
raise ArgumentError, 'Invalid ISBN supplied' if isbn.nil?
|
22
|
-
|
23
|
-
headers = { 'Cookie' => @cookie }
|
24
|
-
|
25
|
-
login_response = Net::HTTP.start(TITLEPAGE_DOMAIN, 80) do |http|
|
26
|
-
data = [
|
27
|
-
"posted=yes",
|
28
|
-
"quicksearch=#{isbn}",
|
29
|
-
"qsrchby=ean",
|
30
|
-
"detailed=Search"
|
31
|
-
].join("&")
|
32
|
-
http.post('/results.php', data, headers)
|
33
|
-
end
|
34
|
-
regex = /onclick=\"bookPopUp\(\'(.+)\'\);\"/
|
35
|
-
code = login_response.body.match(regex)
|
36
|
-
if code.nil?
|
37
|
-
return nil
|
38
|
-
else
|
39
|
-
code = code[1]
|
40
|
-
end
|
41
|
-
onix_file = Net::HTTP.start(TITLEPAGE_DOMAIN, 80) do |http|
|
42
|
-
data = [
|
43
|
-
"download=Download",
|
44
|
-
"rr=#{code}"
|
45
|
-
].join("&")
|
46
|
-
http.post('/detail.php', data, headers)
|
47
|
-
end
|
48
|
-
return onix_file.body
|
49
|
-
end
|
50
|
-
|
51
|
-
# login to the titlepage website.
|
52
|
-
def login(username, password)
|
53
|
-
login_response = Net::HTTP.start(TITLEPAGE_DOMAIN, 80) do |http|
|
54
|
-
data = [
|
55
|
-
"usr=#{username}",
|
56
|
-
"pwd=#{password}",
|
57
|
-
"login=Login"
|
58
|
-
].join("&")
|
59
|
-
http.post('/index.php', data)
|
60
|
-
end
|
61
|
-
@cookie = login_response['set-cookie']
|
62
|
-
end
|
63
|
-
|
64
|
-
# logout from the titlepage API
|
65
|
-
def logout
|
66
|
-
if @cookie
|
67
|
-
login_response = Net::HTTP.start(TITLEPAGE_DOMAIN, 80) do |http|
|
68
|
-
http.get("/logout.php")
|
69
|
-
end
|
70
|
-
@cookie = nil
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
# a convenience method to make queries to title page a little cleaner. This function
|
75
|
-
# essentially calls the login and logout functions for you automatically.
|
76
|
-
#
|
77
|
-
# RBook::TitlePage::WWWClient.open("username","password") do |tp|
|
78
|
-
# result = tp.get_onix_file("9780091835132")
|
79
|
-
# end
|
80
|
-
def self.open(username, password)
|
81
|
-
|
82
|
-
tp = self.new
|
83
|
-
|
84
|
-
begin
|
85
|
-
tp.login(username, password)
|
86
|
-
|
87
|
-
yield(tp)
|
88
|
-
|
89
|
-
ensure
|
90
|
-
tp.logout
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
end
|
95
|
-
end
|
96
|
-
end
|
data/lib/rbook/www.rb
DELETED
@@ -1,172 +0,0 @@
|
|
1
|
-
$LOAD_PATH.unshift(File.dirname(__FILE__) + "/../")
|
2
|
-
|
3
|
-
require 'rubygems'
|
4
|
-
require 'scrapi'
|
5
|
-
|
6
|
-
require 'rbook/isbn'
|
7
|
-
require 'rbook/www/base'
|
8
|
-
|
9
|
-
# load all scraping classes
|
10
|
-
files = Dir.entries(File.dirname(__FILE__) + '/www/')
|
11
|
-
files.delete(".")
|
12
|
-
files.delete("..")
|
13
|
-
files.delete(".svn")
|
14
|
-
files.delete("base.rb")
|
15
|
-
files.each do |file|
|
16
|
-
require 'rbook/www/' + file
|
17
|
-
end
|
18
|
-
|
19
|
-
module RBook
|
20
|
-
# A set of classes to make scraping title information from various publisher websites easier.
|
21
|
-
#
|
22
|
-
# Basic usage:
|
23
|
-
# require 'rubygems'
|
24
|
-
# require 'rbook/www'
|
25
|
-
# RBook::WWW.find_info(:first, "1841492280", :penguin)
|
26
|
-
# #=> Hash
|
27
|
-
# RBook::WWW.find_info(:all, "1841492280", [:penguin, :harpercollins_au])
|
28
|
-
# #=> Array of Hashes
|
29
|
-
module WWW
|
30
|
-
|
31
|
-
# Find any information possible about the supplied isbn using the
|
32
|
-
# specified scrapers.
|
33
|
-
#
|
34
|
-
# - isbn - a valid isbn10 or isbn13
|
35
|
-
# - scrapers - a symbol or array of symbols specifying which scrapers to search with
|
36
|
-
#
|
37
|
-
# Returns the results as an array containing the results
|
38
|
-
#
|
39
|
-
# RBook::WWW.find_info(:first, "1841492280", :penguin)
|
40
|
-
# #=> Hash
|
41
|
-
# RBook::WWW.find_info(:all, "1841492280", [:penguin, :harpercollins_au])
|
42
|
-
# #=> Array of Hashes
|
43
|
-
def self.find_info(search_type, isbn, scrapers)
|
44
|
-
|
45
|
-
raise ArgumentError, 'search_type must be :first or :all' if !search_type.eql?(:first) && !search_type.eql?(:all)
|
46
|
-
raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
|
47
|
-
raise ArgumentError, 'scrapers must be a symbol or array of symbols' unless scrapers.kind_of?(Symbol) || scrapers.kind_of?(Array)
|
48
|
-
|
49
|
-
isbn = ISBN::convert_to_isbn13(isbn)
|
50
|
-
|
51
|
-
if scrapers.kind_of?(Symbol)
|
52
|
-
scrapers = [Base::find_scraper(scrapers)]
|
53
|
-
else
|
54
|
-
scrapers = Base::find_scrapers(scrapers)
|
55
|
-
end
|
56
|
-
|
57
|
-
results = []
|
58
|
-
|
59
|
-
scrapers.each do |scraper|
|
60
|
-
worker = scraper.new
|
61
|
-
result = worker.get_info(isbn)
|
62
|
-
if !result.nil? && search_type.eql?(:first)
|
63
|
-
return result
|
64
|
-
elsif !result.nil?
|
65
|
-
results << result
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
if results.empty?
|
70
|
-
return nil
|
71
|
-
else
|
72
|
-
return results
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
# Find any information possible about the supplied isbn using the
|
77
|
-
# specified scrapers.
|
78
|
-
#
|
79
|
-
# - isbn - a valid isbn10 or isbn13
|
80
|
-
# - scrapers - a symbol or array of symbols specifying which scrapers to search with
|
81
|
-
#
|
82
|
-
# Returns the results as an array containing the results
|
83
|
-
#
|
84
|
-
# RBook::WWW.find_cover(:first, "1841492280", :penguin)
|
85
|
-
# #=> Hash
|
86
|
-
# RBook::WWW.find_cover(:all, "1841492280", [:penguin, :harpercollins_au])
|
87
|
-
# #=> Array of Hashes
|
88
|
-
def self.find_cover(search_type, isbn, scrapers)
|
89
|
-
|
90
|
-
raise ArgumentError, 'search_type must be :first or :all' if !search_type.eql?(:first) && !search_type.eql?(:all)
|
91
|
-
raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
|
92
|
-
raise ArgumentError, 'scrapers must be a symbol or array of symbols' unless scrapers.kind_of?(Symbol) || scrapers.kind_of?(Array)
|
93
|
-
|
94
|
-
isbn = ISBN::convert_to_isbn13(isbn)
|
95
|
-
|
96
|
-
if scrapers.kind_of?(Symbol)
|
97
|
-
scrapers = [Base::find_scraper(scrapers)]
|
98
|
-
else
|
99
|
-
scrapers = Base::find_scrapers(scrapers)
|
100
|
-
end
|
101
|
-
|
102
|
-
results = []
|
103
|
-
|
104
|
-
scrapers.each do |scraper|
|
105
|
-
worker = scraper.new
|
106
|
-
result = worker.get_cover(isbn)
|
107
|
-
if !result.nil? && search_type.eql?(:first)
|
108
|
-
return result
|
109
|
-
elsif !result.nil?
|
110
|
-
results << result
|
111
|
-
end
|
112
|
-
end
|
113
|
-
|
114
|
-
if results.empty?
|
115
|
-
return nil
|
116
|
-
else
|
117
|
-
return results
|
118
|
-
end
|
119
|
-
end
|
120
|
-
|
121
|
-
# Find any information possible about the supplied isbn using the
|
122
|
-
# specified scrapers.
|
123
|
-
#
|
124
|
-
# - isbn - a valid isbn10 or isbn13
|
125
|
-
# - scrapers - a symbol or array of symbols specifying which scrapers to search with
|
126
|
-
#
|
127
|
-
# Returns the results as an array containing the results
|
128
|
-
#
|
129
|
-
# RBook::WWW.find_url(:first, "1841492280", :penguin)
|
130
|
-
# #=> Hash
|
131
|
-
# RBook::WWW.find_url(:all, "1841492280", [:penguin, :harpercollins_au])
|
132
|
-
# #=> Array of Hashes
|
133
|
-
def self.find_url(search_type, isbn, scrapers)
|
134
|
-
|
135
|
-
raise ArgumentError, 'search_type must be :first or :all' if !search_type.eql?(:first) && !search_type.eql?(:all)
|
136
|
-
raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
|
137
|
-
raise ArgumentError, 'scrapers must be a symbol or array of symbols' unless scrapers.kind_of?(Symbol) || scrapers.kind_of?(Array)
|
138
|
-
|
139
|
-
isbn = ISBN::convert_to_isbn13(isbn)
|
140
|
-
|
141
|
-
if scrapers.kind_of?(Symbol)
|
142
|
-
scrapers = [Base::find_scraper(scrapers)]
|
143
|
-
else
|
144
|
-
scrapers = Base::find_scrapers(scrapers)
|
145
|
-
end
|
146
|
-
|
147
|
-
results = []
|
148
|
-
|
149
|
-
scrapers.each do |scraper|
|
150
|
-
worker = scraper.new
|
151
|
-
result = worker.get_url(isbn)
|
152
|
-
if !result.nil? && search_type.eql?(:first)
|
153
|
-
return result
|
154
|
-
elsif !result.nil
|
155
|
-
results << result
|
156
|
-
end
|
157
|
-
end
|
158
|
-
|
159
|
-
if results.empty?
|
160
|
-
return nil
|
161
|
-
else
|
162
|
-
return results
|
163
|
-
end
|
164
|
-
end
|
165
|
-
|
166
|
-
# returns an array of all available scrapers
|
167
|
-
def self.scrapers
|
168
|
-
Base.scrapers
|
169
|
-
end
|
170
|
-
|
171
|
-
end
|
172
|
-
end
|
@@ -1,76 +0,0 @@
|
|
1
|
-
|
2
|
-
module RBook
|
3
|
-
module WWW
|
4
|
-
|
5
|
-
class AAUScraper < Base
|
6
|
-
|
7
|
-
SCRAPER_ID = :aau
|
8
|
-
SCRAPER_NAME = "Allen and Unwin".freeze
|
9
|
-
SCRAPER_SITE = "http://www.allenandunwin.com/".freeze
|
10
|
-
|
11
|
-
#add_publisher( self, "9781741100000", "9781741199999" )
|
12
|
-
#add_publisher( self, "9781865000000", "9781865099999" )
|
13
|
-
add_scraper( self )
|
14
|
-
|
15
|
-
def initialize
|
16
|
-
@url_protocol = "http://"
|
17
|
-
@url_host = "www.allenandunwin.com"
|
18
|
-
@url_path = "/bookseller/product.aspx?ISBN="
|
19
|
-
end
|
20
|
-
|
21
|
-
def get_info(isbn)
|
22
|
-
|
23
|
-
raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
|
24
|
-
|
25
|
-
isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
|
26
|
-
|
27
|
-
rba = Scraper.define do
|
28
|
-
process "span#lblISBN", :isbn => :text
|
29
|
-
process "h1>span#lblBookTitle", :title => :text
|
30
|
-
process "span#lblAusRRP", :rrp => :text
|
31
|
-
process "span#lblPublisher", :publisher => :text
|
32
|
-
process "span#lblImprint", :imprint => :text
|
33
|
-
process "span#lblBinding", :form => :text
|
34
|
-
process "span#lblExtent", :pages => :text
|
35
|
-
process "span#lblPubDate", :pubdate => :text
|
36
|
-
process "span#lblDescription", :description => :text
|
37
|
-
process "span#lblAuthor_bio", :authorbio => :text
|
38
|
-
process "a#hypHiRes", :cover_large => "@href"
|
39
|
-
process "a#imgProduct", :cover_thumb => "@href"
|
40
|
-
result :isbn, :title, :rrp, :publisher, :imprint, :form, :pages, :pubdate, :description, :authorbio, :cover_thumb, :cover_large
|
41
|
-
end
|
42
|
-
|
43
|
-
content = Net::HTTP.get URI.parse(get_link(isbn))
|
44
|
-
result = rba.scrape(content)
|
45
|
-
|
46
|
-
if result.title.nil? || result.title == ""
|
47
|
-
return nil
|
48
|
-
else
|
49
|
-
|
50
|
-
info = {}
|
51
|
-
info[:isbn] = result.isbn.gsub("ISBN : ", "")
|
52
|
-
info[:title] = result.title unless result.title.nil?
|
53
|
-
info[:rrp] = result.rrp.gsub("Australian Price : ", "").gsub(/\sInc. GST\n.+/,"") unless result.rrp.nil?
|
54
|
-
info[:publisher] = result.publisher.gsub("Publisher : ", "") unless result.imprint.nil?
|
55
|
-
info[:imprint] = result.imprint.gsub("Imprint : ", "") unless result.imprint.nil?
|
56
|
-
info[:format] = result.form.gsub("Format : ", "") unless result.form.nil?
|
57
|
-
info[:pages] = result.pages.gsub("Number of pages : ", "") unless result.pages.nil?
|
58
|
-
info[:pubdate] = result.pubdate.gsub("Publication Date : ", "") unless result.pubdate.nil?
|
59
|
-
info[:description] = result.description unless result.description.nil?
|
60
|
-
info[:authorbio] = result.authorbio.gsub("About the Author :\n", "") unless result.authorbio.nil?
|
61
|
-
info[:cover_large] = @url_protocol + @url_host + result.cover_large.gsub(/^../, "") unless result.cover_large.nil?
|
62
|
-
info[:cover_thumb] = @url_protocol + @url_host + result.cover_thumb unless result.cover_thumb.nil?
|
63
|
-
info[:link] = get_link(isbn)
|
64
|
-
info[:from_name] = SCRAPER_NAME
|
65
|
-
info[:from_url] = SCRAPER_SITE
|
66
|
-
return info
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
def get_link(isbn)
|
71
|
-
return nil unless ISBN::valid_isbn?(isbn)
|
72
|
-
return @url_protocol + @url_host + @url_path + ISBN::convert_to_isbn10(isbn)
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
@@ -1,44 +0,0 @@
|
|
1
|
-
|
2
|
-
module RBook
|
3
|
-
module WWW
|
4
|
-
|
5
|
-
class AmazonUKScraper < Base
|
6
|
-
|
7
|
-
SCRAPER_ID = :amazon_uk
|
8
|
-
SCRAPER_NAME = "Amazon UK".freeze
|
9
|
-
SCRAPER_SITE = "http://www.amazon.co.uk/".freeze
|
10
|
-
|
11
|
-
#add_retailer( self )
|
12
|
-
add_scraper( self )
|
13
|
-
|
14
|
-
def get_cover(isbn)
|
15
|
-
|
16
|
-
isbn = ISBN::convert_to_isbn13(isbn)
|
17
|
-
isbn10 = ISBN::convert_to_isbn10(isbn)
|
18
|
-
|
19
|
-
return nil if isbn.nil? || isbn10.nil?
|
20
|
-
|
21
|
-
url_prefix = "http://images.amazon.com/images/P/"
|
22
|
-
url_suffix = ".02.LZZZZZZZ.jpg"
|
23
|
-
link = url_prefix + isbn10 + url_suffix
|
24
|
-
|
25
|
-
begin
|
26
|
-
response = Net::HTTP.get_response URI.parse(link)
|
27
|
-
if response.code != "200"
|
28
|
-
return nil
|
29
|
-
elsif response.body.size <= 807
|
30
|
-
return nil
|
31
|
-
else
|
32
|
-
result = {}
|
33
|
-
result[:data] = response.body
|
34
|
-
result[:content_type] = "image/jpeg"
|
35
|
-
return result
|
36
|
-
end
|
37
|
-
rescue
|
38
|
-
return nil
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
@@ -1,62 +0,0 @@
|
|
1
|
-
|
2
|
-
module RBook
|
3
|
-
module WWW
|
4
|
-
|
5
|
-
class BarnesAndNobleScraper < Base
|
6
|
-
|
7
|
-
SCRAPER_ID = :barnesandnoble
|
8
|
-
SCRAPER_NAME = "Barnes and Noble".freeze
|
9
|
-
SCRAPER_SITE = "http://www.barnesandnole.com/".freeze
|
10
|
-
|
11
|
-
add_scraper( self )
|
12
|
-
|
13
|
-
def get_info(isbn)
|
14
|
-
|
15
|
-
@protocol = "http://"
|
16
|
-
@host = "search.barnesandnoble.com"
|
17
|
-
@path = "/booksearch/isbninquiry.asp?z=y&cds2Pid=9481&isbn="
|
18
|
-
@imgviewer_path = "/booksearch/imageviewer.asp?z=y&ean="
|
19
|
-
@link = @protocol + @host + @path + ISBN::convert_to_isbn10(isbn)
|
20
|
-
@imgviewer_link = @protocol + @host + @imgviewer_path + ISBN::convert_to_isbn10(isbn)
|
21
|
-
|
22
|
-
main = Scraper.define do
|
23
|
-
process "h1#title", :title => :text
|
24
|
-
process "h2#contributor>a", :author => :text
|
25
|
-
process "li.format", :form => :text
|
26
|
-
process "div#coverImage>a>noscript>img", :cover_thumb => "@src"
|
27
|
-
result :title, :author, :form, :cover_thumb
|
28
|
-
end
|
29
|
-
|
30
|
-
imgscraper = Scraper.define do
|
31
|
-
process "div>img[alt=Cover Image]", :cover_large => "@src"
|
32
|
-
result :cover_large
|
33
|
-
end
|
34
|
-
|
35
|
-
content = Net::HTTP.get URI.parse(@link)
|
36
|
-
result = main.scrape(content)
|
37
|
-
if result.title.nil?
|
38
|
-
return nil
|
39
|
-
else
|
40
|
-
|
41
|
-
info = {}
|
42
|
-
info[:isbn] = isbn
|
43
|
-
info[:title] = result.title unless result.title.nil?
|
44
|
-
info[:author] = result.author unless result.author.nil?
|
45
|
-
info[:format] = result.form unless result.form.nil?
|
46
|
-
info[:cover_thumb] = result.cover_thumb
|
47
|
-
info[:link] = @link
|
48
|
-
info[:from_name] = SCRAPER_NAME
|
49
|
-
info[:from_url] = SCRAPER_SITE
|
50
|
-
|
51
|
-
content = Net::HTTP.get URI.parse(@imgviewer_link)
|
52
|
-
result = imgscraper.scrape(content)
|
53
|
-
|
54
|
-
info[:cover_large] = result unless result.nil?
|
55
|
-
|
56
|
-
return info
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|