rbook 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/COPYING +340 -0
- data/LICENSE +13 -0
- data/README +16 -0
- data/Rakefile +206 -0
- data/examples/titlepage.rb +14 -0
- data/examples/www/find_all.rb +23 -0
- data/examples/www/find_cover_from_amazon.rb +12 -0
- data/examples/www/find_url_from_rainbow.rb +12 -0
- data/examples/www/list.rb +13 -0
- data/lib/rbook/bisac.rb +175 -0
- data/lib/rbook/errors.rb +7 -0
- data/lib/rbook/isbn.rb +249 -0
- data/lib/rbook/onix.rb +68 -0
- data/lib/rbook/onix/contributor.rb +60 -0
- data/lib/rbook/onix/lists.rb +2 -0
- data/lib/rbook/onix/lists/contributor_role.rb +10 -0
- data/lib/rbook/onix/lists/product_form.rb +100 -0
- data/lib/rbook/onix/message.rb +101 -0
- data/lib/rbook/onix/product.rb +188 -0
- data/lib/rbook/onix/sales_restriction.rb +51 -0
- data/lib/rbook/onix/supply_detail.rb +68 -0
- data/lib/rbook/onix/xchar.rb +98 -0
- data/lib/rbook/titlepage.rb +96 -0
- data/lib/rbook/titlepage/TitleQueryClient.rb +62 -0
- data/lib/rbook/titlepage/titlepage_driver.rb +134 -0
- data/lib/rbook/titlepage/titlepage_utils.rb +374 -0
- data/lib/rbook/www.rb +172 -0
- data/lib/rbook/www/aau_scraper.rb +76 -0
- data/lib/rbook/www/amazon_uk_scraper.rb +44 -0
- data/lib/rbook/www/base.rb +87 -0
- data/lib/rbook/www/harper_au_scraper.rb +56 -0
- data/lib/rbook/www/harper_us_scraper.rb +55 -0
- data/lib/rbook/www/hha_scraper.rb +50 -0
- data/lib/rbook/www/macmillan_scraper.rb +62 -0
- data/lib/rbook/www/orbis_scraper.rb +48 -0
- data/lib/rbook/www/oup_scraper.rb +64 -0
- data/lib/rbook/www/paulist_scraper.rb +53 -0
- data/lib/rbook/www/pearson_au_scraper.rb +52 -0
- data/lib/rbook/www/penguin_scraper.rb +45 -0
- data/lib/rbook/www/random_au_scraper.rb +90 -0
- data/lib/rbook/www/random_us_scraper.rb +59 -0
- data/lib/rbook/www/sas_scraper.rb +54 -0
- data/lib/rbook/www/unireps_scraper.rb +58 -0
- data/lib/rbook/www/wiley_us_scraper.rb +54 -0
- data/test/data/abingdon.xml +38931 -0
- data/test/data/augsburg.xml +39009 -0
- data/test/data/chalice.xml +10851 -0
- data/test/data/eerdsman.xml +36942 -0
- data/test/data/invalid_no_product.xml +9 -0
- data/test/data/not_xml.csv +1 -0
- data/test/data/single_product.xml +50 -0
- data/test/data/xml_not_onix.xml +7 -0
- data/test/mocks/titlepage_driver.rb +107 -0
- data/test/unit/bisac_test.rb +57 -0
- data/test/unit/isbn_test.rb +149 -0
- data/test/unit/onix/contributor_test.rb +50 -0
- data/test/unit/onix/message_test.rb +119 -0
- data/test/unit/onix/product_test.rb +101 -0
- data/test/unit/onix/sales_restriction_test.rb +48 -0
- data/test/unit/onix/supply_detail_test.rb +53 -0
- data/test/unit/onix/xchar_test.rb +37 -0
- data/test/unit/titlepage_test.rb +127 -0
- metadata +130 -0
@@ -0,0 +1,76 @@
|
|
1
|
+
|
2
|
+
module RBook
|
3
|
+
module WWW
|
4
|
+
|
5
|
+
class AAUScraper < Base
|
6
|
+
|
7
|
+
SCRAPER_ID = :aau
|
8
|
+
SCRAPER_NAME = "Allen and Unwin".freeze
|
9
|
+
SCRAPER_SITE = "http://www.allenandunwin.com/".freeze
|
10
|
+
|
11
|
+
#add_publisher( self, "9781741100000", "9781741199999" )
|
12
|
+
#add_publisher( self, "9781865000000", "9781865099999" )
|
13
|
+
add_scraper( self )
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
@url_protocol = "http://"
|
17
|
+
@url_host = "www.allenandunwin.com"
|
18
|
+
@url_path = "/bookseller/product.aspx?ISBN="
|
19
|
+
end
|
20
|
+
|
21
|
+
def get_info(isbn)
|
22
|
+
|
23
|
+
raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
|
24
|
+
|
25
|
+
isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
|
26
|
+
|
27
|
+
rba = Scraper.define do
|
28
|
+
process "span#lblISBN", :isbn => :text
|
29
|
+
process "h1>span#lblBookTitle", :title => :text
|
30
|
+
process "span#lblAusRRP", :rrp => :text
|
31
|
+
process "span#lblPublisher", :publisher => :text
|
32
|
+
process "span#lblImprint", :imprint => :text
|
33
|
+
process "span#lblBinding", :form => :text
|
34
|
+
process "span#lblExtent", :pages => :text
|
35
|
+
process "span#lblPubDate", :pubdate => :text
|
36
|
+
process "span#lblDescription", :description => :text
|
37
|
+
process "span#lblAuthor_bio", :authorbio => :text
|
38
|
+
process "a#hypHiRes", :cover_large => "@href"
|
39
|
+
process "a#imgProduct", :cover_thumb => "@href"
|
40
|
+
result :isbn, :title, :rrp, :publisher, :imprint, :form, :pages, :pubdate, :description, :authorbio, :cover_thumb, :cover_large
|
41
|
+
end
|
42
|
+
|
43
|
+
content = Net::HTTP.get URI.parse(get_link(isbn))
|
44
|
+
result = rba.scrape(content)
|
45
|
+
|
46
|
+
if result.title.nil? || result.title == ""
|
47
|
+
return nil
|
48
|
+
else
|
49
|
+
|
50
|
+
info = {}
|
51
|
+
info[:isbn] = result.isbn.gsub("ISBN : ", "")
|
52
|
+
info[:title] = result.title unless result.title.nil?
|
53
|
+
info[:rrp] = result.rrp.gsub("Australian Price : ", "").gsub(/\sInc. GST\n.+/,"") unless result.rrp.nil?
|
54
|
+
info[:publisher] = result.publisher.gsub("Publisher : ", "") unless result.imprint.nil?
|
55
|
+
info[:imprint] = result.imprint.gsub("Imprint : ", "") unless result.imprint.nil?
|
56
|
+
info[:format] = result.form.gsub("Format : ", "") unless result.form.nil?
|
57
|
+
info[:pages] = result.pages.gsub("Number of pages : ", "") unless result.pages.nil?
|
58
|
+
info[:pubdate] = result.pubdate.gsub("Publication Date : ", "") unless result.pubdate.nil?
|
59
|
+
info[:description] = result.description unless result.description.nil?
|
60
|
+
info[:authorbio] = result.authorbio.gsub("About the Author :\n", "") unless result.authorbio.nil?
|
61
|
+
info[:cover_large] = @url_protocol + @url_host + result.cover_large.gsub(/^../, "") unless result.cover_large.nil?
|
62
|
+
info[:cover_thumb] = @url_protocol + @url_host + result.cover_thumb unless result.cover_thumb.nil?
|
63
|
+
info[:link] = get_link(isbn)
|
64
|
+
info[:from_name] = SCRAPER_NAME
|
65
|
+
info[:from_url] = SCRAPER_SITE
|
66
|
+
return info
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def get_link(isbn)
|
71
|
+
return nil unless ISBN::valid_isbn?(isbn)
|
72
|
+
return @url_protocol + @url_host + @url_path + ISBN::convert_to_isbn10(isbn)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
|
2
|
+
module RBook
|
3
|
+
module WWW
|
4
|
+
|
5
|
+
class AmazonUKScraper < Base
|
6
|
+
|
7
|
+
SCRAPER_ID = :amazon_uk
|
8
|
+
SCRAPER_NAME = "Amazon UK".freeze
|
9
|
+
SCRAPER_SITE = "http://www.amazon.co.uk/".freeze
|
10
|
+
|
11
|
+
#add_retailer( self )
|
12
|
+
add_scraper( self )
|
13
|
+
|
14
|
+
def get_cover(isbn)
|
15
|
+
|
16
|
+
isbn = ISBN::convert_to_isbn13(isbn)
|
17
|
+
isbn10 = ISBN::convert_to_isbn10(isbn)
|
18
|
+
|
19
|
+
return nil if isbn.nil? || isbn10.nil?
|
20
|
+
|
21
|
+
url_prefix = "http://images.amazon.com/images/P/"
|
22
|
+
url_suffix = ".02.LZZZZZZZ.jpg"
|
23
|
+
link = url_prefix + isbn10 + url_suffix
|
24
|
+
|
25
|
+
begin
|
26
|
+
response = Net::HTTP.get_response URI.parse(link)
|
27
|
+
if response.code != "200"
|
28
|
+
return nil
|
29
|
+
elsif response.body.size <= 807
|
30
|
+
return nil
|
31
|
+
else
|
32
|
+
result = {}
|
33
|
+
result[:data] = response.body
|
34
|
+
result[:content_type] = "image/jpeg"
|
35
|
+
return result
|
36
|
+
end
|
37
|
+
rescue
|
38
|
+
return nil
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'uri'
|
3
|
+
|
4
|
+
module RBook
|
5
|
+
module WWW
|
6
|
+
|
7
|
+
class Base
|
8
|
+
|
9
|
+
@@scrapers = []
|
10
|
+
|
11
|
+
# registers a new scraper with the library.
|
12
|
+
# classname - the class to add
|
13
|
+
def self.add_scraper(classname)
|
14
|
+
@@scrapers << classname
|
15
|
+
end
|
16
|
+
|
17
|
+
# find a scraper matching the requested id
|
18
|
+
# id - a scraper id as a symbol
|
19
|
+
def self.find_scraper(id)
|
20
|
+
@@scrapers.each do |scraper|
|
21
|
+
return scraper if scraper::SCRAPER_ID == id
|
22
|
+
end
|
23
|
+
return nil
|
24
|
+
end
|
25
|
+
|
26
|
+
# find any scrapers matching the requested ids
|
27
|
+
# ids - an array of scraper id's as symbols
|
28
|
+
def self.find_scrapers(ids)
|
29
|
+
ret = []
|
30
|
+
@@scrapers.each do |scraper|
|
31
|
+
ret << scraper if ids.contains?(scraper::SCRAPER_ID)
|
32
|
+
end
|
33
|
+
return ret
|
34
|
+
end
|
35
|
+
|
36
|
+
# This method can be overwritten in each scraper. It should return a hash containing the binary data
|
37
|
+
# and mimetype of the largest cover image it can find for the requested isbn
|
38
|
+
def get_cover(isbn)
|
39
|
+
|
40
|
+
info = get_info(isbn)
|
41
|
+
return nil if info.nil?
|
42
|
+
return nil unless info.kind_of?(Hash)
|
43
|
+
|
44
|
+
link = info[:cover_large] || info[:cover_medium] || info[:cover_thumb]
|
45
|
+
|
46
|
+
return nil if link.nil?
|
47
|
+
|
48
|
+
begin
|
49
|
+
response = Net::HTTP.get_response URI.parse(link)
|
50
|
+
if response.code != "200"
|
51
|
+
raise response.code.to_s
|
52
|
+
return nil
|
53
|
+
else
|
54
|
+
result = {}
|
55
|
+
result[:data] = response.body
|
56
|
+
result[:content_type] = "image/jpeg"
|
57
|
+
return result
|
58
|
+
end
|
59
|
+
rescue
|
60
|
+
return nil
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# This method can be overwritten in each scraper. It should return a hash of any information on
|
65
|
+
# the requested isbn it can find
|
66
|
+
def get_info(isbn)
|
67
|
+
nil
|
68
|
+
end
|
69
|
+
|
70
|
+
# This method can be overwritten in each scraper. It should return a link to the requested isbn
|
71
|
+
# on the targets website
|
72
|
+
def get_link(isbn)
|
73
|
+
nil
|
74
|
+
end
|
75
|
+
|
76
|
+
# return the symbol used to uniquely identify each scraper
|
77
|
+
def scraper_id
|
78
|
+
return SCRAPER_ID
|
79
|
+
end
|
80
|
+
|
81
|
+
def self.scrapers
|
82
|
+
@@scrapers
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
|
2
|
+
module RBook
|
3
|
+
module WWW
|
4
|
+
|
5
|
+
class HarperCollinsAUScraper < Base
|
6
|
+
|
7
|
+
SCRAPER_ID = :harper_au
|
8
|
+
SCRAPER_NAME = "Harper Collins Australia".freeze
|
9
|
+
SCRAPER_SITE = "http://www.harpercollins.com.au/".freeze
|
10
|
+
|
11
|
+
#add_publisher( self, "9780006400000", "9780006499999" )
|
12
|
+
#add_publisher( self, "9780007100000", "9780007199999" )
|
13
|
+
add_scraper( self )
|
14
|
+
|
15
|
+
def get_info(isbn)
|
16
|
+
|
17
|
+
raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
|
18
|
+
|
19
|
+
isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
|
20
|
+
|
21
|
+
@protocol = "http://"
|
22
|
+
@host = "www.harpercollins.com.au"
|
23
|
+
@path = "/global_scripts/product_catalog/book_xml.asp?isbn="
|
24
|
+
@link = @protocol + @host + @path + ISBN::convert_to_isbn10(isbn)
|
25
|
+
|
26
|
+
rba = Scraper.define do
|
27
|
+
process "div.header", :title => :text
|
28
|
+
process "div.subtitle", :subtitle => :text
|
29
|
+
process "div.byline", :author => :text
|
30
|
+
process "img.bookJacket", :cover => "@src"
|
31
|
+
result :title, :subtitle, :author, :cover
|
32
|
+
end
|
33
|
+
|
34
|
+
content = Net::HTTP.get URI.parse(@link)
|
35
|
+
result = rba.scrape(content)
|
36
|
+
|
37
|
+
if result.title.nil?
|
38
|
+
return nil
|
39
|
+
else
|
40
|
+
|
41
|
+
info = {}
|
42
|
+
info[:isbn] = isbn
|
43
|
+
info[:title] = result.title
|
44
|
+
info[:subtitle] = result.subtitle
|
45
|
+
info[:author] = result.author.gsub("by ","")
|
46
|
+
info[:cover] = result.cover
|
47
|
+
info[:link] = @link
|
48
|
+
info[:from_name] = SCRAPER_NAME
|
49
|
+
info[:from_url] = SCRAPER_SITE
|
50
|
+
return info
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
|
2
|
+
module RBook
|
3
|
+
module WWW
|
4
|
+
|
5
|
+
class HarperCollinsUSScraper < Base
|
6
|
+
|
7
|
+
SCRAPER_ID = :harper_us
|
8
|
+
SCRAPER_NAME = "Harper Collins United States".freeze
|
9
|
+
SCRAPER_SITE = "http://www.harpercollins.com/".freeze
|
10
|
+
|
11
|
+
#add_publisher( self, "9780060000000", "9780060999999" )
|
12
|
+
add_scraper( self )
|
13
|
+
|
14
|
+
def get_info(isbn)
|
15
|
+
raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
|
16
|
+
|
17
|
+
isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
|
18
|
+
|
19
|
+
@protocol = "http://"
|
20
|
+
@host = "www.harpercollins.com"
|
21
|
+
@path = "/book/index.aspx?isbn="
|
22
|
+
@link = @protocol + @host + @path + isbn
|
23
|
+
|
24
|
+
rba = Scraper.define do
|
25
|
+
process "h1.bookTitle", :title => :text
|
26
|
+
process "h2.bookSubTitle", :subtitle => :text
|
27
|
+
process "h3.byLine", :author => :text
|
28
|
+
process "img.bookJacket", :cover => "@src"
|
29
|
+
result :title, :subtitle, :author, :cover
|
30
|
+
end
|
31
|
+
|
32
|
+
content = Net::HTTP.get URI.parse(@link)
|
33
|
+
result = rba.scrape(content)
|
34
|
+
|
35
|
+
if result.title.nil?
|
36
|
+
return nil
|
37
|
+
else
|
38
|
+
|
39
|
+
info = {}
|
40
|
+
info[:isbn] = isbn
|
41
|
+
info[:title] = result.title
|
42
|
+
info[:subtitle] = result.subtitle
|
43
|
+
info[:author] = result.author.gsub("by ", "")
|
44
|
+
info[:cover_thumb] = result.cover
|
45
|
+
info[:cover_medium] = result.cover.gsub("medium", "large")
|
46
|
+
info[:link] = @link
|
47
|
+
info[:from_name] = SCRAPER_NAME
|
48
|
+
info[:from_url] = SCRAPER_SITE
|
49
|
+
return info
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
|
2
|
+
module RBook
|
3
|
+
module WWW
|
4
|
+
|
5
|
+
class HHAScraper < Base
|
6
|
+
|
7
|
+
SCRAPER_ID = :hha
|
8
|
+
SCRAPER_NAME = "Hodder Headline Australia".freeze
|
9
|
+
SCRAPER_SITE = "http://www.hha.com.au/".freeze
|
10
|
+
|
11
|
+
#add_publisher( self, "9780340800000", "9780340899999" )
|
12
|
+
#add_publisher( self, "9780755300000", "9780755399999" )
|
13
|
+
#add_publisher( self, "9780733600000", "9780733699999" )
|
14
|
+
add_scraper( self )
|
15
|
+
|
16
|
+
def get_info(isbn)
|
17
|
+
@protocol = "http://"
|
18
|
+
@host = "www.hha.com.au"
|
19
|
+
@path = "/books/"
|
20
|
+
@suffix = ".html"
|
21
|
+
@link = @protocol + @host + @path + ISBN::convert_to_isbn10(isbn) + @suffix
|
22
|
+
|
23
|
+
rba = Scraper.define do
|
24
|
+
process "h1.fiction", :title => :text
|
25
|
+
process "p.author", :author => :text
|
26
|
+
process "p.thumb>img", :cover_thumb => "@src"
|
27
|
+
result :title, :author, :cover_thumb
|
28
|
+
end
|
29
|
+
|
30
|
+
content = Net::HTTP.get URI.parse(@link)
|
31
|
+
result = rba.scrape(content)
|
32
|
+
|
33
|
+
if result.cover_thumb.nil?
|
34
|
+
return nil
|
35
|
+
else
|
36
|
+
|
37
|
+
info = {}
|
38
|
+
info[:isbn] = isbn
|
39
|
+
info[:author] = result.author
|
40
|
+
info[:cover_thumb] = @protocol + @host + result.cover_thumb
|
41
|
+
info[:link] = @link
|
42
|
+
info[:from_name] = SCRAPER_NAME
|
43
|
+
info[:from_url] = SCRAPER_SITE
|
44
|
+
return info
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
|
2
|
+
module RBook
|
3
|
+
|
4
|
+
module WWW
|
5
|
+
|
6
|
+
class MacmillanScraper < Base
|
7
|
+
|
8
|
+
SCRAPER_ID = :macmillan
|
9
|
+
SCRAPER_NAME = "Pan Macmillan".freeze
|
10
|
+
SCRAPER_SITE = "http://www.panmacmillan.com.au/".freeze
|
11
|
+
|
12
|
+
#add_publisher( self, "9780312900000", "9780312999999" )
|
13
|
+
#add_publisher( self, "9780330400000", "9780330499999" )
|
14
|
+
#add_publisher( self, "9781403000000", "9781405099999" )
|
15
|
+
add_scraper( self )
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@url_protocol = "http://"
|
19
|
+
@url_host = "www.panmacmillan.com.au"
|
20
|
+
@url_path = "/display_title.asp?ISBN="
|
21
|
+
@url_suffix = "&Author=Barker,%20Robin"
|
22
|
+
end
|
23
|
+
|
24
|
+
def get_info(isbn)
|
25
|
+
|
26
|
+
isbn = ISBN::convert_to_isbn13(isbn)
|
27
|
+
return nil if isbn.nil?
|
28
|
+
|
29
|
+
mac = Scraper.define do
|
30
|
+
process "div.titlecontent>div.isbn>span", :isbn => :text
|
31
|
+
process "td[width=70%]>h1", :title => :text
|
32
|
+
process "a[title=Click on image to view a larger version]>img", :cover_medium => "@src"
|
33
|
+
process "a[title=Click on image to view a larger version]", :cover_large => "@href"
|
34
|
+
result :isbn, :title, :cover_medium, :cover_large
|
35
|
+
end
|
36
|
+
|
37
|
+
content = Net::HTTP.get URI.parse(get_link(isbn))
|
38
|
+
result = mac.scrape(content)
|
39
|
+
|
40
|
+
if result.title.nil?
|
41
|
+
return nil
|
42
|
+
else
|
43
|
+
|
44
|
+
info = {}
|
45
|
+
info[:isbn] = isbn
|
46
|
+
info[:title] = result.title
|
47
|
+
info[:cover_medium] = @url_protocol + @url_host + result.cover_medium.gsub("..", "") unless result.cover_medium.nil?
|
48
|
+
info[:cover_large] = @url_protocol + @url_host + result.cover_large.gsub("..", "") unless result.cover_large.nil?
|
49
|
+
info[:link] = get_link(isbn)
|
50
|
+
info[:from_name] = SCRAPER_NAME
|
51
|
+
info[:from_url] = SCRAPER_SITE
|
52
|
+
return info
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def get_link(isbn)
|
57
|
+
return nil unless ISBN::valid_isbn?(isbn)
|
58
|
+
return @url_protocol + @url_host + @url_path + ISBN::convert_to_isbn10(isbn) + @url_suffix
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
|
2
|
+
module RBook
|
3
|
+
module WWW
|
4
|
+
|
5
|
+
class OrbisScraper < Base
|
6
|
+
|
7
|
+
SCRAPER_ID = :orbis
|
8
|
+
SCRAPER_NAME = "Orbis Books"
|
9
|
+
SCRAPER_SITE = "http://www.orbisbooks.com/"
|
10
|
+
|
11
|
+
#add_publisher( self, "978157070000", "9781570799999")
|
12
|
+
add_scraper( self )
|
13
|
+
|
14
|
+
def get_info(isbn)
|
15
|
+
|
16
|
+
@protocol = "http://"
|
17
|
+
@host = "www.maryknollmall.org"
|
18
|
+
@path = "/description.cfm?ISBN="
|
19
|
+
@grouped_isbn = ISBN::add_groups(ISBN::convert_to_isbn10(isbn))
|
20
|
+
@link = @protocol + @host + @path + @grouped_isbn
|
21
|
+
|
22
|
+
oup = Scraper.define do
|
23
|
+
process "tr>td[colspan=4]>font[size=3]", :description => :text # doesn't currently work
|
24
|
+
process "table>tr>td[rowspan=2]>img", :cover_thumb => "@src"
|
25
|
+
result :description, :cover_thumb
|
26
|
+
end
|
27
|
+
|
28
|
+
content = Net::HTTP.get URI.parse(@link)
|
29
|
+
|
30
|
+
result = oup.scrape(content)
|
31
|
+
|
32
|
+
if result.cover_thumb.nil?
|
33
|
+
return nil
|
34
|
+
else
|
35
|
+
|
36
|
+
info = {}
|
37
|
+
info[:isbn] = isbn
|
38
|
+
info[:cover_thumb] = @protocol + @host + result.cover_thumb unless result.cover_thumb.nil?
|
39
|
+
info[:link] = @link
|
40
|
+
info[:from_name] = SCRAPER_NAME
|
41
|
+
info[:from_url] = SCRAPER_SITE
|
42
|
+
return info
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|