rbook 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/COPYING +340 -0
- data/LICENSE +13 -0
- data/README +16 -0
- data/Rakefile +206 -0
- data/examples/titlepage.rb +14 -0
- data/examples/www/find_all.rb +23 -0
- data/examples/www/find_cover_from_amazon.rb +12 -0
- data/examples/www/find_url_from_rainbow.rb +12 -0
- data/examples/www/list.rb +13 -0
- data/lib/rbook/bisac.rb +175 -0
- data/lib/rbook/errors.rb +7 -0
- data/lib/rbook/isbn.rb +249 -0
- data/lib/rbook/onix.rb +68 -0
- data/lib/rbook/onix/contributor.rb +60 -0
- data/lib/rbook/onix/lists.rb +2 -0
- data/lib/rbook/onix/lists/contributor_role.rb +10 -0
- data/lib/rbook/onix/lists/product_form.rb +100 -0
- data/lib/rbook/onix/message.rb +101 -0
- data/lib/rbook/onix/product.rb +188 -0
- data/lib/rbook/onix/sales_restriction.rb +51 -0
- data/lib/rbook/onix/supply_detail.rb +68 -0
- data/lib/rbook/onix/xchar.rb +98 -0
- data/lib/rbook/titlepage.rb +96 -0
- data/lib/rbook/titlepage/TitleQueryClient.rb +62 -0
- data/lib/rbook/titlepage/titlepage_driver.rb +134 -0
- data/lib/rbook/titlepage/titlepage_utils.rb +374 -0
- data/lib/rbook/www.rb +172 -0
- data/lib/rbook/www/aau_scraper.rb +76 -0
- data/lib/rbook/www/amazon_uk_scraper.rb +44 -0
- data/lib/rbook/www/base.rb +87 -0
- data/lib/rbook/www/harper_au_scraper.rb +56 -0
- data/lib/rbook/www/harper_us_scraper.rb +55 -0
- data/lib/rbook/www/hha_scraper.rb +50 -0
- data/lib/rbook/www/macmillan_scraper.rb +62 -0
- data/lib/rbook/www/orbis_scraper.rb +48 -0
- data/lib/rbook/www/oup_scraper.rb +64 -0
- data/lib/rbook/www/paulist_scraper.rb +53 -0
- data/lib/rbook/www/pearson_au_scraper.rb +52 -0
- data/lib/rbook/www/penguin_scraper.rb +45 -0
- data/lib/rbook/www/random_au_scraper.rb +90 -0
- data/lib/rbook/www/random_us_scraper.rb +59 -0
- data/lib/rbook/www/sas_scraper.rb +54 -0
- data/lib/rbook/www/unireps_scraper.rb +58 -0
- data/lib/rbook/www/wiley_us_scraper.rb +54 -0
- data/test/data/abingdon.xml +38931 -0
- data/test/data/augsburg.xml +39009 -0
- data/test/data/chalice.xml +10851 -0
- data/test/data/eerdsman.xml +36942 -0
- data/test/data/invalid_no_product.xml +9 -0
- data/test/data/not_xml.csv +1 -0
- data/test/data/single_product.xml +50 -0
- data/test/data/xml_not_onix.xml +7 -0
- data/test/mocks/titlepage_driver.rb +107 -0
- data/test/unit/bisac_test.rb +57 -0
- data/test/unit/isbn_test.rb +149 -0
- data/test/unit/onix/contributor_test.rb +50 -0
- data/test/unit/onix/message_test.rb +119 -0
- data/test/unit/onix/product_test.rb +101 -0
- data/test/unit/onix/sales_restriction_test.rb +48 -0
- data/test/unit/onix/supply_detail_test.rb +53 -0
- data/test/unit/onix/xchar_test.rb +37 -0
- data/test/unit/titlepage_test.rb +127 -0
- metadata +130 -0
@@ -0,0 +1,64 @@
|
|
1
|
+
|
2
|
+
module RBook
|
3
|
+
module WWW
|
4
|
+
|
5
|
+
class OUPScraper < Base
|
6
|
+
|
7
|
+
SCRAPER_ID = :oup
|
8
|
+
SCRAPER_NAME = "Oxford University Press"
|
9
|
+
SCRAPER_SITE = "http://au.oup.com/"
|
10
|
+
|
11
|
+
#add_publisher( self, "978019200000", "978019999999")
|
12
|
+
add_scraper( self )
|
13
|
+
|
14
|
+
def get_info(isbn)
|
15
|
+
|
16
|
+
isbn = ISBN::convert_to_isbn10(isbn)
|
17
|
+
|
18
|
+
@protocol = "http://"
|
19
|
+
@host = "au.oup.com"
|
20
|
+
@path = "/searchbuy/SearchBook.asp?isbn="
|
21
|
+
@link = @protocol + @host + @path + isbn
|
22
|
+
|
23
|
+
oup = Scraper.define do
|
24
|
+
process "div.isbn13", :isbn => :text
|
25
|
+
process "td.title", :title => :text
|
26
|
+
process "td.author", :author => :text
|
27
|
+
process "div.BookInfo", :misc => :text
|
28
|
+
process "td>p.blurb", :description => :text
|
29
|
+
process "td>img[width=100]", :cover => "@src"
|
30
|
+
result :isbn, :title, :author, :misc, :description, :cover
|
31
|
+
end
|
32
|
+
|
33
|
+
content = Net::HTTP.get URI.parse(@link)
|
34
|
+
|
35
|
+
result = oup.scrape(content)
|
36
|
+
|
37
|
+
|
38
|
+
if result.isbn.nil?
|
39
|
+
return nil
|
40
|
+
else
|
41
|
+
|
42
|
+
info = {}
|
43
|
+
info[:isbn] = result.isbn.gsub(/\ ISBN-13:\n/, "")
|
44
|
+
info[:title] = result.title
|
45
|
+
info[:author] = result.author
|
46
|
+
info[:description] = result.description
|
47
|
+
info[:cover_thumb] = @protocol + @host + result.cover
|
48
|
+
info[:link] = @link
|
49
|
+
tmp = result.misc.match(/\n(.+)\n(.+) pages\n(.+)\n(.+)\n(.+)/)
|
50
|
+
if !tmp.nil? && tmp.length == 5
|
51
|
+
info[:published] = tmp[1]
|
52
|
+
info[:pages] = tmp[2]
|
53
|
+
info[:format] = tmp[3]
|
54
|
+
info[:rrp] = tmp[4]
|
55
|
+
end
|
56
|
+
info[:from_name] = SCRAPER_NAME
|
57
|
+
info[:from_url] = SCRAPER_SITE
|
58
|
+
return info
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
|
2
|
+
module RBook
|
3
|
+
module WWW
|
4
|
+
|
5
|
+
class PaulistScraper < Base
|
6
|
+
|
7
|
+
SCRAPER_ID = :paulist
|
8
|
+
SCRAPER_NAME = "Paulist Press".freeze
|
9
|
+
SCRAPER_SITE = "http://www.paulistpress.com/".freeze
|
10
|
+
|
11
|
+
#add_publisher( self, "9780809100000", "9780809199999" )
|
12
|
+
add_scraper( self )
|
13
|
+
|
14
|
+
def get_info(isbn)
|
15
|
+
|
16
|
+
raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
|
17
|
+
|
18
|
+
isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
|
19
|
+
|
20
|
+
@protocol = "http://"
|
21
|
+
@host = "www.paulistpress.com"
|
22
|
+
@path = "/"
|
23
|
+
@abv_isbn = ISBN::convert_to_isbn10(isbn)[-5, 5]
|
24
|
+
@suffix = ".html"
|
25
|
+
@link = @protocol + @host + @path + @abv_isbn[0,4] + "-" + @abv_isbn[-1,1] + @suffix
|
26
|
+
|
27
|
+
rba = Scraper.define do
|
28
|
+
process "tr>td>h4", :title => :text
|
29
|
+
process "img[width=120][height=180]", :cover => "@src"
|
30
|
+
result :title, :cover
|
31
|
+
end
|
32
|
+
|
33
|
+
content = Net::HTTP.get URI.parse(@link)
|
34
|
+
result = rba.scrape(content)
|
35
|
+
|
36
|
+
if result.title.nil?
|
37
|
+
return nil
|
38
|
+
else
|
39
|
+
|
40
|
+
info = {}
|
41
|
+
info[:isbn] = isbn
|
42
|
+
info[:title] = result.title.gsub("Details for ", "")
|
43
|
+
info[:cover_thumb] = @protocol + @host + result.cover
|
44
|
+
info[:link] = @link
|
45
|
+
info[:from_name] = SCRAPER_NAME
|
46
|
+
info[:from_url] = SCRAPER_SITE
|
47
|
+
return info
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
|
2
|
+
module RBook
|
3
|
+
module WWW
|
4
|
+
|
5
|
+
class PearsonAUScraper < Base
|
6
|
+
|
7
|
+
SCRAPER_ID = :pearson_au
|
8
|
+
SCRAPER_NAME = "Pearson Education Australia".freeze
|
9
|
+
SCRAPER_SITE = "http://www.pearsoned.com.au/".freeze
|
10
|
+
|
11
|
+
#add_publisher( self, "9780130000000", "9780139999999" )
|
12
|
+
#add_publisher( self, "9780321000000", "9780321999999" )
|
13
|
+
#add_publisher( self, "9780201000000", "9780201999999" )
|
14
|
+
#add_publisher( self, "9780201000000", "9780201999999" )
|
15
|
+
add_scraper( self )
|
16
|
+
|
17
|
+
def get_info(isbn)
|
18
|
+
@protocol = "http://"
|
19
|
+
@host = "www.pearsoned.com.au"
|
20
|
+
@path = "/Catalogue/TitleDetails.aspx?isbn="
|
21
|
+
@link = @protocol + @host + @path + ISBN::convert_to_isbn10(isbn)
|
22
|
+
|
23
|
+
rba = Scraper.define do
|
24
|
+
process "span.PageHeading", :title => :text
|
25
|
+
process "span#_publishedLabel", :pubdate => :text
|
26
|
+
process "span#_priceLabel", :rrp => :text
|
27
|
+
process "img.CoverImage", :cover_thumb => "@src"
|
28
|
+
result :title, :pubdate, :rrp, :cover_thumb
|
29
|
+
end
|
30
|
+
|
31
|
+
content = Net::HTTP.get URI.parse(@link)
|
32
|
+
result = rba.scrape(content)
|
33
|
+
|
34
|
+
if result.rrp.nil?
|
35
|
+
return nil
|
36
|
+
else
|
37
|
+
|
38
|
+
info = {}
|
39
|
+
info[:isbn] = isbn
|
40
|
+
info[:pubdate] = result.pubdate
|
41
|
+
info[:rrp] = result.rrp
|
42
|
+
info[:cover_thumb] = @protocol + @host + result.cover_thumb
|
43
|
+
info[:link] = @link
|
44
|
+
info[:from_name] = SCRAPER_NAME
|
45
|
+
info[:from_url] = SCRAPER_SITE
|
46
|
+
return info
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
|
2
|
+
module RBook
|
3
|
+
module WWW
|
4
|
+
|
5
|
+
class PenguinScraper < Base
|
6
|
+
|
7
|
+
SCRAPER_ID = :penguin
|
8
|
+
SCRAPER_NAME = "Penguin Books Australia".freeze
|
9
|
+
SCRAPER_SITE = "http://www.penguin.com.au/".freeze
|
10
|
+
|
11
|
+
#add_publisher( self, "9781857200000", "9781857299999" )
|
12
|
+
add_scraper( self )
|
13
|
+
|
14
|
+
def get_info(isbn)
|
15
|
+
@protocol = "http://"
|
16
|
+
@host = "www.penguin.com.au"
|
17
|
+
@path = "/catalog/search-title-details.cfm?SBN="
|
18
|
+
@link = @protocol + @host + @path + ISBN::convert_to_isbn10(isbn)
|
19
|
+
|
20
|
+
rba = Scraper.define do
|
21
|
+
process "font>b", :title => :text
|
22
|
+
process "img[alt=cover]", :cover_thumb => "@src"
|
23
|
+
result :title, :cover_thumb
|
24
|
+
end
|
25
|
+
|
26
|
+
content = Net::HTTP.get URI.parse(@link)
|
27
|
+
result = rba.scrape(content)
|
28
|
+
|
29
|
+
if result.cover_thumb.nil?
|
30
|
+
return nil
|
31
|
+
else
|
32
|
+
|
33
|
+
info = {}
|
34
|
+
info[:isbn] = isbn
|
35
|
+
info[:cover_thumb] = @protocol + @host + result.cover_thumb.gsub("..", "")
|
36
|
+
info[:link] = @link
|
37
|
+
info[:from_name] = SCRAPER_NAME
|
38
|
+
info[:from_url] = SCRAPER_SITE
|
39
|
+
return info
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'cgi'
|
2
|
+
|
3
|
+
module RBook
|
4
|
+
module WWW
|
5
|
+
|
6
|
+
class RandomAUScraper < Base
|
7
|
+
|
8
|
+
SCRAPER_ID = :random_au
|
9
|
+
SCRAPER_NAME = "Random AU".freeze
|
10
|
+
SCRAPER_SITE = "http://www.randomhouse.com.au/".freeze
|
11
|
+
|
12
|
+
#add_publisher( self, "9781400000000", "9781400099999" )
|
13
|
+
#add_publisher( self, "9781863200000", "9781863299999" )
|
14
|
+
#add_publisher( self, "9781904900000", "9781904999999" )
|
15
|
+
add_scraper( self )
|
16
|
+
|
17
|
+
def get_cover(isbn)
|
18
|
+
|
19
|
+
info = get_info(isbn)
|
20
|
+
return nil if info.nil?
|
21
|
+
return nil unless info.kind_of?(Hash)
|
22
|
+
|
23
|
+
link = info[:cover_large] || info[:cover_medium] || info[:cover_thumb]
|
24
|
+
|
25
|
+
return nil if link.nil?
|
26
|
+
|
27
|
+
begin
|
28
|
+
response = Net::HTTP.get_response URI.parse(link)
|
29
|
+
if response.code != "200"
|
30
|
+
raise response.code.to_s
|
31
|
+
return nil
|
32
|
+
else
|
33
|
+
result = {}
|
34
|
+
result[:data] = response.body
|
35
|
+
result[:content_type] = "image/jpeg"
|
36
|
+
return result
|
37
|
+
end
|
38
|
+
rescue
|
39
|
+
return nil
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def get_info(isbn)
|
44
|
+
|
45
|
+
@protocol = "http://"
|
46
|
+
@host = "www.randomhouse.com.au"
|
47
|
+
@path = "/Books/Default.aspx?Page=Book&ID="
|
48
|
+
@link = @protocol + @host + @path + isbn
|
49
|
+
|
50
|
+
random = Scraper.define do
|
51
|
+
process "td.bookTitle", :title => :text
|
52
|
+
process "span#rBodyModule__ctl0_lblISBN13", :isbn => :text
|
53
|
+
process "span#rBodyModule__ctl0_lblAuthor", :author => :text
|
54
|
+
process "span#rBodyModule__ctl0_lblFormat", :form => :text
|
55
|
+
process "span#rBodyModule__ctl0_lblImprint", :imprint => :text
|
56
|
+
process "span#rBodyModule__ctl0_lblRRP", :rrp => :text
|
57
|
+
process "span#rBodyModule__ctl0_lblRelease", :pubdate => :text
|
58
|
+
process "td.detBook>table>tr>td.standard[colspan=2]", :description => :text
|
59
|
+
process "img#rBodyModule__ctl0_imgBook", :cover_thumb => "@src"
|
60
|
+
process "a#rBodyModule__ctl0_hl300Image", :cover_large => "@href"
|
61
|
+
result :title, :isbn, :author, :form, :imprint, :rrp, :pubdate, :description, :cover_thumb, :cover_large
|
62
|
+
end
|
63
|
+
|
64
|
+
content = Net::HTTP.get URI.parse(@link)
|
65
|
+
result = random.scrape(content)
|
66
|
+
|
67
|
+
if result.isbn.nil?
|
68
|
+
return nil
|
69
|
+
else
|
70
|
+
info = {}
|
71
|
+
info[:isbn] = isbn
|
72
|
+
info[:title] = result.title
|
73
|
+
info[:author] = result.author
|
74
|
+
info[:form] = result.form
|
75
|
+
info[:imprint] = result.imprint
|
76
|
+
info[:rrp] = result.rrp
|
77
|
+
info[:pubdate] = result.pubdate
|
78
|
+
info[:description] = result.description
|
79
|
+
info[:cover_thumb] = result.cover_thumb
|
80
|
+
info[:cover_large] = result.cover_large
|
81
|
+
info[:link] = @link
|
82
|
+
info[:from_name] = SCRAPER_NAME
|
83
|
+
info[:from_url] = SCRAPER_SITE
|
84
|
+
return info
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'cgi'
|
2
|
+
|
3
|
+
module RBook
|
4
|
+
module WWW
|
5
|
+
|
6
|
+
class RandomUSScraper < Base
|
7
|
+
|
8
|
+
SCRAPER_ID = :random_us
|
9
|
+
SCRAPER_NAME = "Random US".freeze
|
10
|
+
SCRAPER_SITE = "http://www.randomhouse.com/".freeze
|
11
|
+
|
12
|
+
#add_publisher( self, "9780517300000", "9780517399999" )
|
13
|
+
#add_publisher( self, "9780553800000", "9780553899999" )
|
14
|
+
#add_publisher( self, "9780307000000", "9780307999999" )
|
15
|
+
#add_publisher( self, "9780385500000", "9780385599999" )
|
16
|
+
#add_publisher( self, "9780767900000", "9780767999999" )
|
17
|
+
#add_publisher( self, "9780679600000", "9780679699999" )
|
18
|
+
#add_publisher( self, "9781400000000", "9781400099999" )
|
19
|
+
add_scraper( self )
|
20
|
+
|
21
|
+
def get_info(isbn)
|
22
|
+
|
23
|
+
@protocol = "http://"
|
24
|
+
@host = "www.randomhouse.com"
|
25
|
+
@path = "/catalog/display.pperl?isbn="
|
26
|
+
@link = @protocol + @host + @path + isbn
|
27
|
+
|
28
|
+
random = Scraper.define do
|
29
|
+
process "div#catalog_display>h1", :title => :text
|
30
|
+
process "meta[name=gsa.format]", :form => "@content"
|
31
|
+
process "meta[name=gsa.author]", :author => "@content"
|
32
|
+
process "div#catalog_content>p", :description => :text
|
33
|
+
process "div.rhbw_cover>img", :cover_large => "@src"
|
34
|
+
result :title, :author, :form, :description, :cover_large
|
35
|
+
end
|
36
|
+
|
37
|
+
content = Net::HTTP.get URI.parse(@link)
|
38
|
+
result = random.scrape(content)
|
39
|
+
|
40
|
+
if result.title.nil?
|
41
|
+
return nil
|
42
|
+
else
|
43
|
+
info = {}
|
44
|
+
info[:isbn] = isbn
|
45
|
+
info[:title] = result.title
|
46
|
+
info[:author] = result.author
|
47
|
+
info[:form] = result.form
|
48
|
+
info[:description] = result.description
|
49
|
+
info[:cover_large] = @protocol + @host + result.cover_large.gsub("&","&").gsub("=150","=600") unless result.cover_large.nil?
|
50
|
+
info[:link] = @link
|
51
|
+
info[:from_name] = SCRAPER_NAME
|
52
|
+
info[:from_url] = SCRAPER_SITE
|
53
|
+
return info
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
|
2
|
+
module RBook
|
3
|
+
module WWW
|
4
|
+
|
5
|
+
class SASScraper < Base
|
6
|
+
|
7
|
+
SCRAPER_ID = :sas_us
|
8
|
+
SCRAPER_NAME = "Simon and Schuster US".freeze
|
9
|
+
SCRAPER_SITE = "http://www.simonsays.com/".freeze
|
10
|
+
|
11
|
+
#add_publisher( self, "9780671700000", "9780671799999" )
|
12
|
+
#add_publisher( self, "9780731800000", "9780731899999" )
|
13
|
+
#add_publisher( self, "9780743200000", "9780743299999" )
|
14
|
+
#add_publisher( self, "9781416500000", "9781416599999" )
|
15
|
+
add_scraper( self )
|
16
|
+
|
17
|
+
def get_info(isbn)
|
18
|
+
|
19
|
+
raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
|
20
|
+
|
21
|
+
isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
|
22
|
+
|
23
|
+
@protocol = "http://"
|
24
|
+
@host = "www.simonsays.com"
|
25
|
+
@path = "/subs/book.cfm?areaid=288&isbn="
|
26
|
+
@link = @protocol + @host + @path + ISBN::convert_to_isbn10(isbn)
|
27
|
+
|
28
|
+
rba = Scraper.define do
|
29
|
+
process "font[face=Verdana, Arial, Helvetica]>b", :title => :text
|
30
|
+
process "td[rowspan=3]>img[border=0]", :cover_thumb => "@src"
|
31
|
+
result :title, :cover_thumb
|
32
|
+
end
|
33
|
+
|
34
|
+
content = Net::HTTP.get URI.parse(@link)
|
35
|
+
result = rba.scrape(content)
|
36
|
+
|
37
|
+
if result.title.nil?
|
38
|
+
return nil
|
39
|
+
else
|
40
|
+
|
41
|
+
info = {}
|
42
|
+
info[:isbn] = isbn
|
43
|
+
info[:title] = result.title
|
44
|
+
info[:cover_thumb] = @protocol + @host + result.cover_thumb unless result.cover_thumb.nil?
|
45
|
+
info[:link] = @link
|
46
|
+
info[:from_name] = SCRAPER_NAME
|
47
|
+
info[:from_url] = SCRAPER_SITE
|
48
|
+
return info
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
|
2
|
+
module RBook
|
3
|
+
module WWW
|
4
|
+
|
5
|
+
class UnirepsScraper < Base
|
6
|
+
|
7
|
+
SCRAPER_ID = :unireps
|
8
|
+
SCRAPER_NAME = "Unireps".freeze
|
9
|
+
SCRAPER_SITE = "http://www.unireps.com.au/".freeze
|
10
|
+
|
11
|
+
#add_publisher( self, "9780643000000", "9780643099999" )
|
12
|
+
#add_publisher( self, "9780868400000", "9780868499999" )
|
13
|
+
#add_publisher( self, "9780908800000", "9780908999999" )
|
14
|
+
#add_publisher( self, "9780909600000", "9780909699999" )
|
15
|
+
#add_publisher( self, "9781877000000", "9781877099999" )
|
16
|
+
#add_publisher( self, "9781920700000", "9781920799999" )
|
17
|
+
add_scraper( self )
|
18
|
+
|
19
|
+
def get_info(isbn)
|
20
|
+
|
21
|
+
raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
|
22
|
+
|
23
|
+
isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
|
24
|
+
isbn10 = ISBN::convert_to_isbn10(isbn)
|
25
|
+
|
26
|
+
@protocol = "http://"
|
27
|
+
@host = "www.unireps.com.au"
|
28
|
+
@path = "/isbn/"
|
29
|
+
@suffix = ".htm"
|
30
|
+
@link = @protocol + @host + @path + isbn10 + @suffix
|
31
|
+
|
32
|
+
rba = Scraper.define do
|
33
|
+
process "h4", :title => :text
|
34
|
+
process "img[alt=#{isbn10}]", :cover_thumb => "@src"
|
35
|
+
result :title, :cover_thumb
|
36
|
+
end
|
37
|
+
|
38
|
+
content = Net::HTTP.get URI.parse(@link)
|
39
|
+
result = rba.scrape(content)
|
40
|
+
|
41
|
+
if result.title.nil?
|
42
|
+
return nil
|
43
|
+
else
|
44
|
+
|
45
|
+
info = {}
|
46
|
+
info[:isbn] = isbn
|
47
|
+
info[:title] = result.title unless result.title.nil?
|
48
|
+
info[:cover_thumb] = @protocol + @host + @path + result.cover_thumb unless result.cover_thumb.nil?
|
49
|
+
info[:link] = @link
|
50
|
+
info[:from_name] = SCRAPER_NAME
|
51
|
+
info[:from_url] = SCRAPER_SITE
|
52
|
+
return info
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|