rbook 0.4.3 → 0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +13 -176
- metadata +57 -117
- data/COPYING +0 -340
- data/LICENSE +0 -13
- data/README +0 -17
- data/examples/gbip.rb +0 -12
- data/examples/onix/stream_reader.rb +0 -13
- data/examples/pacstream.rb +0 -13
- data/examples/titlepage.rb +0 -14
- data/examples/titlepage_with_proxy.rb +0 -14
- data/examples/titlepage_www.rb +0 -18
- data/examples/www/find_all.rb +0 -23
- data/examples/www/find_cover_from_amazon.rb +0 -16
- data/examples/www/find_url_from_rainbow.rb +0 -12
- data/examples/www/list.rb +0 -13
- data/lib/rbook/bisac.rb +0 -31
- data/lib/rbook/bisac/message.rb +0 -99
- data/lib/rbook/bisac/po.rb +0 -97
- data/lib/rbook/bisac/po_line_item.rb +0 -33
- data/lib/rbook/bisac/product.rb +0 -176
- data/lib/rbook/errors.rb +0 -8
- data/lib/rbook/gbip.rb +0 -21
- data/lib/rbook/gbip/pos.rb +0 -118
- data/lib/rbook/gbip/title.rb +0 -36
- data/lib/rbook/gbip/warehouse.rb +0 -27
- data/lib/rbook/isbn.rb +0 -255
- data/lib/rbook/onix.rb +0 -70
- data/lib/rbook/onix/contributor.rb +0 -60
- data/lib/rbook/onix/lists.rb +0 -2
- data/lib/rbook/onix/lists/contributor_role.rb +0 -10
- data/lib/rbook/onix/lists/product_form.rb +0 -100
- data/lib/rbook/onix/message.rb +0 -112
- data/lib/rbook/onix/product.rb +0 -189
- data/lib/rbook/onix/sales_restriction.rb +0 -51
- data/lib/rbook/onix/stream_reader.rb +0 -120
- data/lib/rbook/onix/stream_writer.rb +0 -40
- data/lib/rbook/onix/supply_detail.rb +0 -68
- data/lib/rbook/onix/xchar.rb +0 -98
- data/lib/rbook/pacstream.rb +0 -64
- data/lib/rbook/titlepage.rb +0 -37
- data/lib/rbook/titlepage/client.rb +0 -126
- data/lib/rbook/titlepage/titlepage_driver.rb +0 -137
- data/lib/rbook/titlepage/titlepage_utils.rb +0 -379
- data/lib/rbook/titlepage/wwwclient.rb +0 -96
- data/lib/rbook/www.rb +0 -172
- data/lib/rbook/www/aau_scraper.rb +0 -76
- data/lib/rbook/www/amazon_uk_scraper.rb +0 -44
- data/lib/rbook/www/ban_scraper.rb +0 -62
- data/lib/rbook/www/base.rb +0 -87
- data/lib/rbook/www/harper_au_scraper.rb +0 -56
- data/lib/rbook/www/harper_us_scraper.rb +0 -55
- data/lib/rbook/www/hha_scraper.rb +0 -50
- data/lib/rbook/www/macmillan_scraper.rb +0 -62
- data/lib/rbook/www/orbis_scraper.rb +0 -48
- data/lib/rbook/www/oup_scraper.rb +0 -64
- data/lib/rbook/www/paulist_scraper.rb +0 -53
- data/lib/rbook/www/pearson_au_scraper.rb +0 -52
- data/lib/rbook/www/penguin_scraper.rb +0 -45
- data/lib/rbook/www/random_au_scraper.rb +0 -90
- data/lib/rbook/www/random_us_scraper.rb +0 -59
- data/lib/rbook/www/sas_scraper.rb +0 -54
- data/lib/rbook/www/unireps_scraper.rb +0 -58
- data/lib/rbook/www/wiley_us_scraper.rb +0 -54
- data/test/data/abingdon.xml +0 -38931
- data/test/data/augsburg.xml +0 -39009
- data/test/data/bisac_po.txt +0 -112
- data/test/data/chalice.xml +0 -10851
- data/test/data/eerdsman.xml +0 -36942
- data/test/data/invalid_no_product.xml +0 -9
- data/test/data/not_xml.csv +0 -1
- data/test/data/single_product.xml +0 -50
- data/test/data/valid_bisac.txt +0 -213
- data/test/data/xml_not_onix.xml +0 -7
- data/test/mocks/titlepage_driver.rb +0 -111
- data/test/unit/bisac/bisac_test.rb +0 -96
- data/test/unit/bisac/po_line_item_test.rb +0 -38
- data/test/unit/bisac/po_test.rb +0 -82
- data/test/unit/isbn_test.rb +0 -153
- data/test/unit/onix/contributor_test.rb +0 -50
- data/test/unit/onix/message_test.rb +0 -119
- data/test/unit/onix/product_test.rb +0 -101
- data/test/unit/onix/sales_restriction_test.rb +0 -48
- data/test/unit/onix/stream_reader_test.rb +0 -22
- data/test/unit/onix/stream_writer_test.rb +0 -32
- data/test/unit/onix/supply_detail_test.rb +0 -53
- data/test/unit/onix/xchar_test.rb +0 -37
- data/test/unit/titlepage_test.rb +0 -140
data/lib/rbook/www/base.rb
DELETED
@@ -1,87 +0,0 @@
|
|
1
|
-
require 'net/http'
|
2
|
-
require 'uri'
|
3
|
-
|
4
|
-
module RBook
|
5
|
-
module WWW
|
6
|
-
|
7
|
-
class Base
|
8
|
-
|
9
|
-
@@scrapers = []
|
10
|
-
|
11
|
-
# registers a new scraper with the library.
|
12
|
-
# classname - the class to add
|
13
|
-
def self.add_scraper(classname)
|
14
|
-
@@scrapers << classname
|
15
|
-
end
|
16
|
-
|
17
|
-
# find a scraper matching the requested id
|
18
|
-
# id - a scraper id as a symbol
|
19
|
-
def self.find_scraper(id)
|
20
|
-
@@scrapers.each do |scraper|
|
21
|
-
return scraper if scraper::SCRAPER_ID == id
|
22
|
-
end
|
23
|
-
return nil
|
24
|
-
end
|
25
|
-
|
26
|
-
# find any scrapers matching the requested ids
|
27
|
-
# ids - an array of scraper id's as symbols
|
28
|
-
def self.find_scrapers(ids)
|
29
|
-
ret = []
|
30
|
-
@@scrapers.each do |scraper|
|
31
|
-
ret << scraper if ids.include?(scraper::SCRAPER_ID)
|
32
|
-
end
|
33
|
-
return ret
|
34
|
-
end
|
35
|
-
|
36
|
-
# This method can be overwritten in each scraper. It should return a hash containing the binary data
|
37
|
-
# and mimetype of the largest cover image it can find for the requested isbn
|
38
|
-
def get_cover(isbn)
|
39
|
-
|
40
|
-
info = get_info(isbn)
|
41
|
-
return nil if info.nil?
|
42
|
-
return nil unless info.kind_of?(Hash)
|
43
|
-
|
44
|
-
link = info[:cover_large] || info[:cover_medium] || info[:cover_thumb]
|
45
|
-
|
46
|
-
return nil if link.nil?
|
47
|
-
|
48
|
-
begin
|
49
|
-
response = Net::HTTP.get_response URI.parse(link)
|
50
|
-
if response.code != "200"
|
51
|
-
raise response.code.to_s
|
52
|
-
return nil
|
53
|
-
else
|
54
|
-
result = {}
|
55
|
-
result[:data] = response.body
|
56
|
-
result[:content_type] = "image/jpeg"
|
57
|
-
return result
|
58
|
-
end
|
59
|
-
rescue
|
60
|
-
return nil
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
# This method can be overwritten in each scraper. It should return a hash of any information on
|
65
|
-
# the requested isbn it can find
|
66
|
-
def get_info(isbn)
|
67
|
-
nil
|
68
|
-
end
|
69
|
-
|
70
|
-
# This method can be overwritten in each scraper. It should return a link to the requested isbn
|
71
|
-
# on the targets website
|
72
|
-
def get_link(isbn)
|
73
|
-
nil
|
74
|
-
end
|
75
|
-
|
76
|
-
# return the symbol used to uniquely identify each scraper
|
77
|
-
def scraper_id
|
78
|
-
return SCRAPER_ID
|
79
|
-
end
|
80
|
-
|
81
|
-
def self.scrapers
|
82
|
-
@@scrapers
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
end
|
87
|
-
end
|
@@ -1,56 +0,0 @@
|
|
1
|
-
|
2
|
-
module RBook
|
3
|
-
module WWW
|
4
|
-
|
5
|
-
class HarperCollinsAUScraper < Base
|
6
|
-
|
7
|
-
SCRAPER_ID = :harper_au
|
8
|
-
SCRAPER_NAME = "Harper Collins Australia".freeze
|
9
|
-
SCRAPER_SITE = "http://www.harpercollins.com.au/".freeze
|
10
|
-
|
11
|
-
#add_publisher( self, "9780006400000", "9780006499999" )
|
12
|
-
#add_publisher( self, "9780007100000", "9780007199999" )
|
13
|
-
add_scraper( self )
|
14
|
-
|
15
|
-
def get_info(isbn)
|
16
|
-
|
17
|
-
raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
|
18
|
-
|
19
|
-
isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
|
20
|
-
|
21
|
-
@protocol = "http://"
|
22
|
-
@host = "www.harpercollins.com.au"
|
23
|
-
@path = "/global_scripts/product_catalog/book_xml.asp?isbn="
|
24
|
-
@link = @protocol + @host + @path + ISBN::convert_to_isbn10(isbn)
|
25
|
-
|
26
|
-
rba = Scraper.define do
|
27
|
-
process "div.header", :title => :text
|
28
|
-
process "div.subtitle", :subtitle => :text
|
29
|
-
process "div.byline", :author => :text
|
30
|
-
process "img.bookJacket", :cover => "@src"
|
31
|
-
result :title, :subtitle, :author, :cover
|
32
|
-
end
|
33
|
-
|
34
|
-
content = Net::HTTP.get URI.parse(@link)
|
35
|
-
result = rba.scrape(content)
|
36
|
-
|
37
|
-
if result.title.nil?
|
38
|
-
return nil
|
39
|
-
else
|
40
|
-
|
41
|
-
info = {}
|
42
|
-
info[:isbn] = isbn
|
43
|
-
info[:title] = result.title
|
44
|
-
info[:subtitle] = result.subtitle
|
45
|
-
info[:author] = result.author.gsub("by ","")
|
46
|
-
info[:cover] = result.cover
|
47
|
-
info[:link] = @link
|
48
|
-
info[:from_name] = SCRAPER_NAME
|
49
|
-
info[:from_url] = SCRAPER_SITE
|
50
|
-
return info
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
@@ -1,55 +0,0 @@
|
|
1
|
-
|
2
|
-
module RBook
|
3
|
-
module WWW
|
4
|
-
|
5
|
-
class HarperCollinsUSScraper < Base
|
6
|
-
|
7
|
-
SCRAPER_ID = :harper_us
|
8
|
-
SCRAPER_NAME = "Harper Collins United States".freeze
|
9
|
-
SCRAPER_SITE = "http://www.harpercollins.com/".freeze
|
10
|
-
|
11
|
-
#add_publisher( self, "9780060000000", "9780060999999" )
|
12
|
-
add_scraper( self )
|
13
|
-
|
14
|
-
def get_info(isbn)
|
15
|
-
raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
|
16
|
-
|
17
|
-
isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
|
18
|
-
|
19
|
-
@protocol = "http://"
|
20
|
-
@host = "www.harpercollins.com"
|
21
|
-
@path = "/book/index.aspx?isbn="
|
22
|
-
@link = @protocol + @host + @path + isbn
|
23
|
-
|
24
|
-
rba = Scraper.define do
|
25
|
-
process "h1.bookTitle", :title => :text
|
26
|
-
process "h2.bookSubTitle", :subtitle => :text
|
27
|
-
process "h3.byLine", :author => :text
|
28
|
-
process "img.bookJacket", :cover => "@src"
|
29
|
-
result :title, :subtitle, :author, :cover
|
30
|
-
end
|
31
|
-
|
32
|
-
content = Net::HTTP.get URI.parse(@link)
|
33
|
-
result = rba.scrape(content)
|
34
|
-
|
35
|
-
if result.title.nil?
|
36
|
-
return nil
|
37
|
-
else
|
38
|
-
|
39
|
-
info = {}
|
40
|
-
info[:isbn] = isbn
|
41
|
-
info[:title] = result.title
|
42
|
-
info[:subtitle] = result.subtitle
|
43
|
-
info[:author] = result.author.gsub("by ", "")
|
44
|
-
info[:cover_thumb] = result.cover
|
45
|
-
info[:cover_medium] = result.cover.gsub("medium", "large")
|
46
|
-
info[:link] = @link
|
47
|
-
info[:from_name] = SCRAPER_NAME
|
48
|
-
info[:from_url] = SCRAPER_SITE
|
49
|
-
return info
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
@@ -1,50 +0,0 @@
|
|
1
|
-
|
2
|
-
module RBook
|
3
|
-
module WWW
|
4
|
-
|
5
|
-
class HHAScraper < Base
|
6
|
-
|
7
|
-
SCRAPER_ID = :hha
|
8
|
-
SCRAPER_NAME = "Hodder Headline Australia".freeze
|
9
|
-
SCRAPER_SITE = "http://www.hha.com.au/".freeze
|
10
|
-
|
11
|
-
#add_publisher( self, "9780340800000", "9780340899999" )
|
12
|
-
#add_publisher( self, "9780755300000", "9780755399999" )
|
13
|
-
#add_publisher( self, "9780733600000", "9780733699999" )
|
14
|
-
add_scraper( self )
|
15
|
-
|
16
|
-
def get_info(isbn)
|
17
|
-
@protocol = "http://"
|
18
|
-
@host = "www.hha.com.au"
|
19
|
-
@path = "/books/"
|
20
|
-
@suffix = ".html"
|
21
|
-
@link = @protocol + @host + @path + ISBN::convert_to_isbn10(isbn) + @suffix
|
22
|
-
|
23
|
-
rba = Scraper.define do
|
24
|
-
process "h1.fiction", :title => :text
|
25
|
-
process "p.author", :author => :text
|
26
|
-
process "p.thumb>img", :cover_thumb => "@src"
|
27
|
-
result :title, :author, :cover_thumb
|
28
|
-
end
|
29
|
-
|
30
|
-
content = Net::HTTP.get URI.parse(@link)
|
31
|
-
result = rba.scrape(content)
|
32
|
-
|
33
|
-
if result.cover_thumb.nil?
|
34
|
-
return nil
|
35
|
-
else
|
36
|
-
|
37
|
-
info = {}
|
38
|
-
info[:isbn] = isbn
|
39
|
-
info[:author] = result.author
|
40
|
-
info[:cover_thumb] = @protocol + @host + result.cover_thumb
|
41
|
-
info[:link] = @link
|
42
|
-
info[:from_name] = SCRAPER_NAME
|
43
|
-
info[:from_url] = SCRAPER_SITE
|
44
|
-
return info
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
end
|
49
|
-
end
|
50
|
-
end
|
@@ -1,62 +0,0 @@
|
|
1
|
-
|
2
|
-
module RBook
|
3
|
-
|
4
|
-
module WWW
|
5
|
-
|
6
|
-
class MacmillanScraper < Base
|
7
|
-
|
8
|
-
SCRAPER_ID = :macmillan
|
9
|
-
SCRAPER_NAME = "Pan Macmillan".freeze
|
10
|
-
SCRAPER_SITE = "http://www.panmacmillan.com.au/".freeze
|
11
|
-
|
12
|
-
#add_publisher( self, "9780312900000", "9780312999999" )
|
13
|
-
#add_publisher( self, "9780330400000", "9780330499999" )
|
14
|
-
#add_publisher( self, "9781403000000", "9781405099999" )
|
15
|
-
add_scraper( self )
|
16
|
-
|
17
|
-
def initialize
|
18
|
-
@url_protocol = "http://"
|
19
|
-
@url_host = "www.panmacmillan.com.au"
|
20
|
-
@url_path = "/display_title.asp?ISBN="
|
21
|
-
@url_suffix = "&Author=Barker,%20Robin"
|
22
|
-
end
|
23
|
-
|
24
|
-
def get_info(isbn)
|
25
|
-
|
26
|
-
isbn = ISBN::convert_to_isbn13(isbn)
|
27
|
-
return nil if isbn.nil?
|
28
|
-
|
29
|
-
mac = Scraper.define do
|
30
|
-
process "div.titlecontent>div.isbn>span", :isbn => :text
|
31
|
-
process "td[width=70%]>h1", :title => :text
|
32
|
-
process "a[title=Click on image to view a larger version]>img", :cover_medium => "@src"
|
33
|
-
process "a[title=Click on image to view a larger version]", :cover_large => "@href"
|
34
|
-
result :isbn, :title, :cover_medium, :cover_large
|
35
|
-
end
|
36
|
-
|
37
|
-
content = Net::HTTP.get URI.parse(get_link(isbn))
|
38
|
-
result = mac.scrape(content)
|
39
|
-
|
40
|
-
if result.title.nil?
|
41
|
-
return nil
|
42
|
-
else
|
43
|
-
|
44
|
-
info = {}
|
45
|
-
info[:isbn] = isbn
|
46
|
-
info[:title] = result.title
|
47
|
-
info[:cover_medium] = @url_protocol + @url_host + result.cover_medium.gsub("..", "") unless result.cover_medium.nil?
|
48
|
-
info[:cover_large] = @url_protocol + @url_host + result.cover_large.gsub("..", "") unless result.cover_large.nil?
|
49
|
-
info[:link] = get_link(isbn)
|
50
|
-
info[:from_name] = SCRAPER_NAME
|
51
|
-
info[:from_url] = SCRAPER_SITE
|
52
|
-
return info
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def get_link(isbn)
|
57
|
-
return nil unless ISBN::valid_isbn?(isbn)
|
58
|
-
return @url_protocol + @url_host + @url_path + ISBN::convert_to_isbn10(isbn) + @url_suffix
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
@@ -1,48 +0,0 @@
|
|
1
|
-
|
2
|
-
module RBook
|
3
|
-
module WWW
|
4
|
-
|
5
|
-
class OrbisScraper < Base
|
6
|
-
|
7
|
-
SCRAPER_ID = :orbis
|
8
|
-
SCRAPER_NAME = "Orbis Books"
|
9
|
-
SCRAPER_SITE = "http://www.orbisbooks.com/"
|
10
|
-
|
11
|
-
#add_publisher( self, "978157070000", "9781570799999")
|
12
|
-
add_scraper( self )
|
13
|
-
|
14
|
-
def get_info(isbn)
|
15
|
-
|
16
|
-
@protocol = "http://"
|
17
|
-
@host = "www.maryknollmall.org"
|
18
|
-
@path = "/description.cfm?ISBN="
|
19
|
-
@grouped_isbn = ISBN::add_groups(ISBN::convert_to_isbn10(isbn))
|
20
|
-
@link = @protocol + @host + @path + @grouped_isbn
|
21
|
-
|
22
|
-
oup = Scraper.define do
|
23
|
-
process "tr>td[colspan=4]>font[size=3]", :description => :text # doesn't currently work
|
24
|
-
process "table>tr>td[rowspan=2]>img", :cover_thumb => "@src"
|
25
|
-
result :description, :cover_thumb
|
26
|
-
end
|
27
|
-
|
28
|
-
content = Net::HTTP.get URI.parse(@link)
|
29
|
-
|
30
|
-
result = oup.scrape(content)
|
31
|
-
|
32
|
-
if result.cover_thumb.nil?
|
33
|
-
return nil
|
34
|
-
else
|
35
|
-
|
36
|
-
info = {}
|
37
|
-
info[:isbn] = isbn
|
38
|
-
info[:cover_thumb] = @protocol + @host + result.cover_thumb unless result.cover_thumb.nil?
|
39
|
-
info[:link] = @link
|
40
|
-
info[:from_name] = SCRAPER_NAME
|
41
|
-
info[:from_url] = SCRAPER_SITE
|
42
|
-
return info
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
@@ -1,64 +0,0 @@
|
|
1
|
-
|
2
|
-
module RBook
|
3
|
-
module WWW
|
4
|
-
|
5
|
-
class OUPScraper < Base
|
6
|
-
|
7
|
-
SCRAPER_ID = :oup
|
8
|
-
SCRAPER_NAME = "Oxford University Press"
|
9
|
-
SCRAPER_SITE = "http://au.oup.com/"
|
10
|
-
|
11
|
-
#add_publisher( self, "978019200000", "978019999999")
|
12
|
-
add_scraper( self )
|
13
|
-
|
14
|
-
def get_info(isbn)
|
15
|
-
|
16
|
-
isbn = ISBN::convert_to_isbn10(isbn)
|
17
|
-
|
18
|
-
@protocol = "http://"
|
19
|
-
@host = "au.oup.com"
|
20
|
-
@path = "/searchbuy/SearchBook.asp?isbn="
|
21
|
-
@link = @protocol + @host + @path + isbn
|
22
|
-
|
23
|
-
oup = Scraper.define do
|
24
|
-
process "div.isbn13", :isbn => :text
|
25
|
-
process "td.title", :title => :text
|
26
|
-
process "td.author", :author => :text
|
27
|
-
process "div.BookInfo", :misc => :text
|
28
|
-
process "td>p.blurb", :description => :text
|
29
|
-
process "td>img[width=100]", :cover => "@src"
|
30
|
-
result :isbn, :title, :author, :misc, :description, :cover
|
31
|
-
end
|
32
|
-
|
33
|
-
content = Net::HTTP.get URI.parse(@link)
|
34
|
-
|
35
|
-
result = oup.scrape(content)
|
36
|
-
|
37
|
-
|
38
|
-
if result.isbn.nil?
|
39
|
-
return nil
|
40
|
-
else
|
41
|
-
|
42
|
-
info = {}
|
43
|
-
info[:isbn] = result.isbn.gsub(/\ ISBN-13:\n/, "")
|
44
|
-
info[:title] = result.title
|
45
|
-
info[:author] = result.author
|
46
|
-
info[:description] = result.description
|
47
|
-
info[:cover_thumb] = @protocol + @host + result.cover
|
48
|
-
info[:link] = @link
|
49
|
-
tmp = result.misc.match(/\n(.+)\n(.+) pages\n(.+)\n(.+)\n(.+)/)
|
50
|
-
if !tmp.nil? && tmp.length == 5
|
51
|
-
info[:published] = tmp[1]
|
52
|
-
info[:pages] = tmp[2]
|
53
|
-
info[:format] = tmp[3]
|
54
|
-
info[:rrp] = tmp[4]
|
55
|
-
end
|
56
|
-
info[:from_name] = SCRAPER_NAME
|
57
|
-
info[:from_url] = SCRAPER_SITE
|
58
|
-
return info
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
@@ -1,53 +0,0 @@
|
|
1
|
-
|
2
|
-
module RBook
|
3
|
-
module WWW
|
4
|
-
|
5
|
-
class PaulistScraper < Base
|
6
|
-
|
7
|
-
SCRAPER_ID = :paulist
|
8
|
-
SCRAPER_NAME = "Paulist Press".freeze
|
9
|
-
SCRAPER_SITE = "http://www.paulistpress.com/".freeze
|
10
|
-
|
11
|
-
#add_publisher( self, "9780809100000", "9780809199999" )
|
12
|
-
add_scraper( self )
|
13
|
-
|
14
|
-
def get_info(isbn)
|
15
|
-
|
16
|
-
raise ArgumentError, 'Supplied isbn is not valid' unless ISBN::valid_isbn?(isbn)
|
17
|
-
|
18
|
-
isbn = ISBN::convert_to_isbn13(isbn) unless ISBN::valid_isbn13?(isbn)
|
19
|
-
|
20
|
-
@protocol = "http://"
|
21
|
-
@host = "www.paulistpress.com"
|
22
|
-
@path = "/"
|
23
|
-
@abv_isbn = ISBN::convert_to_isbn10(isbn)[-5, 5]
|
24
|
-
@suffix = ".html"
|
25
|
-
@link = @protocol + @host + @path + @abv_isbn[0,4] + "-" + @abv_isbn[-1,1] + @suffix
|
26
|
-
|
27
|
-
rba = Scraper.define do
|
28
|
-
process "tr>td>h4", :title => :text
|
29
|
-
process "img[width=120][height=180]", :cover => "@src"
|
30
|
-
result :title, :cover
|
31
|
-
end
|
32
|
-
|
33
|
-
content = Net::HTTP.get URI.parse(@link)
|
34
|
-
result = rba.scrape(content)
|
35
|
-
|
36
|
-
if result.title.nil?
|
37
|
-
return nil
|
38
|
-
else
|
39
|
-
|
40
|
-
info = {}
|
41
|
-
info[:isbn] = isbn
|
42
|
-
info[:title] = result.title.gsub("Details for ", "")
|
43
|
-
info[:cover_thumb] = @protocol + @host + result.cover
|
44
|
-
info[:link] = @link
|
45
|
-
info[:from_name] = SCRAPER_NAME
|
46
|
-
info[:from_url] = SCRAPER_SITE
|
47
|
-
return info
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|