goodreads-books 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/goodreads_books/book.rb +9 -2
- data/lib/goodreads_books/cli.rb +1 -1
- data/lib/goodreads_books/scraper.rb +2 -2
- data/lib/goodreads_books/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 89779d99588337f482f8d2d784e2af6917beeb3f620ec38ea345e1abbac6f621
|
4
|
+
data.tar.gz: d6fb61b1dd36ede436aa991c782cc39b2c091d20749f0025d77d2e5116cfd79c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4593aca0913571d19fb263dbdf769d7d3442f471dee736f298c90290a9bc59819701c096f4bd118c06b9c9d487296a7b183f9b9bb604e8125ff6adf8561982e4
|
7
|
+
data.tar.gz: ea0407c0d895aa8e1064f6334956531c3048d164e2a1e859c3de54d5aa7fec12cf7f4e63d19d4a719e043d3fb0182c068cefa667518fe37ac2685a80662f843b
|
data/lib/goodreads_books/book.rb
CHANGED
@@ -49,14 +49,21 @@ class GoodreadsBooks::Book
|
|
49
49
|
end #-- url --
|
50
50
|
|
51
51
|
def get_book_details
|
52
|
-
# Next level of scraping (get details of
|
52
|
+
# Next level of scraping (get details of best book within each category_url)
|
53
53
|
book_doc = Nokogiri::HTML(open(self.cate_url))
|
54
54
|
|
55
55
|
self.vote = book_doc.css(".gcaRightContainer .gcaWinnerHeader").text.split(" ")[1]
|
56
56
|
self.author = book_doc.css(".gcaRightContainer h3 .gcaAuthor a.authorName").text
|
57
57
|
self.url = "#{BASE_URL}#{book_doc.css(".gcaRightContainer h3 a.winningTitle").attr("href").text}"
|
58
|
-
self.description = book_doc.css(".gcaRightContainer .readable.stacked").text.strip
|
59
58
|
|
59
|
+
# goodreads description is encoded, so need to add .encode("ISO-8859-1") to print the special characters eg. â\u0080\u0099s in printable character of '
|
60
|
+
# if self.awards_year < 2017, use the span tag, else there's no span tag so don't check for it
|
61
|
+
descript = book_doc.css(".gcaRightContainer .readable.stacked span")[1]
|
62
|
+
if descript
|
63
|
+
self.description = book_doc.css(".gcaRightContainer .readable.stacked span")[1].text.encode("ISO-8859-1")
|
64
|
+
else
|
65
|
+
self.description = book_doc.css(".gcaRightContainer .readable.stacked").text.encode("ISO-8859-1")
|
66
|
+
end
|
60
67
|
#binding.pry
|
61
68
|
end #-- get_book_details --
|
62
69
|
|
data/lib/goodreads_books/cli.rb
CHANGED
@@ -33,10 +33,11 @@ class GoodreadsBooks::Scraper
|
|
33
33
|
def self.create(awards_year = nil)
|
34
34
|
#choice_awards = new(awards_year)
|
35
35
|
#choice_awards.save
|
36
|
+
#replaced with one line of code below using .tap method
|
36
37
|
choice_awards = new(awards_year).tap { |s| s.save }
|
37
38
|
|
38
39
|
# if awards_year is missing from the url,
|
39
|
-
# goodreads.com defaults to latest choice awards
|
40
|
+
# goodreads.com defaults to latest choice awards year
|
40
41
|
# /best-books-#{latest awards year}"
|
41
42
|
if awards_year == nil
|
42
43
|
choice_awards.main_url = "#{BASE_URL}#{PAGE_URL}"
|
@@ -58,7 +59,6 @@ class GoodreadsBooks::Scraper
|
|
58
59
|
cate_name = category.css("h4").text
|
59
60
|
cate_url = category.css("a").attr("href").text
|
60
61
|
cate_title = category.css("img").attr("alt").text
|
61
|
-
# cate_book_id = category.css("input")[2].attr("value") # don't need to keep book_id
|
62
62
|
|
63
63
|
# for each winner element, assemble the book_details hash
|
64
64
|
book_details = {
|