goodreads-books 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/goodreads_books/book.rb +4 -52
- data/lib/goodreads_books/cli.rb +35 -33
- data/lib/goodreads_books/scraper.rb +38 -58
- data/lib/goodreads_books/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9e4b864502b272cfc85b099364e5620d33808a4a6de5da2f2fa961cd25754676
|
4
|
+
data.tar.gz: b12f7f449a2701de75e55c9675a423b7b478a3c6e86318c773ba7fa549ff1865
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b9352d2861fab111920c9f8d35662412bf4babf16658df60baefcc5b7897380927c8684a4ab27b41c490c1feb351a16ca2a18fdb58b7cdcddc74d132a42aa98d
|
7
|
+
data.tar.gz: 166d78168a329e0fc6aab2a61962c040464b0d74fdb2cfcc1c6f4aa5278680762861cd72bc97482ef3035b2097581f10d0cccaab5b9d6c40ae518273d48f66f1
|
data/lib/goodreads_books/book.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
class GoodreadsBooks::Book
|
2
|
-
attr_accessor :awards_year, :category, :title, :author, :vote, :description, :
|
3
|
-
|
4
|
-
BASE_URL = "https://www.goodreads.com"
|
2
|
+
attr_accessor :awards_year, :category, :title, :author, :vote, :description, :category_url, :url
|
5
3
|
|
6
4
|
@@all = []
|
7
5
|
|
@@ -24,54 +22,8 @@ class GoodreadsBooks::Book
|
|
24
22
|
self.class.all << self
|
25
23
|
end #-- save --
|
26
24
|
|
27
|
-
def self.
|
28
|
-
all.select { |book| book.awards_year == awards_year }
|
29
|
-
end #--
|
30
|
-
|
31
|
-
def author
|
32
|
-
get_book_details if !@author
|
33
|
-
@author
|
34
|
-
end #-- author --
|
35
|
-
|
36
|
-
def vote
|
37
|
-
get_book_details if !@vote
|
38
|
-
@vote
|
39
|
-
end #-- vote --
|
40
|
-
|
41
|
-
def description
|
42
|
-
get_book_details if !@description
|
43
|
-
@description
|
44
|
-
end #-- description --
|
45
|
-
|
46
|
-
def url
|
47
|
-
get_book_details if !@url
|
48
|
-
@url
|
49
|
-
end #-- url --
|
50
|
-
|
51
|
-
def get_book_details
|
52
|
-
# Next level of scraping (get details of best book within each category_url)
|
53
|
-
book_doc = Nokogiri::HTML(open(self.cate_url))
|
54
|
-
|
55
|
-
self.vote = book_doc.css(".gcaRightContainer .gcaWinnerHeader").text.split(" ")[1]
|
56
|
-
self.author = book_doc.css(".gcaRightContainer h3 .gcaAuthor a.authorName").text
|
57
|
-
self.url = "#{BASE_URL}#{book_doc.css(".gcaRightContainer h3 a.winningTitle").attr("href").text}"
|
58
|
-
|
59
|
-
# goodreads description is encoded, so need to add .encode("ISO-8859-1") to print the special characters eg. â\u0080\u0099s in printable character of '
|
60
|
-
# if self.awards_year < 2017, use the span tag, else there's no span tag so don't check for it
|
61
|
-
descript = book_doc.css(".gcaRightContainer .readable.stacked span")[1]
|
62
|
-
if descript
|
63
|
-
self.description = book_doc.css(".gcaRightContainer .readable.stacked span")[1].text.encode("ISO-8859-1")
|
64
|
-
else
|
65
|
-
self.description = book_doc.css(".gcaRightContainer .readable.stacked").text.encode("ISO-8859-1")
|
66
|
-
end
|
67
|
-
#binding.pry
|
68
|
-
end #-- get_book_details --
|
69
|
-
|
70
|
-
def self.populate_book_details(award_year)
|
71
|
-
all_by_year(awards_year).each do |book|
|
72
|
-
book.get_book_details
|
73
|
-
end
|
74
|
-
#binding.pry
|
75
|
-
end #-- self.populate_book_details --
|
25
|
+
def self.find_all_by_year(awards_year)
|
26
|
+
self.all.select { |book| book.awards_year == awards_year }
|
27
|
+
end #-- self.find_all_by_year --
|
76
28
|
|
77
29
|
end
|
data/lib/goodreads_books/cli.rb
CHANGED
@@ -1,13 +1,8 @@
|
|
1
1
|
class GoodreadsBooks::CLI
|
2
2
|
|
3
|
-
# This application only works for year 2010 to current year - 1.
|
3
|
+
# This application only works for year 2010 to latest awards year (usually, current year - 1).
|
4
4
|
# Goodreads Choice Awards Winner 2009 page setup differs from 2010 onwards.
|
5
5
|
BASE_YEAR = 2010
|
6
|
-
END_YEAR = Time.now.year - 1
|
7
|
-
|
8
|
-
def initialize
|
9
|
-
@@choice_awards = nil
|
10
|
-
end
|
11
6
|
|
12
7
|
def call
|
13
8
|
system "clear"
|
@@ -16,46 +11,51 @@ class GoodreadsBooks::CLI
|
|
16
11
|
puts " ----------------------------------------"
|
17
12
|
puts ""
|
18
13
|
|
19
|
-
|
14
|
+
@latest_awards_year = GoodreadsBooks::Scraper.scrape_awards_year
|
15
|
+
load_choice_awards_books(@latest_awards_year)
|
20
16
|
|
21
17
|
main_menu
|
22
18
|
end #-- call --
|
23
19
|
|
24
|
-
def
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
20
|
+
def load_choice_awards_books(awards_year)
|
21
|
+
@awards_year = awards_year
|
22
|
+
puts "Loading The Winners of #{awards_year} Goodreads Choice Awards Books..."
|
23
|
+
if GoodreadsBooks::Book.find_all_by_year(awards_year).empty?
|
24
|
+
books = GoodreadsBooks::Scraper.scrape_books(awards_year)
|
29
25
|
end
|
30
|
-
|
31
|
-
@choice_awards = GoodreadsBooks::Scraper.find_or_create_by_year(awards_year)
|
32
|
-
|
33
|
-
@book_count = GoodreadsBooks::Book.all_by_year(@choice_awards.awards_year).count
|
34
|
-
end #-- load_choice_awards --
|
26
|
+
end #-- load_choice_awards_books --
|
35
27
|
|
36
28
|
def main_menu
|
37
|
-
|
38
|
-
|
29
|
+
valid_input = true
|
39
30
|
input = nil
|
40
31
|
while input != "exit"
|
32
|
+
book_count = GoodreadsBooks::Book.find_all_by_year(@awards_year).count
|
41
33
|
list_books
|
42
34
|
|
43
|
-
|
44
|
-
|
45
|
-
|
35
|
+
if !valid_input
|
36
|
+
puts ""
|
37
|
+
puts "Please enter a number between 1 and #{book_count}, or valid Choice Awards year, or 'exit' to end application.".colorize(:red)
|
38
|
+
valid_input = true
|
39
|
+
else
|
40
|
+
puts ""
|
41
|
+
puts "Enter a number to view details of the book, or select another Choice Awards year (2010 onwards).".colorize(:green)
|
42
|
+
puts "Type 'exit' to end the application.".colorize(:green)
|
43
|
+
end
|
44
|
+
|
46
45
|
input = gets.strip
|
47
46
|
|
48
47
|
if input.downcase == "exit"
|
49
48
|
break
|
50
|
-
elsif input.to_i.between?(1,
|
51
|
-
book = GoodreadsBooks::Book.
|
49
|
+
elsif input.to_i.between?(1, book_count)
|
50
|
+
book = GoodreadsBooks::Book.find_all_by_year(@awards_year)[input.to_i - 1]
|
51
|
+
if !book.author
|
52
|
+
GoodreadsBooks::Scraper.scrape_book_details(book)
|
53
|
+
end
|
52
54
|
view_book(book)
|
53
|
-
elsif input.to_i.between?(BASE_YEAR,
|
54
|
-
|
55
|
-
load_choice_awards(input.to_i)
|
55
|
+
elsif input.to_i.between?(BASE_YEAR, @latest_awards_year)
|
56
|
+
load_choice_awards_books(input.to_i)
|
56
57
|
else
|
57
|
-
|
58
|
-
puts "Please enter a number between 1 and #{@book_count} or a valid Choice Awards year".colorize(:red)
|
58
|
+
valid_input = false
|
59
59
|
end
|
60
60
|
end
|
61
61
|
|
@@ -64,18 +64,20 @@ class GoodreadsBooks::CLI
|
|
64
64
|
end #-- main_menu --
|
65
65
|
|
66
66
|
def list_books
|
67
|
+
system "clear"
|
67
68
|
puts ""
|
68
|
-
puts "---------- #{@
|
69
|
+
puts "---------- #{@awards_year} Goodreads Choice Awards Books ----------"
|
69
70
|
puts ""
|
70
71
|
|
71
|
-
GoodreadsBooks::Book.
|
72
|
+
GoodreadsBooks::Book.find_all_by_year(@awards_year).each.with_index(1) do |book, index|
|
72
73
|
puts "#{index}. #{book.category} - #{book.title}"
|
73
74
|
end
|
74
75
|
end #-- display_books --
|
75
76
|
|
76
77
|
def view_book(book)
|
78
|
+
system "clear"
|
77
79
|
puts ""
|
78
|
-
puts "---------- #{@
|
80
|
+
puts "---------- #{@awards_year} BEST #{book.category.upcase} Winner ----------"
|
79
81
|
puts ""
|
80
82
|
puts "Title: #{book.title}"
|
81
83
|
puts "Author: #{book.author}"
|
@@ -85,7 +87,7 @@ class GoodreadsBooks::CLI
|
|
85
87
|
puts "#{book.description}"
|
86
88
|
|
87
89
|
puts ""
|
88
|
-
puts "Would you like to
|
90
|
+
puts "Would you like to open Goodreads website to view this book? Enter Y to open the website.".colorize(:green)
|
89
91
|
input = gets.strip.downcase
|
90
92
|
|
91
93
|
if input.downcase == "y"
|
@@ -1,77 +1,57 @@
|
|
1
1
|
class GoodreadsBooks::Scraper
|
2
|
-
attr_accessor :awards_year, :main_url
|
3
|
-
|
4
2
|
BASE_URL = "https://www.goodreads.com"
|
5
3
|
PAGE_URL = "/choiceawards"
|
6
4
|
|
7
|
-
|
8
|
-
|
9
|
-
def initialize(awards_year = nil)
|
10
|
-
@awards_year = awards_year
|
11
|
-
end #-- initialize --
|
12
|
-
|
13
|
-
def self.all
|
14
|
-
@@all
|
15
|
-
end #-- self.all --
|
16
|
-
|
17
|
-
def save
|
18
|
-
self.class.all << self
|
19
|
-
end #-- save --
|
20
|
-
|
21
|
-
def self.find_or_create_by_year(awards_year = nil)
|
22
|
-
if !(choice_awards = find_by_year(awards_year))
|
23
|
-
choice_awards = create(awards_year)
|
24
|
-
choice_awards.scrape_books
|
25
|
-
end
|
26
|
-
find_by_year(choice_awards.awards_year)
|
27
|
-
end #-- self.find_or_create_by_year --
|
28
|
-
|
29
|
-
def self.find_by_year(awards_year = nil)
|
30
|
-
all.detect { |r| r.awards_year == awards_year }
|
31
|
-
end #-- self.find_by_year --
|
32
|
-
|
33
|
-
def self.create(awards_year = nil)
|
34
|
-
#choice_awards = new(awards_year)
|
35
|
-
#choice_awards.save
|
36
|
-
#replaced with one line of code below using .tap method
|
37
|
-
choice_awards = new(awards_year).tap { |s| s.save }
|
38
|
-
|
5
|
+
def self.scrape_awards_year
|
39
6
|
# if awards_year is missing from the url,
|
40
7
|
# goodreads.com defaults to latest choice awards year
|
41
8
|
# /best-books-#{latest awards year}"
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
else
|
47
|
-
choice_awards.main_url = "#{BASE_URL}#{PAGE_URL}/best-books-#{awards_year}"
|
48
|
-
choice_awards.awards_year = awards_year
|
49
|
-
end
|
9
|
+
main_url = "#{BASE_URL}#{PAGE_URL}"
|
10
|
+
html = open(main_url)
|
11
|
+
html.base_uri.to_s.split("-").last.to_i
|
12
|
+
end #-- self.scrape_awards_year
|
50
13
|
|
51
|
-
|
52
|
-
|
14
|
+
def self.scrape_books(awards_year)
|
15
|
+
main_url = "#{BASE_URL}#{PAGE_URL}/best-books-#{awards_year}"
|
16
|
+
doc = Nokogiri::HTML(open(main_url))
|
53
17
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
# Category winners page
|
18
|
+
# Category winners page: iterate through the best book of each category
|
19
|
+
books = []
|
58
20
|
doc.css(".category.clearFix").each do |category|
|
59
|
-
|
60
|
-
|
61
|
-
|
21
|
+
category_name = category.css("h4").text
|
22
|
+
category_url = category.css("a").attr("href").text
|
23
|
+
category_title = category.css("img").attr("alt").text
|
62
24
|
|
63
25
|
# for each winner element, assemble the book_details hash
|
64
26
|
book_details = {
|
65
|
-
:awards_year =>
|
66
|
-
:category =>
|
67
|
-
:title =>
|
68
|
-
:
|
27
|
+
:awards_year => awards_year,
|
28
|
+
:category => category_name,
|
29
|
+
:title => category_title,
|
30
|
+
:category_url => "#{BASE_URL}#{category_url}"
|
69
31
|
}
|
70
32
|
|
71
|
-
GoodreadsBooks::Book.new_from_web_page(book_details)
|
33
|
+
books = GoodreadsBooks::Book.new_from_web_page(book_details)
|
72
34
|
end
|
73
35
|
|
74
|
-
|
75
|
-
end #-- scrape_books --
|
36
|
+
books
|
37
|
+
end #-- self.scrape_books --
|
38
|
+
|
39
|
+
def self.scrape_book_details(book)
|
40
|
+
# Next level of scraping (get details of best book within each category_url)
|
41
|
+
book_doc = Nokogiri::HTML(open(book.category_url))
|
42
|
+
|
43
|
+
book.vote = book_doc.css(".gcaRightContainer .gcaWinnerHeader").text.split(" ")[1]
|
44
|
+
book.author = book_doc.css(".gcaRightContainer h3 .gcaAuthor a.authorName").text
|
45
|
+
book.url = "#{BASE_URL}#{book_doc.css(".gcaRightContainer h3 a.winningTitle").attr("href").text}"
|
46
|
+
|
47
|
+
# goodreads description is encoded, so need to add .encode("ISO-8859-1") to print the special characters eg. â\u0080\u0099s in printable character of '
|
48
|
+
# if self.awards_year < 2017, use the span tag, else there's no span tag so don't check for it
|
49
|
+
description = book_doc.css(".gcaRightContainer .readable.stacked span")[1]
|
50
|
+
if description
|
51
|
+
book.description = book_doc.css(".gcaRightContainer .readable.stacked span")[1].text.encode("ISO-8859-1")
|
52
|
+
else
|
53
|
+
book.description = book_doc.css(".gcaRightContainer .readable.stacked").text.encode("ISO-8859-1")
|
54
|
+
end
|
55
|
+
end #-- self.scrape_book_details --
|
76
56
|
|
77
57
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: goodreads-books
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ni Chia
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-09-
|
11
|
+
date: 2018-09-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|