ebookdealinfo 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ebookdealinfo/deal_scraper.rb +8 -8
- data/lib/ebookdealinfo/info_scraper.rb +0 -1
- data/lib/ebookdealinfo/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d3af5eefa942b5a80ab2d50d7b877eb201f1cbc6
|
|
4
|
+
data.tar.gz: ee53d579e8d2c724eb4b07ea04b0318d6dc9b11a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: fe6a8fb850d8b257e3f5b7f625907e68456886b443e2a5a8d62ad0e5f444c0f9961be6691e2bac542fb4640b6537fe62a3c684d1daaa48a574be90272f15e0f3
|
|
7
|
+
data.tar.gz: 6e891715c2bb8af19bacd1d16aa8331f3a1322aff9602af74863ce36fd2c1794536a58305dceb0e6c5f3eae88acabe586050827668e73eed01b0ed518867fc16
|
|
@@ -10,19 +10,19 @@ class DealScraper
|
|
|
10
10
|
if post.search("p.title").text.size > 0
|
|
11
11
|
if post.search("p.title").text.include?(";") #for posts formatted "#~Author~; ~Title; ~Price~"
|
|
12
12
|
author = post.search("p.title").text.split(";")[0].strip
|
|
13
|
-
#.search("p.title a").attribute("href").value
|
|
14
13
|
title = post.search("p.title").text.split(";")[1].strip
|
|
15
|
-
price = post.search("p.title").text.split(";")[2].strip
|
|
14
|
+
post.search("p.title").text.split(";")[2] != nil ? price = post.search("p.title").text.split(";")[2].strip : price = ""
|
|
16
15
|
Book.create(author, title, price)
|
|
17
|
-
puts "Loaded book ##{index+1} of
|
|
16
|
+
puts "Loaded book ##{index+1} of #{deals.search("div.link").size}"
|
|
18
17
|
else #for posts formatted "~Title~ by ~author~ (~Price~)"
|
|
19
|
-
if post.search("p.title").text.slice(
|
|
20
|
-
author = post.search("p.title").text.split("by")[1].slice(/\A[^(,$\/]+/).split(". Kindle")[0].split("-- Kindle")[0].strip
|
|
21
|
-
title = post.search("p.title").text.split("by").first.gsub(/\W+\z/, "").strip
|
|
22
|
-
price = post.search("p.title").text.slice(/[$]\d+[.]\d+/).strip
|
|
18
|
+
if post.search("p.title").text.slice(/[,.-[ ]]([Bb]y)/) != nil #ignore wacky formatted posts
|
|
19
|
+
author = post.search("p.title").text.split("by")[1].strip.slice(/\A[^(,$\/]+/).split(". Kindle")[0].split("-- Kindle")[0].strip
|
|
20
|
+
title = post.search("p.title").text.split("by").first.gsub(/[(].+[)]/,"").gsub(/\W+\z/, "").strip
|
|
21
|
+
post.search("p.title").text.slice(/[$]\d+[.]\d+/) != nil ? price = post.search("p.title").text.slice(/[$]\d+[.]\d+/).strip : price = ""
|
|
23
22
|
Book.create(author, title, price)
|
|
24
|
-
puts "Loaded book ##{index+1} of
|
|
23
|
+
puts "Loaded book ##{index+1} of #{deals.search("div.link").size}"
|
|
25
24
|
else
|
|
25
|
+
puts "Unable to load book ##{index+1} of #{deals.search("div.link").size}. Probably a bad post name."
|
|
26
26
|
Book.create("","","",0)
|
|
27
27
|
end
|
|
28
28
|
end
|
|
@@ -5,7 +5,6 @@ class InfoScraper
|
|
|
5
5
|
|
|
6
6
|
def info_scrape(book) #for each instance of book in the class collection, go get blurb, series, gr rating/rates and add them to that instance; also author to deal with last name only from scrape?
|
|
7
7
|
search_string = "#{book.title} #{book.author.gsub(".", ". ").gsub(/[^\w\s]/,"")}".gsub(/(\A|\s)\S\s/," ").gsub(/[^a-zA-Z0-9']+/, "+") #turns the author + title into a usable goodreads search string
|
|
8
|
-
#should remove anything joining multiple authors ("&", ",") that would break the search
|
|
9
8
|
search_page = Nokogiri::HTML(open("https://www.goodreads.com/search?q=#{search_string}&search_type=books",'User-Agent' => 'Ruby')) #uses the search string to pull an item's goodreads page
|
|
10
9
|
if search_page.css("table a").size != 0
|
|
11
10
|
determinant = search_page.css("span.minirating").map.with_index {|i,index| [index, i.text.strip.slice(/\s(\d|,)+/).strip.gsub(",","").to_i]}.sort! {|x,y| x[1].to_i <=> y[1].to_i}.last #the search result with the most rates (and presumably most legitimate) is an array [result_index, #rates]
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ebookdealinfo
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.0.
|
|
4
|
+
version: 1.0.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- kylek
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2017-09-
|
|
11
|
+
date: 2017-09-02 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|