amazon-review 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/amazon-review.rb +40 -0
- data/lib/amazon-review/review.rb +79 -0
- metadata +63 -0
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
3
|
+
|
4
|
+
module AmazonReview
|
5
|
+
|
6
|
+
def self.find_reviews(asin)
|
7
|
+
reviews = []
|
8
|
+
delay = 0.5
|
9
|
+
page = 1
|
10
|
+
|
11
|
+
# iterate through the pages of reviews
|
12
|
+
|
13
|
+
begin
|
14
|
+
url = "http://www.amazon.com/product-reviews/#{asin}/?ie=UTF8&showViewpoints=0&pageNumber=#{page}&sortBy=bySubmissionDateAscending"
|
15
|
+
doc = Nokogiri::HTML(open(url))
|
16
|
+
|
17
|
+
# parse each review
|
18
|
+
new_reviews = 0
|
19
|
+
doc.css("#productReviews td > a[name]").each do |review_html|
|
20
|
+
reviews << Review.new(review_html)
|
21
|
+
new_reviews += 1
|
22
|
+
end
|
23
|
+
# go to next page
|
24
|
+
page += 1
|
25
|
+
|
26
|
+
# delay to prevent 503 errors
|
27
|
+
delay = [0, delay - 0.1].max # decrease delay
|
28
|
+
sleep delay
|
29
|
+
|
30
|
+
rescue Exception => e # error while parsing (likely a 503)
|
31
|
+
delay += 0.5 # increase delay
|
32
|
+
|
33
|
+
end until new_reviews == 0
|
34
|
+
|
35
|
+
reviews
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
require_relative "amazon-review/review"
|
@@ -0,0 +1,79 @@
|
|
1
|
+
module AmazonReview
|
2
|
+
class Review
|
3
|
+
|
4
|
+
def initialize(html)
|
5
|
+
@html = html
|
6
|
+
@div = html.next_element.next_element
|
7
|
+
end
|
8
|
+
|
9
|
+
def inspect
|
10
|
+
"<Review: id=#{id}>"
|
11
|
+
end
|
12
|
+
|
13
|
+
def id
|
14
|
+
@id ||= @html['name']
|
15
|
+
end
|
16
|
+
|
17
|
+
def url
|
18
|
+
@url ||= "http://www.amazon.com/review/#{id}"
|
19
|
+
end
|
20
|
+
|
21
|
+
def user_id
|
22
|
+
regex = /[A-Z0-9]+/
|
23
|
+
@user_id ||= @div.css('a[href^="/gp/pdp/profile"]').first["href"][regex]
|
24
|
+
end
|
25
|
+
|
26
|
+
def title
|
27
|
+
@title ||= @div.css("b").first.text.strip
|
28
|
+
end
|
29
|
+
|
30
|
+
def date
|
31
|
+
@date ||= Date.parse(@div.css("nobr").first.text)
|
32
|
+
end
|
33
|
+
|
34
|
+
def text
|
35
|
+
# remove leading and trailing line returns, tabs, and spaces
|
36
|
+
@text ||= @div.css(".reviewText").first.content.strip #sub(/\A[\n\t\s]+/,"").sub(/[\n\t\s]+\Z/,"")
|
37
|
+
end
|
38
|
+
|
39
|
+
def rating
|
40
|
+
regex = /[0-9\.]+/
|
41
|
+
@rating ||= Float( @div.css("span.swSprite").first['title'][regex] )
|
42
|
+
end
|
43
|
+
|
44
|
+
def helpful_count
|
45
|
+
if helpful_match
|
46
|
+
@helpful_count ||= Float(helpful_match.captures[0])
|
47
|
+
else
|
48
|
+
@helpful_count = nil
|
49
|
+
end
|
50
|
+
|
51
|
+
@helpful_count
|
52
|
+
end
|
53
|
+
|
54
|
+
def helpful_ratio
|
55
|
+
if helpful_match
|
56
|
+
@helpful_ratio ||= Float(helpful_match.captures[0]) / Float(helpful_match.captures[1])
|
57
|
+
else
|
58
|
+
@helpful_ratio = nil
|
59
|
+
end
|
60
|
+
|
61
|
+
@helpful_ratio
|
62
|
+
end
|
63
|
+
|
64
|
+
def to_hash
|
65
|
+
attrs = [:id, :url, :user_id, :title, :date, :text, :rating, :helpful_count, :helpful_ratio]
|
66
|
+
attrs.inject({}) do |r,attr|
|
67
|
+
r[attr] = self.send(attr)
|
68
|
+
r
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
def helpful_match
|
75
|
+
@helpful_match ||= @div.text.match(/(\d+) of (\d+) people/)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
metadata
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: amazon-review
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Jeff Mekler
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2014-09-09 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: nokogiri
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>'
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 1.5.6
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>'
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.5.6
|
30
|
+
description: A simple gem to parse Amazon product reviews
|
31
|
+
email: contact@jeffmekler.com
|
32
|
+
executables: []
|
33
|
+
extensions: []
|
34
|
+
extra_rdoc_files: []
|
35
|
+
files:
|
36
|
+
- lib/amazon-review.rb
|
37
|
+
- lib/amazon-review/review.rb
|
38
|
+
homepage: http://rubygems.org/gems/amazon-review
|
39
|
+
licenses:
|
40
|
+
- MIT
|
41
|
+
post_install_message:
|
42
|
+
rdoc_options: []
|
43
|
+
require_paths:
|
44
|
+
- lib
|
45
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
46
|
+
none: false
|
47
|
+
requirements:
|
48
|
+
- - ! '>='
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '0'
|
51
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
53
|
+
requirements:
|
54
|
+
- - ! '>='
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0'
|
57
|
+
requirements: []
|
58
|
+
rubyforge_project:
|
59
|
+
rubygems_version: 1.8.24
|
60
|
+
signing_key:
|
61
|
+
specification_version: 3
|
62
|
+
summary: A simple gem to parse Amazon product reviews
|
63
|
+
test_files: []
|