amazon-review 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,40 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+
4
+ module AmazonReview
5
+
6
+ def self.find_reviews(asin)
7
+ reviews = []
8
+ delay = 0.5
9
+ page = 1
10
+
11
+ # iterate through the pages of reviews
12
+
13
+ begin
14
+ url = "http://www.amazon.com/product-reviews/#{asin}/?ie=UTF8&showViewpoints=0&pageNumber=#{page}&sortBy=bySubmissionDateAscending"
15
+ doc = Nokogiri::HTML(open(url))
16
+
17
+ # parse each review
18
+ new_reviews = 0
19
+ doc.css("#productReviews td > a[name]").each do |review_html|
20
+ reviews << Review.new(review_html)
21
+ new_reviews += 1
22
+ end
23
+ # go to next page
24
+ page += 1
25
+
26
+ # delay to prevent 503 errors
27
+ delay = [0, delay - 0.1].max # decrease delay
28
+ sleep delay
29
+
30
+ rescue Exception => e # error while parsing (likely a 503)
31
+ delay += 0.5 # increase delay
32
+
33
+ end until new_reviews == 0
34
+
35
+ reviews
36
+ end
37
+
38
+ end
39
+
40
+ require_relative "amazon-review/review"
@@ -0,0 +1,79 @@
1
+ module AmazonReview
2
+ class Review
3
+
4
+ def initialize(html)
5
+ @html = html
6
+ @div = html.next_element.next_element
7
+ end
8
+
9
+ def inspect
10
+ "<Review: id=#{id}>"
11
+ end
12
+
13
+ def id
14
+ @id ||= @html['name']
15
+ end
16
+
17
+ def url
18
+ @url ||= "http://www.amazon.com/review/#{id}"
19
+ end
20
+
21
+ def user_id
22
+ regex = /[A-Z0-9]+/
23
+ @user_id ||= @div.css('a[href^="/gp/pdp/profile"]').first["href"][regex]
24
+ end
25
+
26
+ def title
27
+ @title ||= @div.css("b").first.text.strip
28
+ end
29
+
30
+ def date
31
+ @date ||= Date.parse(@div.css("nobr").first.text)
32
+ end
33
+
34
+ def text
35
+ # remove leading and trailing line returns, tabs, and spaces
36
+ @text ||= @div.css(".reviewText").first.content.strip #sub(/\A[\n\t\s]+/,"").sub(/[\n\t\s]+\Z/,"")
37
+ end
38
+
39
+ def rating
40
+ regex = /[0-9\.]+/
41
+ @rating ||= Float( @div.css("span.swSprite").first['title'][regex] )
42
+ end
43
+
44
+ def helpful_count
45
+ if helpful_match
46
+ @helpful_count ||= Float(helpful_match.captures[0])
47
+ else
48
+ @helpful_count = nil
49
+ end
50
+
51
+ @helpful_count
52
+ end
53
+
54
+ def helpful_ratio
55
+ if helpful_match
56
+ @helpful_ratio ||= Float(helpful_match.captures[0]) / Float(helpful_match.captures[1])
57
+ else
58
+ @helpful_ratio = nil
59
+ end
60
+
61
+ @helpful_ratio
62
+ end
63
+
64
+ def to_hash
65
+ attrs = [:id, :url, :user_id, :title, :date, :text, :rating, :helpful_count, :helpful_ratio]
66
+ attrs.inject({}) do |r,attr|
67
+ r[attr] = self.send(attr)
68
+ r
69
+ end
70
+ end
71
+
72
+ private
73
+
74
+ def helpful_match
75
+ @helpful_match ||= @div.text.match(/(\d+) of (\d+) people/)
76
+ end
77
+ end
78
+
79
+ end
metadata ADDED
@@ -0,0 +1,63 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: amazon-review
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Jeff Mekler
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-09-09 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>'
20
+ - !ruby/object:Gem::Version
21
+ version: 1.5.6
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>'
28
+ - !ruby/object:Gem::Version
29
+ version: 1.5.6
30
+ description: A simple gem to parse Amazon product reviews
31
+ email: contact@jeffmekler.com
32
+ executables: []
33
+ extensions: []
34
+ extra_rdoc_files: []
35
+ files:
36
+ - lib/amazon-review.rb
37
+ - lib/amazon-review/review.rb
38
+ homepage: http://rubygems.org/gems/amazon-review
39
+ licenses:
40
+ - MIT
41
+ post_install_message:
42
+ rdoc_options: []
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ none: false
47
+ requirements:
48
+ - - ! '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ requirements: []
58
+ rubyforge_project:
59
+ rubygems_version: 1.8.24
60
+ signing_key:
61
+ specification_version: 3
62
+ summary: A simple gem to parse Amazon product reviews
63
+ test_files: []