amazon-review 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,40 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+
4
+ module AmazonReview
5
+
6
+ def self.find_reviews(asin)
7
+ reviews = []
8
+ delay = 0.5
9
+ page = 1
10
+
11
+ # iterate through the pages of reviews
12
+
13
+ begin
14
+ url = "http://www.amazon.com/product-reviews/#{asin}/?ie=UTF8&showViewpoints=0&pageNumber=#{page}&sortBy=bySubmissionDateAscending"
15
+ doc = Nokogiri::HTML(open(url))
16
+
17
+ # parse each review
18
+ new_reviews = 0
19
+ doc.css("#productReviews td > a[name]").each do |review_html|
20
+ reviews << Review.new(review_html)
21
+ new_reviews += 1
22
+ end
23
+ # go to next page
24
+ page += 1
25
+
26
+ # delay to prevent 503 errors
27
+ delay = [0, delay - 0.1].max # decrease delay
28
+ sleep delay
29
+
30
+ rescue Exception => e # error while parsing (likely a 503)
31
+ delay += 0.5 # increase delay
32
+
33
+ end until new_reviews == 0
34
+
35
+ reviews
36
+ end
37
+
38
+ end
39
+
40
+ require_relative "amazon-review/review"
@@ -0,0 +1,79 @@
1
+ module AmazonReview
2
+ class Review
3
+
4
+ def initialize(html)
5
+ @html = html
6
+ @div = html.next_element.next_element
7
+ end
8
+
9
+ def inspect
10
+ "<Review: id=#{id}>"
11
+ end
12
+
13
+ def id
14
+ @id ||= @html['name']
15
+ end
16
+
17
+ def url
18
+ @url ||= "http://www.amazon.com/review/#{id}"
19
+ end
20
+
21
+ def user_id
22
+ regex = /[A-Z0-9]+/
23
+ @user_id ||= @div.css('a[href^="/gp/pdp/profile"]').first["href"][regex]
24
+ end
25
+
26
+ def title
27
+ @title ||= @div.css("b").first.text.strip
28
+ end
29
+
30
+ def date
31
+ @date ||= Date.parse(@div.css("nobr").first.text)
32
+ end
33
+
34
+ def text
35
+ # remove leading and trailing line returns, tabs, and spaces
36
+ @text ||= @div.css(".reviewText").first.content.strip #sub(/\A[\n\t\s]+/,"").sub(/[\n\t\s]+\Z/,"")
37
+ end
38
+
39
+ def rating
40
+ regex = /[0-9\.]+/
41
+ @rating ||= Float( @div.css("span.swSprite").first['title'][regex] )
42
+ end
43
+
44
+ def helpful_count
45
+ if helpful_match
46
+ @helpful_count ||= Float(helpful_match.captures[0])
47
+ else
48
+ @helpful_count = nil
49
+ end
50
+
51
+ @helpful_count
52
+ end
53
+
54
+ def helpful_ratio
55
+ if helpful_match
56
+ @helpful_ratio ||= Float(helpful_match.captures[0]) / Float(helpful_match.captures[1])
57
+ else
58
+ @helpful_ratio = nil
59
+ end
60
+
61
+ @helpful_ratio
62
+ end
63
+
64
+ def to_hash
65
+ attrs = [:id, :url, :user_id, :title, :date, :text, :rating, :helpful_count, :helpful_ratio]
66
+ attrs.inject({}) do |r,attr|
67
+ r[attr] = self.send(attr)
68
+ r
69
+ end
70
+ end
71
+
72
+ private
73
+
74
+ def helpful_match
75
+ @helpful_match ||= @div.text.match(/(\d+) of (\d+) people/)
76
+ end
77
+ end
78
+
79
+ end
metadata ADDED
@@ -0,0 +1,63 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: amazon-review
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Jeff Mekler
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-09-09 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>'
20
+ - !ruby/object:Gem::Version
21
+ version: 1.5.6
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>'
28
+ - !ruby/object:Gem::Version
29
+ version: 1.5.6
30
+ description: A simple gem to parse Amazon product reviews
31
+ email: contact@jeffmekler.com
32
+ executables: []
33
+ extensions: []
34
+ extra_rdoc_files: []
35
+ files:
36
+ - lib/amazon-review.rb
37
+ - lib/amazon-review/review.rb
38
+ homepage: http://rubygems.org/gems/amazon-review
39
+ licenses:
40
+ - MIT
41
+ post_install_message:
42
+ rdoc_options: []
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ none: false
47
+ requirements:
48
+ - - ! '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ requirements: []
58
+ rubyforge_project:
59
+ rubygems_version: 1.8.24
60
+ signing_key:
61
+ specification_version: 3
62
+ summary: A simple gem to parse Amazon product reviews
63
+ test_files: []