ratebeer 0.0.4b → 0.0.5a

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7c2006d937effedd123240471363f0ab74b490b1
4
- data.tar.gz: 16cb0a3c415564220941eb88435cf27df46f5a2b
3
+ metadata.gz: d001c76b26d45e168769fddcb3c69dcea3711479
4
+ data.tar.gz: 27390f84bc7f8272b893b1169e3d0e90bab1510d
5
5
  SHA512:
6
- metadata.gz: 029cf0b3964cf721c20a031e5c6f1fa8114f10ee38a4cc36f5a0a3e573e58131bd8657b6ac639c54380ebb746254df2cd8eeff84582e225164b1eab3bf38b36e
7
- data.tar.gz: e4a8efdb59630749439da12eb0a07abe43262c4e82a7c24a840455215996dbbaf2d16dad0ebc490b8ad666decc17db91c45bbb414727a4d4d4145bec8cf3d9d6
6
+ metadata.gz: 07e2009a0bc96bf6afb208345c0c0167a62de3969fd0a209b3428b63abf0572d3265611e15ad7f81d8742dbe00900bf601ef4fdf600fb74a2d76f494649ea9e6
7
+ data.tar.gz: d7a0c63bdc4bf43aa3050dd4fb022c6b6ab4ca7bbd55b2f48bac575e1e0cbcf38120e615492afb22d87710a80b2e24d55e8e5cbdc8bfdb5ed14f495d7d8224dd
data/Gemfile CHANGED
@@ -1,13 +1,13 @@
1
- source "https://rubygems.org"
2
- ruby "2.3.0"
1
+ source 'https://rubygems.org'
2
+ ruby '2.3.0'
3
3
 
4
4
  gemspec
5
5
 
6
- gem "i18n"
7
- gem "nokogiri"
6
+ gem 'i18n'
7
+ gem 'nokogiri'
8
8
 
9
9
  group :test do
10
- gem "rspec"
11
- gem "rake"
12
- gem "codeclimate-test-reporter"
10
+ gem 'rspec'
11
+ gem 'rake'
12
+ gem 'codeclimate-test-reporter'
13
13
  end
data/Gemfile.lock CHANGED
@@ -1,36 +1,39 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
+ ratebeer (0.0.5)
4
5
 
5
6
  GEM
6
7
  remote: https://rubygems.org/
7
8
  specs:
8
- codeclimate-test-reporter (0.4.8)
9
+ codeclimate-test-reporter (0.6.0)
9
10
  simplecov (>= 0.7.1, < 1.0.0)
10
11
  diff-lcs (1.2.5)
11
12
  docile (1.1.5)
12
13
  i18n (0.7.0)
13
- json (1.8.3)
14
- mini_portile (0.6.2)
15
- nokogiri (1.6.6.2)
16
- mini_portile (~> 0.6.0)
17
- rake (10.4.2)
18
- rspec (3.3.0)
19
- rspec-core (~> 3.3.0)
20
- rspec-expectations (~> 3.3.0)
21
- rspec-mocks (~> 3.3.0)
22
- rspec-core (3.3.2)
23
- rspec-support (~> 3.3.0)
24
- rspec-expectations (3.3.1)
14
+ json (2.0.1)
15
+ mini_portile2 (2.1.0)
16
+ nokogiri (1.6.8)
17
+ mini_portile2 (~> 2.1.0)
18
+ pkg-config (~> 1.1.7)
19
+ pkg-config (1.1.7)
20
+ rake (11.2.2)
21
+ rspec (3.5.0)
22
+ rspec-core (~> 3.5.0)
23
+ rspec-expectations (~> 3.5.0)
24
+ rspec-mocks (~> 3.5.0)
25
+ rspec-core (3.5.1)
26
+ rspec-support (~> 3.5.0)
27
+ rspec-expectations (3.5.0)
25
28
  diff-lcs (>= 1.2.0, < 2.0)
26
- rspec-support (~> 3.3.0)
27
- rspec-mocks (3.3.2)
29
+ rspec-support (~> 3.5.0)
30
+ rspec-mocks (3.5.0)
28
31
  diff-lcs (>= 1.2.0, < 2.0)
29
- rspec-support (~> 3.3.0)
30
- rspec-support (3.3.0)
31
- simplecov (0.10.0)
32
+ rspec-support (~> 3.5.0)
33
+ rspec-support (3.5.0)
34
+ simplecov (0.12.0)
32
35
  docile (~> 1.1.0)
33
- json (~> 1.8)
36
+ json (>= 1.8, < 3)
34
37
  simplecov-html (~> 0.10.0)
35
38
  simplecov-html (0.10.0)
36
39
 
@@ -45,5 +48,8 @@ DEPENDENCIES
45
48
  ratebeer!
46
49
  rspec
47
50
 
51
+ RUBY VERSION
52
+ ruby 2.3.0p0
53
+
48
54
  BUNDLED WITH
49
- 1.11.2
55
+ 1.12.5
@@ -0,0 +1,156 @@
1
+ require_relative "brewery"
2
+ require_relative "review"
3
+ require_relative "style"
4
+ require_relative "scraping"
5
+ require_relative "urls"
6
+
7
+ module RateBeer
8
+ class Beer
9
+ # Each key represents an item of data accessible for each beer, and defines
10
+ # dynamically a series of methods for accessing this data.
11
+ #
12
+ def self.data_keys
13
+ [:name,
14
+ :brewery,
15
+ :style,
16
+ :glassware,
17
+ :availability,
18
+ :abv,
19
+ :calories,
20
+ :description,
21
+ :retired,
22
+ :rating]
23
+ end
24
+
25
+ include RateBeer::Scraping
26
+ include RateBeer::URLs
27
+
28
+ # Create RateBeer::Beer instance.
29
+ #
30
+ # Requires the RateBeer ID# for the beer in question.
31
+ #
32
+ # @param [Integer, String] id ID# of beer to retrieve
33
+ # @param [String] name Name of the beer to which ID# relates if known
34
+ # @param [hash] options Options hash for entity created
35
+ #
36
+ def initialize(id, name: nil, **options)
37
+ super
38
+ end
39
+
40
+ # Return reviews of this beer.
41
+ #
42
+ def reviews(order: :most_recent, limit: 10)
43
+ Review.retrieve(self, order: order, limit: limit)
44
+ end
45
+
46
+ private
47
+
48
+ # Retrieve details about this beer from the website.
49
+ #
50
+ # This method stores the retrieved details in instance variables
51
+ # of the beer instance.
52
+ #
53
+ def retrieve_details
54
+ doc = noko_doc(URI.join(BASE_URL, beer_url(id)))
55
+ root = doc.css('#container table').first
56
+ info_tbl = root.css('table').first
57
+
58
+ @name = doc.css("h1")
59
+ .text
60
+ .strip
61
+ @name = fix_characters(@name)
62
+ raise PageNotFoundError.new("Beer not found - #{id}") if name.empty?
63
+
64
+ # If this beer is an alias, change ID to that of "proper" beer and
65
+ # retrieve details of the proper beer instead.
66
+ alias_pattern = /Also known as(.|\n)*Proceed to the aliased beer\.{3}/
67
+ if root.css('tr')[1].css('div div').text =~ alias_pattern
68
+ alias_node = root.css('tr')[1]
69
+ .css('div div')
70
+ .css('a')
71
+ .first
72
+ alias_name = alias_node.text
73
+ alias_id = alias_node['href'].split('/').last.to_i
74
+ @id = alias_id
75
+ retrieve_details
76
+ return nil
77
+ end
78
+
79
+ @brewery = info_tbl.css('td')[1]
80
+ .css('div')
81
+ .first
82
+ .css('a')
83
+ .map { |a| [:id,
84
+ :name].zip([a['href'].split('/')
85
+ .last
86
+ .to_i, a.text]).to_h }.first
87
+ @brewery = Brewery.new(@brewery[:id], name: fix_characters(@brewery[:name]))
88
+ @style = info_tbl.css('td')[1]
89
+ .css('div')
90
+ .first
91
+ .css('a')
92
+ .select { |a| a['href'] =~ /beerstyles/ }
93
+ .map { |a| [:id,
94
+ :name].zip([a['href'].split('/')
95
+ .last
96
+ .to_i, a.text]).to_h }.first
97
+ @style = Style.new(@style[:id], name: fix_characters(@style[:name]))
98
+ @glassware = info_tbl.css('td')[1]
99
+ .css('div')[1]
100
+ .css('a')
101
+ .map { |a| [:id,
102
+ :name].zip([a['href'].split('GWID=')
103
+ .last
104
+ .to_i, a.text]).to_h }.first
105
+ misc = info_tbl.next_element
106
+ .first_element_child
107
+ .children
108
+ .map(&:text)
109
+ .flat_map { |x| x.gsub(nbsp, ' ').strip.split(':') }
110
+ .map(&:strip)
111
+ .reject(&:empty?)
112
+ .each_slice(2)
113
+ .map { |(k, v)| [symbolize_text(k),
114
+ v.to_f.zero? ? v : v.to_f] }
115
+ .to_h
116
+ @abv = misc[:abv]
117
+ @calories = misc[:est_calories]
118
+ @rating = [:overall,
119
+ :style].zip(info_tbl.css('div')
120
+ .select { |d| d['title'] =~ /This figure/ }
121
+ .map { |d| d['title'].split(':').first.to_f }).to_h
122
+ @rating.merge!({ ratings: misc[:ratings],
123
+ weighted_avg: misc[:weighted_avg],
124
+ mean: misc[:mean] })
125
+ @availability = info_tbl.css('td')[1]
126
+ .css('table')
127
+ .css('td')
128
+ .children
129
+ .children
130
+ .map(&:text)
131
+ .reject(&:empty?)
132
+ .each_slice(2)
133
+ .to_a
134
+ .tap { |a| a.last.unshift('distribution') }
135
+ .map { |(k, v)| [k =~ /bottl/ ?
136
+ :bottling :
137
+ symbolize_text(k), v] }
138
+ .to_h
139
+ @availability.merge!({ seasonal: misc[:seasonal] })
140
+ @description = info_tbl.next_element
141
+ .next_element
142
+ .children
143
+ .children
144
+ .map(&:text)
145
+ .map(&:strip)
146
+ .drop(1)
147
+ .reject(&:empty?)
148
+ .join("\n")
149
+ @description = fix_characters(@description)
150
+ @retired = !(root.css('span.beertitle2') &&
151
+ root.css('span.beertitle2').text =~ /RETIRED/).nil?
152
+
153
+ nil
154
+ end
155
+ end
156
+ end
@@ -0,0 +1,167 @@
1
+ require_relative "scraping"
2
+ require_relative "urls"
3
+
4
+ module RateBeer
5
+ class Brewery
6
+ # Each key represents an item of data accessible for each beer, and defines
7
+ # dynamically a series of methods for accessing this data.
8
+ #
9
+ def self.data_keys
10
+ [:name,
11
+ :type,
12
+ :address,
13
+ :telephone,
14
+ :beers]
15
+ end
16
+
17
+ include RateBeer::Scraping
18
+ include RateBeer::URLs
19
+
20
+ attr_reader :established, :location
21
+
22
+ # Create RateBeer::Brewery instance.
23
+ #
24
+ # Requires the RateBeer ID# for the brewery in question. Optionally accepts
25
+ # a name parameter where the name is already known.
26
+ #
27
+ # @param [Integer, String] id ID# for the brewery
28
+ # @param [String] name The name of the specified brewery
29
+ # @param [hash] options Options hash for entity created
30
+ #
31
+ def initialize(id, name: nil, **options)
32
+ super
33
+ if options
34
+ @established = options[:established]
35
+ @location = options[:location]
36
+ @type = options[:type]
37
+ @status = options[:status]
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ # Retrieve details about this brewery from the website.
44
+ #
45
+ # This method stores the retrieved details in instance variables
46
+ # of the brewery instance.
47
+ #
48
+ def retrieve_details
49
+ @doc = noko_doc(URI.join(BASE_URL, brewery_url(id)))
50
+
51
+ brewery_info = retrieve_brewery_info
52
+
53
+ @beers = []
54
+ if pagination?(@doc)
55
+ (1..page_count(@doc)).flat_map do |page_no|
56
+ @doc = noko_doc(URI.join(BASE_URL, brewery_url(id), "0/", "#{page_no}/"))
57
+ retrieve_brewery_beers
58
+ end
59
+ else
60
+ retrieve_brewery_beers
61
+ end
62
+ nil
63
+ end
64
+
65
+ # Scrape brewery info from Nokogiri Doc for brewery page
66
+ #
67
+ def retrieve_brewery_info
68
+ root = @doc.css('#container table').first
69
+ contact_node = root.css('td').first
70
+
71
+ @name = fix_characters(root.css('h1').first.text)
72
+ raise PageNotFoundError.new("Brewery not found - #{id}") if @name.empty?
73
+
74
+ @type = root.css('span.beerfoot')
75
+ .select { |x| x.text =~ /Type: .*/ }
76
+ .first
77
+ .text
78
+ .strip
79
+ .split("Type: ")
80
+ .last
81
+ .split(/\s{2,}/)
82
+ .first
83
+ @address = root.css('div[itemprop="address"] b span')
84
+ .map { |elem| key = case elem.attributes['itemprop'].value
85
+ when 'streetAddress' then :street
86
+ when 'addressLocality' then :city
87
+ when 'addressRegion' then :state
88
+ when 'addressCountry' then :country
89
+ when 'postalCode' then :postcode
90
+ else raise "unrecognised attribute"
91
+ end
92
+ [key, elem.text.strip] }
93
+ .to_h
94
+
95
+ @telephone = root.css('span[itemprop="telephone"]').first &&
96
+ root.css('span[itemprop="telephone"]').first.text
97
+
98
+ end
99
+
100
+ # Scrape beer details from Nokogiri Doc for brewery page
101
+ #
102
+ def retrieve_brewery_beers
103
+ location, brewer = nil # Variables used in the map below
104
+ root = @doc.css('table.maintable.nohover').first
105
+ @beers += root.css('tr').drop(1).map do |row|
106
+ if row.text =~ /^Brewed at (?<location>.+?)(?: by\/for (?<brewer>.+))?$/
107
+ location = Regexp.last_match['location']
108
+ brewer = Regexp.last_match['brewer']
109
+ nil
110
+ else
111
+ process_beer_row(row, location, brewer)
112
+ end
113
+ end.reject(&:nil?)
114
+ end
115
+
116
+ # Process a row of data representing one beer brewed by/at a brewery.
117
+ #
118
+ # @param [Nokogiri::XML::Element] row HTML TR row wrapped as a Nokogiri
119
+ # element
120
+ # @param [String] location the location at which a brewery's beer is brewed
121
+ # where this location differs from the brewery's regular brewsite/venue
122
+ # @param [String] brewer the client for whom this brewery brewed the beer,
123
+ # where the brewery is brewing for a different company/brewery
124
+ # @return [RateBeer::Beer] a beer object representing the scraped beer,
125
+ # containing scraped attributes
126
+ #
127
+ def process_beer_row(row, location=nil, brewer=nil)
128
+ # Attributes stored in each table row, with indices representing their
129
+ # position in each row
130
+ attributes = { name: 0,
131
+ abv: 2,
132
+ avg_rating: 3,
133
+ overall_rating: 4,
134
+ style_rating: 5,
135
+ num_ratings: 6 }
136
+
137
+ beer = attributes.reduce({}) do |beer_hash, (attr, i)|
138
+ val = row.css('td')[i].text.gsub(nbsp, ' ').strip rescue nil
139
+ case attr
140
+ when :name
141
+ fix_characters(val)
142
+ when :abv, :avg_rating
143
+ val = val.to_f
144
+ when :overall_rating, :style_rating, :num_ratings
145
+ val = val.to_i
146
+ end
147
+ beer_hash[attr] = val
148
+ beer_hash
149
+ end
150
+ beer[:url] = row.css('td').first.css('a').first['href']
151
+ id = beer[:url].split('/').last.to_i
152
+
153
+ # Apply additional location and brewer information if scraped
154
+ beer[:brewed_at] = location unless location.nil?
155
+ beer[:brewed_by_for] = brewer unless brewer.nil?
156
+
157
+ # Transform ratings into correct format
158
+ beer[:rating] = { overall: beer[:overall_rating],
159
+ style: beer[:style_rating],
160
+ ratings: beer[:num_ratings],
161
+ weighted_avg: beer[:avg_rating] }
162
+
163
+ # Create beer instance from scraped data
164
+ Beer.new(id, beer)
165
+ end
166
+ end
167
+ end
@@ -0,0 +1,9 @@
1
+ require_relative 'location'
2
+
3
+ module RateBeer
4
+ class Country < Location
5
+ def initialize(id, name: nil)
6
+ super(id, location_type: :country, name: name)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,104 @@
1
+ require_relative "brewery"
2
+ require_relative "style"
3
+ require_relative "urls"
4
+
5
+ module RateBeer
6
+ class Location
7
+ # Keys for fields scraped on RateBeer
8
+ def self.data_keys
9
+ [:name,
10
+ :num_breweries,
11
+ :breweries]
12
+ end
13
+
14
+ include RateBeer::Scraping
15
+ include RateBeer::URLs
16
+
17
+ # Initialize a RateBeer::Location instance.
18
+ #
19
+ # Locations may be either Regions or Countries. This must be specified to
20
+ # the constructor.
21
+ #
22
+ # @param [Integer] id ID# for this location
23
+ # @param [Symbol] location_type Symbol representing either country or region
24
+ # @param [String] name Name of this location
25
+ #
26
+ def initialize(id, location_type: nil, name: nil, **options)
27
+ super
28
+ if location_type.nil? || !([:country, :region].include?(location_type))
29
+ raise ArgumentError.new("location_type must be supplied and must be "\
30
+ "either country or region")
31
+ end
32
+ @location_type = location_type
33
+ end
34
+
35
+ private
36
+
37
+ # Retrive details about this location from the website.
38
+ #
39
+ # This method stores the retrived details in instance variables
40
+ # of the location instance.
41
+ #
42
+ def retrieve_details
43
+ doc = noko_doc(url)
44
+ heading = doc.css('.col-lg-9').first
45
+ brewery_info = doc.css('#tabs table')
46
+
47
+ @name = heading.at_css('h1')
48
+ .text
49
+ .split('Breweries')
50
+ .first
51
+ .strip
52
+ if @name == "n/a" || @name == "RateBeer Robot Oops!"
53
+ raise PageNotFoundError.new("#{self.class.name} not found - #{id}")
54
+ end
55
+
56
+ @num_breweries = heading.at_css('li.active')
57
+ .text
58
+ .scan(/Active \((\d*)\)/)
59
+ .first
60
+ .first
61
+ .to_i
62
+
63
+ @breweries = brewery_info.flat_map.with_index do |tbl, i|
64
+ status = i == 0 ? 'Active' : 'Out of Business'
65
+
66
+ tbl.css('tr').drop(1).flat_map do |row|
67
+ cells = row.css('td')
68
+ next if cells.empty?
69
+ id = cells[0].at_css('a')['href'].split('/').last.to_i
70
+ name = cells[0].text.split('-').first.strip
71
+ location = cells[0].text
72
+ .split('-')
73
+ .last
74
+ .sub('(Out of Business)', '')
75
+ .strip
76
+ type = cells[1].text.strip
77
+ established = status == 'Active' ? cells[3].text.to_i : nil
78
+ Brewery.new(id,
79
+ name: name,
80
+ location: location,
81
+ type: type,
82
+ established: established,
83
+ status: status)
84
+ end
85
+ end
86
+ nil
87
+ end
88
+
89
+ # Return URL for page containing information on this location.
90
+ #
91
+ # Result depends on whether this is a country or a region.
92
+ #
93
+ def url
94
+ @url ||= case @location_type
95
+ when :country
96
+ URI.join(BASE_URL, country_url(id))
97
+ when :region
98
+ URI.join(BASE_URL, region_url(id))
99
+ else
100
+ raise "invalid location type: #{@location_type.to_s}"
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,9 @@
1
+ require_relative 'location'
2
+
3
+ module RateBeer
4
+ class Region < Location
5
+ def initialize(id, name: nil)
6
+ super(id, location_type: :region, name: name)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,157 @@
1
+ require_relative "beer"
2
+ require_relative "scraping"
3
+ require_relative "urls"
4
+
5
+ module RateBeer
6
+ # The Review class contains reviews of Beers posted to RateBeer.com. It
7
+ # also provides some scraping functionality for obtaining reviews.
8
+ #
9
+ class Review
10
+ extend RateBeer::URLs
11
+
12
+ class << self
13
+ attr_reader :review_limit
14
+ attr_reader :review_order
15
+
16
+ # Calculate the number of pages of reviews to retrieve.
17
+ #
18
+ # Ten reviews appear on a page, so this method calculates the number of
19
+ # pages on this basis.
20
+ #
21
+ # @param [Integer] limit The number of reviews to be retrieved
22
+ # @return [Integer] Number of pages to be retrieved for number of reviews
23
+ #
24
+ def num_pages(limit)
25
+ (limit / 10.0).ceil
26
+ end
27
+
28
+ # Determine the URL suffix required for a particular sort order.
29
+ #
30
+ # @param [Symbol] order The desired sorting order
31
+ # @return [String] The URL suffix required to obtain reviews sorted in
32
+ # the desired order
33
+ #
34
+ def url_suffix(order)
35
+ options = [:most_recent, :top_raters, :highest_score]
36
+ unless options.include?(order)
37
+ raise ArgumentError.new("unknown ordering: #{order}")
38
+ end
39
+
40
+ case order
41
+ when :most_recent
42
+ "1"
43
+ when :top_raters
44
+ "2"
45
+ when :highest_score
46
+ "3"
47
+ end
48
+ end
49
+
50
+ # Retrieve all reviews for a given beer.
51
+ #
52
+ # @param [Integer, RateBeer::Beer] beer The beer for which to download
53
+ # reviews
54
+ # @param [Symbol] order The order by which to list reviews
55
+ # @param [Integer] limit The number of reviews to retrieve
56
+ # @return [Array<RateBeer::Review>] A list of all reviews for the passed
57
+ # beer, up to the review_limit
58
+ #
59
+ def retrieve(beer, order: :most_recent, limit: 10)
60
+ if beer.is_a?(RateBeer::Beer)
61
+ beer_id = beer.id
62
+ elsif beer.is_a?(Integer)
63
+ beer_id = beer
64
+ beer = RateBeer::Beer.new(beer)
65
+ else
66
+ raise "unknown beer value: #{beer}"
67
+ end
68
+
69
+ reviews = num_pages(limit).times.flat_map do |page_number|
70
+ url = URI.join(BASE_URL, review_url(beer_id, url_suffix(order), page_number))
71
+ doc = RateBeer::Scraping.noko_doc(url)
72
+ root = doc.css("#container table table")[3]
73
+
74
+ # All reviews are contained within the sole cell in the sole row of
75
+ # the selected table. Each review consists of rating information,
76
+ # details of the reviewer, and the text of the review itself.
77
+ #
78
+ # The components are contained within div, small, div tags
79
+ # respectively. We need to scrape these specifically.
80
+ root.css('td')
81
+ .children
82
+ .select { |x| x.name == 'div' || x.name == 'small' }
83
+ .map(&:text)
84
+ .reject { |x| x.empty? || x.include?("googleFillSlot") }
85
+ .each_slice(3).map do |(rating_data, reviewer_data, review)|
86
+ rating_pattern = /^(?<total>\d+(\.\d+)?).+
87
+ AROMA\s(?<aroma>\d+\/10).+
88
+ APPEARANCE\s(?<appearance>\d+\/5).+
89
+ TASTE\s(?<taste>\d+\/10).+
90
+ PALATE\s(?<palate>\d+\/5).+
91
+ OVERALL\s(?<overall>\d+\/20)$/x
92
+ reviewer_pattern = /^(?<name>.+)\s\((?<rank>\d+\))\s-\s?
93
+ (?<location>.+)?\s?-\s
94
+ (?<date>.+)$/x
95
+ rating_breakdown_match = rating_data.match(rating_pattern)
96
+ rating_breakdown = {}
97
+ reviewer = reviewer_data.gsub(RateBeer::Scraping.nbsp, ' ').match(reviewer_pattern)
98
+ [:overall, :aroma, :appearance, :taste, :palate].each { |k|
99
+ rating_breakdown[k] = Rational(rating_breakdown_match[k])
100
+ }
101
+ rating = rating_breakdown_match[:total].to_f
102
+ self.new({ beer: beer,
103
+ reviewer: reviewer[:name],
104
+ reviewer_rank: reviewer[:rank],
105
+ location: reviewer[:location].strip,
106
+ date: Date.parse(reviewer[:date]),
107
+ rating: rating,
108
+ rating_breakdown: rating_breakdown,
109
+ comment: review })
110
+ end
111
+ end
112
+ reviews.take(limit)
113
+ end
114
+ end
115
+
116
+ attr_reader :beer
117
+ attr_reader :reviewer
118
+ attr_reader :reviewer_rank
119
+ attr_reader :location
120
+ attr_reader :date
121
+ attr_reader :rating
122
+ attr_reader :rating_breakdown
123
+ attr_reader :comment
124
+
125
+ def initialize(**options)
126
+ @beer = if options[:beer].is_a?(RateBeer::Beer)
127
+ options[:beer]
128
+ elsif options[:beer].is_a?(Integer)
129
+ RateBeer::Beer.new(options[:beer])
130
+ else
131
+ raise ArgumentError.new("incorrect beer parameter: #{options[:beer]}")
132
+ end
133
+ [:reviewer, :reviewer_rank, :location, :date,
134
+ :rating, :rating_breakdown, :comment].each do |param|
135
+ if options[param].nil?
136
+ raise ArgumentError.new("#{param.to_s} parameter required")
137
+ end
138
+ instance_variable_set("@#{param.to_s}", options[param])
139
+ end
140
+ end
141
+
142
+ def inspect
143
+ var = "#<Review of #{self.beer} - #{@reviewer} on #{@date}>"
144
+ end
145
+
146
+ def to_s
147
+ inspect
148
+ end
149
+
150
+ def ==(other_review)
151
+ self.reviewer == other_review.reviewer &&
152
+ self.date == other_review.date &&
153
+ self.beer == other_review.beer &&
154
+ self.comment == other_review.comment
155
+ end
156
+ end
157
+ end
@@ -0,0 +1,150 @@
1
+ require 'net/http'
2
+ require 'nokogiri'
3
+ require 'open-uri'
4
+
5
+ module RateBeer
6
+
7
+ # The Scraping module contains a series of methods to assist with scraping
8
+ # pages from RateBeer.com, and dealing with the results.
9
+ module Scraping
10
+
11
+ class PageNotFoundError < StandardError; end
12
+
13
+ attr_reader :id
14
+
15
+ # Run method on inclusion in class.
16
+ def self.included(base)
17
+ base.data_keys.each do |attr|
18
+ define_method(attr) do
19
+ unless instance_variable_defined?("@#{attr}")
20
+ retrieve_details
21
+ end
22
+ instance_variable_get("@#{attr}")
23
+ end
24
+ end
25
+ end
26
+
27
+ # Create RateBeer::Scraper instance.
28
+ #
29
+ # Requires an ID#, and optionally accepts a name and options parameters.
30
+ #
31
+ # @param [Integer, String] id ID# of the entity which is to be retrieved
32
+ # @param [String] name Name of the entity to which ID# relates if known
33
+ # @param [hash] options Options hash for entity created
34
+ #
35
+ def initialize(id, name: nil, **options)
36
+ @id = id
37
+ @name = name unless name.nil?
38
+ options.each do |k, v|
39
+ instance_variable_set("@#{k.to_s}", v)
40
+ end
41
+ end
42
+
43
+ def inspect
44
+ val = "#<#{self.class} ##{@id}"
45
+ val << " - #{@name}" if instance_variable_defined?("@name")
46
+ val << ">"
47
+ end
48
+
49
+ def to_s
50
+ inspect
51
+ end
52
+
53
+ def ==(other_entity)
54
+ other_entity.is_a?(self.class) && id == other_entity.id
55
+ end
56
+
57
+ def url
58
+ @url ||= if respond_to?("#{demodularized_class_name.downcase}_url", id)
59
+ send("#{demodularized_class_name.downcase}_url", id)
60
+ end
61
+ end
62
+
63
+ # Return full details of the scraped entity in a Hash.
64
+ #
65
+ def full_details
66
+ data = self.class
67
+ .data_keys
68
+ .map { |k| [k, send("#{k}")] }
69
+ .to_h
70
+ { id: id,
71
+ url: url }.merge(data)
72
+ end
73
+
74
+ # Determine if data is paginated, or not.
75
+ #
76
+ # @param [Nokogiri::Doc] doc Nokogiri document to test for pagination
77
+ # @return [Boolean] true, if paginated, else false
78
+ #
79
+ def pagination?(doc)
80
+ !page_count(doc).nil?
81
+ end
82
+
83
+ # Determine the number of pages in a document.
84
+ #
85
+ # @param [Nokogiri::Doc] doc Nokogiri document to test for pagination
86
+ # @return [Integer] Number of pages in the document
87
+ #
88
+ def page_count(doc)
89
+ doc.at_css('.pagination') && doc.at_css('.pagination')
90
+ .css('b')
91
+ .map(&:text)
92
+ .map(&:to_i)
93
+ .max
94
+ end
95
+
96
+ # Create Nokogiri doc from url.
97
+ #
98
+ def noko_doc(url)
99
+ begin
100
+ Nokogiri::HTML(open(url).read)
101
+ rescue OpenURI::HTTPError => msg
102
+ raise PageNotFoundError.new("Page not found - #{url}")
103
+ end
104
+ end
105
+
106
+ module_function :noko_doc
107
+
108
+ # Emulate &nbsp; character for stripping, substitution, etc.
109
+ #
110
+ def nbsp
111
+ Nokogiri::HTML("&nbsp;").text
112
+ end
113
+
114
+ module_function :nbsp
115
+
116
+ # Convert text keys to symbols
117
+ #
118
+ def symbolize_text(text)
119
+ text.downcase.gsub(' ', '_').gsub('.', '').to_sym
120
+ end
121
+
122
+ # Fix characters in string scraped from website.
123
+ #
124
+ # This method substitutes problematic characters found in
125
+ # strings scraped from RateBeer.com
126
+ #
127
+ def fix_characters(string)
128
+ characters = { nbsp => " ",
129
+ "\u0093" => "ž",
130
+ "\u0092" => "'",
131
+ "\u0096" => "–",
132
+ / {2,}/ => " " }
133
+ characters.each { |c, r| string.gsub!(c, r) }
134
+ string.strip
135
+ end
136
+
137
+ # Make POST request to RateBeer form. Return a Nokogiri doc.
138
+ #
139
+ def post_request(url, params)
140
+ res = Net::HTTP.post_form(url, params)
141
+ Nokogiri::HTML(res.body)
142
+ end
143
+
144
+ private
145
+
146
+ def demodularized_class_name
147
+ self.class.name.split("::").last
148
+ end
149
+ end
150
+ end
@@ -0,0 +1,224 @@
1
+ require "i18n"
2
+ require_relative "beer"
3
+ require_relative "brewery"
4
+ require_relative "scraping"
5
+ require_relative "urls"
6
+
7
+ module RateBeer
8
+
9
+ # Stop I18N from enforcing locale, to avoid error message
10
+ I18n.enforce_available_locales = false
11
+
12
+ # This class provides functionality for searching RateBeer.com for a
13
+ # specific beer or brewery.
14
+ #
15
+ class Search
16
+ # Keys for fields scraped on RateBeer
17
+ def self.data_keys
18
+ [:query,
19
+ :beers,
20
+ :breweries]
21
+ end
22
+
23
+ include RateBeer::Scraping
24
+ include RateBeer::URLs
25
+
26
+ class << self
27
+ # Create method which generates new search instance and immediately runs
28
+ # a search.
29
+ #
30
+ def search(query)
31
+ s = self.new(query)
32
+ { beers: s.beers,
33
+ breweries: s.breweries }
34
+ end
35
+ end
36
+
37
+ attr_reader :query
38
+
39
+ # Create a RateBeer::Search instance.
40
+ #
41
+ # @param [String] query Term to use to search RateBeer
42
+ #
43
+ def initialize(query)
44
+ self.query = query
45
+ end
46
+
47
+ # Setter for query instance variable.
48
+ #
49
+ def query=(qry)
50
+ clear_cached_data
51
+ @query = fix_query_param(qry)
52
+ end
53
+
54
+ def inspect
55
+ num_beers = @beers && @beers.count || 0
56
+ num_breweries = @breweries && @breweries.count || 0
57
+ val = "#<#{self.class} - #{@query}"
58
+ val << " - #{num_beers} beers / #{num_breweries} breweries" if @beers || @breweries
59
+ val << ">"
60
+ end
61
+
62
+ # Search RateBeer for beers, brewers, etc.
63
+ #
64
+ # The search results page contains a series of tables each of which has the
65
+ # "results" class, containing data of matching brewers, beers, and places
66
+ # in that order. Only brewers and beers are extracted.
67
+ #
68
+ # @return [Hash] Results of the search, broken into breweries and beers,
69
+ # with the attributes of these results contained therein.
70
+ #
71
+ def run_search
72
+ @beers, @breweries = nil
73
+ doc = post_request(URI.join(BASE_URL, SEARCH_URL), post_params)
74
+ tables = doc.css('h2').map(&:text).zip(doc.css('table'))
75
+ beers, breweries = nil
76
+ tables.each do |(heading, table)|
77
+ case heading
78
+ when 'brewers'
79
+ @breweries = process_breweries_table(table)
80
+ when 'beers'
81
+ @beers = process_beers_table(table)
82
+ end
83
+ end
84
+
85
+ # RateBeer is inconsistent with searching for IPAs. If IPA is in the name
86
+ # of the beer, replace IPA with India Pale Ale, and add the additional
87
+ # results to these results.
88
+ if query.downcase.include?(" ipa")
89
+ alt_query = query.downcase.gsub(" ipa", " india pale ale")
90
+ extra_beers = self.class.new(alt_query).run_search.beers
91
+ @beers = ((@beers || []) + (extra_beers || [])).uniq
92
+ end
93
+ return self
94
+ end
95
+
96
+ alias retrieve_details run_search
97
+
98
+ private
99
+
100
+ # Generate parameters to use in POST request.
101
+ #
102
+ def post_params
103
+ { "BeerName" => @query }
104
+ end
105
+
106
+ # Process breweries table returned in search.
107
+ #
108
+ # The breweries table (if returned) consists of a series of rows each
109
+ # containing two cells: the first is the name (and hyperlink) to the
110
+ # brewery; and the second is the full location of the brewery.
111
+ #
112
+ # @param [Nokogiri::XML::Element] table An HTML table containing breweries
113
+ # information
114
+ # @return [Hash{Symbol, String}] Brewery data, including name, location,
115
+ # url and ID
116
+ #
117
+ def process_breweries_table(table)
118
+ table.css('tr').map do |row|
119
+ result = [:id, :name, :location, :url].zip([nil]).to_h
120
+ result[:name], result[:location] = row.element_children.map { |x|
121
+ fix_characters(x.text)
122
+ }
123
+ result[:url] = row.at_css('a')['href']
124
+ result[:id] = result[:url].split('/').last.to_i
125
+ Brewery.new(result[:id], name: result[:name])
126
+ end
127
+ end
128
+
129
+ # Process beers table returned in search.
130
+ #
131
+ # The beers table (if returned) consists of a series of rows each of which
132
+ # contains five cells: the first is the name (and hyperlink) to the beer;
133
+ # the second and third relate to features of the RateBeer.com site, and are
134
+ # ignored; the fourth provides the rating of the beer (if any); and the
135
+ # fifth contains the number of ratings submitted for this beer.
136
+ #
137
+ # The first row in the table contains headings, and is disregarded.
138
+ #
139
+ # @param [Nokogiri::XML::Element] table An HTML table containing beers
140
+ # information
141
+ # @return [Hash{Symbol, String}] Beer data, including name, score, rating,
142
+ # url and ID
143
+ #
144
+ def process_beers_table(table)
145
+ table.css('tr').drop(1).map do |row|
146
+ result = [:id, :name, :score, :ratings, :url].zip([nil]).to_h
147
+ content = row.element_children.map { |x| fix_characters(x.text) }
148
+ result[:name] = content.first
149
+ result[:score], result[:ratings] = content.values_at(3, 4)
150
+ .map { |n|
151
+ n.nil? || n.empty? ? nil : n.to_i
152
+ }
153
+ result[:url] = row.at_css('a')['href']
154
+ result[:id] = result[:url].split('/').last.to_i
155
+ Beer.new(result[:id], name: result[:name])
156
+ end
157
+ end
158
+
159
+ # Amend search query string for better results
160
+ #
161
+ # RateBeer is a little finicky about finding search results. It does not
162
+ # provide results on abbreviations, and a passed query including special
163
+ # characters will return no hits. Often searching using a generic term such
164
+ # as Co, Brewers, Brewery, etc. will not return any results. This method
165
+ # strips out such generic terms from a query.
166
+ #
167
+ # This method attempts to deal with these issues.
168
+ #
169
+ # @param [String] query Raw query parameter
170
+ # @return [String] Query parameter amended to improve results
171
+ #
172
+ def fix_query_param(query)
173
+ query = strip_generic_terms(query)
174
+ query = substitute_known_terms(query)
175
+ I18n.transliterate(query)
176
+ end
177
+
178
+ # Strip defined generic terms from query.
179
+ #
180
+ # This method removes all generic terms which may refer to a brewery, but
181
+ # which may not appear in the brewery's proper name, e.g. brewers.
182
+ #
183
+ # @param [String] query Raw query parameter
184
+ # @return [String] Query parameter with generics stripped out
185
+ #
186
+ def strip_generic_terms(query)
187
+ generic_words = ["Brew",
188
+ "Brewers",
189
+ "Brewery",
190
+ "Brewing",
191
+ "Brewhouse",
192
+ "Company",
193
+ "Co\.?",
194
+ "Inc\.?",
195
+ "Ltd\.?",
196
+ "Limited"]
197
+ generic_words.map! { |w| /(^| )#{w}( |$)/i }
198
+ generic_words.each { |w| query.gsub!(w, " ") }
199
+ query.strip
200
+ end
201
+
202
+ # Substitute known problematic terms in query.
203
+ #
204
+ # This method will replace terms which are known to cause problems in the
205
+ # search with different terms which do not cause the same problem.
206
+ #
207
+ # @param [String] query Raw query parameter
208
+ # @return [String] Query parameter with terms substituted
209
+ #
210
+ def substitute_known_terms(query)
211
+ # List of problem terms - key can be a string or regexp
212
+ problem_terms = { "six°north" => "Six Degrees North",
213
+ /[\/:]/ => " " }
214
+ problem_terms.each { |term, substitute| query.gsub!(term, substitute) }
215
+ query.strip
216
+ end
217
+
218
+ # Clear cached search data.
219
+ #
220
+ def clear_cached_data
221
+ ["@beers", "@breweries"].each { |v| remove_instance_variable(v) if instance_variable_defined?(v) }
222
+ end
223
+ end
224
+ end
@@ -0,0 +1,104 @@
1
+ require_relative 'beer'
2
+ require_relative 'scraping'
3
+ require_relative 'urls'
4
+
5
+ module RateBeer
6
+ class Style
7
+ # Each key represents an item of data accessible for each beer, and defines
8
+ # dynamically a series of methods for accessing this data.
9
+ #
10
+ def self.data_keys
11
+ [:name,
12
+ :description,
13
+ :glassware,
14
+ :beers]
15
+ end
16
+
17
+ include RateBeer::Scraping
18
+ include RateBeer::URLs
19
+
20
+ attr_accessor :category
21
+
22
+ class << self
23
+ include RateBeer::URLs
24
+
25
+ # Scrape all styles.
26
+ #
27
+ # RateBeer provides a styles landing page, with links through to info on
28
+ # each style listed thereon. This method scrapes style info with links
29
+ # to the more detailed pages.
30
+ #
31
+ # @param [Boolean] hidden_styles Flag for whether to include hidden
32
+ # styles.
33
+ # @return [Array<RateBeer::Style>] List of styles with links etc. to
34
+ # detailed pages
35
+ #
36
+ def all_styles(include_hidden=false)
37
+ doc = Scraping.noko_doc(URI.join(BASE_URL, '/beerstyles/'))
38
+ root = doc.at_css('div.container-fluid table')
39
+
40
+ categories = root.css('.groupname').map(&:text)
41
+ style_node = root.css('.styleGroup')
42
+
43
+ styles = style_node.flat_map.with_index do |list, i|
44
+ list.css('a').map do |x|
45
+ category = categories[i]
46
+ Style.new(x['href'].split('/').last.to_i, name: x.text).tap { |s|
47
+ s.category = category
48
+ }
49
+ end
50
+ end
51
+ if include_hidden
52
+ styles += hidden_styles
53
+ else
54
+ styles
55
+ end
56
+ end
57
+
58
+ # Scrape hidden style information
59
+ #
60
+ # RateBeer has a number of styles not accessible from the "beerstyles"
61
+ # landing page. This method scrapes these.
62
+ #
63
+ # @return [Array<Hash>] List of hidden styles
64
+ #
65
+ def hidden_styles
66
+ hidden_ids = [40, 41, 57, 59, 66, 67, 68, 69, 70,
67
+ 75, 83, 99, 104, 106, 116, 119, 120]
68
+ hidden_ids.map do |id|
69
+ Style.new(id)
70
+ end
71
+ end
72
+ end
73
+
74
+ private
75
+
76
+ # Retrieve details about this style from the website.
77
+ #
78
+ # This method stores the retrieved details in instance variables of
79
+ # this style instance.
80
+ #
81
+ def retrieve_details
82
+ doc = noko_doc(URI.join(BASE_URL, style_url(id)))
83
+ root = doc.at_css('.container-fluid')
84
+ beer_list = noko_doc(URI.join(BASE_URL, style_beers_url(id)))
85
+
86
+ if !root.nil?
87
+ @name = root.at_css('h1').text.strip
88
+ else
89
+ raise PageNotFoundError.new("style not found - ##{id}")
90
+ end
91
+
92
+ @description = root.at_css('#styleDescription').text
93
+ @glassware = root.css('.glassblurb').map { |x| x.text.strip }
94
+
95
+ @beers = beer_list.css('tr').drop(1).map do |row|
96
+ cells = row.css('td')
97
+ url = cells[1].at_css('a')['href']
98
+ [cells[0].text.to_i, Beer.new(url.split('/').last,
99
+ name: fix_characters(cells[1].text))]
100
+ end.to_h
101
+ nil
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,54 @@
1
+ module RateBeer
2
+
3
+ # This module contains URLs or URL patterns for use throughout the Gem.
4
+ #
5
+ module URLs
6
+ BASE_URL = "http://www.ratebeer.com"
7
+ SEARCH_URL = "/findbeer.asp"
8
+
9
+ # Return URL to info page for beer with id
10
+ #
11
+ def beer_url(id)
12
+ "/beer/a/#{id}/"
13
+ end
14
+
15
+ # Return URL to page containing reviews for a given beer
16
+ #
17
+ def review_url(beer_id, sort_suffix, page_number)
18
+ "/beer/a/#{beer_id}/#{sort_suffix}/#{page_number}/"
19
+ end
20
+
21
+ # Return URL to info page for brewery with id
22
+ #
23
+ def brewery_url(id)
24
+ "/brewers/a/#{id}/"
25
+ end
26
+
27
+ # Return URL to info page for country with id
28
+ def country_url(id)
29
+ "/breweries/a/0/#{id}/"
30
+ end
31
+
32
+ # Return URL to info page for region with id
33
+ def region_url(id)
34
+ "/breweries/a/#{id}/0/"
35
+ end
36
+
37
+ # Return URL to info page for style with id
38
+ def style_url(id)
39
+ "/beerstyles/a/#{id}/"
40
+ end
41
+
42
+ # Return URL to beers list page for style with id
43
+ def style_beers_url(id)
44
+ "/ajax/top-beer-by-style.asp?style=#{id}"
45
+ end
46
+
47
+ [:beer_url,
48
+ :brewery_url,
49
+ :country_url,
50
+ :region_url,
51
+ :style_url,
52
+ :style_beers_url].each { |m| module_function m }
53
+ end
54
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ratebeer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4b
4
+ version: 0.0.5a
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dan Meakin
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-07-03 00:00:00.000000000 Z
11
+ date: 2016-07-16 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: RateBeer provides a way to access beer & brewery data from RateBeer.com.
14
14
  email: dan@danmeakin.com
@@ -23,6 +23,16 @@ files:
23
23
  - Rakefile
24
24
  - bin/ratebeer
25
25
  - lib/ratebeer.rb
26
+ - lib/ratebeer/beer.rb
27
+ - lib/ratebeer/brewery.rb
28
+ - lib/ratebeer/country.rb
29
+ - lib/ratebeer/location.rb
30
+ - lib/ratebeer/region.rb
31
+ - lib/ratebeer/review.rb
32
+ - lib/ratebeer/scraping.rb
33
+ - lib/ratebeer/search.rb
34
+ - lib/ratebeer/style.rb
35
+ - lib/ratebeer/urls.rb
26
36
  - spec/lib/ratebeer/beer_spec.rb
27
37
  - spec/lib/ratebeer/brewery_spec.rb
28
38
  - spec/lib/ratebeer/country_spec.rb