ratebeer 0.0.8 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 89147b9ea6840ec56edb38f6d24341f2d15a43dd
4
- data.tar.gz: 5a3bbf6f0c78245d5c028f5c65a8dc2b0b5e5e99
3
+ metadata.gz: 45bf83b47740dbbdbed33401175d33db8917f672
4
+ data.tar.gz: f5eaa7e062b02b8391d094e8395c57265b3f92e2
5
5
  SHA512:
6
- metadata.gz: 55bb181d53621951d0a04e7c235723eef68f4be2174dd922d2108339a2b8294b2bdbafdac6908775a9ac6a164be2b1e1be12d1499bd38456787621222536fc84
7
- data.tar.gz: a27cacfa4adb464499e38911528af4bbdda4246cf3071237b1016f5edf0309f5052ff539613b9a489b96632a9e25617623ae3244858f8e752d1666f4f4e6449d
6
+ metadata.gz: fc34adee973d864596e83ee5570c5540a8ff0216c847fc2b9dd2a8b5e4121240ec13a443dfb346b69768be0e9ea0898b35eed18df8349879b0dbd221a3dda6f5
7
+ data.tar.gz: 3bcf0702e0cad27ef4b2a08da5000b8465043c30d11934d49191078e3df001c66900a09393826f9d42075e1d3381257f146a829bb6c9a3d1f21e5732f8878def
data/Gemfile.lock CHANGED
@@ -1,7 +1,8 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- ratebeer (0.0.6)
4
+ ratebeer (0.0.8)
5
+ i18n
5
6
  nokogiri
6
7
 
7
8
  GEM
data/lib/ratebeer.rb CHANGED
@@ -1,19 +1,18 @@
1
+ Dir[File.expand_path('../ratebeer/*.rb', __FILE__)].each { |f| require f }
2
+
1
3
  # RateBeer.com scraper
2
4
  #
3
- # Scrapes required information on beers, breweries, ratings, etc. from the
5
+ # Scrapes required information on beers, breweries, ratings, etc. from the
4
6
  # RateBeer.com beer database.
5
7
  #
6
- Dir[File.expand_path("../ratebeer/*.rb", __FILE__)].each { |f| require f }
7
-
8
8
  module RateBeer
9
-
10
9
  # Create new beer instance, using ID and name passed as arguments.
11
10
  #
12
11
  # @param [Integer, String] id ID# of beer to retrieve
13
12
  # @param [String] name Name of the beer to which ID# relates if known
14
13
  # @return [RateBeer::Beer] beer with passed ID#
15
14
  #
16
- def beer(id, name=nil)
15
+ def beer(id, name = nil)
17
16
  Beer.new(id, name: name)
18
17
  end
19
18
 
@@ -23,7 +22,7 @@ module RateBeer
23
22
  # @param [String] name Name of the brewery to which ID# relates if known
24
23
  # @return [RateBeer::Brewery] brewery with passed ID#
25
24
  #
26
- def brewery(id, name=nil)
25
+ def brewery(id, name = nil)
27
26
  Brewery.new(id, name: name)
28
27
  end
29
28
 
@@ -33,7 +32,7 @@ module RateBeer
33
32
  # @param [String] name Name of the style to which ID# relates if known
34
33
  # @return [RateBeer::Style] style with passed ID#
35
34
  #
36
- def style(id, name=nil)
35
+ def style(id, name = nil)
37
36
  Style.new(id, name: name)
38
37
  end
39
38
 
data/lib/ratebeer/beer.rb CHANGED
@@ -11,12 +11,12 @@ module RateBeer
11
11
  #
12
12
  def self.data_keys
13
13
  [:name,
14
- :brewery,
15
- :style,
16
- :glassware,
17
- :availability,
18
- :abv,
19
- :calories,
14
+ :brewery,
15
+ :style,
16
+ :glassware,
17
+ :availability,
18
+ :abv,
19
+ :calories,
20
20
  :description,
21
21
  :retired,
22
22
  :rating]
@@ -25,6 +25,12 @@ module RateBeer
25
25
  include RateBeer::Scraping
26
26
  include RateBeer::URLs
27
27
 
28
+ # CSS selector for the root element containing beer information.
29
+ ROOT_SELECTOR = '#container table'.freeze
30
+
31
+ # CSS selector for the beer information element.
32
+ INFO_SELECTOR = 'table'.freeze
33
+
28
34
  # Create RateBeer::Beer instance.
29
35
  #
30
36
  # Requires the RateBeer ID# for the beer in question.
@@ -37,6 +43,23 @@ module RateBeer
37
43
  super
38
44
  end
39
45
 
46
+ def doc
47
+ unless instance_variable_defined?("@doc")
48
+ @doc = noko_doc(URI.join(BASE_URL, beer_url(id)))
49
+ validate_beer
50
+ redirect_if_aliased
51
+ end
52
+ @doc
53
+ end
54
+
55
+ def root
56
+ @root ||= doc.at_css(ROOT_SELECTOR)
57
+ end
58
+
59
+ def info_root
60
+ @info_root ||= root.at_css(INFO_SELECTOR)
61
+ end
62
+
40
63
  # Return reviews of this beer.
41
64
  #
42
65
  def reviews(order: :most_recent, limit: 10)
@@ -45,99 +68,85 @@ module RateBeer
45
68
 
46
69
  private
47
70
 
48
- # Retrieve details about this beer from the website.
71
+ # Redirects this beer to the "proper" beer page if it represents an alias
72
+ # of another beer.
49
73
  #
50
- # This method stores the retrieved details in instance variables
51
- # of the beer instance.
52
- #
53
- def retrieve_details
54
- doc = noko_doc(URI.join(BASE_URL, beer_url(id)))
55
- root = doc.css('#container table').first
56
- info_tbl = root.css('table').first
57
-
58
- @name = doc.css("h1")
59
- .text
60
- .strip
61
- @name = fix_characters(@name)
62
- raise PageNotFoundError.new("Beer not found - #{id}") if name.empty?
63
-
64
- # If this beer is an alias, change ID to that of "proper" beer and
74
+ # This method overwrites the value of @doc, so that this will scrape the
75
+ # details of the proper beer, and not the alias.
76
+ def redirect_if_aliased
65
77
  # retrieve details of the proper beer instead.
66
78
  alias_pattern = /Also known as(.|\n)*Proceed to the aliased beer\.{3}/
67
- if root.css('tr')[1].css('div div').text =~ alias_pattern
68
- alias_node = root.css('tr')[1]
69
- .css('div div')
70
- .css('a')
71
- .first
72
- alias_name = alias_node.text
73
- alias_id = alias_node['href'].split('/').last.to_i
74
- @id = alias_id
75
- retrieve_details
76
- return nil
79
+ local_root = doc.at_css(ROOT_SELECTOR)
80
+ if local_root.css('tr')[1].css('div div').text =~ alias_pattern
81
+ scrape_name # Set the name to the original, non-aliased beer.
82
+ alias_node = local_root.css('tr')[1]
83
+ .css('div div')
84
+ .css('a')
85
+ .first
86
+ @alias_id = alias_node['href'].split('/').last.to_i
87
+ @doc = noko_doc(URI.join(BASE_URL, beer_url(@alias_id)))
88
+ end
89
+ end
90
+
91
+ def validate_beer
92
+ error_message = 'we didn\'t find this beer'
93
+ if name == error_message
94
+ raise PageNotFoundError.new("Beer not found - #{id}")
95
+ end
96
+ end
97
+
98
+ def scrape_name
99
+ @name ||= fix_characters(doc.css('h1').text.strip)
100
+ end
101
+
102
+ def scrape_brewery
103
+ brewery_element = doc.at_css("a[itemprop='brand']")
104
+ brewery_id = id_from_link(brewery_element)
105
+ brewery_name = fix_characters(brewery_element.text)
106
+ @brewery = Brewery.new(brewery_id, name: brewery_name)
107
+ end
108
+
109
+ def scrape_style
110
+ style_element = doc.at_css("a[href^='/beerstyles']")
111
+ style_id = id_from_link(style_element)
112
+ style_name = fix_characters(style_element.text)
113
+ @style = Style.new(style_id, name: style_name)
114
+ end
115
+
116
+ def scrape_glassware
117
+ glassware_elements = doc.css("a[href^='/ShowGlassware.asp']")
118
+ @glassware = glassware_elements.map do |el|
119
+ [:id, :name].zip([el['href'].split('GWID=').last.to_i, el.text]).to_h
77
120
  end
121
+ end
122
+
123
+ def scrape_availability
124
+ raw_info = info_root.css('td')[1]
125
+ .css('table')
126
+ .css('td')
127
+ .children
128
+ .children
129
+ .map(&:text)
130
+ .reject(&:empty?)
131
+ .each_slice(2)
132
+ .to_a
133
+ .tap { |a| a.last.unshift('distribution') }
134
+ .map do |(k, v)|
135
+ [k =~ /bottl/ ? :bottling : symbolize_text(k), v]
136
+ end
137
+ @availability = raw_info.to_h.merge(seasonal: scrape_misc[:seasonal])
138
+ end
139
+
140
+ def scrape_abv
141
+ @abv = scrape_misc[:abv]
142
+ end
143
+
144
+ def scrape_calories
145
+ @calories = scrape_misc[:est_calories]
146
+ end
78
147
 
79
- @brewery = info_tbl.css('td')[1]
80
- .css('div')
81
- .first
82
- .css('a')
83
- .map { |a| [:id,
84
- :name].zip([a['href'].split('/')
85
- .last
86
- .to_i, a.text]).to_h }.first
87
- @brewery = Brewery.new(@brewery[:id], name: fix_characters(@brewery[:name]))
88
- @style = info_tbl.css('td')[1]
89
- .css('div')
90
- .first
91
- .css('a')
92
- .select { |a| a['href'] =~ /beerstyles/ }
93
- .map { |a| [:id,
94
- :name].zip([a['href'].split('/')
95
- .last
96
- .to_i, a.text]).to_h }.first
97
- @style = Style.new(@style[:id], name: fix_characters(@style[:name]))
98
- @glassware = info_tbl.css('td')[1]
99
- .css('div')[1]
100
- .css('a')
101
- .map { |a| [:id,
102
- :name].zip([a['href'].split('GWID=')
103
- .last
104
- .to_i, a.text]).to_h }.first
105
- misc = info_tbl.next_element
106
- .first_element_child
107
- .children
108
- .map(&:text)
109
- .flat_map { |x| x.gsub(nbsp, ' ').strip.split(':') }
110
- .map(&:strip)
111
- .reject(&:empty?)
112
- .each_slice(2)
113
- .map { |(k, v)| [symbolize_text(k),
114
- v.to_f.zero? ? v : v.to_f] }
115
- .to_h
116
- @abv = misc[:abv]
117
- @calories = misc[:est_calories]
118
- @rating = [:overall,
119
- :style].zip(info_tbl.css('div')
120
- .select { |d| d['title'] =~ /This figure/ }
121
- .map { |d| d['title'].split(':').first.to_f }).to_h
122
- @rating.merge!({ ratings: misc[:ratings],
123
- weighted_avg: misc[:weighted_avg],
124
- mean: misc[:mean] })
125
- @availability = info_tbl.css('td')[1]
126
- .css('table')
127
- .css('td')
128
- .children
129
- .children
130
- .map(&:text)
131
- .reject(&:empty?)
132
- .each_slice(2)
133
- .to_a
134
- .tap { |a| a.last.unshift('distribution') }
135
- .map { |(k, v)| [k =~ /bottl/ ?
136
- :bottling :
137
- symbolize_text(k), v] }
138
- .to_h
139
- @availability.merge!({ seasonal: misc[:seasonal] })
140
- @description = info_tbl.next_element
148
+ def scrape_description
149
+ @description = info_root.next_element
141
150
  .next_element
142
151
  .children
143
152
  .children
@@ -147,10 +156,38 @@ module RateBeer
147
156
  .reject(&:empty?)
148
157
  .join("\n")
149
158
  @description = fix_characters(@description)
159
+ end
160
+
161
+ def scrape_retired
150
162
  @retired = !(root.css('span.beertitle2') &&
151
- root.css('span.beertitle2').text =~ /RETIRED/).nil?
163
+ root.css('span.beertitle2').text =~ /RETIRED/).nil?
164
+ end
165
+
166
+ def scrape_rating
167
+ raw_rating = [:overall,
168
+ :style].zip(info_root.css('div')
169
+ .select { |d| d['title'] =~ /This figure/ }
170
+ .map { |d| d['title'].split(':').first.to_f }).to_h
171
+ @rating = raw_rating.merge(ratings: scrape_misc[:ratings],
172
+ weighted_avg: scrape_misc[:weighted_avg],
173
+ mean: scrape_misc[:mean])
174
+ end
152
175
 
153
- nil
176
+ # Scrapes the miscellaneous information contained on the beer page.
177
+ #
178
+ # This information relates to various other specific types of information.
179
+ # As such, other scrapers rely on this method for information.
180
+ def scrape_misc
181
+ info_root.next_element
182
+ .first_element_child
183
+ .children
184
+ .map(&:text)
185
+ .flat_map { |x| x.gsub(nbsp, ' ').strip.split(':') }
186
+ .map(&:strip)
187
+ .reject(&:empty?)
188
+ .each_slice(2)
189
+ .map { |(k, v)| [symbolize_text(k), v.to_f.zero? ? v : v.to_f] }
190
+ .to_h
154
191
  end
155
192
  end
156
193
  end
@@ -1,7 +1,11 @@
1
- require_relative "scraping"
2
- require_relative "urls"
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'scraping'
4
+ require_relative 'urls'
3
5
 
4
6
  module RateBeer
7
+ # The brewery class represents one brewery found in RateBeer, with methods
8
+ # for accessing information found about the brewery on the site.
5
9
  class Brewery
6
10
  # Each key represents an item of data accessible for each beer, and defines
7
11
  # dynamically a series of methods for accessing this data.
@@ -19,11 +23,14 @@ module RateBeer
19
23
 
20
24
  attr_reader :established, :location
21
25
 
26
+ # CSS selector for the brewery information element.
27
+ INFO_SELECTOR = "div[itemtype='http://schema.org/LocalBusiness']".freeze
28
+
22
29
  # Create RateBeer::Brewery instance.
23
30
  #
24
31
  # Requires the RateBeer ID# for the brewery in question. Optionally accepts
25
32
  # a name parameter where the name is already known.
26
- #
33
+ #
27
34
  # @param [Integer, String] id ID# for the brewery
28
35
  # @param [String] name The name of the specified brewery
29
36
  # @param [hash] options Options hash for entity created
@@ -38,87 +45,70 @@ module RateBeer
38
45
  end
39
46
  end
40
47
 
41
- def name
42
- @name ||= retrieve_brewery_info
48
+ def doc
49
+ @doc ||= noko_doc(URI.join(BASE_URL, brewery_url(id)))
50
+ validate_brewery
51
+ @doc
43
52
  end
44
53
 
45
- def beers
46
- @beers ||= retrieve_brewery_beers
54
+ def info_root
55
+ @info_root ||= doc.at_css(INFO_SELECTOR)
47
56
  end
48
57
 
49
58
  private
50
59
 
51
- # Retrieve details about this brewery from the website.
52
- #
53
- # This method stores the retrieved details in instance variables
54
- # of the brewery instance.
60
+ # Validates whether the brewery with the given ID exists.
55
61
  #
56
- def retrieve_details
57
- @doc = noko_doc(URI.join(BASE_URL, brewery_url(id)))
58
-
59
- brewery_info = retrieve_brewery_info
60
-
61
- @beers = []
62
- if pagination?(@doc)
63
- (1..page_count(@doc)).flat_map do |page_no|
64
- @doc = noko_doc(URI.join(BASE_URL, brewery_url(id), "0/", "#{page_no}/"))
65
- retrieve_brewery_beers
66
- end
67
- else
68
- retrieve_brewery_beers
62
+ # Throws an exception if the brewery does not exist.
63
+ def validate_brewery
64
+ error_message = "This brewer, ID##{id}, is no longer in the database. "\
65
+ 'RateBeer Home'
66
+ if @doc.at_css('body p').text == error_message
67
+ raise PageNotFoundError.new("Brewery not found - #{id}")
69
68
  end
70
- nil
71
69
  end
72
70
 
73
- # Scrape brewery info from Nokogiri Doc for brewery page
74
- #
75
- def retrieve_brewery_info
76
- root = @doc.css('#container table').first
77
- contact_node = root.css('td').first
78
-
79
- @name = fix_characters(root.css('h1').first.text)
80
- raise PageNotFoundError.new("Brewery not found - #{id}") if @name.empty?
81
-
82
- @type = root.css('span.beerfoot')
83
- .select { |x| x.text =~ /Type: .*/ }
84
- .first
85
- .text
86
- .strip
87
- .split("Type: ")
88
- .last
89
- .split(/\s{2,}/)
90
- .first
91
- @address = root.css('div[itemprop="address"] b span')
92
- .map { |elem| key = case elem.attributes['itemprop'].value
93
- when 'streetAddress' then :street
94
- when 'addressLocality' then :city
95
- when 'addressRegion' then :state
96
- when 'addressCountry' then :country
97
- when 'postalCode' then :postcode
98
- else raise "unrecognised attribute"
99
- end
100
- [key, elem.text.strip] }
101
- .to_h
102
-
103
- @telephone = root.css('span[itemprop="telephone"]').first &&
104
- root.css('span[itemprop="telephone"]').first.text
105
-
106
- end
107
-
108
- # Scrape beer details from Nokogiri Doc for brewery page
109
- #
110
- def retrieve_brewery_beers
111
- location, brewer = nil # Variables used in the map below
112
- root = @doc.css('table.maintable.nohover').first
113
- @beers += root.css('tr').drop(1).map do |row|
114
- if row.text =~ /^Brewed at (?<location>.+?)(?: by\/for (?<brewer>.+))?$/
115
- location = Regexp.last_match['location']
116
- brewer = Regexp.last_match['brewer']
117
- nil
118
- else
119
- process_beer_row(row, location, brewer)
120
- end
121
- end.reject(&:nil?)
71
+ # Scrapes the brewery's name.
72
+ def scrape_name
73
+ @name = fix_characters(info_root.css('h1').first.text)
74
+ end
75
+
76
+ # Scrapes the brewery's address.
77
+ def scrape_address
78
+ address_root = info_root.css('div[itemprop="address"] b span')
79
+ address_details = address_root.map { |e| extract_address_element(e) }
80
+ @address = address_details.to_h
81
+ end
82
+
83
+ # Extracts one element of address details from a node contained within the
84
+ # address div.
85
+ def extract_address_element(node)
86
+ key = case node.attributes['itemprop'].value
87
+ when 'streetAddress' then :street
88
+ when 'addressLocality' then :city
89
+ when 'addressRegion' then :state
90
+ when 'addressCountry' then :country
91
+ when 'postalCode' then :postcode
92
+ else raise 'unrecognised attribute'
93
+ end
94
+ [key, node.text.strip]
95
+ end
96
+
97
+ # Scrapes the telephone number of the brewery.
98
+ def scrape_telephone
99
+ @telephone = info_root.at_css('span[itemprop="telephone"]')
100
+ end
101
+
102
+ # Scrapes the type of brewery.
103
+ def scrape_type
104
+ @type = info_root.css('div')[1]
105
+ end
106
+
107
+ # Scrapes beers list for brewery.
108
+ def scrape_beers
109
+ beers_doc = noko_doc(URI.join(BASE_URL, brewery_beers_url(id)))
110
+ rows = beers_doc.css('table#brewer-beer-table tbody tr')
111
+ @beers = rows.map { |row| process_beer_row(row) }.reject(&:nil?)
122
112
  end
123
113
 
124
114
  # Process a row of data representing one beer brewed by/at a brewery.
@@ -129,47 +119,70 @@ module RateBeer
129
119
  # where this location differs from the brewery's regular brewsite/venue
130
120
  # @param [String] brewer the client for whom this brewery brewed the beer,
131
121
  # where the brewery is brewing for a different company/brewery
132
- # @return [RateBeer::Beer] a beer object representing the scraped beer,
122
+ # @return [RateBeer::Beer] a beer object representing the scraped beer,
133
123
  # containing scraped attributes
134
124
  #
135
- def process_beer_row(row, location=nil, brewer=nil)
136
- # Attributes stored in each table row, with indices representing their
137
- # position in each row
138
- attributes = { name: 0,
139
- abv: 2,
140
- avg_rating: 3,
141
- overall_rating: 4,
142
- style_rating: 5,
143
- num_ratings: 6 }
144
-
145
- beer = attributes.reduce({}) do |beer_hash, (attr, i)|
146
- val = row.css('td')[i].text.gsub(nbsp, ' ').strip rescue nil
147
- case attr
148
- when :name
149
- fix_characters(val)
150
- when :abv, :avg_rating
151
- val = val.to_f
152
- when :overall_rating, :style_rating, :num_ratings
153
- val = val.to_i
154
- end
155
- beer_hash[attr] = val
156
- beer_hash
157
- end
158
- beer[:url] = row.css('td').first.css('a').first['href']
159
- id = beer[:url].split('/').last.to_i
125
+ def process_beer_row(row)
126
+ beer = process_beer_name_cell(row.css('td').first)
127
+ beer[:abv] = row.css('td')[1].text.to_f
128
+ beer[:date_added] = Date.strptime(row.css('td')[2].text, '%m/%d/%Y')
129
+ Beer.new(id, beer.merge(process_rating_info(row)))
130
+ end
131
+
132
+ # Processes the cell containing the beer's name and other information.
133
+ #
134
+ # This cell contains information on the beer's name, its style, whether it
135
+ # is retired, and who is was brewed for or by.
136
+ def process_beer_name_cell(node)
137
+ beer_link = node.at_css('strong a')
138
+ name = fix_characters(beer_link.text)
139
+ id = id_from_link(beer_link)
140
+ info = node.at_css('em.real-small')
141
+ brewed_at_for = process_brewed_at_for(node)
142
+ style = process_style_info(node)
143
+ { id: id,
144
+ name: name,
145
+ style: style,
146
+ retired: info && info.text =~ /retired/ || false }.merge(brewed_at_for)
147
+ end
160
148
 
161
- # Apply additional location and brewer information if scraped
162
- beer[:brewed_at] = location unless location.nil?
163
- beer[:brewed_by_for] = brewer unless brewer.nil?
149
+ # Processes information on who the beer was brewed for or by, or at.
150
+ def process_brewed_at_for(node)
151
+ brewed_at_for_node = node.at_css('div.small em')
152
+ return {} if brewed_at_for_node.nil?
153
+ node_text = brewed_at_for_node.children.first.text
154
+ key = if node_text.include?('Brewed at')
155
+ :brewed_at
156
+ elsif node_text.include?('Brewed by/for')
157
+ :brewed_by_for
158
+ end
159
+ other_brewer_node = brewed_at_for_node.at_css('a')
160
+ { key => Brewery.new(id_from_link(other_brewer_node),
161
+ name: other_brewer_node.text) }
162
+ end
164
163
 
165
- # Transform ratings into correct format
166
- beer[:rating] = { overall: beer[:overall_rating],
167
- style: beer[:style_rating],
168
- ratings: beer[:num_ratings],
169
- weighted_avg: beer[:avg_rating] }
164
+ # Processes the style information contained within a beer name cell.
165
+ def process_style_info(node)
166
+ style_node = node.css('a').find do |n|
167
+ n.children.any? { |c| c.name == 'span' }
168
+ end
169
+ name = style_node.text
170
+ id = id_from_link(style_node)
171
+ Style.new(id, name: name)
172
+ end
170
173
 
171
- # Create beer instance from scraped data
172
- Beer.new(id, beer)
174
+ # Processes rating information from a beer row.
175
+ def process_rating_info(row)
176
+ cell_indices = { avg_rating: 4,
177
+ overall_rating: 5,
178
+ style_rating: 6,
179
+ num_ratings: 7 }
180
+ rating = cell_indices.map do |attr, i|
181
+ val = row.css('td')[i].text.gsub(nbsp, ' ').strip
182
+ conversion = attr == :avg_rating ? :to_f : :to_i
183
+ [attr, val.send(conversion)]
184
+ end
185
+ rating.to_h
173
186
  end
174
187
  end
175
188
  end