ratebeer 0.0.4b → 0.0.5a
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +7 -7
- data/Gemfile.lock +26 -20
- data/lib/ratebeer/beer.rb +156 -0
- data/lib/ratebeer/brewery.rb +167 -0
- data/lib/ratebeer/country.rb +9 -0
- data/lib/ratebeer/location.rb +104 -0
- data/lib/ratebeer/region.rb +9 -0
- data/lib/ratebeer/review.rb +157 -0
- data/lib/ratebeer/scraping.rb +150 -0
- data/lib/ratebeer/search.rb +224 -0
- data/lib/ratebeer/style.rb +104 -0
- data/lib/ratebeer/urls.rb +54 -0
- metadata +12 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d001c76b26d45e168769fddcb3c69dcea3711479
|
4
|
+
data.tar.gz: 27390f84bc7f8272b893b1169e3d0e90bab1510d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 07e2009a0bc96bf6afb208345c0c0167a62de3969fd0a209b3428b63abf0572d3265611e15ad7f81d8742dbe00900bf601ef4fdf600fb74a2d76f494649ea9e6
|
7
|
+
data.tar.gz: d7a0c63bdc4bf43aa3050dd4fb022c6b6ab4ca7bbd55b2f48bac575e1e0cbcf38120e615492afb22d87710a80b2e24d55e8e5cbdc8bfdb5ed14f495d7d8224dd
|
data/Gemfile
CHANGED
@@ -1,13 +1,13 @@
|
|
1
|
-
source
|
2
|
-
ruby
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
ruby '2.3.0'
|
3
3
|
|
4
4
|
gemspec
|
5
5
|
|
6
|
-
gem
|
7
|
-
gem
|
6
|
+
gem 'i18n'
|
7
|
+
gem 'nokogiri'
|
8
8
|
|
9
9
|
group :test do
|
10
|
-
gem
|
11
|
-
gem
|
12
|
-
gem
|
10
|
+
gem 'rspec'
|
11
|
+
gem 'rake'
|
12
|
+
gem 'codeclimate-test-reporter'
|
13
13
|
end
|
data/Gemfile.lock
CHANGED
@@ -1,36 +1,39 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
+
ratebeer (0.0.5)
|
4
5
|
|
5
6
|
GEM
|
6
7
|
remote: https://rubygems.org/
|
7
8
|
specs:
|
8
|
-
codeclimate-test-reporter (0.
|
9
|
+
codeclimate-test-reporter (0.6.0)
|
9
10
|
simplecov (>= 0.7.1, < 1.0.0)
|
10
11
|
diff-lcs (1.2.5)
|
11
12
|
docile (1.1.5)
|
12
13
|
i18n (0.7.0)
|
13
|
-
json (
|
14
|
-
|
15
|
-
nokogiri (1.6.
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
rspec-
|
22
|
-
|
23
|
-
rspec-
|
24
|
-
rspec-
|
14
|
+
json (2.0.1)
|
15
|
+
mini_portile2 (2.1.0)
|
16
|
+
nokogiri (1.6.8)
|
17
|
+
mini_portile2 (~> 2.1.0)
|
18
|
+
pkg-config (~> 1.1.7)
|
19
|
+
pkg-config (1.1.7)
|
20
|
+
rake (11.2.2)
|
21
|
+
rspec (3.5.0)
|
22
|
+
rspec-core (~> 3.5.0)
|
23
|
+
rspec-expectations (~> 3.5.0)
|
24
|
+
rspec-mocks (~> 3.5.0)
|
25
|
+
rspec-core (3.5.1)
|
26
|
+
rspec-support (~> 3.5.0)
|
27
|
+
rspec-expectations (3.5.0)
|
25
28
|
diff-lcs (>= 1.2.0, < 2.0)
|
26
|
-
rspec-support (~> 3.
|
27
|
-
rspec-mocks (3.
|
29
|
+
rspec-support (~> 3.5.0)
|
30
|
+
rspec-mocks (3.5.0)
|
28
31
|
diff-lcs (>= 1.2.0, < 2.0)
|
29
|
-
rspec-support (~> 3.
|
30
|
-
rspec-support (3.
|
31
|
-
simplecov (0.
|
32
|
+
rspec-support (~> 3.5.0)
|
33
|
+
rspec-support (3.5.0)
|
34
|
+
simplecov (0.12.0)
|
32
35
|
docile (~> 1.1.0)
|
33
|
-
json (
|
36
|
+
json (>= 1.8, < 3)
|
34
37
|
simplecov-html (~> 0.10.0)
|
35
38
|
simplecov-html (0.10.0)
|
36
39
|
|
@@ -45,5 +48,8 @@ DEPENDENCIES
|
|
45
48
|
ratebeer!
|
46
49
|
rspec
|
47
50
|
|
51
|
+
RUBY VERSION
|
52
|
+
ruby 2.3.0p0
|
53
|
+
|
48
54
|
BUNDLED WITH
|
49
|
-
1.
|
55
|
+
1.12.5
|
@@ -0,0 +1,156 @@
|
|
1
|
+
require_relative "brewery"
|
2
|
+
require_relative "review"
|
3
|
+
require_relative "style"
|
4
|
+
require_relative "scraping"
|
5
|
+
require_relative "urls"
|
6
|
+
|
7
|
+
module RateBeer
|
8
|
+
class Beer
|
9
|
+
# Each key represents an item of data accessible for each beer, and defines
|
10
|
+
# dynamically a series of methods for accessing this data.
|
11
|
+
#
|
12
|
+
def self.data_keys
|
13
|
+
[:name,
|
14
|
+
:brewery,
|
15
|
+
:style,
|
16
|
+
:glassware,
|
17
|
+
:availability,
|
18
|
+
:abv,
|
19
|
+
:calories,
|
20
|
+
:description,
|
21
|
+
:retired,
|
22
|
+
:rating]
|
23
|
+
end
|
24
|
+
|
25
|
+
include RateBeer::Scraping
|
26
|
+
include RateBeer::URLs
|
27
|
+
|
28
|
+
# Create RateBeer::Beer instance.
|
29
|
+
#
|
30
|
+
# Requires the RateBeer ID# for the beer in question.
|
31
|
+
#
|
32
|
+
# @param [Integer, String] id ID# of beer to retrieve
|
33
|
+
# @param [String] name Name of the beer to which ID# relates if known
|
34
|
+
# @param [hash] options Options hash for entity created
|
35
|
+
#
|
36
|
+
def initialize(id, name: nil, **options)
|
37
|
+
super
|
38
|
+
end
|
39
|
+
|
40
|
+
# Return reviews of this beer.
|
41
|
+
#
|
42
|
+
def reviews(order: :most_recent, limit: 10)
|
43
|
+
Review.retrieve(self, order: order, limit: limit)
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
# Retrieve details about this beer from the website.
|
49
|
+
#
|
50
|
+
# This method stores the retrieved details in instance variables
|
51
|
+
# of the beer instance.
|
52
|
+
#
|
53
|
+
def retrieve_details
|
54
|
+
doc = noko_doc(URI.join(BASE_URL, beer_url(id)))
|
55
|
+
root = doc.css('#container table').first
|
56
|
+
info_tbl = root.css('table').first
|
57
|
+
|
58
|
+
@name = doc.css("h1")
|
59
|
+
.text
|
60
|
+
.strip
|
61
|
+
@name = fix_characters(@name)
|
62
|
+
raise PageNotFoundError.new("Beer not found - #{id}") if name.empty?
|
63
|
+
|
64
|
+
# If this beer is an alias, change ID to that of "proper" beer and
|
65
|
+
# retrieve details of the proper beer instead.
|
66
|
+
alias_pattern = /Also known as(.|\n)*Proceed to the aliased beer\.{3}/
|
67
|
+
if root.css('tr')[1].css('div div').text =~ alias_pattern
|
68
|
+
alias_node = root.css('tr')[1]
|
69
|
+
.css('div div')
|
70
|
+
.css('a')
|
71
|
+
.first
|
72
|
+
alias_name = alias_node.text
|
73
|
+
alias_id = alias_node['href'].split('/').last.to_i
|
74
|
+
@id = alias_id
|
75
|
+
retrieve_details
|
76
|
+
return nil
|
77
|
+
end
|
78
|
+
|
79
|
+
@brewery = info_tbl.css('td')[1]
|
80
|
+
.css('div')
|
81
|
+
.first
|
82
|
+
.css('a')
|
83
|
+
.map { |a| [:id,
|
84
|
+
:name].zip([a['href'].split('/')
|
85
|
+
.last
|
86
|
+
.to_i, a.text]).to_h }.first
|
87
|
+
@brewery = Brewery.new(@brewery[:id], name: fix_characters(@brewery[:name]))
|
88
|
+
@style = info_tbl.css('td')[1]
|
89
|
+
.css('div')
|
90
|
+
.first
|
91
|
+
.css('a')
|
92
|
+
.select { |a| a['href'] =~ /beerstyles/ }
|
93
|
+
.map { |a| [:id,
|
94
|
+
:name].zip([a['href'].split('/')
|
95
|
+
.last
|
96
|
+
.to_i, a.text]).to_h }.first
|
97
|
+
@style = Style.new(@style[:id], name: fix_characters(@style[:name]))
|
98
|
+
@glassware = info_tbl.css('td')[1]
|
99
|
+
.css('div')[1]
|
100
|
+
.css('a')
|
101
|
+
.map { |a| [:id,
|
102
|
+
:name].zip([a['href'].split('GWID=')
|
103
|
+
.last
|
104
|
+
.to_i, a.text]).to_h }.first
|
105
|
+
misc = info_tbl.next_element
|
106
|
+
.first_element_child
|
107
|
+
.children
|
108
|
+
.map(&:text)
|
109
|
+
.flat_map { |x| x.gsub(nbsp, ' ').strip.split(':') }
|
110
|
+
.map(&:strip)
|
111
|
+
.reject(&:empty?)
|
112
|
+
.each_slice(2)
|
113
|
+
.map { |(k, v)| [symbolize_text(k),
|
114
|
+
v.to_f.zero? ? v : v.to_f] }
|
115
|
+
.to_h
|
116
|
+
@abv = misc[:abv]
|
117
|
+
@calories = misc[:est_calories]
|
118
|
+
@rating = [:overall,
|
119
|
+
:style].zip(info_tbl.css('div')
|
120
|
+
.select { |d| d['title'] =~ /This figure/ }
|
121
|
+
.map { |d| d['title'].split(':').first.to_f }).to_h
|
122
|
+
@rating.merge!({ ratings: misc[:ratings],
|
123
|
+
weighted_avg: misc[:weighted_avg],
|
124
|
+
mean: misc[:mean] })
|
125
|
+
@availability = info_tbl.css('td')[1]
|
126
|
+
.css('table')
|
127
|
+
.css('td')
|
128
|
+
.children
|
129
|
+
.children
|
130
|
+
.map(&:text)
|
131
|
+
.reject(&:empty?)
|
132
|
+
.each_slice(2)
|
133
|
+
.to_a
|
134
|
+
.tap { |a| a.last.unshift('distribution') }
|
135
|
+
.map { |(k, v)| [k =~ /bottl/ ?
|
136
|
+
:bottling :
|
137
|
+
symbolize_text(k), v] }
|
138
|
+
.to_h
|
139
|
+
@availability.merge!({ seasonal: misc[:seasonal] })
|
140
|
+
@description = info_tbl.next_element
|
141
|
+
.next_element
|
142
|
+
.children
|
143
|
+
.children
|
144
|
+
.map(&:text)
|
145
|
+
.map(&:strip)
|
146
|
+
.drop(1)
|
147
|
+
.reject(&:empty?)
|
148
|
+
.join("\n")
|
149
|
+
@description = fix_characters(@description)
|
150
|
+
@retired = !(root.css('span.beertitle2') &&
|
151
|
+
root.css('span.beertitle2').text =~ /RETIRED/).nil?
|
152
|
+
|
153
|
+
nil
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
@@ -0,0 +1,167 @@
|
|
1
|
+
require_relative "scraping"
|
2
|
+
require_relative "urls"
|
3
|
+
|
4
|
+
module RateBeer
|
5
|
+
class Brewery
|
6
|
+
# Each key represents an item of data accessible for each beer, and defines
|
7
|
+
# dynamically a series of methods for accessing this data.
|
8
|
+
#
|
9
|
+
def self.data_keys
|
10
|
+
[:name,
|
11
|
+
:type,
|
12
|
+
:address,
|
13
|
+
:telephone,
|
14
|
+
:beers]
|
15
|
+
end
|
16
|
+
|
17
|
+
include RateBeer::Scraping
|
18
|
+
include RateBeer::URLs
|
19
|
+
|
20
|
+
attr_reader :established, :location
|
21
|
+
|
22
|
+
# Create RateBeer::Brewery instance.
|
23
|
+
#
|
24
|
+
# Requires the RateBeer ID# for the brewery in question. Optionally accepts
|
25
|
+
# a name parameter where the name is already known.
|
26
|
+
#
|
27
|
+
# @param [Integer, String] id ID# for the brewery
|
28
|
+
# @param [String] name The name of the specified brewery
|
29
|
+
# @param [hash] options Options hash for entity created
|
30
|
+
#
|
31
|
+
def initialize(id, name: nil, **options)
|
32
|
+
super
|
33
|
+
if options
|
34
|
+
@established = options[:established]
|
35
|
+
@location = options[:location]
|
36
|
+
@type = options[:type]
|
37
|
+
@status = options[:status]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
# Retrieve details about this brewery from the website.
|
44
|
+
#
|
45
|
+
# This method stores the retrieved details in instance variables
|
46
|
+
# of the brewery instance.
|
47
|
+
#
|
48
|
+
def retrieve_details
|
49
|
+
@doc = noko_doc(URI.join(BASE_URL, brewery_url(id)))
|
50
|
+
|
51
|
+
brewery_info = retrieve_brewery_info
|
52
|
+
|
53
|
+
@beers = []
|
54
|
+
if pagination?(@doc)
|
55
|
+
(1..page_count(@doc)).flat_map do |page_no|
|
56
|
+
@doc = noko_doc(URI.join(BASE_URL, brewery_url(id), "0/", "#{page_no}/"))
|
57
|
+
retrieve_brewery_beers
|
58
|
+
end
|
59
|
+
else
|
60
|
+
retrieve_brewery_beers
|
61
|
+
end
|
62
|
+
nil
|
63
|
+
end
|
64
|
+
|
65
|
+
# Scrape brewery info from Nokogiri Doc for brewery page
|
66
|
+
#
|
67
|
+
def retrieve_brewery_info
|
68
|
+
root = @doc.css('#container table').first
|
69
|
+
contact_node = root.css('td').first
|
70
|
+
|
71
|
+
@name = fix_characters(root.css('h1').first.text)
|
72
|
+
raise PageNotFoundError.new("Brewery not found - #{id}") if @name.empty?
|
73
|
+
|
74
|
+
@type = root.css('span.beerfoot')
|
75
|
+
.select { |x| x.text =~ /Type: .*/ }
|
76
|
+
.first
|
77
|
+
.text
|
78
|
+
.strip
|
79
|
+
.split("Type: ")
|
80
|
+
.last
|
81
|
+
.split(/\s{2,}/)
|
82
|
+
.first
|
83
|
+
@address = root.css('div[itemprop="address"] b span')
|
84
|
+
.map { |elem| key = case elem.attributes['itemprop'].value
|
85
|
+
when 'streetAddress' then :street
|
86
|
+
when 'addressLocality' then :city
|
87
|
+
when 'addressRegion' then :state
|
88
|
+
when 'addressCountry' then :country
|
89
|
+
when 'postalCode' then :postcode
|
90
|
+
else raise "unrecognised attribute"
|
91
|
+
end
|
92
|
+
[key, elem.text.strip] }
|
93
|
+
.to_h
|
94
|
+
|
95
|
+
@telephone = root.css('span[itemprop="telephone"]').first &&
|
96
|
+
root.css('span[itemprop="telephone"]').first.text
|
97
|
+
|
98
|
+
end
|
99
|
+
|
100
|
+
# Scrape beer details from Nokogiri Doc for brewery page
|
101
|
+
#
|
102
|
+
def retrieve_brewery_beers
|
103
|
+
location, brewer = nil # Variables used in the map below
|
104
|
+
root = @doc.css('table.maintable.nohover').first
|
105
|
+
@beers += root.css('tr').drop(1).map do |row|
|
106
|
+
if row.text =~ /^Brewed at (?<location>.+?)(?: by\/for (?<brewer>.+))?$/
|
107
|
+
location = Regexp.last_match['location']
|
108
|
+
brewer = Regexp.last_match['brewer']
|
109
|
+
nil
|
110
|
+
else
|
111
|
+
process_beer_row(row, location, brewer)
|
112
|
+
end
|
113
|
+
end.reject(&:nil?)
|
114
|
+
end
|
115
|
+
|
116
|
+
# Process a row of data representing one beer brewed by/at a brewery.
|
117
|
+
#
|
118
|
+
# @param [Nokogiri::XML::Element] row HTML TR row wrapped as a Nokogiri
|
119
|
+
# element
|
120
|
+
# @param [String] location the location at which a brewery's beer is brewed
|
121
|
+
# where this location differs from the brewery's regular brewsite/venue
|
122
|
+
# @param [String] brewer the client for whom this brewery brewed the beer,
|
123
|
+
# where the brewery is brewing for a different company/brewery
|
124
|
+
# @return [RateBeer::Beer] a beer object representing the scraped beer,
|
125
|
+
# containing scraped attributes
|
126
|
+
#
|
127
|
+
def process_beer_row(row, location=nil, brewer=nil)
|
128
|
+
# Attributes stored in each table row, with indices representing their
|
129
|
+
# position in each row
|
130
|
+
attributes = { name: 0,
|
131
|
+
abv: 2,
|
132
|
+
avg_rating: 3,
|
133
|
+
overall_rating: 4,
|
134
|
+
style_rating: 5,
|
135
|
+
num_ratings: 6 }
|
136
|
+
|
137
|
+
beer = attributes.reduce({}) do |beer_hash, (attr, i)|
|
138
|
+
val = row.css('td')[i].text.gsub(nbsp, ' ').strip rescue nil
|
139
|
+
case attr
|
140
|
+
when :name
|
141
|
+
fix_characters(val)
|
142
|
+
when :abv, :avg_rating
|
143
|
+
val = val.to_f
|
144
|
+
when :overall_rating, :style_rating, :num_ratings
|
145
|
+
val = val.to_i
|
146
|
+
end
|
147
|
+
beer_hash[attr] = val
|
148
|
+
beer_hash
|
149
|
+
end
|
150
|
+
beer[:url] = row.css('td').first.css('a').first['href']
|
151
|
+
id = beer[:url].split('/').last.to_i
|
152
|
+
|
153
|
+
# Apply additional location and brewer information if scraped
|
154
|
+
beer[:brewed_at] = location unless location.nil?
|
155
|
+
beer[:brewed_by_for] = brewer unless brewer.nil?
|
156
|
+
|
157
|
+
# Transform ratings into correct format
|
158
|
+
beer[:rating] = { overall: beer[:overall_rating],
|
159
|
+
style: beer[:style_rating],
|
160
|
+
ratings: beer[:num_ratings],
|
161
|
+
weighted_avg: beer[:avg_rating] }
|
162
|
+
|
163
|
+
# Create beer instance from scraped data
|
164
|
+
Beer.new(id, beer)
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
require_relative "brewery"
|
2
|
+
require_relative "style"
|
3
|
+
require_relative "urls"
|
4
|
+
|
5
|
+
module RateBeer
|
6
|
+
class Location
|
7
|
+
# Keys for fields scraped on RateBeer
|
8
|
+
def self.data_keys
|
9
|
+
[:name,
|
10
|
+
:num_breweries,
|
11
|
+
:breweries]
|
12
|
+
end
|
13
|
+
|
14
|
+
include RateBeer::Scraping
|
15
|
+
include RateBeer::URLs
|
16
|
+
|
17
|
+
# Initialize a RateBeer::Location instance.
|
18
|
+
#
|
19
|
+
# Locations may be either Regions or Countries. This must be specified to
|
20
|
+
# the constructor.
|
21
|
+
#
|
22
|
+
# @param [Integer] id ID# for this location
|
23
|
+
# @param [Symbol] location_type Symbol representing either country or region
|
24
|
+
# @param [String] name Name of this location
|
25
|
+
#
|
26
|
+
def initialize(id, location_type: nil, name: nil, **options)
|
27
|
+
super
|
28
|
+
if location_type.nil? || !([:country, :region].include?(location_type))
|
29
|
+
raise ArgumentError.new("location_type must be supplied and must be "\
|
30
|
+
"either country or region")
|
31
|
+
end
|
32
|
+
@location_type = location_type
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
# Retrive details about this location from the website.
|
38
|
+
#
|
39
|
+
# This method stores the retrived details in instance variables
|
40
|
+
# of the location instance.
|
41
|
+
#
|
42
|
+
def retrieve_details
|
43
|
+
doc = noko_doc(url)
|
44
|
+
heading = doc.css('.col-lg-9').first
|
45
|
+
brewery_info = doc.css('#tabs table')
|
46
|
+
|
47
|
+
@name = heading.at_css('h1')
|
48
|
+
.text
|
49
|
+
.split('Breweries')
|
50
|
+
.first
|
51
|
+
.strip
|
52
|
+
if @name == "n/a" || @name == "RateBeer Robot Oops!"
|
53
|
+
raise PageNotFoundError.new("#{self.class.name} not found - #{id}")
|
54
|
+
end
|
55
|
+
|
56
|
+
@num_breweries = heading.at_css('li.active')
|
57
|
+
.text
|
58
|
+
.scan(/Active \((\d*)\)/)
|
59
|
+
.first
|
60
|
+
.first
|
61
|
+
.to_i
|
62
|
+
|
63
|
+
@breweries = brewery_info.flat_map.with_index do |tbl, i|
|
64
|
+
status = i == 0 ? 'Active' : 'Out of Business'
|
65
|
+
|
66
|
+
tbl.css('tr').drop(1).flat_map do |row|
|
67
|
+
cells = row.css('td')
|
68
|
+
next if cells.empty?
|
69
|
+
id = cells[0].at_css('a')['href'].split('/').last.to_i
|
70
|
+
name = cells[0].text.split('-').first.strip
|
71
|
+
location = cells[0].text
|
72
|
+
.split('-')
|
73
|
+
.last
|
74
|
+
.sub('(Out of Business)', '')
|
75
|
+
.strip
|
76
|
+
type = cells[1].text.strip
|
77
|
+
established = status == 'Active' ? cells[3].text.to_i : nil
|
78
|
+
Brewery.new(id,
|
79
|
+
name: name,
|
80
|
+
location: location,
|
81
|
+
type: type,
|
82
|
+
established: established,
|
83
|
+
status: status)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
nil
|
87
|
+
end
|
88
|
+
|
89
|
+
# Return URL for page containing information on this location.
|
90
|
+
#
|
91
|
+
# Result depends on whether this is a country or a region.
|
92
|
+
#
|
93
|
+
def url
|
94
|
+
@url ||= case @location_type
|
95
|
+
when :country
|
96
|
+
URI.join(BASE_URL, country_url(id))
|
97
|
+
when :region
|
98
|
+
URI.join(BASE_URL, region_url(id))
|
99
|
+
else
|
100
|
+
raise "invalid location type: #{@location_type.to_s}"
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,157 @@
|
|
1
|
+
require_relative "beer"
|
2
|
+
require_relative "scraping"
|
3
|
+
require_relative "urls"
|
4
|
+
|
5
|
+
module RateBeer
|
6
|
+
# The Review class contains reviews of Beers posted to RateBeer.com. It
|
7
|
+
# also provides some scraping functionality for obtaining reviews.
|
8
|
+
#
|
9
|
+
class Review
|
10
|
+
extend RateBeer::URLs
|
11
|
+
|
12
|
+
class << self
|
13
|
+
attr_reader :review_limit
|
14
|
+
attr_reader :review_order
|
15
|
+
|
16
|
+
# Calculate the number of pages of reviews to retrieve.
|
17
|
+
#
|
18
|
+
# Ten reviews appear on a page, so this method calculates the number of
|
19
|
+
# pages on this basis.
|
20
|
+
#
|
21
|
+
# @param [Integer] limit The number of reviews to be retrieved
|
22
|
+
# @return [Integer] Number of pages to be retrieved for number of reviews
|
23
|
+
#
|
24
|
+
def num_pages(limit)
|
25
|
+
(limit / 10.0).ceil
|
26
|
+
end
|
27
|
+
|
28
|
+
# Determine the URL suffix required for a particular sort order.
|
29
|
+
#
|
30
|
+
# @param [Symbol] order The desired sorting order
|
31
|
+
# @return [String] The URL suffix required to obtain reviews sorted in
|
32
|
+
# the desired order
|
33
|
+
#
|
34
|
+
def url_suffix(order)
|
35
|
+
options = [:most_recent, :top_raters, :highest_score]
|
36
|
+
unless options.include?(order)
|
37
|
+
raise ArgumentError.new("unknown ordering: #{order}")
|
38
|
+
end
|
39
|
+
|
40
|
+
case order
|
41
|
+
when :most_recent
|
42
|
+
"1"
|
43
|
+
when :top_raters
|
44
|
+
"2"
|
45
|
+
when :highest_score
|
46
|
+
"3"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# Retrieve all reviews for a given beer.
|
51
|
+
#
|
52
|
+
# @param [Integer, RateBeer::Beer] beer The beer for which to download
|
53
|
+
# reviews
|
54
|
+
# @param [Symbol] order The order by which to list reviews
|
55
|
+
# @param [Integer] limit The number of reviews to retrieve
|
56
|
+
# @return [Array<RateBeer::Review>] A list of all reviews for the passed
|
57
|
+
# beer, up to the review_limit
|
58
|
+
#
|
59
|
+
def retrieve(beer, order: :most_recent, limit: 10)
|
60
|
+
if beer.is_a?(RateBeer::Beer)
|
61
|
+
beer_id = beer.id
|
62
|
+
elsif beer.is_a?(Integer)
|
63
|
+
beer_id = beer
|
64
|
+
beer = RateBeer::Beer.new(beer)
|
65
|
+
else
|
66
|
+
raise "unknown beer value: #{beer}"
|
67
|
+
end
|
68
|
+
|
69
|
+
reviews = num_pages(limit).times.flat_map do |page_number|
|
70
|
+
url = URI.join(BASE_URL, review_url(beer_id, url_suffix(order), page_number))
|
71
|
+
doc = RateBeer::Scraping.noko_doc(url)
|
72
|
+
root = doc.css("#container table table")[3]
|
73
|
+
|
74
|
+
# All reviews are contained within the sole cell in the sole row of
|
75
|
+
# the selected table. Each review consists of rating information,
|
76
|
+
# details of the reviewer, and the text of the review itself.
|
77
|
+
#
|
78
|
+
# The components are contained within div, small, div tags
|
79
|
+
# respectively. We need to scrape these specifically.
|
80
|
+
root.css('td')
|
81
|
+
.children
|
82
|
+
.select { |x| x.name == 'div' || x.name == 'small' }
|
83
|
+
.map(&:text)
|
84
|
+
.reject { |x| x.empty? || x.include?("googleFillSlot") }
|
85
|
+
.each_slice(3).map do |(rating_data, reviewer_data, review)|
|
86
|
+
rating_pattern = /^(?<total>\d+(\.\d+)?).+
|
87
|
+
AROMA\s(?<aroma>\d+\/10).+
|
88
|
+
APPEARANCE\s(?<appearance>\d+\/5).+
|
89
|
+
TASTE\s(?<taste>\d+\/10).+
|
90
|
+
PALATE\s(?<palate>\d+\/5).+
|
91
|
+
OVERALL\s(?<overall>\d+\/20)$/x
|
92
|
+
reviewer_pattern = /^(?<name>.+)\s\((?<rank>\d+\))\s-\s?
|
93
|
+
(?<location>.+)?\s?-\s
|
94
|
+
(?<date>.+)$/x
|
95
|
+
rating_breakdown_match = rating_data.match(rating_pattern)
|
96
|
+
rating_breakdown = {}
|
97
|
+
reviewer = reviewer_data.gsub(RateBeer::Scraping.nbsp, ' ').match(reviewer_pattern)
|
98
|
+
[:overall, :aroma, :appearance, :taste, :palate].each { |k|
|
99
|
+
rating_breakdown[k] = Rational(rating_breakdown_match[k])
|
100
|
+
}
|
101
|
+
rating = rating_breakdown_match[:total].to_f
|
102
|
+
self.new({ beer: beer,
|
103
|
+
reviewer: reviewer[:name],
|
104
|
+
reviewer_rank: reviewer[:rank],
|
105
|
+
location: reviewer[:location].strip,
|
106
|
+
date: Date.parse(reviewer[:date]),
|
107
|
+
rating: rating,
|
108
|
+
rating_breakdown: rating_breakdown,
|
109
|
+
comment: review })
|
110
|
+
end
|
111
|
+
end
|
112
|
+
reviews.take(limit)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
attr_reader :beer
|
117
|
+
attr_reader :reviewer
|
118
|
+
attr_reader :reviewer_rank
|
119
|
+
attr_reader :location
|
120
|
+
attr_reader :date
|
121
|
+
attr_reader :rating
|
122
|
+
attr_reader :rating_breakdown
|
123
|
+
attr_reader :comment
|
124
|
+
|
125
|
+
def initialize(**options)
|
126
|
+
@beer = if options[:beer].is_a?(RateBeer::Beer)
|
127
|
+
options[:beer]
|
128
|
+
elsif options[:beer].is_a?(Integer)
|
129
|
+
RateBeer::Beer.new(options[:beer])
|
130
|
+
else
|
131
|
+
raise ArgumentError.new("incorrect beer parameter: #{options[:beer]}")
|
132
|
+
end
|
133
|
+
[:reviewer, :reviewer_rank, :location, :date,
|
134
|
+
:rating, :rating_breakdown, :comment].each do |param|
|
135
|
+
if options[param].nil?
|
136
|
+
raise ArgumentError.new("#{param.to_s} parameter required")
|
137
|
+
end
|
138
|
+
instance_variable_set("@#{param.to_s}", options[param])
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def inspect
|
143
|
+
var = "#<Review of #{self.beer} - #{@reviewer} on #{@date}>"
|
144
|
+
end
|
145
|
+
|
146
|
+
def to_s
|
147
|
+
inspect
|
148
|
+
end
|
149
|
+
|
150
|
+
def ==(other_review)
|
151
|
+
self.reviewer == other_review.reviewer &&
|
152
|
+
self.date == other_review.date &&
|
153
|
+
self.beer == other_review.beer &&
|
154
|
+
self.comment == other_review.comment
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
@@ -0,0 +1,150 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'open-uri'
|
4
|
+
|
5
|
+
module RateBeer
|
6
|
+
|
7
|
+
# The Scraping module contains a series of methods to assist with scraping
|
8
|
+
# pages from RateBeer.com, and dealing with the results.
|
9
|
+
module Scraping
|
10
|
+
|
11
|
+
class PageNotFoundError < StandardError; end
|
12
|
+
|
13
|
+
attr_reader :id
|
14
|
+
|
15
|
+
# Run method on inclusion in class.
|
16
|
+
def self.included(base)
|
17
|
+
base.data_keys.each do |attr|
|
18
|
+
define_method(attr) do
|
19
|
+
unless instance_variable_defined?("@#{attr}")
|
20
|
+
retrieve_details
|
21
|
+
end
|
22
|
+
instance_variable_get("@#{attr}")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# Create RateBeer::Scraper instance.
|
28
|
+
#
|
29
|
+
# Requires an ID#, and optionally accepts a name and options parameters.
|
30
|
+
#
|
31
|
+
# @param [Integer, String] id ID# of the entity which is to be retrieved
|
32
|
+
# @param [String] name Name of the entity to which ID# relates if known
|
33
|
+
# @param [hash] options Options hash for entity created
|
34
|
+
#
|
35
|
+
def initialize(id, name: nil, **options)
|
36
|
+
@id = id
|
37
|
+
@name = name unless name.nil?
|
38
|
+
options.each do |k, v|
|
39
|
+
instance_variable_set("@#{k.to_s}", v)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def inspect
|
44
|
+
val = "#<#{self.class} ##{@id}"
|
45
|
+
val << " - #{@name}" if instance_variable_defined?("@name")
|
46
|
+
val << ">"
|
47
|
+
end
|
48
|
+
|
49
|
+
def to_s
|
50
|
+
inspect
|
51
|
+
end
|
52
|
+
|
53
|
+
def ==(other_entity)
|
54
|
+
other_entity.is_a?(self.class) && id == other_entity.id
|
55
|
+
end
|
56
|
+
|
57
|
+
def url
|
58
|
+
@url ||= if respond_to?("#{demodularized_class_name.downcase}_url", id)
|
59
|
+
send("#{demodularized_class_name.downcase}_url", id)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# Return full details of the scraped entity in a Hash.
|
64
|
+
#
|
65
|
+
def full_details
|
66
|
+
data = self.class
|
67
|
+
.data_keys
|
68
|
+
.map { |k| [k, send("#{k}")] }
|
69
|
+
.to_h
|
70
|
+
{ id: id,
|
71
|
+
url: url }.merge(data)
|
72
|
+
end
|
73
|
+
|
74
|
+
# Determine if data is paginated, or not.
|
75
|
+
#
|
76
|
+
# @param [Nokogiri::Doc] doc Nokogiri document to test for pagination
|
77
|
+
# @return [Boolean] true, if paginated, else false
|
78
|
+
#
|
79
|
+
def pagination?(doc)
|
80
|
+
!page_count(doc).nil?
|
81
|
+
end
|
82
|
+
|
83
|
+
# Determine the number of pages in a document.
|
84
|
+
#
|
85
|
+
# @param [Nokogiri::Doc] doc Nokogiri document to test for pagination
|
86
|
+
# @return [Integer] Number of pages in the document
|
87
|
+
#
|
88
|
+
def page_count(doc)
|
89
|
+
doc.at_css('.pagination') && doc.at_css('.pagination')
|
90
|
+
.css('b')
|
91
|
+
.map(&:text)
|
92
|
+
.map(&:to_i)
|
93
|
+
.max
|
94
|
+
end
|
95
|
+
|
96
|
+
# Create Nokogiri doc from url.
|
97
|
+
#
|
98
|
+
def noko_doc(url)
|
99
|
+
begin
|
100
|
+
Nokogiri::HTML(open(url).read)
|
101
|
+
rescue OpenURI::HTTPError => msg
|
102
|
+
raise PageNotFoundError.new("Page not found - #{url}")
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
module_function :noko_doc
|
107
|
+
|
108
|
+
# Emulate character for stripping, substitution, etc.
|
109
|
+
#
|
110
|
+
def nbsp
|
111
|
+
Nokogiri::HTML(" ").text
|
112
|
+
end
|
113
|
+
|
114
|
+
module_function :nbsp
|
115
|
+
|
116
|
+
# Convert text keys to symbols
|
117
|
+
#
|
118
|
+
def symbolize_text(text)
|
119
|
+
text.downcase.gsub(' ', '_').gsub('.', '').to_sym
|
120
|
+
end
|
121
|
+
|
122
|
+
# Fix characters in string scraped from website.
|
123
|
+
#
|
124
|
+
# This method substitutes problematic characters found in
|
125
|
+
# strings scraped from RateBeer.com
|
126
|
+
#
|
127
|
+
def fix_characters(string)
|
128
|
+
characters = { nbsp => " ",
|
129
|
+
"\u0093" => "ž",
|
130
|
+
"\u0092" => "'",
|
131
|
+
"\u0096" => "–",
|
132
|
+
/ {2,}/ => " " }
|
133
|
+
characters.each { |c, r| string.gsub!(c, r) }
|
134
|
+
string.strip
|
135
|
+
end
|
136
|
+
|
137
|
+
# Make POST request to RateBeer form. Return a Nokogiri doc.
|
138
|
+
#
|
139
|
+
def post_request(url, params)
|
140
|
+
res = Net::HTTP.post_form(url, params)
|
141
|
+
Nokogiri::HTML(res.body)
|
142
|
+
end
|
143
|
+
|
144
|
+
private
|
145
|
+
|
146
|
+
def demodularized_class_name
|
147
|
+
self.class.name.split("::").last
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
@@ -0,0 +1,224 @@
|
|
1
|
+
require "i18n"
|
2
|
+
require_relative "beer"
|
3
|
+
require_relative "brewery"
|
4
|
+
require_relative "scraping"
|
5
|
+
require_relative "urls"
|
6
|
+
|
7
|
+
module RateBeer
|
8
|
+
|
9
|
+
# Stop I18N from enforcing locale, to avoid error message
|
10
|
+
I18n.enforce_available_locales = false
|
11
|
+
|
12
|
+
# This class provides functionality for searching RateBeer.com for a
|
13
|
+
# specific beer or brewery.
|
14
|
+
#
|
15
|
+
class Search
|
16
|
+
# Keys for fields scraped on RateBeer
|
17
|
+
def self.data_keys
|
18
|
+
[:query,
|
19
|
+
:beers,
|
20
|
+
:breweries]
|
21
|
+
end
|
22
|
+
|
23
|
+
include RateBeer::Scraping
|
24
|
+
include RateBeer::URLs
|
25
|
+
|
26
|
+
class << self
|
27
|
+
# Create method which generates new search instance and immediately runs
|
28
|
+
# a search.
|
29
|
+
#
|
30
|
+
def search(query)
|
31
|
+
s = self.new(query)
|
32
|
+
{ beers: s.beers,
|
33
|
+
breweries: s.breweries }
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
attr_reader :query
|
38
|
+
|
39
|
+
# Create a RateBeer::Search instance.
|
40
|
+
#
|
41
|
+
# @param [String] query Term to use to search RateBeer
|
42
|
+
#
|
43
|
+
def initialize(query)
|
44
|
+
self.query = query
|
45
|
+
end
|
46
|
+
|
47
|
+
# Setter for query instance variable.
|
48
|
+
#
|
49
|
+
def query=(qry)
|
50
|
+
clear_cached_data
|
51
|
+
@query = fix_query_param(qry)
|
52
|
+
end
|
53
|
+
|
54
|
+
def inspect
|
55
|
+
num_beers = @beers && @beers.count || 0
|
56
|
+
num_breweries = @breweries && @breweries.count || 0
|
57
|
+
val = "#<#{self.class} - #{@query}"
|
58
|
+
val << " - #{num_beers} beers / #{num_breweries} breweries" if @beers || @breweries
|
59
|
+
val << ">"
|
60
|
+
end
|
61
|
+
|
62
|
+
# Search RateBeer for beers, brewers, etc.
|
63
|
+
#
|
64
|
+
# The search results page contains a series of tables each of which has the
|
65
|
+
# "results" class, containing data of matching brewers, beers, and places
|
66
|
+
# in that order. Only brewers and beers are extracted.
|
67
|
+
#
|
68
|
+
# @return [Hash] Results of the search, broken into breweries and beers,
|
69
|
+
# with the attributes of these results contained therein.
|
70
|
+
#
|
71
|
+
def run_search
|
72
|
+
@beers, @breweries = nil
|
73
|
+
doc = post_request(URI.join(BASE_URL, SEARCH_URL), post_params)
|
74
|
+
tables = doc.css('h2').map(&:text).zip(doc.css('table'))
|
75
|
+
beers, breweries = nil
|
76
|
+
tables.each do |(heading, table)|
|
77
|
+
case heading
|
78
|
+
when 'brewers'
|
79
|
+
@breweries = process_breweries_table(table)
|
80
|
+
when 'beers'
|
81
|
+
@beers = process_beers_table(table)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# RateBeer is inconsistent with searching for IPAs. If IPA is in the name
|
86
|
+
# of the beer, replace IPA with India Pale Ale, and add the additional
|
87
|
+
# results to these results.
|
88
|
+
if query.downcase.include?(" ipa")
|
89
|
+
alt_query = query.downcase.gsub(" ipa", " india pale ale")
|
90
|
+
extra_beers = self.class.new(alt_query).run_search.beers
|
91
|
+
@beers = ((@beers || []) + (extra_beers || [])).uniq
|
92
|
+
end
|
93
|
+
return self
|
94
|
+
end
|
95
|
+
|
96
|
+
alias retrieve_details run_search
|
97
|
+
|
98
|
+
private
|
99
|
+
|
100
|
+
# Generate parameters to use in POST request.
|
101
|
+
#
|
102
|
+
def post_params
|
103
|
+
{ "BeerName" => @query }
|
104
|
+
end
|
105
|
+
|
106
|
+
# Process breweries table returned in search.
|
107
|
+
#
|
108
|
+
# The breweries table (if returned) consists of a series of rows each
|
109
|
+
# containing two cells: the first is the name (and hyperlink) to the
|
110
|
+
# brewery; and the second is the full location of the brewery.
|
111
|
+
#
|
112
|
+
# @param [Nokogiri::XML::Element] table An HTML table containing breweries
|
113
|
+
# information
|
114
|
+
# @return [Hash{Symbol, String}] Brewery data, including name, location,
|
115
|
+
# url and ID
|
116
|
+
#
|
117
|
+
def process_breweries_table(table)
|
118
|
+
table.css('tr').map do |row|
|
119
|
+
result = [:id, :name, :location, :url].zip([nil]).to_h
|
120
|
+
result[:name], result[:location] = row.element_children.map { |x|
|
121
|
+
fix_characters(x.text)
|
122
|
+
}
|
123
|
+
result[:url] = row.at_css('a')['href']
|
124
|
+
result[:id] = result[:url].split('/').last.to_i
|
125
|
+
Brewery.new(result[:id], name: result[:name])
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# Process beers table returned in search.
|
130
|
+
#
|
131
|
+
# The beers table (if returned) consists of a series of rows each of which
|
132
|
+
# contains five cells: the first is the name (and hyperlink) to the beer;
|
133
|
+
# the second and third relate to features of the RateBeer.com site, and are
|
134
|
+
# ignored; the fourth provides the rating of the beer (if any); and the
|
135
|
+
# fifth contains the number of ratings submitted for this beer.
|
136
|
+
#
|
137
|
+
# The first row in the table contains headings, and is disregarded.
|
138
|
+
#
|
139
|
+
# @param [Nokogiri::XML::Element] table An HTML table containing beers
|
140
|
+
# information
|
141
|
+
# @return [Hash{Symbol, String}] Beer data, including name, score, rating,
|
142
|
+
# url and ID
|
143
|
+
#
|
144
|
+
def process_beers_table(table)
|
145
|
+
table.css('tr').drop(1).map do |row|
|
146
|
+
result = [:id, :name, :score, :ratings, :url].zip([nil]).to_h
|
147
|
+
content = row.element_children.map { |x| fix_characters(x.text) }
|
148
|
+
result[:name] = content.first
|
149
|
+
result[:score], result[:ratings] = content.values_at(3, 4)
|
150
|
+
.map { |n|
|
151
|
+
n.nil? || n.empty? ? nil : n.to_i
|
152
|
+
}
|
153
|
+
result[:url] = row.at_css('a')['href']
|
154
|
+
result[:id] = result[:url].split('/').last.to_i
|
155
|
+
Beer.new(result[:id], name: result[:name])
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
# Amend search query string for better results
|
160
|
+
#
|
161
|
+
# RateBeer is a little finicky about finding search results. It does not
|
162
|
+
# provide results on abbreviations, and a passed query including special
|
163
|
+
# characters will return no hits. Often searching using a generic term such
|
164
|
+
# as Co, Brewers, Brewery, etc. will not return any results. This method
|
165
|
+
# strips out such generic terms from a query.
|
166
|
+
#
|
167
|
+
# This method attempts to deal with these issues.
|
168
|
+
#
|
169
|
+
# @param [String] query Raw query parameter
|
170
|
+
# @return [String] Query parameter amended to improve results
|
171
|
+
#
|
172
|
+
def fix_query_param(query)
|
173
|
+
query = strip_generic_terms(query)
|
174
|
+
query = substitute_known_terms(query)
|
175
|
+
I18n.transliterate(query)
|
176
|
+
end
|
177
|
+
|
178
|
+
# Strip defined generic terms from query.
|
179
|
+
#
|
180
|
+
# This method removes all generic terms which may refer to a brewery, but
|
181
|
+
# which may not appear in the brewery's proper name, e.g. brewers.
|
182
|
+
#
|
183
|
+
# @param [String] query Raw query parameter
|
184
|
+
# @return [String] Query parameter with generics stripped out
|
185
|
+
#
|
186
|
+
def strip_generic_terms(query)
|
187
|
+
generic_words = ["Brew",
|
188
|
+
"Brewers",
|
189
|
+
"Brewery",
|
190
|
+
"Brewing",
|
191
|
+
"Brewhouse",
|
192
|
+
"Company",
|
193
|
+
"Co\.?",
|
194
|
+
"Inc\.?",
|
195
|
+
"Ltd\.?",
|
196
|
+
"Limited"]
|
197
|
+
generic_words.map! { |w| /(^| )#{w}( |$)/i }
|
198
|
+
generic_words.each { |w| query.gsub!(w, " ") }
|
199
|
+
query.strip
|
200
|
+
end
|
201
|
+
|
202
|
+
# Substitute known problematic terms in query.
|
203
|
+
#
|
204
|
+
# This method will replace terms which are known to cause problems in the
|
205
|
+
# search with different terms which do not cause the same problem.
|
206
|
+
#
|
207
|
+
# @param [String] query Raw query parameter
|
208
|
+
# @return [String] Query parameter with terms substituted
|
209
|
+
#
|
210
|
+
def substitute_known_terms(query)
|
211
|
+
# List of problem terms - key can be a string or regexp
|
212
|
+
problem_terms = { "six°north" => "Six Degrees North",
|
213
|
+
/[\/:]/ => " " }
|
214
|
+
problem_terms.each { |term, substitute| query.gsub!(term, substitute) }
|
215
|
+
query.strip
|
216
|
+
end
|
217
|
+
|
218
|
+
# Clear cached search data.
|
219
|
+
#
|
220
|
+
def clear_cached_data
|
221
|
+
["@beers", "@breweries"].each { |v| remove_instance_variable(v) if instance_variable_defined?(v) }
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
require_relative 'beer'
|
2
|
+
require_relative 'scraping'
|
3
|
+
require_relative 'urls'
|
4
|
+
|
5
|
+
module RateBeer
|
6
|
+
class Style
|
7
|
+
# Each key represents an item of data accessible for each beer, and defines
|
8
|
+
# dynamically a series of methods for accessing this data.
|
9
|
+
#
|
10
|
+
def self.data_keys
|
11
|
+
[:name,
|
12
|
+
:description,
|
13
|
+
:glassware,
|
14
|
+
:beers]
|
15
|
+
end
|
16
|
+
|
17
|
+
include RateBeer::Scraping
|
18
|
+
include RateBeer::URLs
|
19
|
+
|
20
|
+
attr_accessor :category
|
21
|
+
|
22
|
+
class << self
|
23
|
+
include RateBeer::URLs
|
24
|
+
|
25
|
+
# Scrape all styles.
|
26
|
+
#
|
27
|
+
# RateBeer provides a styles landing page, with links through to info on
|
28
|
+
# each style listed thereon. This method scrapes style info with links
|
29
|
+
# to the more detailed pages.
|
30
|
+
#
|
31
|
+
# @param [Boolean] hidden_styles Flag for whether to include hidden
|
32
|
+
# styles.
|
33
|
+
# @return [Array<RateBeer::Style>] List of styles with links etc. to
|
34
|
+
# detailed pages
|
35
|
+
#
|
36
|
+
def all_styles(include_hidden=false)
|
37
|
+
doc = Scraping.noko_doc(URI.join(BASE_URL, '/beerstyles/'))
|
38
|
+
root = doc.at_css('div.container-fluid table')
|
39
|
+
|
40
|
+
categories = root.css('.groupname').map(&:text)
|
41
|
+
style_node = root.css('.styleGroup')
|
42
|
+
|
43
|
+
styles = style_node.flat_map.with_index do |list, i|
|
44
|
+
list.css('a').map do |x|
|
45
|
+
category = categories[i]
|
46
|
+
Style.new(x['href'].split('/').last.to_i, name: x.text).tap { |s|
|
47
|
+
s.category = category
|
48
|
+
}
|
49
|
+
end
|
50
|
+
end
|
51
|
+
if include_hidden
|
52
|
+
styles += hidden_styles
|
53
|
+
else
|
54
|
+
styles
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# Scrape hidden style information
|
59
|
+
#
|
60
|
+
# RateBeer has a number of styles not accessible from the "beerstyles"
|
61
|
+
# landing page. This method scrapes these.
|
62
|
+
#
|
63
|
+
# @return [Array<Hash>] List of hidden styles
|
64
|
+
#
|
65
|
+
def hidden_styles
|
66
|
+
hidden_ids = [40, 41, 57, 59, 66, 67, 68, 69, 70,
|
67
|
+
75, 83, 99, 104, 106, 116, 119, 120]
|
68
|
+
hidden_ids.map do |id|
|
69
|
+
Style.new(id)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
private
|
75
|
+
|
76
|
+
# Retrieve details about this style from the website.
|
77
|
+
#
|
78
|
+
# This method stores the retrieved details in instance variables of
|
79
|
+
# this style instance.
|
80
|
+
#
|
81
|
+
def retrieve_details
|
82
|
+
doc = noko_doc(URI.join(BASE_URL, style_url(id)))
|
83
|
+
root = doc.at_css('.container-fluid')
|
84
|
+
beer_list = noko_doc(URI.join(BASE_URL, style_beers_url(id)))
|
85
|
+
|
86
|
+
if !root.nil?
|
87
|
+
@name = root.at_css('h1').text.strip
|
88
|
+
else
|
89
|
+
raise PageNotFoundError.new("style not found - ##{id}")
|
90
|
+
end
|
91
|
+
|
92
|
+
@description = root.at_css('#styleDescription').text
|
93
|
+
@glassware = root.css('.glassblurb').map { |x| x.text.strip }
|
94
|
+
|
95
|
+
@beers = beer_list.css('tr').drop(1).map do |row|
|
96
|
+
cells = row.css('td')
|
97
|
+
url = cells[1].at_css('a')['href']
|
98
|
+
[cells[0].text.to_i, Beer.new(url.split('/').last,
|
99
|
+
name: fix_characters(cells[1].text))]
|
100
|
+
end.to_h
|
101
|
+
nil
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module RateBeer
|
2
|
+
|
3
|
+
# This module contains URLs or URL patterns for use throughout the Gem.
|
4
|
+
#
|
5
|
+
module URLs
|
6
|
+
BASE_URL = "http://www.ratebeer.com"
|
7
|
+
SEARCH_URL = "/findbeer.asp"
|
8
|
+
|
9
|
+
# Return URL to info page for beer with id
|
10
|
+
#
|
11
|
+
def beer_url(id)
|
12
|
+
"/beer/a/#{id}/"
|
13
|
+
end
|
14
|
+
|
15
|
+
# Return URL to page containing reviews for a given beer
|
16
|
+
#
|
17
|
+
def review_url(beer_id, sort_suffix, page_number)
|
18
|
+
"/beer/a/#{beer_id}/#{sort_suffix}/#{page_number}/"
|
19
|
+
end
|
20
|
+
|
21
|
+
# Return URL to info page for brewery with id
|
22
|
+
#
|
23
|
+
def brewery_url(id)
|
24
|
+
"/brewers/a/#{id}/"
|
25
|
+
end
|
26
|
+
|
27
|
+
# Return URL to info page for country with id
|
28
|
+
def country_url(id)
|
29
|
+
"/breweries/a/0/#{id}/"
|
30
|
+
end
|
31
|
+
|
32
|
+
# Return URL to info page for region with id
|
33
|
+
def region_url(id)
|
34
|
+
"/breweries/a/#{id}/0/"
|
35
|
+
end
|
36
|
+
|
37
|
+
# Return URL to info page for style with id
|
38
|
+
def style_url(id)
|
39
|
+
"/beerstyles/a/#{id}/"
|
40
|
+
end
|
41
|
+
|
42
|
+
# Return URL to beers list page for style with id
|
43
|
+
def style_beers_url(id)
|
44
|
+
"/ajax/top-beer-by-style.asp?style=#{id}"
|
45
|
+
end
|
46
|
+
|
47
|
+
[:beer_url,
|
48
|
+
:brewery_url,
|
49
|
+
:country_url,
|
50
|
+
:region_url,
|
51
|
+
:style_url,
|
52
|
+
:style_beers_url].each { |m| module_function m }
|
53
|
+
end
|
54
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ratebeer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5a
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dan Meakin
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-07-
|
11
|
+
date: 2016-07-16 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: RateBeer provides a way to access beer & brewery data from RateBeer.com.
|
14
14
|
email: dan@danmeakin.com
|
@@ -23,6 +23,16 @@ files:
|
|
23
23
|
- Rakefile
|
24
24
|
- bin/ratebeer
|
25
25
|
- lib/ratebeer.rb
|
26
|
+
- lib/ratebeer/beer.rb
|
27
|
+
- lib/ratebeer/brewery.rb
|
28
|
+
- lib/ratebeer/country.rb
|
29
|
+
- lib/ratebeer/location.rb
|
30
|
+
- lib/ratebeer/region.rb
|
31
|
+
- lib/ratebeer/review.rb
|
32
|
+
- lib/ratebeer/scraping.rb
|
33
|
+
- lib/ratebeer/search.rb
|
34
|
+
- lib/ratebeer/style.rb
|
35
|
+
- lib/ratebeer/urls.rb
|
26
36
|
- spec/lib/ratebeer/beer_spec.rb
|
27
37
|
- spec/lib/ratebeer/brewery_spec.rb
|
28
38
|
- spec/lib/ratebeer/country_spec.rb
|