vegas_insider_scraper 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/sports/mlb.rb +19 -0
- data/lib/sports/nba.rb +10 -0
- data/lib/sports/ncaabb.rb +22 -0
- data/lib/sports/ncaafb.rb +80 -0
- data/lib/sports/nfl.rb +10 -0
- data/lib/sports/nhl.rb +18 -0
- data/lib/sports/scraper_league.rb +484 -0
- data/lib/vegas_insider_scraper.rb +21 -0
- metadata +51 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 3621386a1dc6841318420023e337cb2421f44564
|
4
|
+
data.tar.gz: 0216f2ab7ad22d840a79b7c609d57cf45185c8ca
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 18f4459bd95457d3019c310f51563fe33e08f1e8e22ff7a1b06df6084c0e527ed9ee9d31a81ebe176301d8aff35689aa2bcf6adc9cdcd7b9a4bd4b6bfb5a8036
|
7
|
+
data.tar.gz: 58e3991c01f85e8e470b64c2fef476455204f3606fb5b429c249088ab818f15233f044eae11144acd5129c12b4a3c4bff316a4d22ce4b413ce303bb0b0b60afb
|
data/lib/sports/mlb.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
|
2
|
+
class MLB < ScraperLeague
|
3
|
+
|
4
|
+
def initialize
|
5
|
+
@sport_id = 4
|
6
|
+
@sport_name = :mlb
|
7
|
+
@moneyline_sport = true
|
8
|
+
super
|
9
|
+
end
|
10
|
+
|
11
|
+
def get_games
|
12
|
+
urls = %w[http://www.vegasinsider.com/mlb/odds/las-vegas/ http://www.vegasinsider.com/mlb/odds/las-vegas/run]
|
13
|
+
urls.each do |url|
|
14
|
+
get_lines(url, sport_id)
|
15
|
+
end
|
16
|
+
return true
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
data/lib/sports/nba.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
|
2
|
+
class NCAABB < ScraperLeague
|
3
|
+
|
4
|
+
def initialize
|
5
|
+
@sport_id = 1
|
6
|
+
@sport_name = 'college-basketball'
|
7
|
+
super
|
8
|
+
end
|
9
|
+
|
10
|
+
# def get_nicknames
|
11
|
+
# start_time = Time.now
|
12
|
+
# num_successes = 0
|
13
|
+
# Team.ncaabb_teams.each_with_index do |team, i|
|
14
|
+
# url = "http://www.vegasinsider.com/college-basketball/teams/team-page.cfm/team/#{team.vegas_insider_identifier}"
|
15
|
+
# nickname = Scraper.scrape_team_page_for_nickname(team.vegas_insider_identifier, url)
|
16
|
+
# team.nickname = nickname
|
17
|
+
# team.save
|
18
|
+
# end
|
19
|
+
# Time.now - start_time
|
20
|
+
# end
|
21
|
+
|
22
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
|
2
|
+
class NCAAFB < ScraperLeague
|
3
|
+
|
4
|
+
def initialize
|
5
|
+
@sport_id = 0
|
6
|
+
@sport_name = 'college-football'
|
7
|
+
super
|
8
|
+
end
|
9
|
+
|
10
|
+
# def get_nicknames
|
11
|
+
# start_time = Time.now
|
12
|
+
# Team.where(sport_id: 0).each_with_index do |team, i|
|
13
|
+
# next if team.nickname
|
14
|
+
# url = "http://www.vegasinsider.com/college-football/teams/team-page.cfm/team/#{team.vegas_insider_identifier}"
|
15
|
+
# nickname = Scraper.scrape_team_page_for_nickname(team.vegas_insider_identifier, url)
|
16
|
+
# team.nickname = nickname
|
17
|
+
# team.save
|
18
|
+
# end
|
19
|
+
# Time.now - start_time
|
20
|
+
# end
|
21
|
+
|
22
|
+
# def get_locations
|
23
|
+
# start_time = Time.now
|
24
|
+
# Team.where(sport_id: 0, custom_team_flag: 1).each_with_index do |team, i|
|
25
|
+
# team.location = nil
|
26
|
+
# team.save
|
27
|
+
# end
|
28
|
+
# Time.now - start_time
|
29
|
+
# end
|
30
|
+
|
31
|
+
# def scrape_custom_team_page_for_location(vegas_identifier, url)
|
32
|
+
# doc = Nokogiri::HTML(open(url))
|
33
|
+
# title = doc.at_css('h1.page_title').content.gsub(' Team Page', '')
|
34
|
+
# return title
|
35
|
+
# end
|
36
|
+
|
37
|
+
# def remove_nickname_from_location
|
38
|
+
# start_time = Time.now
|
39
|
+
# Team.where(sport_id: 0).each_with_index do |team, i|
|
40
|
+
# puts team.location
|
41
|
+
# puts team.location.gsub(" #{team.nickname}", '')
|
42
|
+
# end
|
43
|
+
# Time.now - start_time
|
44
|
+
# end
|
45
|
+
|
46
|
+
# def scrape_fcs_teams
|
47
|
+
# url = 'http://www.vegasinsider.com/college-football/teams/'
|
48
|
+
# doc = Nokogiri::HTML(open(url))
|
49
|
+
|
50
|
+
# current_conference = nil
|
51
|
+
# fcs = []
|
52
|
+
|
53
|
+
# doc.css('.main-content-cell table table table').each_with_index do |col,i|
|
54
|
+
# col.css('tr').each do |row|
|
55
|
+
# new_conference = row.at_css('td.viSubHeader1')
|
56
|
+
|
57
|
+
# if new_conference
|
58
|
+
# current_conference = new_conference.content
|
59
|
+
# else
|
60
|
+
# team = row.at_css('a')
|
61
|
+
# if team
|
62
|
+
# team_formatted = {
|
63
|
+
# team_name: team.content,
|
64
|
+
# team_url_id: team_url_parser(team.attribute('href')),
|
65
|
+
# conference: current_conference,
|
66
|
+
# league: sport_id
|
67
|
+
# }
|
68
|
+
# puts team_formatted
|
69
|
+
# fcs.push team_formatted
|
70
|
+
# end
|
71
|
+
# end
|
72
|
+
# end
|
73
|
+
# end
|
74
|
+
|
75
|
+
# Team.save_teams(fcs)
|
76
|
+
# return true
|
77
|
+
|
78
|
+
# end
|
79
|
+
|
80
|
+
end
|
data/lib/sports/nfl.rb
ADDED
data/lib/sports/nhl.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
|
2
|
+
class NHL < ScraperLeague
|
3
|
+
|
4
|
+
def initialize
|
5
|
+
@sport_id = 5
|
6
|
+
@sport_name = :nhl
|
7
|
+
@moneyline_sport = false
|
8
|
+
super
|
9
|
+
end
|
10
|
+
|
11
|
+
def get_games
|
12
|
+
urls = %w[http://www.vegasinsider.com/nhl/odds/las-vegas/ http://www.vegasinsider.com/nhl/odds/las-vegas/puck]
|
13
|
+
urls.each do |url|
|
14
|
+
get_lines(url, sport_id)
|
15
|
+
Game.save_games(games)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,484 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
3
|
+
|
4
|
+
class ScraperLeague
|
5
|
+
|
6
|
+
attr_reader :sport_id
|
7
|
+
attr_reader :sport_name
|
8
|
+
attr_reader :moneyline_sport
|
9
|
+
attr_reader :teams
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@moneyline_sport = false
|
13
|
+
end
|
14
|
+
|
15
|
+
def teams
|
16
|
+
@teams ||= scrape_standings
|
17
|
+
end
|
18
|
+
|
19
|
+
# Gets the upcoming/current games for the sport
|
20
|
+
def current_games
|
21
|
+
@current_games ||= get_lines("http://www.vegasinsider.com/#{sport_name}/odds/las-vegas/")
|
22
|
+
end
|
23
|
+
|
24
|
+
# Gets all of the schedule and results for each team
|
25
|
+
def team_schedules
|
26
|
+
@team_schedules ||= teams.map { |team|
|
27
|
+
puts " ### GETTING GAMES FOR: #{team[:info][:full_name]}"
|
28
|
+
url = "http://www.vegasinsider.com/#{sport_name}/teams/team-page.cfm/team/#{team[:info][:identifier]}"
|
29
|
+
scrape_team_page(url, team[:info][:identifier])
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
######################################################
|
36
|
+
# Gets the teams and scrapes the records for the teams
|
37
|
+
def scrape_standings
|
38
|
+
standings_teams = []
|
39
|
+
|
40
|
+
url = "http://www.vegasinsider.com/#{sport_name}/standings/"
|
41
|
+
doc = Nokogiri::HTML(open(url)).at_css('.main-content-cell')
|
42
|
+
teams_doc = Nokogiri::HTML(open(url.gsub('standings','teams'))).at_css('.main-content-cell')
|
43
|
+
|
44
|
+
doc.css(standings_table_class).each do |conference|
|
45
|
+
|
46
|
+
conference_title = conference.at_css(".viHeaderNorm")
|
47
|
+
next if conference_title.nil?
|
48
|
+
table = conference.css('.viBodyBorderNorm table')[standings_table_index]
|
49
|
+
|
50
|
+
if table
|
51
|
+
table.css('tr').each_with_index do |row, index|
|
52
|
+
next if (row.at_css('.viSubHeader1') != nil || row.at_css('.viSubHeader2') != nil)
|
53
|
+
standings_teams.push(scrape_standings_row(row, conference_division_parser(conference_title.content), teams_doc))
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
standings_teams
|
58
|
+
end
|
59
|
+
|
60
|
+
# Utility method for scraping standings
|
61
|
+
# * gets the standings table class
|
62
|
+
def standings_table_class
|
63
|
+
college_sport? ? '.SLTables1' : 'table'
|
64
|
+
end
|
65
|
+
|
66
|
+
# Utility method for scraping standings
|
67
|
+
# * gets the index of the table
|
68
|
+
def standings_table_index
|
69
|
+
college_sport? ? 1 : 0
|
70
|
+
end
|
71
|
+
|
72
|
+
# Utility method for scraping standings
|
73
|
+
# * gets the standings table class
|
74
|
+
def conference_division_parser(title)
|
75
|
+
if college_sport?
|
76
|
+
return { conference: title, division: nil }
|
77
|
+
else
|
78
|
+
result = /(?<conference>.+) - (?<division>.+)/.match(title)
|
79
|
+
return { conference: result[:conference], division: result[:division] }
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
# Utility method for scraping standings
|
85
|
+
# * is a college sport?
|
86
|
+
def college_sport?
|
87
|
+
['college-football','college-basketball'].include?(sport_name)
|
88
|
+
end
|
89
|
+
|
90
|
+
# Utility method for scraping standings
|
91
|
+
# * scrapes a row of the standings, chooses a helper method based on the league
|
92
|
+
def scrape_standings_row(row, grouping, teams_doc)
|
93
|
+
team_shell = { info: {}, record: {} }
|
94
|
+
team = case sport_id
|
95
|
+
when 0,1 then college_standings_row_parser(row, team_shell, teams_doc)
|
96
|
+
when 2 then nfl_standings_row_parser(row, team_shell)
|
97
|
+
when 3,4 then pro_standings_row_parser(row, team_shell)
|
98
|
+
when 5 then hockey_standings_row_parser(row, team_shell)
|
99
|
+
end
|
100
|
+
team[:grouping] = grouping
|
101
|
+
team
|
102
|
+
end
|
103
|
+
|
104
|
+
# Utility method for scraping standings
|
105
|
+
# * scrapes a row of the standings, for COLLEGE sports
|
106
|
+
def college_standings_row_parser(row, team, teams_doc)
|
107
|
+
row.css('td').each_with_index do |cell, cell_index|
|
108
|
+
value = remove_element_whitespace(cell)
|
109
|
+
case cell_index
|
110
|
+
when 0 then team[:info] = format_college_team(cell.at_css('a'), teams_doc)
|
111
|
+
when 5 then team[:record][:overall_wins] = value.to_i
|
112
|
+
when 6 then team[:record][:overall_losses] = value.to_i
|
113
|
+
when 9 then team[:record][:home_wins] = value.to_i
|
114
|
+
when 10 then team[:record][:home_losses] = value.to_i
|
115
|
+
when 13 then team[:record][:away_wins] = value.to_i
|
116
|
+
when 14 then team[:record][:away_losses] = value.to_i
|
117
|
+
end
|
118
|
+
end
|
119
|
+
return team
|
120
|
+
end
|
121
|
+
|
122
|
+
# Utility method for scraping standings
|
123
|
+
# * scrapes a row of the standings, for NFL
|
124
|
+
def nfl_standings_row_parser(row, team)
|
125
|
+
row.css('td').each_with_index do |cell, cell_index|
|
126
|
+
content = remove_element_whitespace(cell)
|
127
|
+
|
128
|
+
case cell_index
|
129
|
+
when 0 then team[:info] = format_team(cell.at_css('a'))
|
130
|
+
when 1 then team[:record][:overall_wins] = content.to_i
|
131
|
+
when 2 then team[:record][:overall_losses] = content.to_i
|
132
|
+
when 3 then team[:record][:overall_ties] = content.to_i
|
133
|
+
when 7
|
134
|
+
record = RegularExpressions::NFL_RECORD_REGEX.match(content)
|
135
|
+
team[:record][:home_wins] = record[:wins]
|
136
|
+
team[:record][:home_losses] = record[:losses]
|
137
|
+
team[:record][:home_ties] = record[:ties]
|
138
|
+
when 8
|
139
|
+
record = RegularExpressions::NFL_RECORD_REGEX.match(content)
|
140
|
+
team[:record][:away_wins] = record[:wins]
|
141
|
+
team[:record][:away_losses] = record[:losses]
|
142
|
+
team[:record][:away_ties] = record[:ties]
|
143
|
+
end
|
144
|
+
end
|
145
|
+
return team
|
146
|
+
end
|
147
|
+
|
148
|
+
# Utility method for scraping standings
|
149
|
+
# * scrapes a row of the standings, for PRO (MLB)
|
150
|
+
def pro_standings_row_parser(row, team)
|
151
|
+
row.css('td').each_with_index do |cell, cell_index|
|
152
|
+
content = remove_element_whitespace(cell)
|
153
|
+
|
154
|
+
case cell_index
|
155
|
+
when 0 then team[:info] = format_team(cell.at_css('a'))
|
156
|
+
when 1 then team[:record][:overall_wins] = content.to_i
|
157
|
+
when 2 then team[:record][:overall_losses] = content.to_i
|
158
|
+
when 5
|
159
|
+
record = RegularExpressions::RECORD_REGEX.match(content)
|
160
|
+
team[:record][:home_wins] = record[:wins]
|
161
|
+
team[:record][:home_losses] = record[:losses]
|
162
|
+
when 6
|
163
|
+
record = RegularExpressions::RECORD_REGEX.match(content)
|
164
|
+
team[:record][:away_wins] = record[:wins]
|
165
|
+
team[:record][:away_losses] = record[:losses]
|
166
|
+
end
|
167
|
+
end
|
168
|
+
return team
|
169
|
+
end
|
170
|
+
|
171
|
+
# Utility method for scraping standings
|
172
|
+
# * scrapes a row of the standings, for NHL
|
173
|
+
def hockey_standings_row_parser(row, team)
|
174
|
+
row.css('td').each_with_index do |cell, cell_index|
|
175
|
+
content = remove_element_whitespace(cell)
|
176
|
+
|
177
|
+
case cell_index
|
178
|
+
when 0 then team[:info] = format_team(cell.at_css('a'))
|
179
|
+
when 1 then team[:record][:overall_wins] = content.to_i
|
180
|
+
when 2 then team[:record][:overall_losses] = content.to_i
|
181
|
+
when 3 then team[:record][:over_time_losses] = content.to_i
|
182
|
+
when 4 then team[:record][:shootout_losses] = content.to_i
|
183
|
+
when 5 then team[:record][:points] = content.to_i
|
184
|
+
when 8
|
185
|
+
record = RegularExpressions::NHL_RECORD_REGEX.match(content)
|
186
|
+
team[:record][:home_wins] = record[:wins]
|
187
|
+
team[:record][:home_losses] = record[:losses]
|
188
|
+
team[:record][:home_over_time_losses] = record[:ot_losses]
|
189
|
+
team[:record][:home_shootout_losses] = record[:shootout_losses]
|
190
|
+
when 9
|
191
|
+
record = RegularExpressions::NHL_RECORD_REGEX.match(content)
|
192
|
+
team[:record][:away_wins] = record[:wins]
|
193
|
+
team[:record][:away_losses] = record[:losses]
|
194
|
+
team[:record][:away_over_time_losses] = record[:ot_losses]
|
195
|
+
team[:record][:away_shootout_losses] = record[:shootout_losses]
|
196
|
+
end
|
197
|
+
end
|
198
|
+
return team
|
199
|
+
end
|
200
|
+
|
201
|
+
# Utility method for scraping standings
|
202
|
+
# * formats the team using the URL
|
203
|
+
def format_team(url)
|
204
|
+
full_name = url.content
|
205
|
+
identifier = team_url_parser(url.attribute('href'))
|
206
|
+
nickname = identifier.capitalize
|
207
|
+
|
208
|
+
return {
|
209
|
+
identifier: identifier,
|
210
|
+
nickname: nickname,
|
211
|
+
location: full_name.gsub(" #{nickname}", ''),
|
212
|
+
full_name: full_name,
|
213
|
+
url: url.attribute('href').value
|
214
|
+
}
|
215
|
+
end
|
216
|
+
|
217
|
+
# Utility method for scraping standings
|
218
|
+
# * formats the team using the URL and the Nokogiri document for the teams page
|
219
|
+
def format_college_team(url, teams_doc)
|
220
|
+
full_name = team_page_full_name(teams_doc, url)
|
221
|
+
location = url.content
|
222
|
+
identifier = team_url_parser(url.attribute('href'))
|
223
|
+
nickname = full_name.gsub("#{location} ",'')
|
224
|
+
|
225
|
+
return {
|
226
|
+
identifier: identifier,
|
227
|
+
nickname: nickname,
|
228
|
+
location: location,
|
229
|
+
full_name: full_name,
|
230
|
+
url: url.attribute('href').value
|
231
|
+
}
|
232
|
+
end
|
233
|
+
|
234
|
+
# Utility method for scraping standings
|
235
|
+
# * gets the full team name using the teams page
|
236
|
+
def team_page_full_name(doc,url)
|
237
|
+
doc.at_css("a[href='#{url.attribute('href')}']").content
|
238
|
+
end
|
239
|
+
|
240
|
+
##########################################
|
241
|
+
# Gets the current lines for a given sport
|
242
|
+
def get_lines(url)
|
243
|
+
games = []
|
244
|
+
doc = Nokogiri::HTML(open(url))
|
245
|
+
doc.css('.viBodyBorderNorm .frodds-data-tbl tr').each do |game_row|
|
246
|
+
|
247
|
+
game_cell = game_row.at_css('td:first-child')
|
248
|
+
teams = game_cell_parser(game_cell)
|
249
|
+
game = Game.new(home_team: teams[1], away_team: teams[0], sport_id: sport_id)
|
250
|
+
|
251
|
+
if game.teams_found?
|
252
|
+
game.update(time: get_game_time(game_cell))
|
253
|
+
game.update(regular_lines(get_odds(game_row)))
|
254
|
+
games.push game
|
255
|
+
|
256
|
+
else
|
257
|
+
last_game = games.last
|
258
|
+
if last_game
|
259
|
+
last_game.update(notes: game_cell.content)
|
260
|
+
last_game.update(doubleheader: doubleheader_id(game_cell.content))
|
261
|
+
end
|
262
|
+
end
|
263
|
+
end
|
264
|
+
games.map { |game| game.as_json }
|
265
|
+
end
|
266
|
+
|
267
|
+
# Utility method for scraping current lines
|
268
|
+
# * find the identifier for each team
|
269
|
+
def game_cell_parser(cell)
|
270
|
+
cell.css('b a').map { |team| team_url_parser(team.attribute('href')) }
|
271
|
+
end
|
272
|
+
|
273
|
+
# Utility method for scraping current lines
|
274
|
+
# * getting the time of the game
|
275
|
+
def get_game_time(cell)
|
276
|
+
time = RegularExpressions::TIME_REGEX.match(cell.at_css('span').content.to_s)
|
277
|
+
year = ((Date.today.month > time[:mo].to_i) && (Date.today.month - 1 != time[:mo].to_i)) ? Date.today.year + 1 : Date.today.year
|
278
|
+
|
279
|
+
ENV['TZ'] = 'US/Eastern'
|
280
|
+
time = Time.strptime("#{year} #{time[:mo]} #{time[:d]} #{time[:h]}:#{time[:mi]}:00 #{time[:mer]}", "%Y %m %d %r")
|
281
|
+
ENV['TZ'] = nil
|
282
|
+
time
|
283
|
+
end
|
284
|
+
|
285
|
+
# Utility method for scraping current lines
|
286
|
+
# * getting odds from the cell, removing whitespace, and converting 1/2 to 0.5
|
287
|
+
def get_odds(odds_element)
|
288
|
+
(odds_element.at_css('td:nth-child(3) a').content || '').gsub(" ","").gsub("½",".5").strip
|
289
|
+
end
|
290
|
+
|
291
|
+
# Utility method for scraping current lines
|
292
|
+
# * parsing the lines for non-moneyline sports
|
293
|
+
def regular_lines(odds_string)
|
294
|
+
away_fav_odds = RegularExpressions::ODDS.match(odds_string) || {}
|
295
|
+
home_fav_odds = RegularExpressions::ALT_ODDS.match(odds_string) || {}
|
296
|
+
|
297
|
+
result = {
|
298
|
+
home_team_odds: (home_fav_odds[:line] ? -odds_reader(home_fav_odds[:line]) : odds_reader(away_fav_odds[:line])),
|
299
|
+
away_team_odds: (away_fav_odds[:line] ? -odds_reader(away_fav_odds[:line]) : odds_reader(home_fav_odds[:line])),
|
300
|
+
over_under: (home_fav_odds[:ou] || away_fav_odds[:ou])
|
301
|
+
}
|
302
|
+
end
|
303
|
+
|
304
|
+
# Utility method for scraping current lines
|
305
|
+
# * parsing the odds to get a number
|
306
|
+
def odds_reader(odds)
|
307
|
+
case odds when '' then nil when 'PK' then 0 else odds.to_f end
|
308
|
+
end
|
309
|
+
|
310
|
+
# Utility method for scraping current lines
|
311
|
+
# * is the game a doubleheader
|
312
|
+
def doubleheader_id(content)
|
313
|
+
dh = RegularExpressions::DOUBLEHEADER.match(content)
|
314
|
+
dh ? dh[:id] : nil
|
315
|
+
end
|
316
|
+
|
317
|
+
################################################
|
318
|
+
# Gets the schedule and results for a team page
|
319
|
+
def scrape_team_page(url, team)
|
320
|
+
|
321
|
+
games = Nokogiri::HTML(open(url)).css('.main-content-cell table:nth-child(5) table').css('tr').each_with_index.map do |row,index|
|
322
|
+
|
323
|
+
next if index == 0
|
324
|
+
game = Game.new
|
325
|
+
opponent = nil
|
326
|
+
|
327
|
+
row.css('td').each_with_index do |cell,m|
|
328
|
+
|
329
|
+
case m
|
330
|
+
when 0 then game.update(time: get_game_date(cell,row))
|
331
|
+
when 1
|
332
|
+
info = get_game_info(cell, team)
|
333
|
+
opponent = info[:opponent]
|
334
|
+
game.update(info[:game_info])
|
335
|
+
end
|
336
|
+
|
337
|
+
if game_finished?(row)
|
338
|
+
case m
|
339
|
+
when 2
|
340
|
+
unless moneyline_sport
|
341
|
+
formatted = odds_reader(remove_element_whitespace(cell))
|
342
|
+
game.update(home_team_odds: formatted, away_team_odds: (formatted ? -formatted : formatted))
|
343
|
+
end
|
344
|
+
|
345
|
+
when 3 then game.update(over_under: remove_element_whitespace(cell))
|
346
|
+
when 4 then game.update(game_results(cell, team, opponent))
|
347
|
+
when 5 then game.update(ats_results(cell, team, opponent))
|
348
|
+
end
|
349
|
+
end
|
350
|
+
end
|
351
|
+
game
|
352
|
+
end
|
353
|
+
{ team: team, games: games.compact.map{ |game| game.as_json } }
|
354
|
+
end
|
355
|
+
|
356
|
+
# Utility method for scraping team page results
|
357
|
+
# * gets the date of the game, accounting for different years
|
358
|
+
def get_game_date(date_string, row)
|
359
|
+
date = Date.strptime(date_string.content.gsub!(/\s+/, ""), "%b%e")
|
360
|
+
if game_finished?(row) && date.month > Date.today.month
|
361
|
+
date = Date.new(Date.today.year - 1, date.month, date.day)
|
362
|
+
elsif !game_finished?(row) && date.month < Date.today.month
|
363
|
+
date = Date.new(Date.today.year + 1, date.month, date.day)
|
364
|
+
end
|
365
|
+
date.to_time
|
366
|
+
end
|
367
|
+
|
368
|
+
# Utility method for scraping team page results
|
369
|
+
# * determines if the game has concluded
|
370
|
+
def game_finished?(row)
|
371
|
+
!"#{RegularExpressions::GAME_RESULTS.match(remove_element_whitespace(row.at_css('td:nth-child(5)')))}".empty?
|
372
|
+
end
|
373
|
+
|
374
|
+
# Utility method for scraping team page results
|
375
|
+
# * gets the home_team, away_team, and doubleheader info
|
376
|
+
def get_game_info(cell, primary_team)
|
377
|
+
url = cell.at_css('a')
|
378
|
+
home_or_away = remove_element_whitespace(cell)[0] == "@" ? :away : :home
|
379
|
+
opponent = url ? team_url_parser(url.attribute('href')) : custom_opponent_identifier(cell)
|
380
|
+
{
|
381
|
+
opponent: opponent,
|
382
|
+
game_info: {
|
383
|
+
doubleheader: matchdata_to_hash(RegularExpressions::RESULTS_DOUBLEHEADER.match(cell.content))[:doubleheader],
|
384
|
+
home_team: home_or_away == :home ? primary_team : opponent,
|
385
|
+
away_team: home_or_away == :away ? primary_team : opponent,
|
386
|
+
}
|
387
|
+
}
|
388
|
+
end
|
389
|
+
|
390
|
+
# Utility method for scraping team page results
|
391
|
+
# * gets the result of the game
|
392
|
+
def game_results(cell, primary_team, opponent)
|
393
|
+
results = RegularExpressions::GAME_RESULTS.match(remove_element_whitespace(cell))
|
394
|
+
results_hash = matchdata_to_hash(results)
|
395
|
+
{
|
396
|
+
ending: (results_hash['result'] ? :ended : results.to_s),
|
397
|
+
winning_team: case results_hash['result'] when :won then primary_team when :lost then opponent else nil end,
|
398
|
+
winning_score: case results_hash['result'] when :won then results['team_score'] when :lost then results['oppo_score'] else nil end,
|
399
|
+
losing_score: case results_hash['result'] when :won then results['oppo_score'] when :lost then results['team_score'] else nil end,
|
400
|
+
}
|
401
|
+
end
|
402
|
+
|
403
|
+
# Utility method for scraping team page results
|
404
|
+
# * gets the spread results
|
405
|
+
def ats_results(cell, primary_team, opponent)
|
406
|
+
results = RegularExpressions::SPREAD_RESULTS.match(remove_element_whitespace(cell))
|
407
|
+
results_hash = matchdata_to_hash(results)
|
408
|
+
{
|
409
|
+
ats_winner: case results_hash['ats_result'] when :win then primary_team when :loss then opponent else nil end,
|
410
|
+
over_under_result: results_hash['ou_result']
|
411
|
+
}
|
412
|
+
end
|
413
|
+
|
414
|
+
# Utility method for scraping team page results
|
415
|
+
# * gets the identifier for an opponent without links
|
416
|
+
def custom_opponent_identifier(cell)
|
417
|
+
cell.content.strip.gsub(/(\s| )+/, '-').gsub('@-').downcase[0..-3]
|
418
|
+
end
|
419
|
+
|
420
|
+
# General Utility Method
|
421
|
+
# used the get the team identifier from the URL
|
422
|
+
def team_url_parser(url)
|
423
|
+
/.+\/team\/(?<team_name>(\w|-)+)/.match(url)[:team_name]
|
424
|
+
end
|
425
|
+
|
426
|
+
# General Utility Method
|
427
|
+
# used the remove all whitespace from the content of the element
|
428
|
+
def remove_element_whitespace(element)
|
429
|
+
string = element.content.gsub(/(\s| )+/, '')
|
430
|
+
string.empty? ? nil : string
|
431
|
+
end
|
432
|
+
|
433
|
+
def matchdata_to_hash(matchdata)
|
434
|
+
matchdata ? Hash[*matchdata.names.map{ |name| [name, (matchdata[name] ? matchdata[name].downcase.to_sym : nil)] }.flatten] : {}
|
435
|
+
end
|
436
|
+
|
437
|
+
# Regular Expressions Module
|
438
|
+
module RegularExpressions
|
439
|
+
RECORD_REGEX = /(?<wins>\d+)-(?<losses>\d+)/
|
440
|
+
NFL_RECORD_REGEX = /(?<wins>\d+)-(?<losses>\d+)-(?<ties>\d+)/
|
441
|
+
NHL_RECORD_REGEX = /(?<wins>\d+)-(?<losses>\d+)-(?<ot_losses>\d+)-(?<shootout_losses>\d+)/
|
442
|
+
|
443
|
+
TIME_REGEX = /(?<mo>\d{2})\/(?<d>\d{2}) (?<h>\d+):(?<mi>\d{2}) (?<mer>\w{2})/
|
444
|
+
MONEYLINE_OVER_UNDER = /(?<ou>\d+(\.5)?)[ou]/x
|
445
|
+
ODDS = /-?(?<line>\w+(\.5)?)-\d\d(?<ou>\d+(\.5)?)[ou]-\d\d/x
|
446
|
+
ALT_ODDS = /(?<ou>\d+(\.5)?)[ou]-\d\d-?(?<line>\w+(\.5)?)-\d\d/x
|
447
|
+
|
448
|
+
DOUBLEHEADER = /DH Gm (?<id>\d)/
|
449
|
+
RESULTS_DOUBLEHEADER = /\(DH (?<doubleheader>\d)\)/
|
450
|
+
|
451
|
+
GAME_RESULTS = /(?<result>\D+)(?<team_score>\d+)-(?<oppo_score>\d+)|(Postponed)|(Cancelled)/
|
452
|
+
SPREAD_RESULTS = /((?<ats_result>\w+)\/)?(?<ou_result>\w+)/
|
453
|
+
end
|
454
|
+
|
455
|
+
class Game
|
456
|
+
attr_reader :time, :home_team, :away_team, :home_team_odds, :away_team_odds, :over_under, :sport_id,
|
457
|
+
:ending, :winning_team, :winning_score, :losing_score, :ats_winner, :over_under_result, :doubleheader, :notes
|
458
|
+
|
459
|
+
def initialize(args = {})
|
460
|
+
Game.sanitize(args).map { |attribute, value| instance_variable_set("@#{attribute}", value) }
|
461
|
+
end
|
462
|
+
|
463
|
+
def update(args = {})
|
464
|
+
Game.sanitize(args).map { |attribute, value| instance_variable_set("@#{attribute}", value) }
|
465
|
+
return self
|
466
|
+
end
|
467
|
+
|
468
|
+
def teams_found?
|
469
|
+
home_team && away_team
|
470
|
+
end
|
471
|
+
|
472
|
+
def as_json
|
473
|
+
instance_variables.each_with_object({}) { |var, hash| hash[var.to_s.delete("@").to_sym] = instance_variable_get(var) }
|
474
|
+
end
|
475
|
+
|
476
|
+
private
|
477
|
+
def self.sanitize(args)
|
478
|
+
permitted_keys = [:time, :home_team, :away_team, :home_team_odds, :away_team_odds, :over_under, :sport_id,
|
479
|
+
:ending, :winning_team, :winning_score, :losing_score, :ats_winner, :over_under_result, :doubleheader, :notes]
|
480
|
+
args.select { |key,_| permitted_keys.include? key }
|
481
|
+
end
|
482
|
+
end
|
483
|
+
|
484
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
3
|
+
|
4
|
+
require 'sports/scraper_league'
|
5
|
+
require 'sports/ncaafb'
|
6
|
+
require 'sports/ncaabb'
|
7
|
+
require 'sports/nba'
|
8
|
+
require 'sports/nfl'
|
9
|
+
require 'sports/mlb'
|
10
|
+
require 'sports/nhl'
|
11
|
+
|
12
|
+
class VegasInsiderScraper
|
13
|
+
|
14
|
+
attr_reader :sports
|
15
|
+
SPORTS = [NCAAFB, NCAABB, NFL, NBA, MLB, NHL]
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@sports = SPORTS.map { |sport| sport.new }
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
metadata
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: vegas_insider_scraper
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Matthew Reitz
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-06-19 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: A gem to scrape vegasinsider.com for stats, teams, lines, and more!
|
14
|
+
email: reitz1994@gmail.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- lib/sports/mlb.rb
|
20
|
+
- lib/sports/nba.rb
|
21
|
+
- lib/sports/ncaabb.rb
|
22
|
+
- lib/sports/ncaafb.rb
|
23
|
+
- lib/sports/nfl.rb
|
24
|
+
- lib/sports/nhl.rb
|
25
|
+
- lib/sports/scraper_league.rb
|
26
|
+
- lib/vegas_insider_scraper.rb
|
27
|
+
homepage: http://rubygems.org/gems/vegas_insider_scraper
|
28
|
+
licenses:
|
29
|
+
- MIT
|
30
|
+
metadata: {}
|
31
|
+
post_install_message:
|
32
|
+
rdoc_options: []
|
33
|
+
require_paths:
|
34
|
+
- lib
|
35
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '0'
|
45
|
+
requirements: []
|
46
|
+
rubyforge_project:
|
47
|
+
rubygems_version: 2.5.1
|
48
|
+
signing_key:
|
49
|
+
specification_version: 4
|
50
|
+
summary: Vegas Insider Website Scraper API
|
51
|
+
test_files: []
|