vegas_insider_scraper 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/sports/mlb.rb +19 -0
- data/lib/sports/nba.rb +10 -0
- data/lib/sports/ncaabb.rb +22 -0
- data/lib/sports/ncaafb.rb +80 -0
- data/lib/sports/nfl.rb +10 -0
- data/lib/sports/nhl.rb +18 -0
- data/lib/sports/scraper_league.rb +484 -0
- data/lib/vegas_insider_scraper.rb +21 -0
- metadata +51 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 3621386a1dc6841318420023e337cb2421f44564
|
4
|
+
data.tar.gz: 0216f2ab7ad22d840a79b7c609d57cf45185c8ca
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 18f4459bd95457d3019c310f51563fe33e08f1e8e22ff7a1b06df6084c0e527ed9ee9d31a81ebe176301d8aff35689aa2bcf6adc9cdcd7b9a4bd4b6bfb5a8036
|
7
|
+
data.tar.gz: 58e3991c01f85e8e470b64c2fef476455204f3606fb5b429c249088ab818f15233f044eae11144acd5129c12b4a3c4bff316a4d22ce4b413ce303bb0b0b60afb
|
data/lib/sports/mlb.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
|
2
|
+
class MLB < ScraperLeague
|
3
|
+
|
4
|
+
def initialize
|
5
|
+
@sport_id = 4
|
6
|
+
@sport_name = :mlb
|
7
|
+
@moneyline_sport = true
|
8
|
+
super
|
9
|
+
end
|
10
|
+
|
11
|
+
def get_games
|
12
|
+
urls = %w[http://www.vegasinsider.com/mlb/odds/las-vegas/ http://www.vegasinsider.com/mlb/odds/las-vegas/run]
|
13
|
+
urls.each do |url|
|
14
|
+
get_lines(url, sport_id)
|
15
|
+
end
|
16
|
+
return true
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
data/lib/sports/nba.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
|
2
|
+
class NCAABB < ScraperLeague
|
3
|
+
|
4
|
+
def initialize
|
5
|
+
@sport_id = 1
|
6
|
+
@sport_name = 'college-basketball'
|
7
|
+
super
|
8
|
+
end
|
9
|
+
|
10
|
+
# def get_nicknames
|
11
|
+
# start_time = Time.now
|
12
|
+
# num_successes = 0
|
13
|
+
# Team.ncaabb_teams.each_with_index do |team, i|
|
14
|
+
# url = "http://www.vegasinsider.com/college-basketball/teams/team-page.cfm/team/#{team.vegas_insider_identifier}"
|
15
|
+
# nickname = Scraper.scrape_team_page_for_nickname(team.vegas_insider_identifier, url)
|
16
|
+
# team.nickname = nickname
|
17
|
+
# team.save
|
18
|
+
# end
|
19
|
+
# Time.now - start_time
|
20
|
+
# end
|
21
|
+
|
22
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
|
2
|
+
class NCAAFB < ScraperLeague
|
3
|
+
|
4
|
+
def initialize
|
5
|
+
@sport_id = 0
|
6
|
+
@sport_name = 'college-football'
|
7
|
+
super
|
8
|
+
end
|
9
|
+
|
10
|
+
# def get_nicknames
|
11
|
+
# start_time = Time.now
|
12
|
+
# Team.where(sport_id: 0).each_with_index do |team, i|
|
13
|
+
# next if team.nickname
|
14
|
+
# url = "http://www.vegasinsider.com/college-football/teams/team-page.cfm/team/#{team.vegas_insider_identifier}"
|
15
|
+
# nickname = Scraper.scrape_team_page_for_nickname(team.vegas_insider_identifier, url)
|
16
|
+
# team.nickname = nickname
|
17
|
+
# team.save
|
18
|
+
# end
|
19
|
+
# Time.now - start_time
|
20
|
+
# end
|
21
|
+
|
22
|
+
# def get_locations
|
23
|
+
# start_time = Time.now
|
24
|
+
# Team.where(sport_id: 0, custom_team_flag: 1).each_with_index do |team, i|
|
25
|
+
# team.location = nil
|
26
|
+
# team.save
|
27
|
+
# end
|
28
|
+
# Time.now - start_time
|
29
|
+
# end
|
30
|
+
|
31
|
+
# def scrape_custom_team_page_for_location(vegas_identifier, url)
|
32
|
+
# doc = Nokogiri::HTML(open(url))
|
33
|
+
# title = doc.at_css('h1.page_title').content.gsub(' Team Page', '')
|
34
|
+
# return title
|
35
|
+
# end
|
36
|
+
|
37
|
+
# def remove_nickname_from_location
|
38
|
+
# start_time = Time.now
|
39
|
+
# Team.where(sport_id: 0).each_with_index do |team, i|
|
40
|
+
# puts team.location
|
41
|
+
# puts team.location.gsub(" #{team.nickname}", '')
|
42
|
+
# end
|
43
|
+
# Time.now - start_time
|
44
|
+
# end
|
45
|
+
|
46
|
+
# def scrape_fcs_teams
|
47
|
+
# url = 'http://www.vegasinsider.com/college-football/teams/'
|
48
|
+
# doc = Nokogiri::HTML(open(url))
|
49
|
+
|
50
|
+
# current_conference = nil
|
51
|
+
# fcs = []
|
52
|
+
|
53
|
+
# doc.css('.main-content-cell table table table').each_with_index do |col,i|
|
54
|
+
# col.css('tr').each do |row|
|
55
|
+
# new_conference = row.at_css('td.viSubHeader1')
|
56
|
+
|
57
|
+
# if new_conference
|
58
|
+
# current_conference = new_conference.content
|
59
|
+
# else
|
60
|
+
# team = row.at_css('a')
|
61
|
+
# if team
|
62
|
+
# team_formatted = {
|
63
|
+
# team_name: team.content,
|
64
|
+
# team_url_id: team_url_parser(team.attribute('href')),
|
65
|
+
# conference: current_conference,
|
66
|
+
# league: sport_id
|
67
|
+
# }
|
68
|
+
# puts team_formatted
|
69
|
+
# fcs.push team_formatted
|
70
|
+
# end
|
71
|
+
# end
|
72
|
+
# end
|
73
|
+
# end
|
74
|
+
|
75
|
+
# Team.save_teams(fcs)
|
76
|
+
# return true
|
77
|
+
|
78
|
+
# end
|
79
|
+
|
80
|
+
end
|
data/lib/sports/nfl.rb
ADDED
data/lib/sports/nhl.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
|
2
|
+
class NHL < ScraperLeague
|
3
|
+
|
4
|
+
def initialize
|
5
|
+
@sport_id = 5
|
6
|
+
@sport_name = :nhl
|
7
|
+
@moneyline_sport = false
|
8
|
+
super
|
9
|
+
end
|
10
|
+
|
11
|
+
def get_games
|
12
|
+
urls = %w[http://www.vegasinsider.com/nhl/odds/las-vegas/ http://www.vegasinsider.com/nhl/odds/las-vegas/puck]
|
13
|
+
urls.each do |url|
|
14
|
+
get_lines(url, sport_id)
|
15
|
+
Game.save_games(games)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,484 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
3
|
+
|
4
|
+
class ScraperLeague
|
5
|
+
|
6
|
+
attr_reader :sport_id
|
7
|
+
attr_reader :sport_name
|
8
|
+
attr_reader :moneyline_sport
|
9
|
+
attr_reader :teams
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@moneyline_sport = false
|
13
|
+
end
|
14
|
+
|
15
|
+
def teams
|
16
|
+
@teams ||= scrape_standings
|
17
|
+
end
|
18
|
+
|
19
|
+
# Gets the upcoming/current games for the sport
|
20
|
+
def current_games
|
21
|
+
@current_games ||= get_lines("http://www.vegasinsider.com/#{sport_name}/odds/las-vegas/")
|
22
|
+
end
|
23
|
+
|
24
|
+
# Gets all of the schedule and results for each team
|
25
|
+
def team_schedules
|
26
|
+
@team_schedules ||= teams.map { |team|
|
27
|
+
puts " ### GETTING GAMES FOR: #{team[:info][:full_name]}"
|
28
|
+
url = "http://www.vegasinsider.com/#{sport_name}/teams/team-page.cfm/team/#{team[:info][:identifier]}"
|
29
|
+
scrape_team_page(url, team[:info][:identifier])
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
######################################################
|
36
|
+
# Gets the teams and scrapes the records for the teams
|
37
|
+
def scrape_standings
|
38
|
+
standings_teams = []
|
39
|
+
|
40
|
+
url = "http://www.vegasinsider.com/#{sport_name}/standings/"
|
41
|
+
doc = Nokogiri::HTML(open(url)).at_css('.main-content-cell')
|
42
|
+
teams_doc = Nokogiri::HTML(open(url.gsub('standings','teams'))).at_css('.main-content-cell')
|
43
|
+
|
44
|
+
doc.css(standings_table_class).each do |conference|
|
45
|
+
|
46
|
+
conference_title = conference.at_css(".viHeaderNorm")
|
47
|
+
next if conference_title.nil?
|
48
|
+
table = conference.css('.viBodyBorderNorm table')[standings_table_index]
|
49
|
+
|
50
|
+
if table
|
51
|
+
table.css('tr').each_with_index do |row, index|
|
52
|
+
next if (row.at_css('.viSubHeader1') != nil || row.at_css('.viSubHeader2') != nil)
|
53
|
+
standings_teams.push(scrape_standings_row(row, conference_division_parser(conference_title.content), teams_doc))
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
standings_teams
|
58
|
+
end
|
59
|
+
|
60
|
+
# Utility method for scraping standings
|
61
|
+
# * gets the standings table class
|
62
|
+
def standings_table_class
|
63
|
+
college_sport? ? '.SLTables1' : 'table'
|
64
|
+
end
|
65
|
+
|
66
|
+
# Utility method for scraping standings
|
67
|
+
# * gets the index of the table
|
68
|
+
def standings_table_index
|
69
|
+
college_sport? ? 1 : 0
|
70
|
+
end
|
71
|
+
|
72
|
+
# Utility method for scraping standings
|
73
|
+
# * gets the standings table class
|
74
|
+
def conference_division_parser(title)
|
75
|
+
if college_sport?
|
76
|
+
return { conference: title, division: nil }
|
77
|
+
else
|
78
|
+
result = /(?<conference>.+) - (?<division>.+)/.match(title)
|
79
|
+
return { conference: result[:conference], division: result[:division] }
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
# Utility method for scraping standings
|
85
|
+
# * is a college sport?
|
86
|
+
def college_sport?
|
87
|
+
['college-football','college-basketball'].include?(sport_name)
|
88
|
+
end
|
89
|
+
|
90
|
+
# Utility method for scraping standings
|
91
|
+
# * scrapes a row of the standings, chooses a helper method based on the league
|
92
|
+
def scrape_standings_row(row, grouping, teams_doc)
|
93
|
+
team_shell = { info: {}, record: {} }
|
94
|
+
team = case sport_id
|
95
|
+
when 0,1 then college_standings_row_parser(row, team_shell, teams_doc)
|
96
|
+
when 2 then nfl_standings_row_parser(row, team_shell)
|
97
|
+
when 3,4 then pro_standings_row_parser(row, team_shell)
|
98
|
+
when 5 then hockey_standings_row_parser(row, team_shell)
|
99
|
+
end
|
100
|
+
team[:grouping] = grouping
|
101
|
+
team
|
102
|
+
end
|
103
|
+
|
104
|
+
# Utility method for scraping standings
|
105
|
+
# * scrapes a row of the standings, for COLLEGE sports
|
106
|
+
def college_standings_row_parser(row, team, teams_doc)
|
107
|
+
row.css('td').each_with_index do |cell, cell_index|
|
108
|
+
value = remove_element_whitespace(cell)
|
109
|
+
case cell_index
|
110
|
+
when 0 then team[:info] = format_college_team(cell.at_css('a'), teams_doc)
|
111
|
+
when 5 then team[:record][:overall_wins] = value.to_i
|
112
|
+
when 6 then team[:record][:overall_losses] = value.to_i
|
113
|
+
when 9 then team[:record][:home_wins] = value.to_i
|
114
|
+
when 10 then team[:record][:home_losses] = value.to_i
|
115
|
+
when 13 then team[:record][:away_wins] = value.to_i
|
116
|
+
when 14 then team[:record][:away_losses] = value.to_i
|
117
|
+
end
|
118
|
+
end
|
119
|
+
return team
|
120
|
+
end
|
121
|
+
|
122
|
+
# Utility method for scraping standings
|
123
|
+
# * scrapes a row of the standings, for NFL
|
124
|
+
def nfl_standings_row_parser(row, team)
|
125
|
+
row.css('td').each_with_index do |cell, cell_index|
|
126
|
+
content = remove_element_whitespace(cell)
|
127
|
+
|
128
|
+
case cell_index
|
129
|
+
when 0 then team[:info] = format_team(cell.at_css('a'))
|
130
|
+
when 1 then team[:record][:overall_wins] = content.to_i
|
131
|
+
when 2 then team[:record][:overall_losses] = content.to_i
|
132
|
+
when 3 then team[:record][:overall_ties] = content.to_i
|
133
|
+
when 7
|
134
|
+
record = RegularExpressions::NFL_RECORD_REGEX.match(content)
|
135
|
+
team[:record][:home_wins] = record[:wins]
|
136
|
+
team[:record][:home_losses] = record[:losses]
|
137
|
+
team[:record][:home_ties] = record[:ties]
|
138
|
+
when 8
|
139
|
+
record = RegularExpressions::NFL_RECORD_REGEX.match(content)
|
140
|
+
team[:record][:away_wins] = record[:wins]
|
141
|
+
team[:record][:away_losses] = record[:losses]
|
142
|
+
team[:record][:away_ties] = record[:ties]
|
143
|
+
end
|
144
|
+
end
|
145
|
+
return team
|
146
|
+
end
|
147
|
+
|
148
|
+
# Utility method for scraping standings
|
149
|
+
# * scrapes a row of the standings, for PRO (MLB)
|
150
|
+
def pro_standings_row_parser(row, team)
|
151
|
+
row.css('td').each_with_index do |cell, cell_index|
|
152
|
+
content = remove_element_whitespace(cell)
|
153
|
+
|
154
|
+
case cell_index
|
155
|
+
when 0 then team[:info] = format_team(cell.at_css('a'))
|
156
|
+
when 1 then team[:record][:overall_wins] = content.to_i
|
157
|
+
when 2 then team[:record][:overall_losses] = content.to_i
|
158
|
+
when 5
|
159
|
+
record = RegularExpressions::RECORD_REGEX.match(content)
|
160
|
+
team[:record][:home_wins] = record[:wins]
|
161
|
+
team[:record][:home_losses] = record[:losses]
|
162
|
+
when 6
|
163
|
+
record = RegularExpressions::RECORD_REGEX.match(content)
|
164
|
+
team[:record][:away_wins] = record[:wins]
|
165
|
+
team[:record][:away_losses] = record[:losses]
|
166
|
+
end
|
167
|
+
end
|
168
|
+
return team
|
169
|
+
end
|
170
|
+
|
171
|
+
# Utility method for scraping standings
|
172
|
+
# * scrapes a row of the standings, for NHL
|
173
|
+
def hockey_standings_row_parser(row, team)
|
174
|
+
row.css('td').each_with_index do |cell, cell_index|
|
175
|
+
content = remove_element_whitespace(cell)
|
176
|
+
|
177
|
+
case cell_index
|
178
|
+
when 0 then team[:info] = format_team(cell.at_css('a'))
|
179
|
+
when 1 then team[:record][:overall_wins] = content.to_i
|
180
|
+
when 2 then team[:record][:overall_losses] = content.to_i
|
181
|
+
when 3 then team[:record][:over_time_losses] = content.to_i
|
182
|
+
when 4 then team[:record][:shootout_losses] = content.to_i
|
183
|
+
when 5 then team[:record][:points] = content.to_i
|
184
|
+
when 8
|
185
|
+
record = RegularExpressions::NHL_RECORD_REGEX.match(content)
|
186
|
+
team[:record][:home_wins] = record[:wins]
|
187
|
+
team[:record][:home_losses] = record[:losses]
|
188
|
+
team[:record][:home_over_time_losses] = record[:ot_losses]
|
189
|
+
team[:record][:home_shootout_losses] = record[:shootout_losses]
|
190
|
+
when 9
|
191
|
+
record = RegularExpressions::NHL_RECORD_REGEX.match(content)
|
192
|
+
team[:record][:away_wins] = record[:wins]
|
193
|
+
team[:record][:away_losses] = record[:losses]
|
194
|
+
team[:record][:away_over_time_losses] = record[:ot_losses]
|
195
|
+
team[:record][:away_shootout_losses] = record[:shootout_losses]
|
196
|
+
end
|
197
|
+
end
|
198
|
+
return team
|
199
|
+
end
|
200
|
+
|
201
|
+
# Utility method for scraping standings
|
202
|
+
# * formats the team using the URL
|
203
|
+
def format_team(url)
|
204
|
+
full_name = url.content
|
205
|
+
identifier = team_url_parser(url.attribute('href'))
|
206
|
+
nickname = identifier.capitalize
|
207
|
+
|
208
|
+
return {
|
209
|
+
identifier: identifier,
|
210
|
+
nickname: nickname,
|
211
|
+
location: full_name.gsub(" #{nickname}", ''),
|
212
|
+
full_name: full_name,
|
213
|
+
url: url.attribute('href').value
|
214
|
+
}
|
215
|
+
end
|
216
|
+
|
217
|
+
# Utility method for scraping standings
|
218
|
+
# * formats the team using the URL and the Nokogiri document for the teams page
|
219
|
+
def format_college_team(url, teams_doc)
|
220
|
+
full_name = team_page_full_name(teams_doc, url)
|
221
|
+
location = url.content
|
222
|
+
identifier = team_url_parser(url.attribute('href'))
|
223
|
+
nickname = full_name.gsub("#{location} ",'')
|
224
|
+
|
225
|
+
return {
|
226
|
+
identifier: identifier,
|
227
|
+
nickname: nickname,
|
228
|
+
location: location,
|
229
|
+
full_name: full_name,
|
230
|
+
url: url.attribute('href').value
|
231
|
+
}
|
232
|
+
end
|
233
|
+
|
234
|
+
# Utility method for scraping standings
|
235
|
+
# * gets the full team name using the teams page
|
236
|
+
def team_page_full_name(doc,url)
|
237
|
+
doc.at_css("a[href='#{url.attribute('href')}']").content
|
238
|
+
end
|
239
|
+
|
240
|
+
##########################################
|
241
|
+
# Gets the current lines for a given sport
|
242
|
+
def get_lines(url)
|
243
|
+
games = []
|
244
|
+
doc = Nokogiri::HTML(open(url))
|
245
|
+
doc.css('.viBodyBorderNorm .frodds-data-tbl tr').each do |game_row|
|
246
|
+
|
247
|
+
game_cell = game_row.at_css('td:first-child')
|
248
|
+
teams = game_cell_parser(game_cell)
|
249
|
+
game = Game.new(home_team: teams[1], away_team: teams[0], sport_id: sport_id)
|
250
|
+
|
251
|
+
if game.teams_found?
|
252
|
+
game.update(time: get_game_time(game_cell))
|
253
|
+
game.update(regular_lines(get_odds(game_row)))
|
254
|
+
games.push game
|
255
|
+
|
256
|
+
else
|
257
|
+
last_game = games.last
|
258
|
+
if last_game
|
259
|
+
last_game.update(notes: game_cell.content)
|
260
|
+
last_game.update(doubleheader: doubleheader_id(game_cell.content))
|
261
|
+
end
|
262
|
+
end
|
263
|
+
end
|
264
|
+
games.map { |game| game.as_json }
|
265
|
+
end
|
266
|
+
|
267
|
+
# Utility method for scraping current lines
|
268
|
+
# * find the identifier for each team
|
269
|
+
def game_cell_parser(cell)
|
270
|
+
cell.css('b a').map { |team| team_url_parser(team.attribute('href')) }
|
271
|
+
end
|
272
|
+
|
273
|
+
# Utility method for scraping current lines
|
274
|
+
# * getting the time of the game
|
275
|
+
def get_game_time(cell)
|
276
|
+
time = RegularExpressions::TIME_REGEX.match(cell.at_css('span').content.to_s)
|
277
|
+
year = ((Date.today.month > time[:mo].to_i) && (Date.today.month - 1 != time[:mo].to_i)) ? Date.today.year + 1 : Date.today.year
|
278
|
+
|
279
|
+
ENV['TZ'] = 'US/Eastern'
|
280
|
+
time = Time.strptime("#{year} #{time[:mo]} #{time[:d]} #{time[:h]}:#{time[:mi]}:00 #{time[:mer]}", "%Y %m %d %r")
|
281
|
+
ENV['TZ'] = nil
|
282
|
+
time
|
283
|
+
end
|
284
|
+
|
285
|
+
# Utility method for scraping current lines
|
286
|
+
# * getting odds from the cell, removing whitespace, and converting 1/2 to 0.5
|
287
|
+
def get_odds(odds_element)
|
288
|
+
(odds_element.at_css('td:nth-child(3) a').content || '').gsub(" ","").gsub("½",".5").strip
|
289
|
+
end
|
290
|
+
|
291
|
+
# Utility method for scraping current lines
|
292
|
+
# * parsing the lines for non-moneyline sports
|
293
|
+
def regular_lines(odds_string)
|
294
|
+
away_fav_odds = RegularExpressions::ODDS.match(odds_string) || {}
|
295
|
+
home_fav_odds = RegularExpressions::ALT_ODDS.match(odds_string) || {}
|
296
|
+
|
297
|
+
result = {
|
298
|
+
home_team_odds: (home_fav_odds[:line] ? -odds_reader(home_fav_odds[:line]) : odds_reader(away_fav_odds[:line])),
|
299
|
+
away_team_odds: (away_fav_odds[:line] ? -odds_reader(away_fav_odds[:line]) : odds_reader(home_fav_odds[:line])),
|
300
|
+
over_under: (home_fav_odds[:ou] || away_fav_odds[:ou])
|
301
|
+
}
|
302
|
+
end
|
303
|
+
|
304
|
+
# Utility method for scraping current lines
|
305
|
+
# * parsing the odds to get a number
|
306
|
+
def odds_reader(odds)
|
307
|
+
case odds when '' then nil when 'PK' then 0 else odds.to_f end
|
308
|
+
end
|
309
|
+
|
310
|
+
# Utility method for scraping current lines
|
311
|
+
# * is the game a doubleheader
|
312
|
+
def doubleheader_id(content)
|
313
|
+
dh = RegularExpressions::DOUBLEHEADER.match(content)
|
314
|
+
dh ? dh[:id] : nil
|
315
|
+
end
|
316
|
+
|
317
|
+
################################################
|
318
|
+
# Gets the schedule and results for a team page
|
319
|
+
def scrape_team_page(url, team)
|
320
|
+
|
321
|
+
games = Nokogiri::HTML(open(url)).css('.main-content-cell table:nth-child(5) table').css('tr').each_with_index.map do |row,index|
|
322
|
+
|
323
|
+
next if index == 0
|
324
|
+
game = Game.new
|
325
|
+
opponent = nil
|
326
|
+
|
327
|
+
row.css('td').each_with_index do |cell,m|
|
328
|
+
|
329
|
+
case m
|
330
|
+
when 0 then game.update(time: get_game_date(cell,row))
|
331
|
+
when 1
|
332
|
+
info = get_game_info(cell, team)
|
333
|
+
opponent = info[:opponent]
|
334
|
+
game.update(info[:game_info])
|
335
|
+
end
|
336
|
+
|
337
|
+
if game_finished?(row)
|
338
|
+
case m
|
339
|
+
when 2
|
340
|
+
unless moneyline_sport
|
341
|
+
formatted = odds_reader(remove_element_whitespace(cell))
|
342
|
+
game.update(home_team_odds: formatted, away_team_odds: (formatted ? -formatted : formatted))
|
343
|
+
end
|
344
|
+
|
345
|
+
when 3 then game.update(over_under: remove_element_whitespace(cell))
|
346
|
+
when 4 then game.update(game_results(cell, team, opponent))
|
347
|
+
when 5 then game.update(ats_results(cell, team, opponent))
|
348
|
+
end
|
349
|
+
end
|
350
|
+
end
|
351
|
+
game
|
352
|
+
end
|
353
|
+
{ team: team, games: games.compact.map{ |game| game.as_json } }
|
354
|
+
end
|
355
|
+
|
356
|
+
# Utility method for scraping team page results
|
357
|
+
# * gets the date of the game, accounting for different years
|
358
|
+
def get_game_date(date_string, row)
|
359
|
+
date = Date.strptime(date_string.content.gsub!(/\s+/, ""), "%b%e")
|
360
|
+
if game_finished?(row) && date.month > Date.today.month
|
361
|
+
date = Date.new(Date.today.year - 1, date.month, date.day)
|
362
|
+
elsif !game_finished?(row) && date.month < Date.today.month
|
363
|
+
date = Date.new(Date.today.year + 1, date.month, date.day)
|
364
|
+
end
|
365
|
+
date.to_time
|
366
|
+
end
|
367
|
+
|
368
|
+
# Utility method for scraping team page results
|
369
|
+
# * determines if the game has concluded
|
370
|
+
def game_finished?(row)
|
371
|
+
!"#{RegularExpressions::GAME_RESULTS.match(remove_element_whitespace(row.at_css('td:nth-child(5)')))}".empty?
|
372
|
+
end
|
373
|
+
|
374
|
+
# Utility method for scraping team page results
|
375
|
+
# * gets the home_team, away_team, and doubleheader info
|
376
|
+
def get_game_info(cell, primary_team)
|
377
|
+
url = cell.at_css('a')
|
378
|
+
home_or_away = remove_element_whitespace(cell)[0] == "@" ? :away : :home
|
379
|
+
opponent = url ? team_url_parser(url.attribute('href')) : custom_opponent_identifier(cell)
|
380
|
+
{
|
381
|
+
opponent: opponent,
|
382
|
+
game_info: {
|
383
|
+
doubleheader: matchdata_to_hash(RegularExpressions::RESULTS_DOUBLEHEADER.match(cell.content))[:doubleheader],
|
384
|
+
home_team: home_or_away == :home ? primary_team : opponent,
|
385
|
+
away_team: home_or_away == :away ? primary_team : opponent,
|
386
|
+
}
|
387
|
+
}
|
388
|
+
end
|
389
|
+
|
390
|
+
# Utility method for scraping team page results
|
391
|
+
# * gets the result of the game
|
392
|
+
def game_results(cell, primary_team, opponent)
|
393
|
+
results = RegularExpressions::GAME_RESULTS.match(remove_element_whitespace(cell))
|
394
|
+
results_hash = matchdata_to_hash(results)
|
395
|
+
{
|
396
|
+
ending: (results_hash['result'] ? :ended : results.to_s),
|
397
|
+
winning_team: case results_hash['result'] when :won then primary_team when :lost then opponent else nil end,
|
398
|
+
winning_score: case results_hash['result'] when :won then results['team_score'] when :lost then results['oppo_score'] else nil end,
|
399
|
+
losing_score: case results_hash['result'] when :won then results['oppo_score'] when :lost then results['team_score'] else nil end,
|
400
|
+
}
|
401
|
+
end
|
402
|
+
|
403
|
+
# Utility method for scraping team page results
|
404
|
+
# * gets the spread results
|
405
|
+
def ats_results(cell, primary_team, opponent)
|
406
|
+
results = RegularExpressions::SPREAD_RESULTS.match(remove_element_whitespace(cell))
|
407
|
+
results_hash = matchdata_to_hash(results)
|
408
|
+
{
|
409
|
+
ats_winner: case results_hash['ats_result'] when :win then primary_team when :loss then opponent else nil end,
|
410
|
+
over_under_result: results_hash['ou_result']
|
411
|
+
}
|
412
|
+
end
|
413
|
+
|
414
|
+
# Utility method for scraping team page results
|
415
|
+
# * gets the identifier for an opponent without links
|
416
|
+
def custom_opponent_identifier(cell)
|
417
|
+
cell.content.strip.gsub(/(\s| )+/, '-').gsub('@-').downcase[0..-3]
|
418
|
+
end
|
419
|
+
|
420
|
+
# General Utility Method
|
421
|
+
# used the get the team identifier from the URL
|
422
|
+
def team_url_parser(url)
|
423
|
+
/.+\/team\/(?<team_name>(\w|-)+)/.match(url)[:team_name]
|
424
|
+
end
|
425
|
+
|
426
|
+
# General Utility Method
|
427
|
+
# used the remove all whitespace from the content of the element
|
428
|
+
def remove_element_whitespace(element)
|
429
|
+
string = element.content.gsub(/(\s| )+/, '')
|
430
|
+
string.empty? ? nil : string
|
431
|
+
end
|
432
|
+
|
433
|
+
def matchdata_to_hash(matchdata)
|
434
|
+
matchdata ? Hash[*matchdata.names.map{ |name| [name, (matchdata[name] ? matchdata[name].downcase.to_sym : nil)] }.flatten] : {}
|
435
|
+
end
|
436
|
+
|
437
|
+
# Regular Expressions Module
|
438
|
+
module RegularExpressions
|
439
|
+
RECORD_REGEX = /(?<wins>\d+)-(?<losses>\d+)/
|
440
|
+
NFL_RECORD_REGEX = /(?<wins>\d+)-(?<losses>\d+)-(?<ties>\d+)/
|
441
|
+
NHL_RECORD_REGEX = /(?<wins>\d+)-(?<losses>\d+)-(?<ot_losses>\d+)-(?<shootout_losses>\d+)/
|
442
|
+
|
443
|
+
TIME_REGEX = /(?<mo>\d{2})\/(?<d>\d{2}) (?<h>\d+):(?<mi>\d{2}) (?<mer>\w{2})/
|
444
|
+
MONEYLINE_OVER_UNDER = /(?<ou>\d+(\.5)?)[ou]/x
|
445
|
+
ODDS = /-?(?<line>\w+(\.5)?)-\d\d(?<ou>\d+(\.5)?)[ou]-\d\d/x
|
446
|
+
ALT_ODDS = /(?<ou>\d+(\.5)?)[ou]-\d\d-?(?<line>\w+(\.5)?)-\d\d/x
|
447
|
+
|
448
|
+
DOUBLEHEADER = /DH Gm (?<id>\d)/
|
449
|
+
RESULTS_DOUBLEHEADER = /\(DH (?<doubleheader>\d)\)/
|
450
|
+
|
451
|
+
GAME_RESULTS = /(?<result>\D+)(?<team_score>\d+)-(?<oppo_score>\d+)|(Postponed)|(Cancelled)/
|
452
|
+
SPREAD_RESULTS = /((?<ats_result>\w+)\/)?(?<ou_result>\w+)/
|
453
|
+
end
|
454
|
+
|
455
|
+
class Game
|
456
|
+
attr_reader :time, :home_team, :away_team, :home_team_odds, :away_team_odds, :over_under, :sport_id,
|
457
|
+
:ending, :winning_team, :winning_score, :losing_score, :ats_winner, :over_under_result, :doubleheader, :notes
|
458
|
+
|
459
|
+
def initialize(args = {})
|
460
|
+
Game.sanitize(args).map { |attribute, value| instance_variable_set("@#{attribute}", value) }
|
461
|
+
end
|
462
|
+
|
463
|
+
def update(args = {})
|
464
|
+
Game.sanitize(args).map { |attribute, value| instance_variable_set("@#{attribute}", value) }
|
465
|
+
return self
|
466
|
+
end
|
467
|
+
|
468
|
+
def teams_found?
|
469
|
+
home_team && away_team
|
470
|
+
end
|
471
|
+
|
472
|
+
def as_json
|
473
|
+
instance_variables.each_with_object({}) { |var, hash| hash[var.to_s.delete("@").to_sym] = instance_variable_get(var) }
|
474
|
+
end
|
475
|
+
|
476
|
+
private
|
477
|
+
def self.sanitize(args)
|
478
|
+
permitted_keys = [:time, :home_team, :away_team, :home_team_odds, :away_team_odds, :over_under, :sport_id,
|
479
|
+
:ending, :winning_team, :winning_score, :losing_score, :ats_winner, :over_under_result, :doubleheader, :notes]
|
480
|
+
args.select { |key,_| permitted_keys.include? key }
|
481
|
+
end
|
482
|
+
end
|
483
|
+
|
484
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
3
|
+
|
4
|
+
require 'sports/scraper_league'
|
5
|
+
require 'sports/ncaafb'
|
6
|
+
require 'sports/ncaabb'
|
7
|
+
require 'sports/nba'
|
8
|
+
require 'sports/nfl'
|
9
|
+
require 'sports/mlb'
|
10
|
+
require 'sports/nhl'
|
11
|
+
|
12
|
+
class VegasInsiderScraper
|
13
|
+
|
14
|
+
attr_reader :sports
|
15
|
+
SPORTS = [NCAAFB, NCAABB, NFL, NBA, MLB, NHL]
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@sports = SPORTS.map { |sport| sport.new }
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
metadata
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: vegas_insider_scraper
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Matthew Reitz
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-06-19 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: A gem to scrape vegasinsider.com for stats, teams, lines, and more!
|
14
|
+
email: reitz1994@gmail.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- lib/sports/mlb.rb
|
20
|
+
- lib/sports/nba.rb
|
21
|
+
- lib/sports/ncaabb.rb
|
22
|
+
- lib/sports/ncaafb.rb
|
23
|
+
- lib/sports/nfl.rb
|
24
|
+
- lib/sports/nhl.rb
|
25
|
+
- lib/sports/scraper_league.rb
|
26
|
+
- lib/vegas_insider_scraper.rb
|
27
|
+
homepage: http://rubygems.org/gems/vegas_insider_scraper
|
28
|
+
licenses:
|
29
|
+
- MIT
|
30
|
+
metadata: {}
|
31
|
+
post_install_message:
|
32
|
+
rdoc_options: []
|
33
|
+
require_paths:
|
34
|
+
- lib
|
35
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '0'
|
45
|
+
requirements: []
|
46
|
+
rubyforge_project:
|
47
|
+
rubygems_version: 2.5.1
|
48
|
+
signing_key:
|
49
|
+
specification_version: 4
|
50
|
+
summary: Vegas Insider Website Scraper API
|
51
|
+
test_files: []
|