vegas_insider_scraper 0.0.15 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 4406cf01f39908cc6e374486a063ad22f52374bf
4
- data.tar.gz: 445c1ddcdbae187568c046f21b09f187a6fbe7f7
2
+ SHA256:
3
+ metadata.gz: 538bf0d9d798e8602915f700679ad9541dcad4b6f279cf4fd8a734030d556821
4
+ data.tar.gz: 4d0d701086e782e36ecc3524145a30189627f1940d4b5c5b6473bc4a1cb167e9
5
5
  SHA512:
6
- metadata.gz: 61eb9deb49c1fec787015199b3eabd24a503773c4a15df7dcea3b2670db5d4fcc9a8d2a8ef7929245b5633c782b4b5706b79d6d928a5be11c7b8e55b42d97787
7
- data.tar.gz: 5e62b335b39d4788356b335757bc2454d06dec2b490e0ea8043cebc9b7b6c72ae25486b14a504fcf45d7739639a308b762124697b82f82c7a25036dcf843ce98
6
+ metadata.gz: e4b0a4db84a2ce082e21de78de71c02a51f12c8c1bc4325f60151c9547cdb06ffa1ebe93b42bbae7d5206b433f76284a792d6c17f36585cb8eecde12422f95b4
7
+ data.tar.gz: b6a1ff071b8d3675cf318468c45fbc4a8806eb750cbd09be8a0656dbb9415b6b4229a978a8cf90febfec68a0c051c20e0d072d754b4db3af840833b09936a597
data/lib/sports/mlb.rb CHANGED
@@ -1,16 +1,16 @@
1
1
 
2
2
  class MLB < ScraperLeague
3
3
 
4
- def initialize
5
- @sport_id = 4
6
- @sport_name = :mlb
7
- super
8
- @moneyline_sport = true
9
- end
4
+ def initialize
5
+ @sport_id = 4
6
+ @sport_name = :mlb
7
+ super
8
+ @moneyline_sport = true
9
+ end
10
10
 
11
- def current_games
12
- @current_games ||= get_lines(["http://www.vegasinsider.com/#{sport_name}/odds/las-vegas/run/",
13
- "http://www.vegasinsider.com/#{sport_name}/odds/las-vegas/"])
14
- end
11
+ def current_games
12
+ @current_games ||= get_lines(["http://www.vegasinsider.com/#{sport_name}/odds/las-vegas/run/",
13
+ "http://www.vegasinsider.com/#{sport_name}/odds/las-vegas/"])
14
+ end
15
15
 
16
16
  end
data/lib/sports/nba.rb CHANGED
@@ -1,10 +1,10 @@
1
1
 
2
2
  class NBA < ScraperLeague
3
3
 
4
- def initialize
5
- @sport_id = 3
6
- @sport_name = :nba
7
- super
8
- end
4
+ def initialize
5
+ @sport_id = 3
6
+ @sport_name = :nba
7
+ super
8
+ end
9
9
 
10
10
  end
data/lib/sports/ncaabb.rb CHANGED
@@ -1,22 +1,22 @@
1
1
 
2
2
  class NCAABB < ScraperLeague
3
3
 
4
- def initialize
5
- @sport_id = 1
6
- @sport_name = 'college-basketball'
7
- super
8
- end
4
+ def initialize
5
+ @sport_id = 1
6
+ @sport_name = 'college-basketball'
7
+ super
8
+ end
9
9
 
10
- # def get_nicknames
11
- # start_time = Time.now
12
- # num_successes = 0
13
- # Team.ncaabb_teams.each_with_index do |team, i|
14
- # url = "http://www.vegasinsider.com/college-basketball/teams/team-page.cfm/team/#{team.vegas_insider_identifier}"
15
- # nickname = Scraper.scrape_team_page_for_nickname(team.vegas_insider_identifier, url)
16
- # team.nickname = nickname
17
- # team.save
18
- # end
19
- # Time.now - start_time
20
- # end
10
+ # def get_nicknames
11
+ # start_time = Time.now
12
+ # num_successes = 0
13
+ # Team.ncaabb_teams.each_with_index do |team, i|
14
+ # url = "http://www.vegasinsider.com/college-basketball/teams/team-page.cfm/team/#{team.vegas_insider_identifier}"
15
+ # nickname = Scraper.scrape_team_page_for_nickname(team.vegas_insider_identifier, url)
16
+ # team.nickname = nickname
17
+ # team.save
18
+ # end
19
+ # Time.now - start_time
20
+ # end
21
21
 
22
22
  end
data/lib/sports/ncaafb.rb CHANGED
@@ -1,80 +1,84 @@
1
1
 
2
2
  class NCAAFB < ScraperLeague
3
3
 
4
- def initialize
5
- @sport_id = 0
6
- @sport_name = 'college-football'
7
- super
8
- end
4
+ def initialize
5
+ @sport_id = 0
6
+ @sport_name = 'college-football'
7
+ super
8
+ end
9
9
 
10
- # def get_nicknames
11
- # start_time = Time.now
12
- # Team.where(sport_id: 0).each_with_index do |team, i|
13
- # next if team.nickname
14
- # url = "http://www.vegasinsider.com/college-football/teams/team-page.cfm/team/#{team.vegas_insider_identifier}"
15
- # nickname = Scraper.scrape_team_page_for_nickname(team.vegas_insider_identifier, url)
16
- # team.nickname = nickname
17
- # team.save
18
- # end
19
- # Time.now - start_time
20
- # end
10
+ def teams
11
+ @teams ||= scrape_teams
12
+ end
21
13
 
22
- # def get_locations
23
- # start_time = Time.now
24
- # Team.where(sport_id: 0, custom_team_flag: 1).each_with_index do |team, i|
25
- # team.location = nil
26
- # team.save
27
- # end
28
- # Time.now - start_time
29
- # end
14
+ # def get_nicknames
15
+ # start_time = Time.now
16
+ # Team.where(sport_id: 0).each_with_index do |team, i|
17
+ # next if team.nickname
18
+ # url = "http://www.vegasinsider.com/college-football/teams/team-page.cfm/team/#{team.vegas_insider_identifier}"
19
+ # nickname = Scraper.scrape_team_page_for_nickname(team.vegas_insider_identifier, url)
20
+ # team.nickname = nickname
21
+ # team.save
22
+ # end
23
+ # Time.now - start_time
24
+ # end
30
25
 
31
- # def scrape_custom_team_page_for_location(vegas_identifier, url)
32
- # doc = Nokogiri::HTML(open(url))
33
- # title = doc.at_css('h1.page_title').content.gsub(' Team Page', '')
34
- # return title
35
- # end
26
+ # def get_locations
27
+ # start_time = Time.now
28
+ # Team.where(sport_id: 0, custom_team_flag: 1).each_with_index do |team, i|
29
+ # team.location = nil
30
+ # team.save
31
+ # end
32
+ # Time.now - start_time
33
+ # end
36
34
 
37
- # def remove_nickname_from_location
38
- # start_time = Time.now
39
- # Team.where(sport_id: 0).each_with_index do |team, i|
40
- # puts team.location
41
- # puts team.location.gsub(" #{team.nickname}", '')
42
- # end
43
- # Time.now - start_time
44
- # end
35
+ # def scrape_custom_team_page_for_location(vegas_identifier, url)
36
+ # doc = Nokogiri::HTML(open(url))
37
+ # title = doc.at_css('h1.page_title').content.gsub(' Team Page', '')
38
+ # return title
39
+ # end
45
40
 
46
- # def scrape_fcs_teams
47
- # url = 'http://www.vegasinsider.com/college-football/teams/'
48
- # doc = Nokogiri::HTML(open(url))
41
+ # def remove_nickname_from_location
42
+ # start_time = Time.now
43
+ # Team.where(sport_id: 0).each_with_index do |team, i|
44
+ # puts team.location
45
+ # puts team.location.gsub(" #{team.nickname}", '')
46
+ # end
47
+ # Time.now - start_time
48
+ # end
49
49
 
50
- # current_conference = nil
51
- # fcs = []
50
+ # def scrape_fcs_teams
51
+ # url = 'http://www.vegasinsider.com/college-football/teams/'
52
+ # doc = Nokogiri::HTML(open(url))
52
53
 
53
- # doc.css('.main-content-cell table table table').each_with_index do |col,i|
54
- # col.css('tr').each do |row|
55
- # new_conference = row.at_css('td.viSubHeader1')
54
+ # current_conference = nil
55
+ # fcs = []
56
56
 
57
- # if new_conference
58
- # current_conference = new_conference.content
59
- # else
60
- # team = row.at_css('a')
61
- # if team
62
- # team_formatted = {
63
- # team_name: team.content,
64
- # team_url_id: team_url_parser(team.attribute('href')),
65
- # conference: current_conference,
66
- # league: sport_id
67
- # }
68
- # puts team_formatted
69
- # fcs.push team_formatted
70
- # end
71
- # end
72
- # end
73
- # end
57
+ # doc.css('.main-content-cell table table table').each_with_index do |col,i|
58
+ # col.css('tr').each do |row|
59
+ # new_conference = row.at_css('td.viSubHeader1')
74
60
 
75
- # Team.save_teams(fcs)
76
- # return true
61
+ # if new_conference
62
+ # current_conference = new_conference.content
63
+ # else
64
+ # team = row.at_css('a')
65
+ # if team
66
+ # team_formatted = {
67
+ # team_name: team.content,
68
+ # team_url_id: team_url_parser(team.attribute('href')),
69
+ # conference: current_conference,
70
+ # league: sport_id
71
+ # }
72
+ # puts team_formatted
73
+ # fcs.push team_formatted
74
+ # end
75
+ # end
76
+ # end
77
+ # end
77
78
 
78
- # end
79
+ # Team.save_teams(fcs)
80
+ # return true
81
+
82
+ # end
79
83
 
80
84
  end
data/lib/sports/nfl.rb CHANGED
@@ -1,10 +1,10 @@
1
1
 
2
2
  class NFL < ScraperLeague
3
3
 
4
- def initialize
5
- @sport_id = 2
6
- @sport_name = :nfl
7
- super
8
- end
4
+ def initialize
5
+ @sport_id = 2
6
+ @sport_name = :nfl
7
+ super
8
+ end
9
9
 
10
10
  end
@@ -3,554 +3,661 @@ require 'open-uri'
3
3
 
4
4
  class ScraperLeague
5
5
 
6
- attr_reader :sport_id
7
- attr_reader :sport_name
8
- attr_reader :moneyline_sport
9
- attr_reader :teams
10
-
11
- def initialize
12
- @moneyline_sport = false
13
- end
14
-
15
- def teams
16
- @teams ||= scrape_standings
17
- end
18
-
19
- # Gets the upcoming/current games for the sport
20
- def current_games
21
- @current_games ||= get_lines(["http://www.vegasinsider.com/#{sport_name}/odds/las-vegas/","http://www.vegasinsider.com/#{sport_name}/odds/las-vegas/money/"])
22
- end
23
-
24
- # Gets all of the schedule and results for each team
25
- def team_schedules
26
- @team_schedules ||= teams.map { |team|
27
- puts " ### GETTING GAMES FOR: #{team[:info][:full_name]}"
28
- url = "http://www.vegasinsider.com/#{sport_name}/teams/team-page.cfm/team/#{team[:info][:identifier]}"
29
- scrape_team_page(url, team[:info][:identifier])
30
- }
31
- end
32
-
33
- private
34
-
35
- ######################################################
36
- # Gets the teams and scrapes the records for the teams
37
- def scrape_standings
38
- standings_teams = []
39
-
40
- url = "http://www.vegasinsider.com/#{sport_name}/standings/"
41
- doc = Nokogiri::HTML(open(url)).at_css('.main-content-cell')
42
- teams_doc = Nokogiri::HTML(open(url.gsub('standings','teams'))).at_css('.main-content-cell')
43
-
44
- doc.css(standings_table_class).each do |conference|
45
-
46
- conference_title = conference.at_css(".viHeaderNorm")
47
-
48
- next if conference_title.nil?
49
-
50
- table = conference.css('.viBodyBorderNorm table')[standings_table_index]
51
- table = conference.css('.viBodyBorderNorm table')[2] if (conference_title.content == 'Conference USA' && sport_name == 'college-football')
52
-
53
- if table
54
- table.css('tr').each_with_index do |row, index|
55
- next if (row.at_css('.viSubHeader1') != nil || row.at_css('.viSubHeader2') != nil)
56
- standings_teams.push(scrape_standings_row(row, conference_division_parser(conference_title.content), teams_doc))
57
- end
58
- end
59
- end
60
- standings_teams
61
- end
62
-
63
- # Utility method for scraping standings
64
- # * gets the standings table class
65
- def standings_table_class
66
- college_sport? ? '.SLTables1' : 'table'
67
- end
68
-
69
- # Utility method for scraping standings
70
- # * gets the index of the table
71
- def standings_table_index
72
- college_sport? ? 1 : 0
73
- end
74
-
75
- # Utility method for scraping standings
76
- # * gets the standings table class
77
- def conference_division_parser(title)
78
- if college_sport?
79
- return { conference: title, division: nil }
80
- else
81
- result = /(?<conference>.+) - (?<division>.+)/.match(title)
82
- return { conference: result[:conference], division: result[:division] }
83
- end
84
- end
85
-
86
-
87
- # Utility method for scraping standings
88
- # * is a college sport?
89
- def college_sport?
90
- ['college-football','college-basketball'].include?(sport_name)
91
- end
92
-
93
- # Utility method for scraping standings
94
- # * scrapes a row of the standings, chooses a helper method based on the league
95
- def scrape_standings_row(row, grouping, teams_doc)
96
- team_shell = { info: {}, record: {} }
97
- team = case sport_id
98
- when 0,1 then college_standings_row_parser(row, team_shell, teams_doc)
99
- when 2 then nfl_standings_row_parser(row, team_shell)
100
- when 3,4 then pro_standings_row_parser(row, team_shell)
101
- when 5 then hockey_standings_row_parser(row, team_shell)
102
- end
103
- team[:grouping] = grouping
104
- team
105
- end
106
-
107
- # Utility method for scraping standings
108
- # * scrapes a row of the standings, for COLLEGE sports
109
- def college_standings_row_parser(row, team, teams_doc)
110
- row.css('td').each_with_index do |cell, cell_index|
111
- value = remove_element_whitespace(cell)
112
- case cell_index
113
- when 0
114
- team[:info] = format_college_team(cell.at_css('a'), teams_doc)
115
- when 5 then team[:record][:overall_wins] = value.to_i
116
- when 6 then team[:record][:overall_losses] = value.to_i
117
- when 9 then team[:record][:home_wins] = value.to_i
118
- when 10 then team[:record][:home_losses] = value.to_i
119
- when 13 then team[:record][:away_wins] = value.to_i
120
- when 14 then team[:record][:away_losses] = value.to_i
121
- end
122
- end
123
- return team
124
- end
125
-
126
- # Utility method for scraping standings
127
- # * scrapes a row of the standings, for NFL
128
- def nfl_standings_row_parser(row, team)
129
- row.css('td').each_with_index do |cell, cell_index|
130
- content = remove_element_whitespace(cell)
131
-
132
- case cell_index
133
- when 0 then team[:info] = format_team(cell.at_css('a'))
134
- when 1 then team[:record][:overall_wins] = content.to_i
135
- when 2 then team[:record][:overall_losses] = content.to_i
136
- when 3 then team[:record][:overall_ties] = content.to_i
137
- when 7
138
- record = RegularExpressions::NFL_RECORD_REGEX.match(content) || { wins: 0, losses: 0, ties: 0 }
139
- team[:record][:home_wins] = record[:wins]
140
- team[:record][:home_losses] = record[:losses]
141
- team[:record][:home_ties] = record[:ties]
142
- when 8
143
- record = RegularExpressions::NFL_RECORD_REGEX.match(content) || { wins: 0, losses: 0, ties: 0 }
144
- team[:record][:away_wins] = record[:wins]
145
- team[:record][:away_losses] = record[:losses]
146
- team[:record][:away_ties] = record[:ties]
147
- end
148
- end
149
- return team
150
- end
151
-
152
- # Utility method for scraping standings
153
- # * scrapes a row of the standings, for PRO (MLB)
154
- def pro_standings_row_parser(row, team)
155
- row.css('td').each_with_index do |cell, cell_index|
156
- content = remove_element_whitespace(cell)
157
-
158
- case cell_index
159
- when 0 then team[:info] = format_team(cell.at_css('a'))
160
- when 1 then team[:record][:overall_wins] = content.to_i
161
- when 2 then team[:record][:overall_losses] = content.to_i
162
- when 5
163
- record = RegularExpressions::RECORD_REGEX.match(content) || { wins: 0, losses: 0 }
164
- team[:record][:home_wins] = record[:wins]
165
- team[:record][:home_losses] = record[:losses]
166
- when 6
167
- record = RegularExpressions::RECORD_REGEX.match(content) || { wins: 0, losses: 0 }
168
- team[:record][:away_wins] = record[:wins]
169
- team[:record][:away_losses] = record[:losses]
170
- end
171
- end
172
- return team
173
- end
174
-
175
- # Utility method for scraping standings
176
- # * scrapes a row of the standings, for NHL
177
- def hockey_standings_row_parser(row, team)
178
- row.css('td').each_with_index do |cell, cell_index|
179
- content = remove_element_whitespace(cell)
180
-
181
- case cell_index
182
- when 0 then team[:info] = format_team(cell.at_css('a'))
183
- when 1 then team[:record][:overall_wins] = content.to_i
184
- when 2 then team[:record][:overall_losses] = content.to_i
185
- when 3 then team[:record][:over_time_losses] = content.to_i
186
- when 4 then team[:record][:shootout_losses] = content.to_i
187
- when 5 then team[:record][:points] = content.to_i
188
- when 8
189
- record = RegularExpressions::NHL_RECORD_REGEX.match(content) || { wins: 0, losses: 0, ot_losses: 0, shootout_losses: 0 }
190
- team[:record][:home_wins] = record[:wins]
191
- team[:record][:home_losses] = record[:losses]
192
- team[:record][:home_over_time_losses] = record[:ot_losses]
193
- team[:record][:home_shootout_losses] = record[:shootout_losses]
194
- when 9
195
- record = RegularExpressions::NHL_RECORD_REGEX.match(content) || { wins: 0, losses: 0, ot_losses: 0, shootout_losses: 0 }
196
- team[:record][:away_wins] = record[:wins]
197
- team[:record][:away_losses] = record[:losses]
198
- team[:record][:away_over_time_losses] = record[:ot_losses]
199
- team[:record][:away_shootout_losses] = record[:shootout_losses]
200
- end
201
- end
202
- return team
203
- end
204
-
205
- # Utility method for scraping standings
206
- # * formats the team using the URL
207
- def format_team(url)
208
- full_name = url.content
209
- identifier = team_url_parser(url.attribute('href'))
210
- nickname = humanize_identifier(identifier)
211
-
212
- return {
213
- identifier: identifier,
214
- nickname: nickname,
215
- location: full_name.gsub(" #{nickname}", ''),
216
- full_name: full_name,
217
- url: url.attribute('href').value
218
- }
219
- end
220
-
221
- # Utility method for scraping standings
222
- # * formats the team using the URL and the Nokogiri document for the teams page
223
- def format_college_team(url, teams_doc)
224
-
225
- full_name = team_page_full_name(teams_doc, url)
226
- location = url.content.gsub('AM', 'A&M').gsub('AT', 'A&T')
227
- identifier = team_url_parser(url.attribute('href'))
228
- nickname = full_name.gsub("#{location} ",'')
229
-
230
- if nickname == full_name
231
- nickname = full_name.gsub('&','').gsub("#{humanize_identifier(identifier)}", '').strip
232
- end
233
-
234
- if nickname == full_name.gsub('&','').strip
235
- nickname_array = nickname.split(' ')
236
- nickname = nickname_array.each_slice( (nickname_array.size/2.0).round ).to_a[1].join(' ')
237
- nickname = nickname_exceptions(identifier,nickname)
238
- end
239
-
240
- return {
241
- identifier: identifier,
242
- nickname: nickname,
243
- location: location,
244
- full_name: full_name,
245
- url: url.attribute('href').value
246
- }
247
- end
248
-
249
- def humanize_identifier(identifier)
250
- identifier.split('-').map { |x| x.capitalize }.join(' ')
251
- end
252
-
253
- def nickname_exceptions(identifier,nickname)
254
- case identifier
255
- when 'california-state-long-beach' then '49ers'
256
- when 'texas-am-corpus-christi' then 'Islanders'
257
- when 'southern-am' then 'Jaguars'
258
- when 'saint-marys-college-california' then 'Gaels'
259
- else nickname end
260
- end
261
-
262
- # Utility method for scraping standings
263
- # * gets the full team name using the teams page
264
- def team_page_full_name(doc,url)
265
- doc.at_css("a[href='#{url.attribute('href')}']").content
266
- end
267
-
268
- ##########################################
269
- # Gets the current lines for a given sport
270
- def get_lines(urls)
271
- games = []
272
-
273
- urls.each { |url|
274
- is_first_url = games.empty?
275
- doc = Nokogiri::HTML(open(url))
276
- doc.css('.viBodyBorderNorm .frodds-data-tbl tr').each do |game_row|
277
-
278
- game_cell = game_row.at_css('td:first-child')
279
- teams = game_cell_parser(game_cell)
280
- game = Game.new(home_team: teams[1], away_team: teams[0])
281
-
282
- if game.teams_found?
283
- game.update(time: get_game_time(game_cell))
284
- game.update(doubleheader: doubleheader_id(game_row.next&.next&.at_css('td:first-child')&.content))
285
- is_first_url ? (games.push game) : (game = game.find_equal(games))
286
- game.update(vegas_info: get_line(get_odds(game_row)))
287
- game.update(vegas_info: get_line(get_odds_inner_html(game_row)))
288
-
289
- elsif is_first_url
290
- last_game = games.last
291
- if last_game then last_game.update(notes: (last_game.notes ? "#{last_game.notes} / " : '') + game_cell.content) end
292
- end
293
- end
294
- }
295
- games
296
- end
297
-
298
- # Utility method for scraping current lines
299
- # * find the identifier for each team
300
- def game_cell_parser(cell)
301
- cell.css('b a').map { |team| team_url_parser(team.attribute('href')) }
302
- end
303
-
304
- # Utility method for scraping current lines
305
- # * getting the time of the game
306
- def get_game_time(cell)
307
- time = RegularExpressions::TIME_REGEX.match(cell.at_css('span').content.to_s)
308
- year = ((Date.today.month > time[:mo].to_i) && (Date.today.month - 1 != time[:mo].to_i)) ? Date.today.year + 1 : Date.today.year
309
-
310
- ENV['TZ'] = 'US/Eastern'
311
- time = Time.strptime("#{year} #{time[:mo]} #{time[:d]} #{time[:h]}:#{time[:mi]}:00 #{time[:mer]}", "%Y %m %d %r")
312
- ENV['TZ'] = nil
313
- time
314
- end
315
-
316
- # Utility method for scraping current lines
317
- # * getting odds from the cell, removing whitespace, and converting 1/2 to 0.5
318
- def get_odds(odds_element)
319
- (odds_element.at_css('td:nth-child(3) a')&.content || '').gsub(" ","").gsub("½",".5").strip
320
- end
321
- def get_odds_inner_html(odds_element)
322
- ((odds_element.at_css('td:nth-child(3) a'))&.inner_html || '').encode('utf-8').gsub(" ","").gsub("½",".5").strip
323
- end
324
-
325
- # Utility method for scraping current lines
326
- # * parsing the lines for non-moneyline sports
327
- def get_line(odds_string)
328
- odds_string = odds_string.gsub('PK', '-0')
329
- odds = matchdata_to_hash(RegularExpressions::ODDS.match(odds_string)) || {}
330
- runlines_odds = matchdata_to_hash(RegularExpressions::RUNLINE_ODDS.match(odds_string)) || {}
331
- moneyline_odds = matchdata_to_hash(RegularExpressions::MONEYLINE_ODDS.match(odds_string)) || {}
332
-
333
- result = odds.merge(runlines_odds).merge(moneyline_odds)
334
-
335
- result.each { |k,v| result[k] = result[k].to_s.to_f if result[k] }
336
- get_home_and_away(result)
337
-
338
- end
339
-
340
- # Utility method for scraping current lines
341
- # * filling the home/away lines
342
- def get_home_and_away(result)
343
- result['away_line'] = -result['home_line'] if result['home_line']
344
- result['home_line'] = -result['away_line'] if result['away_line']
345
- result
346
- end
347
-
348
- # Utility method for scraping current lines
349
- # * parsing the odds to get a number
350
- def odds_reader(odds)
351
- case odds&.strip when '',nil then nil when 'PK' then 0 else odds.to_f end
352
- end
353
-
354
- # Utility method for scraping current lines
355
- # * is the game a doubleheader
356
- def doubleheader_id(content)
357
- dh = RegularExpressions::DOUBLEHEADER.match(content)
358
- dh ? dh[:id] : nil
359
- end
360
-
361
- ################################################
362
- # Gets the schedule and results for a team page
363
- def scrape_team_page(url, team)
364
-
365
- games = Nokogiri::HTML(open(url)).css('.main-content-cell table:nth-child(5) table').css('tr').each_with_index.map do |row,index|
366
-
367
- next if index == 0
368
- game = Game.new(vegas_info: {})
369
- opponent = nil
370
-
371
- row.css('td').each_with_index do |cell,m|
372
-
373
- case m
374
- when 0 then game.update(time: get_game_date(cell,row))
375
- when 1
376
- info = get_game_info(cell, team)
377
- opponent = info[:opponent]
378
- game.update(info[:game_info])
379
- end
380
-
381
- if game_finished?(row)
382
- case m
383
- when 2
384
- formatted = odds_reader(remove_element_whitespace(cell))
385
- home_team = (game.home_or_away_team(team) == :home)
386
- if moneyline_sport
387
- home_team ? game.update(vegas_info: {home_moneyline: formatted}) : game.update(vegas_info: {away_moneyline: formatted})
388
- else
389
- home_line = (formatted && !home_team) ? -formatted : formatted
390
- game.update(vegas_info: {home_line: home_line, away_line: (home_line ? -home_line : nil)})
391
- end
392
-
393
- when 3 then game.update(vegas_info: { over_under: remove_element_whitespace(cell)})
394
- when 4 then game.update(game_results(cell, team, opponent))
395
- when 5 then game.update(ats_results(cell, team, opponent))
396
- end
397
- end
398
- end
399
- game
400
- end
401
- { team: team, games: games.compact.map{ |game| game } }
402
- end
403
-
404
- # Utility method for scraping team page results
405
- # * gets the date of the game, accounting for different years
406
- def get_game_date(date_string, row)
407
- date = Date.strptime(date_string.content.gsub!(/\s+/, ""), "%b%e")
408
- if game_finished?(row) && date.month > Date.today.month
409
- date = Date.new(Date.today.year - 1, date.month, date.day)
410
- elsif !game_finished?(row) && date.month < Date.today.month
411
- date = Date.new(Date.today.year + 1, date.month, date.day)
412
- end
413
- date.to_time
414
- end
415
-
416
- # Utility method for scraping team page results
417
- # * determines if the game has concluded
418
- def game_finished?(row)
419
- !"#{RegularExpressions::GAME_RESULTS.match(remove_element_whitespace(row.at_css('td:nth-child(5)')))}".empty?
420
- end
421
-
422
- # Utility method for scraping team page results
423
- # * gets the home_team, away_team, and doubleheader info
424
- def get_game_info(cell, primary_team)
425
- url = cell.at_css('a')
426
- home_or_away = remove_element_whitespace(cell)[0] == "@" ? :away : :home
427
- opponent = url ? team_url_parser(url.attribute('href')) : custom_opponent_identifier(cell)
428
-
429
- {
430
- opponent: opponent,
431
- game_info: {
432
- doubleheader: matchdata_to_hash(RegularExpressions::RESULTS_DOUBLEHEADER.match(cell.content))['doubleheader'],
433
- home_team: home_or_away == :home ? primary_team : opponent,
434
- away_team: home_or_away == :away ? primary_team : opponent,
435
- }
436
- }
437
- end
438
-
439
- # Utility method for scraping team page results
440
- # * gets the result of the game
441
- def game_results(cell, primary_team, opponent)
442
- results = RegularExpressions::GAME_RESULTS.match(remove_element_whitespace(cell))
443
- results_hash = matchdata_to_hash(results)
444
- {
445
- ending: (results_hash['result'] ? :ended : results.to_s),
446
- winning_team: case results_hash['result'] when :won then primary_team when :lost then opponent else nil end,
447
- winning_score: case results_hash['result'] when :won then results['team_score'] when :lost then results['oppo_score'] else nil end,
448
- losing_score: case results_hash['result'] when :won then results['oppo_score'] when :lost then results['team_score'] else nil end,
449
- }
450
- end
451
-
452
- # Utility method for scraping team page results
453
- # * gets the spread results
454
- def ats_results(cell, primary_team, opponent)
455
- results = RegularExpressions::SPREAD_RESULTS.match(remove_element_whitespace(cell))
456
- results_hash = matchdata_to_hash(results)
457
- {
458
- ats_winner: case results_hash['ats_result'] when :win then primary_team when :loss then opponent else nil end,
459
- over_under_result: results_hash['ou_result']
460
- }
461
- end
462
-
463
- # Utility method for scraping team page results
464
- # * gets the identifier for an opponent without links
465
- def custom_opponent_identifier(cell)
466
- cell.content.strip.gsub(/(\s| )+/, '-').gsub('@-','').downcase[0..-3]
467
- end
468
-
469
- # General Utility Method
470
- # used the get the team identifier from the URL
471
- def team_url_parser(url)
472
- /.+\/team\/(?<team_name>(\w|-)+)/.match(url)[:team_name]
473
- end
474
-
475
- # General Utility Method
476
- # used the remove all whitespace from the content of the element
477
- def remove_element_whitespace(element)
478
- string = element.content.gsub(/(\s| )+/, '')
479
- string.empty? ? '' : string
480
- end
481
-
482
- def matchdata_to_hash(matchdata)
483
- matchdata ? Hash[*matchdata.names.map{ |name| [name,(matchdata[name] ? matchdata[name].downcase.to_sym : nil)] }.flatten].compact : {}
484
- end
485
-
486
- # Regular Expressions Module
487
- module RegularExpressions
488
- RECORD_REGEX = /(?<wins>\d+)-(?<losses>\d+)/
489
- NFL_RECORD_REGEX = /(?<wins>\d+)-(?<losses>\d+)-(?<ties>\d+)/
490
- NHL_RECORD_REGEX = /(?<wins>\d+)-(?<losses>\d+)-(?<ot_losses>\d+)-(?<shootout_losses>\d+)/
491
-
492
- TIME_REGEX = /(?<mo>\d{2})\/(?<d>\d{2}) (?<h>\d+):(?<mi>\d{2}) (?<mer>\w{2})/
493
- MONEYLINE_OVER_UNDER = /(?<ou>\d+(\.5)?)[ou]/x
494
-
495
- ODDS = /(<br><br>(?<home_line>-\d+(\.5)?))|(<br>(?<away_line>-\d+(\.5)?)[+-]\d\d<br>)|
496
- ((?<over_under>\d+(\.5)?)[ou]((-\d{2})|EV)(?<home_line>-\d+(.5)?)-\d\d\z)|
497
- ((?<away_line>-\d+(.5)?)-\d\d(?<over_under>\d+(\.5)?)[ou]((-\d{2})|EV)\z)/x
498
- RUNLINE_ODDS = /(?<away_line>(\+|-)\d+(\.5)?)\/(\+|-)\d{3}(?<home_line>(\+|-)\d+(\.5)?)\/(\+|-)\d{3}/
499
- MONEYLINE_ODDS = /((?<over_under>\d+(\.5)?)[ou]-\d{2})?(?<away_moneyline>(\+|-)\d{3}\d*)(?<home_moneyline>(\+|-)\d{3}\d*)/
500
-
501
- DOUBLEHEADER = /DH Gm (?<id>\d)/
502
- RESULTS_DOUBLEHEADER = /\(DH (?<doubleheader>\d)\)/
503
-
504
- GAME_RESULTS = /(?<result>\D+)(?<team_score>\d+)-(?<oppo_score>\d+)|(Postponed)|(Cancelled)/
505
- SPREAD_RESULTS = /((?<ats_result>\w+)\/)?(?<ou_result>\w+)/
506
- end
507
-
508
- class Game
509
- attr_reader :time, :away_team, :home_team, :vegas_info,
510
- :ending, :winning_team, :winning_score, :losing_score, :ats_winner, :over_under_result, :doubleheader, :notes
511
-
512
- def initialize(args = {})
513
- Game.sanitize(args).map { |attribute, value| instance_variable_set("@#{attribute}", value) }
514
- end
515
-
516
- def update(args = {})
517
- Game.sanitize(args).map { |attribute, value|
518
- new_val = (attribute == :vegas_info && value && vegas_info) ? value.merge(vegas_info) : value
519
- instance_variable_set("@#{attribute}", new_val)
520
- }
521
- return self
522
- end
523
-
524
- def teams_found?
525
- home_team && away_team
526
- end
527
-
528
- def find_equal(games)
529
- games.detect { |g| g == self }
530
- end
531
-
532
- def ==(other_game)
533
- home_team == other_game.home_team && away_team == other_game.away_team && time.to_date == other_game.time.to_date && doubleheader == other_game.doubleheader
534
- end
535
-
536
- def home_or_away_team(team)
537
- case team
538
- when home_team then :home
539
- when away_team then :away
540
- else nil end
541
- end
542
-
543
- def as_json
544
- instance_variables.each_with_object({}) { |var, hash| hash[var.to_s.delete("@").to_sym] = instance_variable_get(var) }
545
- end
546
-
547
- private
548
- def self.sanitize(args)
549
- permitted_keys = [:time, :away_team, :home_team, :vegas_info,
550
- :ending, :winning_team, :winning_score, :losing_score, :ats_winner, :over_under_result, :doubleheader, :notes]
551
- args.select { |key,_| permitted_keys.include? key }
552
- end
553
- end
6
+ attr_reader :sport_id
7
+ attr_reader :sport_name
8
+ attr_reader :moneyline_sport
9
+ attr_reader :teams
10
+
11
+ def initialize
12
+ @moneyline_sport = false
13
+ end
14
+
15
+ def teams
16
+ @teams ||= standings
17
+ end
18
+
19
+ def standings
20
+ @standings ||= scrape_standings
21
+ end
22
+
23
+ # Gets the upcoming/current games for the sport
24
+ def current_games
25
+ @current_games ||= get_lines(["http://www.vegasinsider.com/#{sport_name}/odds/las-vegas/","http://www.vegasinsider.com/#{sport_name}/odds/las-vegas/money/"])
26
+ end
27
+
28
+ # Gets all of the schedule and results for each team
29
+ def team_schedules
30
+ @team_schedules ||= teams.map { |team|
31
+ puts " ### GETTING GAMES FOR: #{team[:info][:full_name]}"
32
+ url = "http://www.vegasinsider.com/#{sport_name}/teams/team-page.cfm/team/#{team[:info][:identifier]}"
33
+ scrape_team_page(url, team[:info][:identifier])
34
+ }
35
+ end
36
+
37
+ def live_scores
38
+ @live_scores = get_live_scores("https://web.archive.org/web/20170704205945/http://www.vegasinsider.com/mlb/scoreboard/")
39
+ nil
40
+ end
41
+
42
+ private
43
+
44
+ def scrape_teams
45
+ url = "http://www.vegasinsider.com/#{sport_name}/teams/"
46
+ doc = Nokogiri::HTML(open(url)).at_css('.main-content-cell')
47
+
48
+ doc.css('a').map do |team_link|
49
+ team = {}
50
+ team[:info] = format_college_team(team_link, doc)
51
+
52
+ row = team_link.parent.parent.previous
53
+ while !(row.at_css('td') && row.at_css('td').attributes['class'].value.include?('viSubHeader1'))
54
+ row = row.previous
55
+ end
56
+ team[:grouping] = { conference: row.at_css('td').content }
57
+ team
58
+ end
59
+ end
60
+
61
+ ######################################################
62
+ # Gets the teams and scrapes the records for the teams
63
+ def scrape_standings
64
+ standings_teams = []
65
+ url = "http://www.vegasinsider.com/#{sport_name}/standings/"
66
+ doc = Nokogiri::HTML(open(url)).at_css('.main-content-cell')
67
+ teams_doc = Nokogiri::HTML(open(url.gsub('standings','teams'))).at_css('.main-content-cell')
68
+
69
+ doc.css(standings_table_class).each do |conference|
70
+ conference_title = conference.at_css(".viHeaderNorm")
71
+ next if conference_title.nil?
72
+
73
+ table = conference.css('.viBodyBorderNorm table')[standings_table_index]
74
+ table = conference.css('.viBodyBorderNorm table')[2] if (conference_title.content == 'Conference USA' && sport_name == 'college-football')
75
+
76
+ if table
77
+ table.css('tr').each_with_index do |row, index|
78
+ next if (row.at_css('.viSubHeader1') != nil || row.at_css('.viSubHeader2') != nil)
79
+ standings_teams.push(scrape_standings_row(row, conference_division_parser(conference_title.content), teams_doc))
80
+ end
81
+ end
82
+ end
83
+ standings_teams
84
+ end
85
+
86
+ # Utility method for scraping standings
87
+ # * gets the standings table class
88
+ def standings_table_class
89
+ college_sport? ? '.SLTables1' : 'table'
90
+ end
91
+
92
+ # Utility method for scraping standings
93
+ # * gets the index of the table
94
+ def standings_table_index
95
+ college_sport? ? 1 : 0
96
+ end
97
+
98
+ # Utility method for scraping standings
99
+ # * gets the standings table class
100
+ def conference_division_parser(title)
101
+ if college_sport?
102
+ return { conference: title, division: nil }
103
+ else
104
+ result = /(?<conference>.+) - (?<division>.+)/.match(title)
105
+ return { conference: result[:conference], division: result[:division] }
106
+ end
107
+ end
108
+
109
+
110
+ # Utility method for scraping standings
111
+ # * is a college sport?
112
+ def college_sport?
113
+ ['college-football','college-basketball'].include?(sport_name)
114
+ end
115
+
116
+ # Utility method for scraping standings
117
+ # * scrapes a row of the standings, chooses a helper method based on the league
118
+ def scrape_standings_row(row, grouping, teams_doc)
119
+ team_shell = { info: {}, record: {} }
120
+ team = case sport_id
121
+ when 0,1 then college_standings_row_parser(row, team_shell, teams_doc)
122
+ when 2 then nfl_standings_row_parser(row, team_shell)
123
+ when 3,4 then pro_standings_row_parser(row, team_shell)
124
+ when 5 then hockey_standings_row_parser(row, team_shell)
125
+ end
126
+ team[:grouping] = grouping
127
+ team
128
+ end
129
+
130
+ # Utility method for scraping standings
131
+ # * scrapes a row of the standings, for COLLEGE sports
132
+ def college_standings_row_parser(row, team, teams_doc)
133
+ row.css('td').each_with_index do |cell, cell_index|
134
+ value = remove_element_whitespace(cell)
135
+ case cell_index
136
+ when 0
137
+ team[:info] = format_college_team(cell.at_css('a'), teams_doc)
138
+ when 5 then team[:record][:overall_wins] = value.to_i
139
+ when 6 then team[:record][:overall_losses] = value.to_i
140
+ when 9 then team[:record][:home_wins] = value.to_i
141
+ when 10 then team[:record][:home_losses] = value.to_i
142
+ when 13 then team[:record][:away_wins] = value.to_i
143
+ when 14 then team[:record][:away_losses] = value.to_i
144
+ end
145
+ end
146
+ return team
147
+ end
148
+
149
+ # Utility method for scraping standings
150
+ # * scrapes a row of the standings, for NFL
151
+ def nfl_standings_row_parser(row, team)
152
+ row.css('td').each_with_index do |cell, cell_index|
153
+ content = remove_element_whitespace(cell)
154
+
155
+ case cell_index
156
+ when 0 then team[:info] = format_team(cell.at_css('a'))
157
+ when 1 then team[:record][:overall_wins] = content.to_i
158
+ when 2 then team[:record][:overall_losses] = content.to_i
159
+ when 3 then team[:record][:overall_ties] = content.to_i
160
+ when 7
161
+ record = RegularExpressions::NFL_RECORD_REGEX.match(content) || { wins: 0, losses: 0, ties: 0 }
162
+ team[:record][:home_wins] = record[:wins]
163
+ team[:record][:home_losses] = record[:losses]
164
+ team[:record][:home_ties] = record[:ties]
165
+ when 8
166
+ record = RegularExpressions::NFL_RECORD_REGEX.match(content) || { wins: 0, losses: 0, ties: 0 }
167
+ team[:record][:away_wins] = record[:wins]
168
+ team[:record][:away_losses] = record[:losses]
169
+ team[:record][:away_ties] = record[:ties]
170
+ end
171
+ end
172
+ return team
173
+ end
174
+
175
+ # Utility method for scraping standings
176
+ # * scrapes a row of the standings, for PRO (MLB)
177
+ def pro_standings_row_parser(row, team)
178
+ row.css('td').each_with_index do |cell, cell_index|
179
+ content = remove_element_whitespace(cell)
180
+
181
+ case cell_index
182
+ when 0 then team[:info] = format_team(cell.at_css('a'))
183
+ when 1 then team[:record][:overall_wins] = content.to_i
184
+ when 2 then team[:record][:overall_losses] = content.to_i
185
+ when 5
186
+ record = RegularExpressions::RECORD_REGEX.match(content) || { wins: 0, losses: 0 }
187
+ team[:record][:home_wins] = record[:wins]
188
+ team[:record][:home_losses] = record[:losses]
189
+ when 6
190
+ record = RegularExpressions::RECORD_REGEX.match(content) || { wins: 0, losses: 0 }
191
+ team[:record][:away_wins] = record[:wins]
192
+ team[:record][:away_losses] = record[:losses]
193
+ end
194
+ end
195
+ return team
196
+ end
197
+
198
+ # Utility method for scraping standings
199
+ # * scrapes a row of the standings, for NHL
200
+ def hockey_standings_row_parser(row, team)
201
+ row.css('td').each_with_index do |cell, cell_index|
202
+ content = remove_element_whitespace(cell)
203
+
204
+ case cell_index
205
+ when 0 then team[:info] = format_team(cell.at_css('a'))
206
+ when 1 then team[:record][:overall_wins] = content.to_i
207
+ when 2 then team[:record][:overall_losses] = content.to_i
208
+ when 3 then team[:record][:over_time_losses] = content.to_i
209
+ when 4 then team[:record][:shootout_losses] = content.to_i
210
+ when 5 then team[:record][:points] = content.to_i
211
+ when 8
212
+ record = RegularExpressions::NHL_RECORD_REGEX.match(content) || { wins: 0, losses: 0, ot_losses: 0, shootout_losses: 0 }
213
+ team[:record][:home_wins] = record[:wins]
214
+ team[:record][:home_losses] = record[:losses]
215
+ team[:record][:home_over_time_losses] = record[:ot_losses]
216
+ team[:record][:home_shootout_losses] = record[:shootout_losses]
217
+ when 9
218
+ record = RegularExpressions::NHL_RECORD_REGEX.match(content) || { wins: 0, losses: 0, ot_losses: 0, shootout_losses: 0 }
219
+ team[:record][:away_wins] = record[:wins]
220
+ team[:record][:away_losses] = record[:losses]
221
+ team[:record][:away_over_time_losses] = record[:ot_losses]
222
+ team[:record][:away_shootout_losses] = record[:shootout_losses]
223
+ end
224
+ end
225
+ return team
226
+ end
227
+
228
+ # Utility method for scraping standings
229
+ # * formats the team using the URL
230
+ def format_team(url)
231
+ full_name = url.content
232
+ identifier = team_url_parser(url.attribute('href'))
233
+ nickname = humanize_identifier(identifier)
234
+
235
+ {
236
+ identifier: identifier,
237
+ nickname: nickname,
238
+ location: full_name.gsub(" #{nickname}", ''),
239
+ full_name: full_name,
240
+ url: url.attribute('href').value
241
+ }
242
+ end
243
+
244
+ # Utility method for scraping standings
245
+ # * formats the team using the URL and the Nokogiri document for the teams page
246
+ def format_college_team(url, teams_doc)
247
+ full_name = team_page_full_name(teams_doc, url)
248
+ location = url.content.gsub('AM', 'A&M').gsub('AT', 'A&T')
249
+ identifier = team_url_parser(url.attribute('href'))
250
+ nickname = full_name.gsub("#{location} ",'')
251
+
252
+ if nickname == full_name
253
+ nickname = full_name.gsub('&','').gsub("#{humanize_identifier(identifier)}", '').strip
254
+ end
255
+
256
+ if nickname == full_name.gsub('&','').strip
257
+ nickname_array = nickname.split(' ')
258
+ nickname = nickname_array.each_slice( (nickname_array.size/2.0).round ).to_a[1].join(' ')
259
+ nickname = nickname_exceptions(identifier,nickname)
260
+ end
261
+
262
+ return {
263
+ identifier: identifier,
264
+ nickname: nickname,
265
+ location: full_name.gsub(" #{nickname}", ''),
266
+ full_name: full_name,
267
+ url: url.attribute('href').value
268
+ }
269
+ end
270
+
271
+ def humanize_identifier(identifier)
272
+ identifier.split('-').map { |x| x.capitalize }.join(' ')
273
+ end
274
+
275
+ def nickname_exceptions(identifier,nickname)
276
+ case identifier
277
+ when 'california-state-long-beach' then '49ers'
278
+ when 'texas-am-corpus-christi' then 'Islanders'
279
+ when 'southern-am' then 'Jaguars'
280
+ when 'saint-marys-college-california' then 'Gaels'
281
+ else nickname end
282
+ end
283
+
284
+ # Utility method for scraping standings
285
+ # * gets the full team name using the teams page
286
+ def team_page_full_name(doc,url)
287
+ doc.at_css("a[href='#{url.attribute('href')}']").content
288
+ end
289
+
290
+ ##########################################
291
+ # Gets the current lines for a given sport
292
+ def get_live_scores(url)
293
+ doc = Nokogiri::HTML(open(url))
294
+
295
+ date = doc.at_css('.ff_txt2 tr:nth-child(2) font')
296
+ date = Date.strptime(date.content, '%a, %b %d') if date
297
+
298
+ games = []
299
+
300
+ doc.css('.SLTables4 table > tr').each do |row|
301
+
302
+ date_row = row.attribute('valign')
303
+
304
+ if date_row
305
+ date = parse_score_date(row)
306
+ else
307
+
308
+ row.css('.yeallowBg .sportPicksBorder').each do |game|
309
+
310
+ result = {}
311
+ game.css('.tanBg a').each_with_index do |team, i|
312
+ if i == 0
313
+ result[:away_team] = team_url_parser(team.attribute('href'))
314
+ else
315
+ result[:home_team] = team_url_parser(team.attribute('href'))
316
+ end
317
+ end
318
+
319
+ game_status = remove_element_whitespace(game.at_css('.sub_title_red'), true)
320
+ game_status = case
321
+ when game_status == 'Final Score' then :ended
322
+ when game_status == 'PPD' then :Postponed
323
+ when game_status == '' then :Cancelled
324
+ when game_status.include?('Game Time') then nil
325
+ else game_status end
326
+
327
+ if game_status
328
+ segment_titles = []
329
+ game.css('.sportPicksBg td').each_with_index do |col,i|
330
+ puts remove_element_whitespace(col)
331
+ next if ['Teams', 'Odds', 'ATS', ''].include?(remove_element_whitespace col)
332
+ segment_titles.push remove_element_whitespace col
333
+ end
334
+
335
+ away_values = []
336
+ game.css('.tanBg')[0].css('td').each_with_index do |col,i|
337
+ next if i < 3
338
+ away_values.push remove_element_whitespace(col).to_i
339
+ end
340
+ away_values.pop
341
+
342
+ home_values = []
343
+ game.css('.tanBg')[1].css('td').each_with_index do |col,i|
344
+ next if i < 3
345
+ home_values.push remove_element_whitespace(col).to_i
346
+ end
347
+ home_values.pop
348
+
349
+ end
350
+
351
+ if segment_titles
352
+ result[:scoring] = segment_titles.each_with_index.map { |s,i|
353
+ { period: s, away: away_values[i], home: home_values[i] }
354
+ }
355
+ end
356
+
357
+ result[:status] = game_status
358
+ result[:date] = date
359
+
360
+ games.push(result)
361
+ puts result
362
+ puts "********************"
363
+ end
364
+ end
365
+
366
+ end
367
+ return games
368
+ end
369
+
370
+ def parse_score_date(element)
371
+ str = remove_element_whitespace(element, true).gsub(/Week\s+\d+\s+-\s/,'')
372
+ Date.strptime(str, '%A %B %d, %Y')
373
+ end
374
+
375
+ ##########################################
376
+ # Gets the current lines for a given sport
377
+ def get_lines(urls)
378
+ games = []
379
+
380
+ urls.each { |url|
381
+ is_first_url = games.empty?
382
+ doc = Nokogiri::HTML(open(url))
383
+ doc.css('.viBodyBorderNorm .frodds-data-tbl tr').each do |game_row|
384
+
385
+ game_cell = game_row.at_css('td:first-child')
386
+ teams = game_cell_parser(game_cell)
387
+ game = Game.new(home_team: teams[1], away_team: teams[0])
388
+
389
+ if game.teams_found?
390
+ game.update(time: get_game_time(game_cell))
391
+ game.update(doubleheader: doubleheader_id(game_row.next&.next&.at_css('td:first-child')&.content))
392
+ is_first_url ? (games.push game) : (game = game.find_equal(games))
393
+ game.update(vegas_info: get_line(get_odds(game_row)))
394
+ game.update(vegas_info: get_line(get_odds_inner_html(game_row)))
395
+
396
+ elsif is_first_url
397
+ last_game = games.last
398
+ if last_game then last_game.update(notes: (last_game.notes ? "#{last_game.notes} / " : '') + game_cell.content) end
399
+ end
400
+ end
401
+ }
402
+ games
403
+ end
404
+
405
+ # Utility method for scraping current lines
406
+ # * find the identifier for each team
407
+ def game_cell_parser(cell)
408
+ cell.css('b a').map { |team| team_url_parser(team.attribute('href')) }
409
+ end
410
+
411
+ # Utility method for scraping current lines
412
+ # * getting the time of the game
413
+ def get_game_time(cell)
414
+ time = RegularExpressions::TIME_REGEX.match(cell.at_css('span').content.to_s)
415
+ year = ((Date.today.month > time[:mo].to_i) && (Date.today.month - 1 != time[:mo].to_i)) ? Date.today.year + 1 : Date.today.year
416
+
417
+ ENV['TZ'] = 'US/Eastern'
418
+ time = Time.strptime("#{year} #{time[:mo]} #{time[:d]} #{time[:h]}:#{time[:mi]}:00 #{time[:mer]}", "%Y %m %d %r")
419
+ ENV['TZ'] = nil
420
+ time
421
+ end
422
+
423
+ # Utility method for scraping current lines
424
+ # * getting odds from the cell, removing whitespace, and converting 1/2 to 0.5
425
+ def get_odds(odds_element)
426
+ (odds_element.at_css('td:nth-child(3) a')&.content || '').gsub(" ","").gsub("½",".5").strip
427
+ end
428
+ def get_odds_inner_html(odds_element)
429
+ ((odds_element.at_css('td:nth-child(3) a'))&.inner_html || '').encode('utf-8').gsub(" ","").gsub("½",".5").strip
430
+ end
431
+
432
+ # Utility method for scraping current lines
433
+ # * parsing the lines for non-moneyline sports
434
+ def get_line(odds_string)
435
+ odds_string = odds_string.gsub('PK', '-0')
436
+ odds = matchdata_to_hash(RegularExpressions::ODDS.match(odds_string)) || {}
437
+ runlines_odds = matchdata_to_hash(RegularExpressions::RUNLINE_ODDS.match(odds_string)) || {}
438
+ moneyline_odds = matchdata_to_hash(RegularExpressions::MONEYLINE_ODDS.match(odds_string)) || {}
439
+
440
+ result = odds.merge(runlines_odds).merge(moneyline_odds)
441
+
442
+ result.each { |k,v| result[k] = result[k].to_s.to_f if result[k] }
443
+ get_home_and_away(result)
444
+
445
+ end
446
+
447
+ # Utility method for scraping current lines
448
+ # * filling the home/away lines
449
+ def get_home_and_away(result)
450
+ result['away_line'] = -result['home_line'] if result['home_line']
451
+ result['home_line'] = -result['away_line'] if result['away_line']
452
+ result
453
+ end
454
+
455
+ # Utility method for scraping current lines
456
+ # * parsing the odds to get a number
457
+ def odds_reader(odds)
458
+ case odds&.strip when '',nil then nil when 'PK' then 0 else odds.to_f end
459
+ end
460
+
461
+ # Utility method for scraping current lines
462
+ # * is the game a doubleheader
463
+ def doubleheader_id(content)
464
+ dh = RegularExpressions::DOUBLEHEADER.match(content)
465
+ dh ? dh[:id] : nil
466
+ end
467
+
468
+ ################################################
469
+ # Gets the schedule and results for a team page
470
+ def scrape_team_page(url, team)
471
+
472
+ games = Nokogiri::HTML(open(url)).css('.main-content-cell table:nth-child(5) table').css('tr').each_with_index.map do |row,index|
473
+
474
+ next if index == 0
475
+ game = Game.new(vegas_info: {})
476
+ opponent = nil
477
+
478
+ row.css('td').each_with_index do |cell,m|
479
+
480
+ case m
481
+ when 0 then game.update(time: get_game_date(cell,row))
482
+ when 1
483
+ info = get_game_info(cell, team)
484
+ opponent = info[:opponent]
485
+ game.update(info[:game_info])
486
+ end
487
+
488
+ if game_finished?(row)
489
+ case m
490
+ when 2
491
+ formatted = odds_reader(remove_element_whitespace(cell))
492
+ home_team = (game.home_or_away_team(team) == :home)
493
+ if moneyline_sport
494
+ home_team ? game.update(vegas_info: {home_moneyline: formatted}) : game.update(vegas_info: {away_moneyline: formatted})
495
+ else
496
+ home_line = (formatted && !home_team) ? -formatted : formatted
497
+ game.update(vegas_info: {home_line: home_line, away_line: (home_line ? -home_line : nil)})
498
+ end
499
+
500
+ when 3 then game.update(vegas_info: { over_under: remove_element_whitespace(cell)})
501
+ when 4 then game.update(game_results(cell, team, opponent))
502
+ when 5 then game.update(ats_results(cell, team, opponent))
503
+ end
504
+ end
505
+ end
506
+ game
507
+ end
508
+ { team: team, games: games.compact.map{ |game| game } }
509
+ end
510
+
511
+ # Utility method for scraping team page results
512
+ # * gets the date of the game, accounting for different years
513
+ def get_game_date(date_string, row)
514
+ date = Date.strptime(date_string.content.gsub!(/\s+/, ""), "%b%e")
515
+ if game_finished?(row) && date.month > Date.today.month
516
+ date = Date.new(Date.today.year - 1, date.month, date.day)
517
+ elsif !game_finished?(row) && date.month < Date.today.month
518
+ date = Date.new(Date.today.year + 1, date.month, date.day)
519
+ end
520
+ date.to_time
521
+ end
522
+
523
+ # Utility method for scraping team page results
524
+ # * determines if the game has concluded
525
+ def game_finished?(row)
526
+ !"#{RegularExpressions::GAME_RESULTS.match(remove_element_whitespace(row.at_css('td:nth-child(5)')))}".empty?
527
+ end
528
+
529
+ # Utility method for scraping team page results
530
+ # * gets the home_team, away_team, and doubleheader info
531
+ def get_game_info(cell, primary_team)
532
+ url = cell.at_css('a')
533
+ home_or_away = remove_element_whitespace(cell)[0] == "@" ? :away : :home
534
+ opponent = url ? team_url_parser(url.attribute('href')) : custom_opponent_identifier(cell)
535
+
536
+ {
537
+ opponent: opponent,
538
+ game_info: {
539
+ doubleheader: matchdata_to_hash(RegularExpressions::RESULTS_DOUBLEHEADER.match(cell.content))['doubleheader'],
540
+ home_team: home_or_away == :home ? primary_team : opponent,
541
+ away_team: home_or_away == :away ? primary_team : opponent,
542
+ }
543
+ }
544
+ end
545
+
546
+ # Utility method for scraping team page results
547
+ # * gets the result of the game
548
+ def game_results(cell, primary_team, opponent)
549
+ results = RegularExpressions::GAME_RESULTS.match(remove_element_whitespace(cell))
550
+ results_hash = matchdata_to_hash(results)
551
+ {
552
+ ending: (results_hash['result'] ? :ended : results.to_s),
553
+ winning_team: case results_hash['result'] when :won then primary_team when :lost then opponent else nil end,
554
+ winning_score: case results_hash['result'] when :won then results['team_score'] when :lost then results['oppo_score'] else nil end,
555
+ losing_score: case results_hash['result'] when :won then results['oppo_score'] when :lost then results['team_score'] else nil end,
556
+ }
557
+ end
558
+
559
+ # Utility method for scraping team page results
560
+ # * gets the spread results
561
+ def ats_results(cell, primary_team, opponent)
562
+ results = RegularExpressions::SPREAD_RESULTS.match(remove_element_whitespace(cell))
563
+ results_hash = matchdata_to_hash(results)
564
+ {
565
+ ats_winner: case results_hash['ats_result'] when :win then primary_team when :loss then opponent else nil end,
566
+ over_under_result: results_hash['ou_result']
567
+ }
568
+ end
569
+
570
+ # Utility method for scraping team page results
571
+ # * gets the identifier for an opponent without links
572
+ def custom_opponent_identifier(cell)
573
+ cell.content.strip.gsub(/(\s| )+/, '-').gsub('@-','').downcase[0..-3]
574
+ end
575
+
576
+ # General Utility Method
577
+ # used the get the team identifier from the URL
578
+ def team_url_parser(url)
579
+ /.+\/team\/(?<team_name>(\w|-)+)/.match(url)[:team_name]
580
+ end
581
+
582
+ # General Utility Method
583
+ # used the remove all whitespace from the content of the element
584
+ def remove_element_whitespace(element, only_end = false)
585
+ string = element.content.gsub(only_end ? /^(\s| )+|(\s| )+\z/ : /(\s| )+/, '')
586
+ string.empty? ? '' : string
587
+ end
588
+
589
+ def matchdata_to_hash(matchdata)
590
+ matchdata ? Hash[*matchdata.names.map{ |name| [name,(matchdata[name] ? matchdata[name].downcase.to_sym : nil)] }.flatten].compact : {}
591
+ end
592
+
593
+ # Regular Expressions Module
594
+ module RegularExpressions
595
+ RECORD_REGEX = /(?<wins>\d+)-(?<losses>\d+)/
596
+ NFL_RECORD_REGEX = /(?<wins>\d+)-(?<losses>\d+)-(?<ties>\d+)/
597
+ NHL_RECORD_REGEX = /(?<wins>\d+)-(?<losses>\d+)-(?<ot_losses>\d+)-(?<shootout_losses>\d+)/
598
+
599
+ TIME_REGEX = /(?<mo>\d{2})\/(?<d>\d{2}) (?<h>\d+):(?<mi>\d{2}) (?<mer>\w{2})/
600
+ MONEYLINE_OVER_UNDER = /(?<ou>\d+(\.5)?)[ou]/x
601
+
602
+ ODDS = /(<br><br>(?<home_line>-\d+(\.5)?))|(<br>(?<away_line>-\d+(\.5)?)[+-]\d\d<br>)|
603
+ ((?<over_under>\d+(\.5)?)[ou]((-\d{2})|EV)(?<home_line>-\d+(.5)?)-\d\d\z)|
604
+ ((?<away_line>-\d+(.5)?)-\d\d(?<over_under>\d+(\.5)?)[ou]((-\d{2})|EV)\z)/x
605
+ RUNLINE_ODDS = /(?<away_line>(\+|-)\d+(\.5)?)\/(\+|-)\d{3}(?<home_line>(\+|-)\d+(\.5)?)\/(\+|-)\d{3}/
606
+ MONEYLINE_ODDS = /((?<over_under>\d+(\.5)?)[ou]-\d{2})?(?<away_moneyline>(\+|-)\d{3}\d*)(?<home_moneyline>(\+|-)\d{3}\d*)/
607
+
608
+ DOUBLEHEADER = /DH Gm (?<id>\d)/
609
+ RESULTS_DOUBLEHEADER = /\(DH (?<doubleheader>\d)\)/
610
+
611
+ GAME_RESULTS = /(?<result>\D+)(?<team_score>\d+)-(?<oppo_score>\d+)|(Postponed)|(Cancelled)/
612
+ SPREAD_RESULTS = /((?<ats_result>\w+)\/)?(?<ou_result>\w+)/
613
+ end
614
+
615
+ class Game
616
+ attr_reader :time, :away_team, :home_team, :vegas_info,
617
+ :ending, :winning_team, :winning_score, :losing_score, :ats_winner, :over_under_result, :doubleheader, :notes
618
+
619
+ def initialize(args = {})
620
+ Game.sanitize(args).map { |attribute, value| instance_variable_set("@#{attribute}", value) }
621
+ end
622
+
623
+ def update(args = {})
624
+ Game.sanitize(args).map { |attribute, value|
625
+ new_val = (attribute == :vegas_info && value && vegas_info) ? value.merge(vegas_info) : value
626
+ instance_variable_set("@#{attribute}", new_val)
627
+ }
628
+ return self
629
+ end
630
+
631
+ def teams_found?
632
+ home_team && away_team
633
+ end
634
+
635
+ def find_equal(games)
636
+ games.detect { |g| g == self }
637
+ end
638
+
639
+ def ==(other_game)
640
+ home_team == other_game.home_team && away_team == other_game.away_team && time.to_date == other_game.time.to_date && doubleheader == other_game.doubleheader
641
+ end
642
+
643
+ def home_or_away_team(team)
644
+ case team
645
+ when home_team then :home
646
+ when away_team then :away
647
+ else nil end
648
+ end
649
+
650
+ def as_json
651
+ instance_variables.each_with_object({}) { |var, hash| hash[var.to_s.delete("@").to_sym] = instance_variable_get(var) }
652
+ end
653
+
654
+ private
655
+ def self.sanitize(args)
656
+ permitted_keys = [:time, :away_team, :home_team, :vegas_info,
657
+ :ending, :winning_team, :winning_score, :losing_score, :ats_winner, :over_under_result, :doubleheader, :notes]
658
+ args.select { |key,_| permitted_keys.include? key }
659
+ end
660
+ end
554
661
 
555
662
  end
556
663
 
@@ -561,7 +668,7 @@ class Array
561
668
  end
562
669
 
563
670
  class Hash
564
- def compact
671
+ def compact
565
672
  self.select { |_, value| !value.nil? }
566
673
  end
567
674
  end