vegas_insider_scraper 0.0.15 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 4406cf01f39908cc6e374486a063ad22f52374bf
4
- data.tar.gz: 445c1ddcdbae187568c046f21b09f187a6fbe7f7
2
+ SHA256:
3
+ metadata.gz: 538bf0d9d798e8602915f700679ad9541dcad4b6f279cf4fd8a734030d556821
4
+ data.tar.gz: 4d0d701086e782e36ecc3524145a30189627f1940d4b5c5b6473bc4a1cb167e9
5
5
  SHA512:
6
- metadata.gz: 61eb9deb49c1fec787015199b3eabd24a503773c4a15df7dcea3b2670db5d4fcc9a8d2a8ef7929245b5633c782b4b5706b79d6d928a5be11c7b8e55b42d97787
7
- data.tar.gz: 5e62b335b39d4788356b335757bc2454d06dec2b490e0ea8043cebc9b7b6c72ae25486b14a504fcf45d7739639a308b762124697b82f82c7a25036dcf843ce98
6
+ metadata.gz: e4b0a4db84a2ce082e21de78de71c02a51f12c8c1bc4325f60151c9547cdb06ffa1ebe93b42bbae7d5206b433f76284a792d6c17f36585cb8eecde12422f95b4
7
+ data.tar.gz: b6a1ff071b8d3675cf318468c45fbc4a8806eb750cbd09be8a0656dbb9415b6b4229a978a8cf90febfec68a0c051c20e0d072d754b4db3af840833b09936a597
data/lib/sports/mlb.rb CHANGED
@@ -1,16 +1,16 @@
1
1
 
2
2
  class MLB < ScraperLeague
3
3
 
4
- def initialize
5
- @sport_id = 4
6
- @sport_name = :mlb
7
- super
8
- @moneyline_sport = true
9
- end
4
+ def initialize
5
+ @sport_id = 4
6
+ @sport_name = :mlb
7
+ super
8
+ @moneyline_sport = true
9
+ end
10
10
 
11
- def current_games
12
- @current_games ||= get_lines(["http://www.vegasinsider.com/#{sport_name}/odds/las-vegas/run/",
13
- "http://www.vegasinsider.com/#{sport_name}/odds/las-vegas/"])
14
- end
11
+ def current_games
12
+ @current_games ||= get_lines(["http://www.vegasinsider.com/#{sport_name}/odds/las-vegas/run/",
13
+ "http://www.vegasinsider.com/#{sport_name}/odds/las-vegas/"])
14
+ end
15
15
 
16
16
  end
data/lib/sports/nba.rb CHANGED
@@ -1,10 +1,10 @@
1
1
 
2
2
  class NBA < ScraperLeague
3
3
 
4
- def initialize
5
- @sport_id = 3
6
- @sport_name = :nba
7
- super
8
- end
4
+ def initialize
5
+ @sport_id = 3
6
+ @sport_name = :nba
7
+ super
8
+ end
9
9
 
10
10
  end
data/lib/sports/ncaabb.rb CHANGED
@@ -1,22 +1,22 @@
1
1
 
2
2
  class NCAABB < ScraperLeague
3
3
 
4
- def initialize
5
- @sport_id = 1
6
- @sport_name = 'college-basketball'
7
- super
8
- end
4
+ def initialize
5
+ @sport_id = 1
6
+ @sport_name = 'college-basketball'
7
+ super
8
+ end
9
9
 
10
- # def get_nicknames
11
- # start_time = Time.now
12
- # num_successes = 0
13
- # Team.ncaabb_teams.each_with_index do |team, i|
14
- # url = "http://www.vegasinsider.com/college-basketball/teams/team-page.cfm/team/#{team.vegas_insider_identifier}"
15
- # nickname = Scraper.scrape_team_page_for_nickname(team.vegas_insider_identifier, url)
16
- # team.nickname = nickname
17
- # team.save
18
- # end
19
- # Time.now - start_time
20
- # end
10
+ # def get_nicknames
11
+ # start_time = Time.now
12
+ # num_successes = 0
13
+ # Team.ncaabb_teams.each_with_index do |team, i|
14
+ # url = "http://www.vegasinsider.com/college-basketball/teams/team-page.cfm/team/#{team.vegas_insider_identifier}"
15
+ # nickname = Scraper.scrape_team_page_for_nickname(team.vegas_insider_identifier, url)
16
+ # team.nickname = nickname
17
+ # team.save
18
+ # end
19
+ # Time.now - start_time
20
+ # end
21
21
 
22
22
  end
data/lib/sports/ncaafb.rb CHANGED
@@ -1,80 +1,84 @@
1
1
 
2
2
  class NCAAFB < ScraperLeague
3
3
 
4
- def initialize
5
- @sport_id = 0
6
- @sport_name = 'college-football'
7
- super
8
- end
4
+ def initialize
5
+ @sport_id = 0
6
+ @sport_name = 'college-football'
7
+ super
8
+ end
9
9
 
10
- # def get_nicknames
11
- # start_time = Time.now
12
- # Team.where(sport_id: 0).each_with_index do |team, i|
13
- # next if team.nickname
14
- # url = "http://www.vegasinsider.com/college-football/teams/team-page.cfm/team/#{team.vegas_insider_identifier}"
15
- # nickname = Scraper.scrape_team_page_for_nickname(team.vegas_insider_identifier, url)
16
- # team.nickname = nickname
17
- # team.save
18
- # end
19
- # Time.now - start_time
20
- # end
10
+ def teams
11
+ @teams ||= scrape_teams
12
+ end
21
13
 
22
- # def get_locations
23
- # start_time = Time.now
24
- # Team.where(sport_id: 0, custom_team_flag: 1).each_with_index do |team, i|
25
- # team.location = nil
26
- # team.save
27
- # end
28
- # Time.now - start_time
29
- # end
14
+ # def get_nicknames
15
+ # start_time = Time.now
16
+ # Team.where(sport_id: 0).each_with_index do |team, i|
17
+ # next if team.nickname
18
+ # url = "http://www.vegasinsider.com/college-football/teams/team-page.cfm/team/#{team.vegas_insider_identifier}"
19
+ # nickname = Scraper.scrape_team_page_for_nickname(team.vegas_insider_identifier, url)
20
+ # team.nickname = nickname
21
+ # team.save
22
+ # end
23
+ # Time.now - start_time
24
+ # end
30
25
 
31
- # def scrape_custom_team_page_for_location(vegas_identifier, url)
32
- # doc = Nokogiri::HTML(open(url))
33
- # title = doc.at_css('h1.page_title').content.gsub(' Team Page', '')
34
- # return title
35
- # end
26
+ # def get_locations
27
+ # start_time = Time.now
28
+ # Team.where(sport_id: 0, custom_team_flag: 1).each_with_index do |team, i|
29
+ # team.location = nil
30
+ # team.save
31
+ # end
32
+ # Time.now - start_time
33
+ # end
36
34
 
37
- # def remove_nickname_from_location
38
- # start_time = Time.now
39
- # Team.where(sport_id: 0).each_with_index do |team, i|
40
- # puts team.location
41
- # puts team.location.gsub(" #{team.nickname}", '')
42
- # end
43
- # Time.now - start_time
44
- # end
35
+ # def scrape_custom_team_page_for_location(vegas_identifier, url)
36
+ # doc = Nokogiri::HTML(open(url))
37
+ # title = doc.at_css('h1.page_title').content.gsub(' Team Page', '')
38
+ # return title
39
+ # end
45
40
 
46
- # def scrape_fcs_teams
47
- # url = 'http://www.vegasinsider.com/college-football/teams/'
48
- # doc = Nokogiri::HTML(open(url))
41
+ # def remove_nickname_from_location
42
+ # start_time = Time.now
43
+ # Team.where(sport_id: 0).each_with_index do |team, i|
44
+ # puts team.location
45
+ # puts team.location.gsub(" #{team.nickname}", '')
46
+ # end
47
+ # Time.now - start_time
48
+ # end
49
49
 
50
- # current_conference = nil
51
- # fcs = []
50
+ # def scrape_fcs_teams
51
+ # url = 'http://www.vegasinsider.com/college-football/teams/'
52
+ # doc = Nokogiri::HTML(open(url))
52
53
 
53
- # doc.css('.main-content-cell table table table').each_with_index do |col,i|
54
- # col.css('tr').each do |row|
55
- # new_conference = row.at_css('td.viSubHeader1')
54
+ # current_conference = nil
55
+ # fcs = []
56
56
 
57
- # if new_conference
58
- # current_conference = new_conference.content
59
- # else
60
- # team = row.at_css('a')
61
- # if team
62
- # team_formatted = {
63
- # team_name: team.content,
64
- # team_url_id: team_url_parser(team.attribute('href')),
65
- # conference: current_conference,
66
- # league: sport_id
67
- # }
68
- # puts team_formatted
69
- # fcs.push team_formatted
70
- # end
71
- # end
72
- # end
73
- # end
57
+ # doc.css('.main-content-cell table table table').each_with_index do |col,i|
58
+ # col.css('tr').each do |row|
59
+ # new_conference = row.at_css('td.viSubHeader1')
74
60
 
75
- # Team.save_teams(fcs)
76
- # return true
61
+ # if new_conference
62
+ # current_conference = new_conference.content
63
+ # else
64
+ # team = row.at_css('a')
65
+ # if team
66
+ # team_formatted = {
67
+ # team_name: team.content,
68
+ # team_url_id: team_url_parser(team.attribute('href')),
69
+ # conference: current_conference,
70
+ # league: sport_id
71
+ # }
72
+ # puts team_formatted
73
+ # fcs.push team_formatted
74
+ # end
75
+ # end
76
+ # end
77
+ # end
77
78
 
78
- # end
79
+ # Team.save_teams(fcs)
80
+ # return true
81
+
82
+ # end
79
83
 
80
84
  end
data/lib/sports/nfl.rb CHANGED
@@ -1,10 +1,10 @@
1
1
 
2
2
  class NFL < ScraperLeague
3
3
 
4
- def initialize
5
- @sport_id = 2
6
- @sport_name = :nfl
7
- super
8
- end
4
+ def initialize
5
+ @sport_id = 2
6
+ @sport_name = :nfl
7
+ super
8
+ end
9
9
 
10
10
  end
@@ -3,554 +3,661 @@ require 'open-uri'
3
3
 
4
4
  class ScraperLeague
5
5
 
6
- attr_reader :sport_id
7
- attr_reader :sport_name
8
- attr_reader :moneyline_sport
9
- attr_reader :teams
10
-
11
- def initialize
12
- @moneyline_sport = false
13
- end
14
-
15
- def teams
16
- @teams ||= scrape_standings
17
- end
18
-
19
- # Gets the upcoming/current games for the sport
20
- def current_games
21
- @current_games ||= get_lines(["http://www.vegasinsider.com/#{sport_name}/odds/las-vegas/","http://www.vegasinsider.com/#{sport_name}/odds/las-vegas/money/"])
22
- end
23
-
24
- # Gets all of the schedule and results for each team
25
- def team_schedules
26
- @team_schedules ||= teams.map { |team|
27
- puts " ### GETTING GAMES FOR: #{team[:info][:full_name]}"
28
- url = "http://www.vegasinsider.com/#{sport_name}/teams/team-page.cfm/team/#{team[:info][:identifier]}"
29
- scrape_team_page(url, team[:info][:identifier])
30
- }
31
- end
32
-
33
- private
34
-
35
- ######################################################
36
- # Gets the teams and scrapes the records for the teams
37
- def scrape_standings
38
- standings_teams = []
39
-
40
- url = "http://www.vegasinsider.com/#{sport_name}/standings/"
41
- doc = Nokogiri::HTML(open(url)).at_css('.main-content-cell')
42
- teams_doc = Nokogiri::HTML(open(url.gsub('standings','teams'))).at_css('.main-content-cell')
43
-
44
- doc.css(standings_table_class).each do |conference|
45
-
46
- conference_title = conference.at_css(".viHeaderNorm")
47
-
48
- next if conference_title.nil?
49
-
50
- table = conference.css('.viBodyBorderNorm table')[standings_table_index]
51
- table = conference.css('.viBodyBorderNorm table')[2] if (conference_title.content == 'Conference USA' && sport_name == 'college-football')
52
-
53
- if table
54
- table.css('tr').each_with_index do |row, index|
55
- next if (row.at_css('.viSubHeader1') != nil || row.at_css('.viSubHeader2') != nil)
56
- standings_teams.push(scrape_standings_row(row, conference_division_parser(conference_title.content), teams_doc))
57
- end
58
- end
59
- end
60
- standings_teams
61
- end
62
-
63
- # Utility method for scraping standings
64
- # * gets the standings table class
65
- def standings_table_class
66
- college_sport? ? '.SLTables1' : 'table'
67
- end
68
-
69
- # Utility method for scraping standings
70
- # * gets the index of the table
71
- def standings_table_index
72
- college_sport? ? 1 : 0
73
- end
74
-
75
- # Utility method for scraping standings
76
- # * gets the standings table class
77
- def conference_division_parser(title)
78
- if college_sport?
79
- return { conference: title, division: nil }
80
- else
81
- result = /(?<conference>.+) - (?<division>.+)/.match(title)
82
- return { conference: result[:conference], division: result[:division] }
83
- end
84
- end
85
-
86
-
87
- # Utility method for scraping standings
88
- # * is a college sport?
89
- def college_sport?
90
- ['college-football','college-basketball'].include?(sport_name)
91
- end
92
-
93
- # Utility method for scraping standings
94
- # * scrapes a row of the standings, chooses a helper method based on the league
95
- def scrape_standings_row(row, grouping, teams_doc)
96
- team_shell = { info: {}, record: {} }
97
- team = case sport_id
98
- when 0,1 then college_standings_row_parser(row, team_shell, teams_doc)
99
- when 2 then nfl_standings_row_parser(row, team_shell)
100
- when 3,4 then pro_standings_row_parser(row, team_shell)
101
- when 5 then hockey_standings_row_parser(row, team_shell)
102
- end
103
- team[:grouping] = grouping
104
- team
105
- end
106
-
107
- # Utility method for scraping standings
108
- # * scrapes a row of the standings, for COLLEGE sports
109
- def college_standings_row_parser(row, team, teams_doc)
110
- row.css('td').each_with_index do |cell, cell_index|
111
- value = remove_element_whitespace(cell)
112
- case cell_index
113
- when 0
114
- team[:info] = format_college_team(cell.at_css('a'), teams_doc)
115
- when 5 then team[:record][:overall_wins] = value.to_i
116
- when 6 then team[:record][:overall_losses] = value.to_i
117
- when 9 then team[:record][:home_wins] = value.to_i
118
- when 10 then team[:record][:home_losses] = value.to_i
119
- when 13 then team[:record][:away_wins] = value.to_i
120
- when 14 then team[:record][:away_losses] = value.to_i
121
- end
122
- end
123
- return team
124
- end
125
-
126
- # Utility method for scraping standings
127
- # * scrapes a row of the standings, for NFL
128
- def nfl_standings_row_parser(row, team)
129
- row.css('td').each_with_index do |cell, cell_index|
130
- content = remove_element_whitespace(cell)
131
-
132
- case cell_index
133
- when 0 then team[:info] = format_team(cell.at_css('a'))
134
- when 1 then team[:record][:overall_wins] = content.to_i
135
- when 2 then team[:record][:overall_losses] = content.to_i
136
- when 3 then team[:record][:overall_ties] = content.to_i
137
- when 7
138
- record = RegularExpressions::NFL_RECORD_REGEX.match(content) || { wins: 0, losses: 0, ties: 0 }
139
- team[:record][:home_wins] = record[:wins]
140
- team[:record][:home_losses] = record[:losses]
141
- team[:record][:home_ties] = record[:ties]
142
- when 8
143
- record = RegularExpressions::NFL_RECORD_REGEX.match(content) || { wins: 0, losses: 0, ties: 0 }
144
- team[:record][:away_wins] = record[:wins]
145
- team[:record][:away_losses] = record[:losses]
146
- team[:record][:away_ties] = record[:ties]
147
- end
148
- end
149
- return team
150
- end
151
-
152
- # Utility method for scraping standings
153
- # * scrapes a row of the standings, for PRO (MLB)
154
- def pro_standings_row_parser(row, team)
155
- row.css('td').each_with_index do |cell, cell_index|
156
- content = remove_element_whitespace(cell)
157
-
158
- case cell_index
159
- when 0 then team[:info] = format_team(cell.at_css('a'))
160
- when 1 then team[:record][:overall_wins] = content.to_i
161
- when 2 then team[:record][:overall_losses] = content.to_i
162
- when 5
163
- record = RegularExpressions::RECORD_REGEX.match(content) || { wins: 0, losses: 0 }
164
- team[:record][:home_wins] = record[:wins]
165
- team[:record][:home_losses] = record[:losses]
166
- when 6
167
- record = RegularExpressions::RECORD_REGEX.match(content) || { wins: 0, losses: 0 }
168
- team[:record][:away_wins] = record[:wins]
169
- team[:record][:away_losses] = record[:losses]
170
- end
171
- end
172
- return team
173
- end
174
-
175
- # Utility method for scraping standings
176
- # * scrapes a row of the standings, for NHL
177
- def hockey_standings_row_parser(row, team)
178
- row.css('td').each_with_index do |cell, cell_index|
179
- content = remove_element_whitespace(cell)
180
-
181
- case cell_index
182
- when 0 then team[:info] = format_team(cell.at_css('a'))
183
- when 1 then team[:record][:overall_wins] = content.to_i
184
- when 2 then team[:record][:overall_losses] = content.to_i
185
- when 3 then team[:record][:over_time_losses] = content.to_i
186
- when 4 then team[:record][:shootout_losses] = content.to_i
187
- when 5 then team[:record][:points] = content.to_i
188
- when 8
189
- record = RegularExpressions::NHL_RECORD_REGEX.match(content) || { wins: 0, losses: 0, ot_losses: 0, shootout_losses: 0 }
190
- team[:record][:home_wins] = record[:wins]
191
- team[:record][:home_losses] = record[:losses]
192
- team[:record][:home_over_time_losses] = record[:ot_losses]
193
- team[:record][:home_shootout_losses] = record[:shootout_losses]
194
- when 9
195
- record = RegularExpressions::NHL_RECORD_REGEX.match(content) || { wins: 0, losses: 0, ot_losses: 0, shootout_losses: 0 }
196
- team[:record][:away_wins] = record[:wins]
197
- team[:record][:away_losses] = record[:losses]
198
- team[:record][:away_over_time_losses] = record[:ot_losses]
199
- team[:record][:away_shootout_losses] = record[:shootout_losses]
200
- end
201
- end
202
- return team
203
- end
204
-
205
- # Utility method for scraping standings
206
- # * formats the team using the URL
207
- def format_team(url)
208
- full_name = url.content
209
- identifier = team_url_parser(url.attribute('href'))
210
- nickname = humanize_identifier(identifier)
211
-
212
- return {
213
- identifier: identifier,
214
- nickname: nickname,
215
- location: full_name.gsub(" #{nickname}", ''),
216
- full_name: full_name,
217
- url: url.attribute('href').value
218
- }
219
- end
220
-
221
- # Utility method for scraping standings
222
- # * formats the team using the URL and the Nokogiri document for the teams page
223
- def format_college_team(url, teams_doc)
224
-
225
- full_name = team_page_full_name(teams_doc, url)
226
- location = url.content.gsub('AM', 'A&M').gsub('AT', 'A&T')
227
- identifier = team_url_parser(url.attribute('href'))
228
- nickname = full_name.gsub("#{location} ",'')
229
-
230
- if nickname == full_name
231
- nickname = full_name.gsub('&','').gsub("#{humanize_identifier(identifier)}", '').strip
232
- end
233
-
234
- if nickname == full_name.gsub('&','').strip
235
- nickname_array = nickname.split(' ')
236
- nickname = nickname_array.each_slice( (nickname_array.size/2.0).round ).to_a[1].join(' ')
237
- nickname = nickname_exceptions(identifier,nickname)
238
- end
239
-
240
- return {
241
- identifier: identifier,
242
- nickname: nickname,
243
- location: location,
244
- full_name: full_name,
245
- url: url.attribute('href').value
246
- }
247
- end
248
-
249
- def humanize_identifier(identifier)
250
- identifier.split('-').map { |x| x.capitalize }.join(' ')
251
- end
252
-
253
- def nickname_exceptions(identifier,nickname)
254
- case identifier
255
- when 'california-state-long-beach' then '49ers'
256
- when 'texas-am-corpus-christi' then 'Islanders'
257
- when 'southern-am' then 'Jaguars'
258
- when 'saint-marys-college-california' then 'Gaels'
259
- else nickname end
260
- end
261
-
262
- # Utility method for scraping standings
263
- # * gets the full team name using the teams page
264
- def team_page_full_name(doc,url)
265
- doc.at_css("a[href='#{url.attribute('href')}']").content
266
- end
267
-
268
- ##########################################
269
- # Gets the current lines for a given sport
270
- def get_lines(urls)
271
- games = []
272
-
273
- urls.each { |url|
274
- is_first_url = games.empty?
275
- doc = Nokogiri::HTML(open(url))
276
- doc.css('.viBodyBorderNorm .frodds-data-tbl tr').each do |game_row|
277
-
278
- game_cell = game_row.at_css('td:first-child')
279
- teams = game_cell_parser(game_cell)
280
- game = Game.new(home_team: teams[1], away_team: teams[0])
281
-
282
- if game.teams_found?
283
- game.update(time: get_game_time(game_cell))
284
- game.update(doubleheader: doubleheader_id(game_row.next&.next&.at_css('td:first-child')&.content))
285
- is_first_url ? (games.push game) : (game = game.find_equal(games))
286
- game.update(vegas_info: get_line(get_odds(game_row)))
287
- game.update(vegas_info: get_line(get_odds_inner_html(game_row)))
288
-
289
- elsif is_first_url
290
- last_game = games.last
291
- if last_game then last_game.update(notes: (last_game.notes ? "#{last_game.notes} / " : '') + game_cell.content) end
292
- end
293
- end
294
- }
295
- games
296
- end
297
-
298
- # Utility method for scraping current lines
299
- # * find the identifier for each team
300
- def game_cell_parser(cell)
301
- cell.css('b a').map { |team| team_url_parser(team.attribute('href')) }
302
- end
303
-
304
- # Utility method for scraping current lines
305
- # * getting the time of the game
306
- def get_game_time(cell)
307
- time = RegularExpressions::TIME_REGEX.match(cell.at_css('span').content.to_s)
308
- year = ((Date.today.month > time[:mo].to_i) && (Date.today.month - 1 != time[:mo].to_i)) ? Date.today.year + 1 : Date.today.year
309
-
310
- ENV['TZ'] = 'US/Eastern'
311
- time = Time.strptime("#{year} #{time[:mo]} #{time[:d]} #{time[:h]}:#{time[:mi]}:00 #{time[:mer]}", "%Y %m %d %r")
312
- ENV['TZ'] = nil
313
- time
314
- end
315
-
316
- # Utility method for scraping current lines
317
- # * getting odds from the cell, removing whitespace, and converting 1/2 to 0.5
318
- def get_odds(odds_element)
319
- (odds_element.at_css('td:nth-child(3) a')&.content || '').gsub(" ","").gsub("½",".5").strip
320
- end
321
- def get_odds_inner_html(odds_element)
322
- ((odds_element.at_css('td:nth-child(3) a'))&.inner_html || '').encode('utf-8').gsub(" ","").gsub("½",".5").strip
323
- end
324
-
325
- # Utility method for scraping current lines
326
- # * parsing the lines for non-moneyline sports
327
- def get_line(odds_string)
328
- odds_string = odds_string.gsub('PK', '-0')
329
- odds = matchdata_to_hash(RegularExpressions::ODDS.match(odds_string)) || {}
330
- runlines_odds = matchdata_to_hash(RegularExpressions::RUNLINE_ODDS.match(odds_string)) || {}
331
- moneyline_odds = matchdata_to_hash(RegularExpressions::MONEYLINE_ODDS.match(odds_string)) || {}
332
-
333
- result = odds.merge(runlines_odds).merge(moneyline_odds)
334
-
335
- result.each { |k,v| result[k] = result[k].to_s.to_f if result[k] }
336
- get_home_and_away(result)
337
-
338
- end
339
-
340
- # Utility method for scraping current lines
341
- # * filling the home/away lines
342
- def get_home_and_away(result)
343
- result['away_line'] = -result['home_line'] if result['home_line']
344
- result['home_line'] = -result['away_line'] if result['away_line']
345
- result
346
- end
347
-
348
- # Utility method for scraping current lines
349
- # * parsing the odds to get a number
350
- def odds_reader(odds)
351
- case odds&.strip when '',nil then nil when 'PK' then 0 else odds.to_f end
352
- end
353
-
354
- # Utility method for scraping current lines
355
- # * is the game a doubleheader
356
- def doubleheader_id(content)
357
- dh = RegularExpressions::DOUBLEHEADER.match(content)
358
- dh ? dh[:id] : nil
359
- end
360
-
361
- ################################################
362
- # Gets the schedule and results for a team page
363
- def scrape_team_page(url, team)
364
-
365
- games = Nokogiri::HTML(open(url)).css('.main-content-cell table:nth-child(5) table').css('tr').each_with_index.map do |row,index|
366
-
367
- next if index == 0
368
- game = Game.new(vegas_info: {})
369
- opponent = nil
370
-
371
- row.css('td').each_with_index do |cell,m|
372
-
373
- case m
374
- when 0 then game.update(time: get_game_date(cell,row))
375
- when 1
376
- info = get_game_info(cell, team)
377
- opponent = info[:opponent]
378
- game.update(info[:game_info])
379
- end
380
-
381
- if game_finished?(row)
382
- case m
383
- when 2
384
- formatted = odds_reader(remove_element_whitespace(cell))
385
- home_team = (game.home_or_away_team(team) == :home)
386
- if moneyline_sport
387
- home_team ? game.update(vegas_info: {home_moneyline: formatted}) : game.update(vegas_info: {away_moneyline: formatted})
388
- else
389
- home_line = (formatted && !home_team) ? -formatted : formatted
390
- game.update(vegas_info: {home_line: home_line, away_line: (home_line ? -home_line : nil)})
391
- end
392
-
393
- when 3 then game.update(vegas_info: { over_under: remove_element_whitespace(cell)})
394
- when 4 then game.update(game_results(cell, team, opponent))
395
- when 5 then game.update(ats_results(cell, team, opponent))
396
- end
397
- end
398
- end
399
- game
400
- end
401
- { team: team, games: games.compact.map{ |game| game } }
402
- end
403
-
404
- # Utility method for scraping team page results
405
- # * gets the date of the game, accounting for different years
406
- def get_game_date(date_string, row)
407
- date = Date.strptime(date_string.content.gsub!(/\s+/, ""), "%b%e")
408
- if game_finished?(row) && date.month > Date.today.month
409
- date = Date.new(Date.today.year - 1, date.month, date.day)
410
- elsif !game_finished?(row) && date.month < Date.today.month
411
- date = Date.new(Date.today.year + 1, date.month, date.day)
412
- end
413
- date.to_time
414
- end
415
-
416
- # Utility method for scraping team page results
417
- # * determines if the game has concluded
418
- def game_finished?(row)
419
- !"#{RegularExpressions::GAME_RESULTS.match(remove_element_whitespace(row.at_css('td:nth-child(5)')))}".empty?
420
- end
421
-
422
- # Utility method for scraping team page results
423
- # * gets the home_team, away_team, and doubleheader info
424
- def get_game_info(cell, primary_team)
425
- url = cell.at_css('a')
426
- home_or_away = remove_element_whitespace(cell)[0] == "@" ? :away : :home
427
- opponent = url ? team_url_parser(url.attribute('href')) : custom_opponent_identifier(cell)
428
-
429
- {
430
- opponent: opponent,
431
- game_info: {
432
- doubleheader: matchdata_to_hash(RegularExpressions::RESULTS_DOUBLEHEADER.match(cell.content))['doubleheader'],
433
- home_team: home_or_away == :home ? primary_team : opponent,
434
- away_team: home_or_away == :away ? primary_team : opponent,
435
- }
436
- }
437
- end
438
-
439
- # Utility method for scraping team page results
440
- # * gets the result of the game
441
- def game_results(cell, primary_team, opponent)
442
- results = RegularExpressions::GAME_RESULTS.match(remove_element_whitespace(cell))
443
- results_hash = matchdata_to_hash(results)
444
- {
445
- ending: (results_hash['result'] ? :ended : results.to_s),
446
- winning_team: case results_hash['result'] when :won then primary_team when :lost then opponent else nil end,
447
- winning_score: case results_hash['result'] when :won then results['team_score'] when :lost then results['oppo_score'] else nil end,
448
- losing_score: case results_hash['result'] when :won then results['oppo_score'] when :lost then results['team_score'] else nil end,
449
- }
450
- end
451
-
452
- # Utility method for scraping team page results
453
- # * gets the spread results
454
- def ats_results(cell, primary_team, opponent)
455
- results = RegularExpressions::SPREAD_RESULTS.match(remove_element_whitespace(cell))
456
- results_hash = matchdata_to_hash(results)
457
- {
458
- ats_winner: case results_hash['ats_result'] when :win then primary_team when :loss then opponent else nil end,
459
- over_under_result: results_hash['ou_result']
460
- }
461
- end
462
-
463
- # Utility method for scraping team page results
464
- # * gets the identifier for an opponent without links
465
- def custom_opponent_identifier(cell)
466
- cell.content.strip.gsub(/(\s| )+/, '-').gsub('@-','').downcase[0..-3]
467
- end
468
-
469
- # General Utility Method
470
- # used the get the team identifier from the URL
471
- def team_url_parser(url)
472
- /.+\/team\/(?<team_name>(\w|-)+)/.match(url)[:team_name]
473
- end
474
-
475
- # General Utility Method
476
- # used the remove all whitespace from the content of the element
477
- def remove_element_whitespace(element)
478
- string = element.content.gsub(/(\s| )+/, '')
479
- string.empty? ? '' : string
480
- end
481
-
482
- def matchdata_to_hash(matchdata)
483
- matchdata ? Hash[*matchdata.names.map{ |name| [name,(matchdata[name] ? matchdata[name].downcase.to_sym : nil)] }.flatten].compact : {}
484
- end
485
-
486
- # Regular Expressions Module
487
- module RegularExpressions
488
- RECORD_REGEX = /(?<wins>\d+)-(?<losses>\d+)/
489
- NFL_RECORD_REGEX = /(?<wins>\d+)-(?<losses>\d+)-(?<ties>\d+)/
490
- NHL_RECORD_REGEX = /(?<wins>\d+)-(?<losses>\d+)-(?<ot_losses>\d+)-(?<shootout_losses>\d+)/
491
-
492
- TIME_REGEX = /(?<mo>\d{2})\/(?<d>\d{2}) (?<h>\d+):(?<mi>\d{2}) (?<mer>\w{2})/
493
- MONEYLINE_OVER_UNDER = /(?<ou>\d+(\.5)?)[ou]/x
494
-
495
- ODDS = /(<br><br>(?<home_line>-\d+(\.5)?))|(<br>(?<away_line>-\d+(\.5)?)[+-]\d\d<br>)|
496
- ((?<over_under>\d+(\.5)?)[ou]((-\d{2})|EV)(?<home_line>-\d+(.5)?)-\d\d\z)|
497
- ((?<away_line>-\d+(.5)?)-\d\d(?<over_under>\d+(\.5)?)[ou]((-\d{2})|EV)\z)/x
498
- RUNLINE_ODDS = /(?<away_line>(\+|-)\d+(\.5)?)\/(\+|-)\d{3}(?<home_line>(\+|-)\d+(\.5)?)\/(\+|-)\d{3}/
499
- MONEYLINE_ODDS = /((?<over_under>\d+(\.5)?)[ou]-\d{2})?(?<away_moneyline>(\+|-)\d{3}\d*)(?<home_moneyline>(\+|-)\d{3}\d*)/
500
-
501
- DOUBLEHEADER = /DH Gm (?<id>\d)/
502
- RESULTS_DOUBLEHEADER = /\(DH (?<doubleheader>\d)\)/
503
-
504
- GAME_RESULTS = /(?<result>\D+)(?<team_score>\d+)-(?<oppo_score>\d+)|(Postponed)|(Cancelled)/
505
- SPREAD_RESULTS = /((?<ats_result>\w+)\/)?(?<ou_result>\w+)/
506
- end
507
-
508
- class Game
509
- attr_reader :time, :away_team, :home_team, :vegas_info,
510
- :ending, :winning_team, :winning_score, :losing_score, :ats_winner, :over_under_result, :doubleheader, :notes
511
-
512
- def initialize(args = {})
513
- Game.sanitize(args).map { |attribute, value| instance_variable_set("@#{attribute}", value) }
514
- end
515
-
516
- def update(args = {})
517
- Game.sanitize(args).map { |attribute, value|
518
- new_val = (attribute == :vegas_info && value && vegas_info) ? value.merge(vegas_info) : value
519
- instance_variable_set("@#{attribute}", new_val)
520
- }
521
- return self
522
- end
523
-
524
- def teams_found?
525
- home_team && away_team
526
- end
527
-
528
- def find_equal(games)
529
- games.detect { |g| g == self }
530
- end
531
-
532
- def ==(other_game)
533
- home_team == other_game.home_team && away_team == other_game.away_team && time.to_date == other_game.time.to_date && doubleheader == other_game.doubleheader
534
- end
535
-
536
- def home_or_away_team(team)
537
- case team
538
- when home_team then :home
539
- when away_team then :away
540
- else nil end
541
- end
542
-
543
- def as_json
544
- instance_variables.each_with_object({}) { |var, hash| hash[var.to_s.delete("@").to_sym] = instance_variable_get(var) }
545
- end
546
-
547
- private
548
- def self.sanitize(args)
549
- permitted_keys = [:time, :away_team, :home_team, :vegas_info,
550
- :ending, :winning_team, :winning_score, :losing_score, :ats_winner, :over_under_result, :doubleheader, :notes]
551
- args.select { |key,_| permitted_keys.include? key }
552
- end
553
- end
6
+ attr_reader :sport_id
7
+ attr_reader :sport_name
8
+ attr_reader :moneyline_sport
9
+ attr_reader :teams
10
+
11
+ def initialize
12
+ @moneyline_sport = false
13
+ end
14
+
15
+ def teams
16
+ @teams ||= standings
17
+ end
18
+
19
+ def standings
20
+ @standings ||= scrape_standings
21
+ end
22
+
23
+ # Gets the upcoming/current games for the sport
24
+ def current_games
25
+ @current_games ||= get_lines(["http://www.vegasinsider.com/#{sport_name}/odds/las-vegas/","http://www.vegasinsider.com/#{sport_name}/odds/las-vegas/money/"])
26
+ end
27
+
28
+ # Gets all of the schedule and results for each team
29
+ def team_schedules
30
+ @team_schedules ||= teams.map { |team|
31
+ puts " ### GETTING GAMES FOR: #{team[:info][:full_name]}"
32
+ url = "http://www.vegasinsider.com/#{sport_name}/teams/team-page.cfm/team/#{team[:info][:identifier]}"
33
+ scrape_team_page(url, team[:info][:identifier])
34
+ }
35
+ end
36
+
37
+ def live_scores
38
+ @live_scores = get_live_scores("https://web.archive.org/web/20170704205945/http://www.vegasinsider.com/mlb/scoreboard/")
39
+ nil
40
+ end
41
+
42
+ private
43
+
44
+ def scrape_teams
45
+ url = "http://www.vegasinsider.com/#{sport_name}/teams/"
46
+ doc = Nokogiri::HTML(open(url)).at_css('.main-content-cell')
47
+
48
+ doc.css('a').map do |team_link|
49
+ team = {}
50
+ team[:info] = format_college_team(team_link, doc)
51
+
52
+ row = team_link.parent.parent.previous
53
+ while !(row.at_css('td') && row.at_css('td').attributes['class'].value.include?('viSubHeader1'))
54
+ row = row.previous
55
+ end
56
+ team[:grouping] = { conference: row.at_css('td').content }
57
+ team
58
+ end
59
+ end
60
+
61
+ ######################################################
62
+ # Gets the teams and scrapes the records for the teams
63
+ def scrape_standings
64
+ standings_teams = []
65
+ url = "http://www.vegasinsider.com/#{sport_name}/standings/"
66
+ doc = Nokogiri::HTML(open(url)).at_css('.main-content-cell')
67
+ teams_doc = Nokogiri::HTML(open(url.gsub('standings','teams'))).at_css('.main-content-cell')
68
+
69
+ doc.css(standings_table_class).each do |conference|
70
+ conference_title = conference.at_css(".viHeaderNorm")
71
+ next if conference_title.nil?
72
+
73
+ table = conference.css('.viBodyBorderNorm table')[standings_table_index]
74
+ table = conference.css('.viBodyBorderNorm table')[2] if (conference_title.content == 'Conference USA' && sport_name == 'college-football')
75
+
76
+ if table
77
+ table.css('tr').each_with_index do |row, index|
78
+ next if (row.at_css('.viSubHeader1') != nil || row.at_css('.viSubHeader2') != nil)
79
+ standings_teams.push(scrape_standings_row(row, conference_division_parser(conference_title.content), teams_doc))
80
+ end
81
+ end
82
+ end
83
+ standings_teams
84
+ end
85
+
86
+ # Utility method for scraping standings
87
+ # * gets the standings table class
88
+ def standings_table_class
89
+ college_sport? ? '.SLTables1' : 'table'
90
+ end
91
+
92
+ # Utility method for scraping standings
93
+ # * gets the index of the table
94
+ def standings_table_index
95
+ college_sport? ? 1 : 0
96
+ end
97
+
98
+ # Utility method for scraping standings
99
+ # * gets the standings table class
100
+ def conference_division_parser(title)
101
+ if college_sport?
102
+ return { conference: title, division: nil }
103
+ else
104
+ result = /(?<conference>.+) - (?<division>.+)/.match(title)
105
+ return { conference: result[:conference], division: result[:division] }
106
+ end
107
+ end
108
+
109
+
110
+ # Utility method for scraping standings
111
+ # * is a college sport?
112
+ def college_sport?
113
+ ['college-football','college-basketball'].include?(sport_name)
114
+ end
115
+
116
+ # Utility method for scraping standings
117
+ # * scrapes a row of the standings, chooses a helper method based on the league
118
+ def scrape_standings_row(row, grouping, teams_doc)
119
+ team_shell = { info: {}, record: {} }
120
+ team = case sport_id
121
+ when 0,1 then college_standings_row_parser(row, team_shell, teams_doc)
122
+ when 2 then nfl_standings_row_parser(row, team_shell)
123
+ when 3,4 then pro_standings_row_parser(row, team_shell)
124
+ when 5 then hockey_standings_row_parser(row, team_shell)
125
+ end
126
+ team[:grouping] = grouping
127
+ team
128
+ end
129
+
130
+ # Utility method for scraping standings
131
+ # * scrapes a row of the standings, for COLLEGE sports
132
+ def college_standings_row_parser(row, team, teams_doc)
133
+ row.css('td').each_with_index do |cell, cell_index|
134
+ value = remove_element_whitespace(cell)
135
+ case cell_index
136
+ when 0
137
+ team[:info] = format_college_team(cell.at_css('a'), teams_doc)
138
+ when 5 then team[:record][:overall_wins] = value.to_i
139
+ when 6 then team[:record][:overall_losses] = value.to_i
140
+ when 9 then team[:record][:home_wins] = value.to_i
141
+ when 10 then team[:record][:home_losses] = value.to_i
142
+ when 13 then team[:record][:away_wins] = value.to_i
143
+ when 14 then team[:record][:away_losses] = value.to_i
144
+ end
145
+ end
146
+ return team
147
+ end
148
+
149
+ # Utility method for scraping standings
150
+ # * scrapes a row of the standings, for NFL
151
+ def nfl_standings_row_parser(row, team)
152
+ row.css('td').each_with_index do |cell, cell_index|
153
+ content = remove_element_whitespace(cell)
154
+
155
+ case cell_index
156
+ when 0 then team[:info] = format_team(cell.at_css('a'))
157
+ when 1 then team[:record][:overall_wins] = content.to_i
158
+ when 2 then team[:record][:overall_losses] = content.to_i
159
+ when 3 then team[:record][:overall_ties] = content.to_i
160
+ when 7
161
+ record = RegularExpressions::NFL_RECORD_REGEX.match(content) || { wins: 0, losses: 0, ties: 0 }
162
+ team[:record][:home_wins] = record[:wins]
163
+ team[:record][:home_losses] = record[:losses]
164
+ team[:record][:home_ties] = record[:ties]
165
+ when 8
166
+ record = RegularExpressions::NFL_RECORD_REGEX.match(content) || { wins: 0, losses: 0, ties: 0 }
167
+ team[:record][:away_wins] = record[:wins]
168
+ team[:record][:away_losses] = record[:losses]
169
+ team[:record][:away_ties] = record[:ties]
170
+ end
171
+ end
172
+ return team
173
+ end
174
+
175
+ # Utility method for scraping standings
176
+ # * scrapes a row of the standings, for PRO (MLB)
177
+ def pro_standings_row_parser(row, team)
178
+ row.css('td').each_with_index do |cell, cell_index|
179
+ content = remove_element_whitespace(cell)
180
+
181
+ case cell_index
182
+ when 0 then team[:info] = format_team(cell.at_css('a'))
183
+ when 1 then team[:record][:overall_wins] = content.to_i
184
+ when 2 then team[:record][:overall_losses] = content.to_i
185
+ when 5
186
+ record = RegularExpressions::RECORD_REGEX.match(content) || { wins: 0, losses: 0 }
187
+ team[:record][:home_wins] = record[:wins]
188
+ team[:record][:home_losses] = record[:losses]
189
+ when 6
190
+ record = RegularExpressions::RECORD_REGEX.match(content) || { wins: 0, losses: 0 }
191
+ team[:record][:away_wins] = record[:wins]
192
+ team[:record][:away_losses] = record[:losses]
193
+ end
194
+ end
195
+ return team
196
+ end
197
+
198
+ # Utility method for scraping standings
199
+ # * scrapes a row of the standings, for NHL
200
+ def hockey_standings_row_parser(row, team)
201
+ row.css('td').each_with_index do |cell, cell_index|
202
+ content = remove_element_whitespace(cell)
203
+
204
+ case cell_index
205
+ when 0 then team[:info] = format_team(cell.at_css('a'))
206
+ when 1 then team[:record][:overall_wins] = content.to_i
207
+ when 2 then team[:record][:overall_losses] = content.to_i
208
+ when 3 then team[:record][:over_time_losses] = content.to_i
209
+ when 4 then team[:record][:shootout_losses] = content.to_i
210
+ when 5 then team[:record][:points] = content.to_i
211
+ when 8
212
+ record = RegularExpressions::NHL_RECORD_REGEX.match(content) || { wins: 0, losses: 0, ot_losses: 0, shootout_losses: 0 }
213
+ team[:record][:home_wins] = record[:wins]
214
+ team[:record][:home_losses] = record[:losses]
215
+ team[:record][:home_over_time_losses] = record[:ot_losses]
216
+ team[:record][:home_shootout_losses] = record[:shootout_losses]
217
+ when 9
218
+ record = RegularExpressions::NHL_RECORD_REGEX.match(content) || { wins: 0, losses: 0, ot_losses: 0, shootout_losses: 0 }
219
+ team[:record][:away_wins] = record[:wins]
220
+ team[:record][:away_losses] = record[:losses]
221
+ team[:record][:away_over_time_losses] = record[:ot_losses]
222
+ team[:record][:away_shootout_losses] = record[:shootout_losses]
223
+ end
224
+ end
225
+ return team
226
+ end
227
+
228
+ # Utility method for scraping standings
229
+ # * formats the team using the URL
230
+ def format_team(url)
231
+ full_name = url.content
232
+ identifier = team_url_parser(url.attribute('href'))
233
+ nickname = humanize_identifier(identifier)
234
+
235
+ {
236
+ identifier: identifier,
237
+ nickname: nickname,
238
+ location: full_name.gsub(" #{nickname}", ''),
239
+ full_name: full_name,
240
+ url: url.attribute('href').value
241
+ }
242
+ end
243
+
244
+ # Utility method for scraping standings
245
+ # * formats the team using the URL and the Nokogiri document for the teams page
246
+ def format_college_team(url, teams_doc)
247
+ full_name = team_page_full_name(teams_doc, url)
248
+ location = url.content.gsub('AM', 'A&M').gsub('AT', 'A&T')
249
+ identifier = team_url_parser(url.attribute('href'))
250
+ nickname = full_name.gsub("#{location} ",'')
251
+
252
+ if nickname == full_name
253
+ nickname = full_name.gsub('&','').gsub("#{humanize_identifier(identifier)}", '').strip
254
+ end
255
+
256
+ if nickname == full_name.gsub('&','').strip
257
+ nickname_array = nickname.split(' ')
258
+ nickname = nickname_array.each_slice( (nickname_array.size/2.0).round ).to_a[1].join(' ')
259
+ nickname = nickname_exceptions(identifier,nickname)
260
+ end
261
+
262
+ return {
263
+ identifier: identifier,
264
+ nickname: nickname,
265
+ location: full_name.gsub(" #{nickname}", ''),
266
+ full_name: full_name,
267
+ url: url.attribute('href').value
268
+ }
269
+ end
270
+
271
+ def humanize_identifier(identifier)
272
+ identifier.split('-').map { |x| x.capitalize }.join(' ')
273
+ end
274
+
275
+ def nickname_exceptions(identifier,nickname)
276
+ case identifier
277
+ when 'california-state-long-beach' then '49ers'
278
+ when 'texas-am-corpus-christi' then 'Islanders'
279
+ when 'southern-am' then 'Jaguars'
280
+ when 'saint-marys-college-california' then 'Gaels'
281
+ else nickname end
282
+ end
283
+
284
+ # Utility method for scraping standings
285
+ # * gets the full team name using the teams page
286
+ def team_page_full_name(doc,url)
287
+ doc.at_css("a[href='#{url.attribute('href')}']").content
288
+ end
289
+
290
+ ##########################################
291
+ # Gets the current lines for a given sport
292
+ def get_live_scores(url)
293
+ doc = Nokogiri::HTML(open(url))
294
+
295
+ date = doc.at_css('.ff_txt2 tr:nth-child(2) font')
296
+ date = Date.strptime(date.content, '%a, %b %d') if date
297
+
298
+ games = []
299
+
300
+ doc.css('.SLTables4 table > tr').each do |row|
301
+
302
+ date_row = row.attribute('valign')
303
+
304
+ if date_row
305
+ date = parse_score_date(row)
306
+ else
307
+
308
+ row.css('.yeallowBg .sportPicksBorder').each do |game|
309
+
310
+ result = {}
311
+ game.css('.tanBg a').each_with_index do |team, i|
312
+ if i == 0
313
+ result[:away_team] = team_url_parser(team.attribute('href'))
314
+ else
315
+ result[:home_team] = team_url_parser(team.attribute('href'))
316
+ end
317
+ end
318
+
319
+ game_status = remove_element_whitespace(game.at_css('.sub_title_red'), true)
320
+ game_status = case
321
+ when game_status == 'Final Score' then :ended
322
+ when game_status == 'PPD' then :Postponed
323
+ when game_status == '' then :Cancelled
324
+ when game_status.include?('Game Time') then nil
325
+ else game_status end
326
+
327
+ if game_status
328
+ segment_titles = []
329
+ game.css('.sportPicksBg td').each_with_index do |col,i|
330
+ puts remove_element_whitespace(col)
331
+ next if ['Teams', 'Odds', 'ATS', ''].include?(remove_element_whitespace col)
332
+ segment_titles.push remove_element_whitespace col
333
+ end
334
+
335
+ away_values = []
336
+ game.css('.tanBg')[0].css('td').each_with_index do |col,i|
337
+ next if i < 3
338
+ away_values.push remove_element_whitespace(col).to_i
339
+ end
340
+ away_values.pop
341
+
342
+ home_values = []
343
+ game.css('.tanBg')[1].css('td').each_with_index do |col,i|
344
+ next if i < 3
345
+ home_values.push remove_element_whitespace(col).to_i
346
+ end
347
+ home_values.pop
348
+
349
+ end
350
+
351
+ if segment_titles
352
+ result[:scoring] = segment_titles.each_with_index.map { |s,i|
353
+ { period: s, away: away_values[i], home: home_values[i] }
354
+ }
355
+ end
356
+
357
+ result[:status] = game_status
358
+ result[:date] = date
359
+
360
+ games.push(result)
361
+ puts result
362
+ puts "********************"
363
+ end
364
+ end
365
+
366
+ end
367
+ return games
368
+ end
369
+
370
+ def parse_score_date(element)
371
+ str = remove_element_whitespace(element, true).gsub(/Week\s+\d+\s+-\s/,'')
372
+ Date.strptime(str, '%A %B %d, %Y')
373
+ end
374
+
375
+ ##########################################
376
+ # Gets the current lines for a given sport
377
+ def get_lines(urls)
378
+ games = []
379
+
380
+ urls.each { |url|
381
+ is_first_url = games.empty?
382
+ doc = Nokogiri::HTML(open(url))
383
+ doc.css('.viBodyBorderNorm .frodds-data-tbl tr').each do |game_row|
384
+
385
+ game_cell = game_row.at_css('td:first-child')
386
+ teams = game_cell_parser(game_cell)
387
+ game = Game.new(home_team: teams[1], away_team: teams[0])
388
+
389
+ if game.teams_found?
390
+ game.update(time: get_game_time(game_cell))
391
+ game.update(doubleheader: doubleheader_id(game_row.next&.next&.at_css('td:first-child')&.content))
392
+ is_first_url ? (games.push game) : (game = game.find_equal(games))
393
+ game.update(vegas_info: get_line(get_odds(game_row)))
394
+ game.update(vegas_info: get_line(get_odds_inner_html(game_row)))
395
+
396
+ elsif is_first_url
397
+ last_game = games.last
398
+ if last_game then last_game.update(notes: (last_game.notes ? "#{last_game.notes} / " : '') + game_cell.content) end
399
+ end
400
+ end
401
+ }
402
+ games
403
+ end
404
+
405
+ # Utility method for scraping current lines
406
+ # * find the identifier for each team
407
+ def game_cell_parser(cell)
408
+ cell.css('b a').map { |team| team_url_parser(team.attribute('href')) }
409
+ end
410
+
411
+ # Utility method for scraping current lines
412
+ # * getting the time of the game
413
+ def get_game_time(cell)
414
+ time = RegularExpressions::TIME_REGEX.match(cell.at_css('span').content.to_s)
415
+ year = ((Date.today.month > time[:mo].to_i) && (Date.today.month - 1 != time[:mo].to_i)) ? Date.today.year + 1 : Date.today.year
416
+
417
+ ENV['TZ'] = 'US/Eastern'
418
+ time = Time.strptime("#{year} #{time[:mo]} #{time[:d]} #{time[:h]}:#{time[:mi]}:00 #{time[:mer]}", "%Y %m %d %r")
419
+ ENV['TZ'] = nil
420
+ time
421
+ end
422
+
423
+ # Utility method for scraping current lines
424
+ # * getting odds from the cell, removing whitespace, and converting 1/2 to 0.5
425
+ def get_odds(odds_element)
426
+ (odds_element.at_css('td:nth-child(3) a')&.content || '').gsub(" ","").gsub("½",".5").strip
427
+ end
428
+ def get_odds_inner_html(odds_element)
429
+ ((odds_element.at_css('td:nth-child(3) a'))&.inner_html || '').encode('utf-8').gsub(" ","").gsub("½",".5").strip
430
+ end
431
+
432
+ # Utility method for scraping current lines
433
+ # * parsing the lines for non-moneyline sports
434
+ def get_line(odds_string)
435
+ odds_string = odds_string.gsub('PK', '-0')
436
+ odds = matchdata_to_hash(RegularExpressions::ODDS.match(odds_string)) || {}
437
+ runlines_odds = matchdata_to_hash(RegularExpressions::RUNLINE_ODDS.match(odds_string)) || {}
438
+ moneyline_odds = matchdata_to_hash(RegularExpressions::MONEYLINE_ODDS.match(odds_string)) || {}
439
+
440
+ result = odds.merge(runlines_odds).merge(moneyline_odds)
441
+
442
+ result.each { |k,v| result[k] = result[k].to_s.to_f if result[k] }
443
+ get_home_and_away(result)
444
+
445
+ end
446
+
447
+ # Utility method for scraping current lines
448
+ # * filling the home/away lines
449
+ def get_home_and_away(result)
450
+ result['away_line'] = -result['home_line'] if result['home_line']
451
+ result['home_line'] = -result['away_line'] if result['away_line']
452
+ result
453
+ end
454
+
455
+ # Utility method for scraping current lines
456
+ # * parsing the odds to get a number
457
+ def odds_reader(odds)
458
+ case odds&.strip when '',nil then nil when 'PK' then 0 else odds.to_f end
459
+ end
460
+
461
+ # Utility method for scraping current lines
462
+ # * is the game a doubleheader
463
+ def doubleheader_id(content)
464
+ dh = RegularExpressions::DOUBLEHEADER.match(content)
465
+ dh ? dh[:id] : nil
466
+ end
467
+
468
+ ################################################
469
+ # Gets the schedule and results for a team page
470
+ def scrape_team_page(url, team)
471
+
472
+ games = Nokogiri::HTML(open(url)).css('.main-content-cell table:nth-child(5) table').css('tr').each_with_index.map do |row,index|
473
+
474
+ next if index == 0
475
+ game = Game.new(vegas_info: {})
476
+ opponent = nil
477
+
478
+ row.css('td').each_with_index do |cell,m|
479
+
480
+ case m
481
+ when 0 then game.update(time: get_game_date(cell,row))
482
+ when 1
483
+ info = get_game_info(cell, team)
484
+ opponent = info[:opponent]
485
+ game.update(info[:game_info])
486
+ end
487
+
488
+ if game_finished?(row)
489
+ case m
490
+ when 2
491
+ formatted = odds_reader(remove_element_whitespace(cell))
492
+ home_team = (game.home_or_away_team(team) == :home)
493
+ if moneyline_sport
494
+ home_team ? game.update(vegas_info: {home_moneyline: formatted}) : game.update(vegas_info: {away_moneyline: formatted})
495
+ else
496
+ home_line = (formatted && !home_team) ? -formatted : formatted
497
+ game.update(vegas_info: {home_line: home_line, away_line: (home_line ? -home_line : nil)})
498
+ end
499
+
500
+ when 3 then game.update(vegas_info: { over_under: remove_element_whitespace(cell)})
501
+ when 4 then game.update(game_results(cell, team, opponent))
502
+ when 5 then game.update(ats_results(cell, team, opponent))
503
+ end
504
+ end
505
+ end
506
+ game
507
+ end
508
+ { team: team, games: games.compact.map{ |game| game } }
509
+ end
510
+
511
+ # Utility method for scraping team page results
512
+ # * gets the date of the game, accounting for different years
513
+ def get_game_date(date_string, row)
514
+ date = Date.strptime(date_string.content.gsub!(/\s+/, ""), "%b%e")
515
+ if game_finished?(row) && date.month > Date.today.month
516
+ date = Date.new(Date.today.year - 1, date.month, date.day)
517
+ elsif !game_finished?(row) && date.month < Date.today.month
518
+ date = Date.new(Date.today.year + 1, date.month, date.day)
519
+ end
520
+ date.to_time
521
+ end
522
+
523
+ # Utility method for scraping team page results
524
+ # * determines if the game has concluded
525
+ def game_finished?(row)
526
+ !"#{RegularExpressions::GAME_RESULTS.match(remove_element_whitespace(row.at_css('td:nth-child(5)')))}".empty?
527
+ end
528
+
529
+ # Utility method for scraping team page results
530
+ # * gets the home_team, away_team, and doubleheader info
531
+ def get_game_info(cell, primary_team)
532
+ url = cell.at_css('a')
533
+ home_or_away = remove_element_whitespace(cell)[0] == "@" ? :away : :home
534
+ opponent = url ? team_url_parser(url.attribute('href')) : custom_opponent_identifier(cell)
535
+
536
+ {
537
+ opponent: opponent,
538
+ game_info: {
539
+ doubleheader: matchdata_to_hash(RegularExpressions::RESULTS_DOUBLEHEADER.match(cell.content))['doubleheader'],
540
+ home_team: home_or_away == :home ? primary_team : opponent,
541
+ away_team: home_or_away == :away ? primary_team : opponent,
542
+ }
543
+ }
544
+ end
545
+
546
+ # Utility method for scraping team page results
547
+ # * gets the result of the game
548
+ def game_results(cell, primary_team, opponent)
549
+ results = RegularExpressions::GAME_RESULTS.match(remove_element_whitespace(cell))
550
+ results_hash = matchdata_to_hash(results)
551
+ {
552
+ ending: (results_hash['result'] ? :ended : results.to_s),
553
+ winning_team: case results_hash['result'] when :won then primary_team when :lost then opponent else nil end,
554
+ winning_score: case results_hash['result'] when :won then results['team_score'] when :lost then results['oppo_score'] else nil end,
555
+ losing_score: case results_hash['result'] when :won then results['oppo_score'] when :lost then results['team_score'] else nil end,
556
+ }
557
+ end
558
+
559
+ # Utility method for scraping team page results
560
+ # * gets the spread results
561
+ def ats_results(cell, primary_team, opponent)
562
+ results = RegularExpressions::SPREAD_RESULTS.match(remove_element_whitespace(cell))
563
+ results_hash = matchdata_to_hash(results)
564
+ {
565
+ ats_winner: case results_hash['ats_result'] when :win then primary_team when :loss then opponent else nil end,
566
+ over_under_result: results_hash['ou_result']
567
+ }
568
+ end
569
+
570
+ # Utility method for scraping team page results
571
+ # * gets the identifier for an opponent without links
572
+ def custom_opponent_identifier(cell)
573
+ cell.content.strip.gsub(/(\s| )+/, '-').gsub('@-','').downcase[0..-3]
574
+ end
575
+
576
+ # General Utility Method
577
+ # used the get the team identifier from the URL
578
+ def team_url_parser(url)
579
+ /.+\/team\/(?<team_name>(\w|-)+)/.match(url)[:team_name]
580
+ end
581
+
582
+ # General Utility Method
583
+ # used the remove all whitespace from the content of the element
584
+ def remove_element_whitespace(element, only_end = false)
585
+ string = element.content.gsub(only_end ? /^(\s| )+|(\s| )+\z/ : /(\s| )+/, '')
586
+ string.empty? ? '' : string
587
+ end
588
+
589
+ def matchdata_to_hash(matchdata)
590
+ matchdata ? Hash[*matchdata.names.map{ |name| [name,(matchdata[name] ? matchdata[name].downcase.to_sym : nil)] }.flatten].compact : {}
591
+ end
592
+
593
+ # Regular Expressions Module
594
+ module RegularExpressions
595
+ RECORD_REGEX = /(?<wins>\d+)-(?<losses>\d+)/
596
+ NFL_RECORD_REGEX = /(?<wins>\d+)-(?<losses>\d+)-(?<ties>\d+)/
597
+ NHL_RECORD_REGEX = /(?<wins>\d+)-(?<losses>\d+)-(?<ot_losses>\d+)-(?<shootout_losses>\d+)/
598
+
599
+ TIME_REGEX = /(?<mo>\d{2})\/(?<d>\d{2}) (?<h>\d+):(?<mi>\d{2}) (?<mer>\w{2})/
600
+ MONEYLINE_OVER_UNDER = /(?<ou>\d+(\.5)?)[ou]/x
601
+
602
+ ODDS = /(<br><br>(?<home_line>-\d+(\.5)?))|(<br>(?<away_line>-\d+(\.5)?)[+-]\d\d<br>)|
603
+ ((?<over_under>\d+(\.5)?)[ou]((-\d{2})|EV)(?<home_line>-\d+(.5)?)-\d\d\z)|
604
+ ((?<away_line>-\d+(.5)?)-\d\d(?<over_under>\d+(\.5)?)[ou]((-\d{2})|EV)\z)/x
605
+ RUNLINE_ODDS = /(?<away_line>(\+|-)\d+(\.5)?)\/(\+|-)\d{3}(?<home_line>(\+|-)\d+(\.5)?)\/(\+|-)\d{3}/
606
+ MONEYLINE_ODDS = /((?<over_under>\d+(\.5)?)[ou]-\d{2})?(?<away_moneyline>(\+|-)\d{3}\d*)(?<home_moneyline>(\+|-)\d{3}\d*)/
607
+
608
+ DOUBLEHEADER = /DH Gm (?<id>\d)/
609
+ RESULTS_DOUBLEHEADER = /\(DH (?<doubleheader>\d)\)/
610
+
611
+ GAME_RESULTS = /(?<result>\D+)(?<team_score>\d+)-(?<oppo_score>\d+)|(Postponed)|(Cancelled)/
612
+ SPREAD_RESULTS = /((?<ats_result>\w+)\/)?(?<ou_result>\w+)/
613
+ end
614
+
615
+ class Game
616
+ attr_reader :time, :away_team, :home_team, :vegas_info,
617
+ :ending, :winning_team, :winning_score, :losing_score, :ats_winner, :over_under_result, :doubleheader, :notes
618
+
619
+ def initialize(args = {})
620
+ Game.sanitize(args).map { |attribute, value| instance_variable_set("@#{attribute}", value) }
621
+ end
622
+
623
+ def update(args = {})
624
+ Game.sanitize(args).map { |attribute, value|
625
+ new_val = (attribute == :vegas_info && value && vegas_info) ? value.merge(vegas_info) : value
626
+ instance_variable_set("@#{attribute}", new_val)
627
+ }
628
+ return self
629
+ end
630
+
631
+ def teams_found?
632
+ home_team && away_team
633
+ end
634
+
635
+ def find_equal(games)
636
+ games.detect { |g| g == self }
637
+ end
638
+
639
+ def ==(other_game)
640
+ home_team == other_game.home_team && away_team == other_game.away_team && time.to_date == other_game.time.to_date && doubleheader == other_game.doubleheader
641
+ end
642
+
643
+ def home_or_away_team(team)
644
+ case team
645
+ when home_team then :home
646
+ when away_team then :away
647
+ else nil end
648
+ end
649
+
650
+ def as_json
651
+ instance_variables.each_with_object({}) { |var, hash| hash[var.to_s.delete("@").to_sym] = instance_variable_get(var) }
652
+ end
653
+
654
+ private
655
+ def self.sanitize(args)
656
+ permitted_keys = [:time, :away_team, :home_team, :vegas_info,
657
+ :ending, :winning_team, :winning_score, :losing_score, :ats_winner, :over_under_result, :doubleheader, :notes]
658
+ args.select { |key,_| permitted_keys.include? key }
659
+ end
660
+ end
554
661
 
555
662
  end
556
663
 
@@ -561,7 +668,7 @@ class Array
561
668
  end
562
669
 
563
670
  class Hash
564
- def compact
671
+ def compact
565
672
  self.select { |_, value| !value.nil? }
566
673
  end
567
674
  end