hoopscrape 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,258 @@
1
+ require_relative './Navigator'
2
+ # Access NBA team schedule data
3
+ class NbaSchedule
4
+ include NbaUrls
5
+ include PrintUtils
6
+
7
+ attr_reader :game_list, :next_game, :year, :wins, :losses
8
+
9
+ # Read Schedule data for a given Team
10
+ # @param team_id [String] Team ID
11
+ # @param file [String] HTML Test Data
12
+ # @param season_type [Integer] Season Type
13
+ # @param year [Integer] Ending Year of Season
14
+ # @note
15
+ # Season Types: 1-Preseason; 2-Regular Season; 3-Playoffs
16
+ # @example
17
+ # test = NbaSchedule.new('', file: 'test/data/testData.html')
18
+ # pre = NbaSchedule.new('UTA', s_type: 1)
19
+ # playoffs = NbaSchedule.new('GSW', s_type: 3)
20
+ def initialize(args)
21
+ doc, seasontype = getNokoDoc(args)
22
+ return if doc.nil?
23
+
24
+ @game_list = [] # Processed Schedule Data
25
+ @next_game = 0 # Cursor to start of Future Games
26
+
27
+ schedule, @year, indicator, tid = collectNodeSets(doc)
28
+ season_valid = verifySeasonType(seasontype, indicator)
29
+ seasontype = findSeasonType(indicator) if seasontype.to_i.eql?(0)
30
+
31
+ @wins = @losses = 0
32
+ processSeason(schedule, tid, @year, seasontype, args[:format]) if season_valid && !seasontype.eql?(0)
33
+ @allGames = Navigator.new(@game_list)
34
+ @futureGames = Navigator.new(@game_list[@next_game, game_list.size])
35
+ @pastGames = Navigator.new(@game_list[0, @next_game])
36
+ @game_list = nil
37
+ @year = "#{@year}-#{(@year + 1).to_s[2, 4]}"
38
+ end
39
+
40
+ # @return [Navigator] Navigator All Schedule data
41
+ # @see GAME_F
42
+ # @see GAME_P
43
+ attr_reader :allGames
44
+
45
+ # Returns Schedule info of next game
46
+ # @return [[Object]] Future Schedule Row (Array/Hash/Struct)
47
+ # @note (see #futureGames)
48
+ # @example
49
+ # nextGame #=> ['UTA', '13', 'Nov 23', 'true', 'OKC', '9:00 PM ET', 'false', '2015-11-23 21:00:00', '2']
50
+ # @see GAME_F
51
+ def nextGame
52
+ allGames[][@next_game] unless allGames[].nil?
53
+ end
54
+
55
+ # Returns Schedule info of last completed game
56
+ # @return [Object] Past Schedule Row (Array/Hash/Struct)
57
+ # @note (see #pastGames)
58
+ # @example
59
+ # lastGame #=> ['UTA', '12', '00:00:00', 'false', 'Nov 20', 'false', 'DAL', 'false', '93', '102', '400828071', '6', '6', '2015-11-20 00:00:00', '2']
60
+ # @see SymbolDefaults::GAME_P
61
+ def lastGame
62
+ allGames[][@next_game - 1]
63
+ end
64
+
65
+ # @return [Integer] Game # of Next Game
66
+ def nextGameId
67
+ @next_game
68
+ end
69
+
70
+ # @return [String] Team ID of next opponent
71
+ # @example
72
+ # nextTeamId #=> "OKC"
73
+ def nextTeamId
74
+ nextGame[4] if nextGame
75
+ end
76
+
77
+ # @return [Navigator] Navigator for Future Games
78
+ # @note (see SymbolDefaults::GAME_F)
79
+ # @see SymbolDefaults::GAME_F
80
+ attr_reader :futureGames
81
+
82
+ # Return Schedule info of Past Games
83
+ # @return [Navigator] Navigator for Past Games
84
+ # @note (see SymbolDefaults::GAME_P)
85
+ # @see SymbolDefaults::GAME_P
86
+ attr_reader :pastGames
87
+
88
+ private
89
+
90
+ # Return Nokogiri XML Document
91
+ def getNokoDoc(args)
92
+ return Nokogiri::HTML(open(args[:file])), args[:season_type] if args[:file] # Use File
93
+ url = formatTeamUrl(args[:team_id], teamScheduleUrl(args[:season_type], args[:year]))
94
+ [Nokogiri::HTML(open(url)), args[:season_type]] # Use Live Data
95
+ end
96
+
97
+ # Extract NodeSets
98
+ def collectNodeSets(doc)
99
+ schedule = doc.xpath('//div/div/table/tr') # Schedule Rows
100
+ year = doc.xpath('//div[@id=\'my-teams-table\']/div/div/div/h1').text.split('-')[1].strip.to_i # Season Starting Year
101
+ season = doc.xpath("//tr[@class='stathead']").text.split[1].downcase # preseason/regular/postseason
102
+ tid = getTid(doc.title.split(/\d{4}/)[0].strip).upcase
103
+ [schedule, year, season, tid]
104
+ end
105
+
106
+ # Ensure requested season type is what is being processed
107
+ def verifySeasonType(s_type, indicator)
108
+ # If season type is provided, verify
109
+ case s_type
110
+ when 1, 2, 3
111
+ return s_type.eql?(findSeasonType(indicator))
112
+ end
113
+ true
114
+ end
115
+
116
+ # Determine season type from document data
117
+ def findSeasonType(indicator)
118
+ # Determine season type
119
+ return 1 if indicator.include?('pre')
120
+ return 2 if indicator.include?('regular')
121
+ return 3 if indicator.include?('post')
122
+ nil
123
+ end
124
+
125
+ # Process Table of Schedule Data
126
+ def processSeason(schedule, tid, year1, seasontype, new_form)
127
+ seasontype = seasontype.to_i
128
+ game_id = 0 # 82-game counter
129
+
130
+ # Process Schedule lines
131
+ schedule.each do |row|
132
+ game_date = ''
133
+ game_time = ''
134
+ if ('a'..'z').cover?(row.text[1]) # => Non-Header Row
135
+ tmp = [tid, (game_id += 1).to_s] # TeamID, GameID
136
+
137
+ if row.children.size == 3 # => Postponed Game
138
+ game_id -= 1
139
+ next
140
+ elsif row.children[2].text.include?(':') # => Future Game
141
+ game_date, game_time = futureGame(row, tmp)
142
+ game_in_past = false
143
+ else # => Past Game
144
+ @next_game = game_id
145
+ game_time = '00:00:00' # Game Time (Not shown for past games)
146
+ game_date = pastGame(row, tmp, seasontype)
147
+ game_in_past = true
148
+ end
149
+ end
150
+ saveProcessedScheduleRow(tmp, formatGameDate(game_date, year1, game_time), seasontype, new_form, game_in_past) unless tmp.nil?
151
+ end
152
+ end
153
+
154
+ # Process Past Game Row
155
+ def pastGame(row, result, season_type)
156
+ row.children[0, 4].each_with_index do |cell, cnt|
157
+ txt = cell.text.chomp
158
+ if cnt.zero? # Game Date
159
+ result << txt.split(',')[1].strip
160
+ elsif cnt == 1 # Home Game? and Opponent ID
161
+ saveHomeOpponent(cell, result, txt)
162
+ elsif cnt == 2 # Game Result
163
+ saveGameResult(cell, result, txt)
164
+ else # Team Record
165
+ saveTeamRecord(result, season_type, txt)
166
+ end
167
+ end
168
+ # Game Date
169
+ result[2]
170
+ end
171
+
172
+ # Process Future Game Row
173
+ def futureGame(row, result)
174
+ row.children[0, 4].each_with_index do |cell, cnt|
175
+ txt = cell.text.strip
176
+ if cnt.zero? # Game Date
177
+ result << txt.split(',')[1].strip
178
+ elsif cnt == 1 # Home/Away, Opp tid
179
+ saveHomeOpponent(cell, result, txt)
180
+ elsif cnt == 2 # Game Time
181
+ result << txt + ' ET'
182
+ elsif cnt == 3 # TV
183
+ saveTV(cell, txt, result)
184
+ end
185
+ end
186
+ # Game Date, Game Time
187
+ [result[2], result[5]]
188
+ end
189
+
190
+ # Store Home? and Opponent ID
191
+ def saveHomeOpponent(cell, result, txt)
192
+ result << (!txt[0, 1].include?('@')).to_s # Home Game?
193
+ x0 = cell.children.children.children[1].attributes['href']
194
+ result <<
195
+ if x0.nil? # Non-NBA Team
196
+ cell.children.children.children[1].text.strip
197
+ else # NBA Team
198
+ getTid(x0.text.split('/')[-1].split('-').join(' ')) # Opponent ID
199
+ end
200
+ end
201
+
202
+ # Store Game Result
203
+ # Win?, Team Score, Opponent Score, Boxscore ID
204
+ def saveGameResult(cell, result, txt)
205
+ win = (txt[0, 1].include?('W') ? true : false)
206
+ final_score = txt[1, txt.length].gsub(/\s?\d?OT/, '')
207
+ if win
208
+ team_score, opp_score = final_score.split('-')
209
+ else
210
+ opp_score, team_score = final_score.split('-')
211
+ end
212
+ box_id = extract_boxscore_id(cell)
213
+ result << win.to_s << team_score.to_s << opp_score.to_s << box_id # Win?, Team Score, Opponent Score, Boxcore ID
214
+ end
215
+
216
+ def extract_boxscore_id(cell)
217
+ return 0 if cell.text.include? 'TBA'
218
+ boxscore_id = cell.children.children.children[1].attributes['href']
219
+ return 0 if boxscore_id.nil?
220
+ return boxscore_id.text.split('=')[1] if boxscore_id.text.include?('recap?id=')
221
+ boxscore_id.text.split('/').last
222
+ end
223
+
224
+ # Store Team Record
225
+ # Wins, Losses
226
+ def saveTeamRecord(result, season_type, text)
227
+ if season_type == 3 # Team Record Playoffs
228
+ result[5].eql?('true') ? @wins += 1 : @losses += 1
229
+ result << @wins.to_s << @losses.to_s
230
+ else # Team Record Pre/Regular
231
+ wins, losses = text.split('-')
232
+ @wins = wins.to_i
233
+ @losses = losses.to_i
234
+ result << wins << losses
235
+ end
236
+ end
237
+
238
+ # Store TV?
239
+ def saveTV(cell, txt, result)
240
+ # Network image, link or name?
241
+ result << (%w(a img).include?(cell.children[0].node_name) || txt.size > 1).to_s
242
+ end
243
+
244
+ # Store Processed Schedule Row
245
+ def saveProcessedScheduleRow(tmp, game_date, season_type, new_form, game_in_past)
246
+ tmp << game_date # Game DateTime
247
+ tmp << season_type.to_s # Season Type
248
+ @game_list << tmp if new_form.nil? # Save processed Array
249
+ @game_list += tmp.send(new_form, game_in_past ? S_GAME_P : S_GAME_F) unless new_form.nil? # Conversion
250
+ end
251
+
252
+ # Adjust and format dates
253
+ def formatGameDate(month_day, year, game_time = '00:00:00')
254
+ year += 1 unless %w(Oct Nov Dec).include?(month_day.split[0])
255
+ d = DateTime.parse(game_time + ' , ' + month_day + ',' + year.to_s)
256
+ d.strftime('%Y-%m-%d %H:%M:%S') # Game DateTime String
257
+ end
258
+ end
@@ -0,0 +1,63 @@
1
+ # Access list of NBA teams
2
+ class NbaTeamList
3
+ include NbaUrls
4
+ include PrintUtils
5
+
6
+ # @return [String] Table Title
7
+ attr_accessor :header
8
+
9
+ # @return [[[String]]] Table of NBA Teams
10
+ # @note (see TEAM_L)
11
+ attr_accessor :teamList
12
+
13
+ # Scrape Team Data
14
+ # @return [[String]] Resulting Team List
15
+ def initialize(args = {})
16
+ doc = args[:file] ? Nokogiri::HTML(open(args[:file])) : Nokogiri::HTML(open(teamListUrl))
17
+ return if doc.nil?
18
+
19
+ # Collect
20
+ @header = doc.xpath('//h2')[0].text.strip # Table Header
21
+ team_names = doc.xpath('//h5/a/text()') # Team Names
22
+
23
+ @teamList = []
24
+ h = 0 # Head of teamNames range
25
+ west_conf = %w(Northwest Pacific Southwest) # Western Conference Divs
26
+ # Process Teams by Division
27
+ divs = %w(Atlantic Pacific Central Southwest Southeast Northwest)
28
+ divs.each do |div|
29
+ @teamList += processTeams(div, team_names[h, 5], west_conf) # Store Team Data
30
+ h += 5
31
+ end
32
+ # puts "Converting to #{args[:format]}"
33
+ @teamList = @teamList.send(args[:format], S_TEAM) if args[:format]
34
+ @teamList = Navigator.new(@teamList)
35
+ end
36
+
37
+ private
38
+
39
+ # Derive TeamID, Division, Conference
40
+ # @param division [String] Division Name
41
+ # @param team_names [[String]] List of Team Names
42
+ # @param west_conf [[String]] List of Divisions in the Western Conference
43
+ # @param tl [[String]] List to which rows of TeamList are appended
44
+ # @example
45
+ # processTeams("Atlantic", ["Boston Celtics"], [...], result)
46
+ # #result[n] = [TeamID, TeamName, TeamDiv, TeamConf]
47
+ # result[0] = ["BOS", "Boston Celtics", "Atlatic", "Eastern"]
48
+ def processTeams(division, team_names, west_conf)
49
+ result = []
50
+ team_names.each do |tname|
51
+ tmp = [] # Stage Team Data
52
+ full = adjustTeamName(tname.text.strip) # Full Team Name
53
+ tmp << getTid(full) # Derive Team Abbreviation
54
+ tmp << full.strip
55
+ tmp << division
56
+
57
+ # Derive Conference from Division
58
+ tmp << (west_conf.include?(division) ? 'Western' : 'Eastern')
59
+ result << tmp # Save Team Data to global @teamList[]
60
+ end
61
+ result
62
+ end
63
+ end
@@ -0,0 +1,86 @@
1
+ # Methods and Urls to access ESPN NBA data
2
+ module NbaUrls
3
+ # @return [String] URL to access Boxscore
4
+ def boxScoreUrl
5
+ 'http://scores.espn.go.com/nba/boxscore?gameId='
6
+ end
7
+
8
+ # @return [String] URL to access NBA Team List
9
+ def teamListUrl
10
+ 'http://espn.go.com/nba/teams'
11
+ end
12
+
13
+ # @param seasontype [INT] 1-Pre 2-Regular 3-Playoff
14
+ # @return [String] URL to access Team Schedule
15
+ def teamScheduleUrl(seasontype = nil, year = nil)
16
+ year ||= seasonYearEnd # Default to the current season
17
+ seasontype ||= 3 # Default to playoff data
18
+ "http://espn.go.com/nba/team/schedule/_/name/%s/year/#{year}/seasontype/#{seasontype}"
19
+ end
20
+
21
+ # @return [String] URL to access Team Roster
22
+ def teamRosterUrl
23
+ 'http://espn.go.com/nba/team/roster/_/name/%s/'
24
+ end
25
+
26
+ # @return [String] URL to access Player profile
27
+ def playerUrl
28
+ 'http://espn.go.com/nba/player/_/id/'
29
+ end
30
+
31
+ # @return [String] Season Years
32
+ # @example
33
+ # seasonYears('2015-07-10') => '2015-2016'
34
+ def seasonYears(date = nil)
35
+ return seasonYears(Date.today) if date.nil?
36
+ date = Date.parse(date.to_s)
37
+ return "#{date.year - 1}-#{date.year}" if date.month < 7
38
+ "#{date.year}-#{date.year + 1}"
39
+ end
40
+
41
+ def seasonYearEnd(date = nil)
42
+ return seasonYears(date).split('-')[1] rescue nil
43
+ end
44
+
45
+ # Generate team specific URL
46
+ # @param team_id [String] Team ID
47
+ # @param url [String] URL String
48
+ # @return [String] Formatted URL
49
+ # @example
50
+ # NbaUrls.formatTeamUrl('uta', NbaUrls.teamRosterUrl) #=> "http://espn.go.com/nba/team/roster/_/name/utah/"
51
+ def formatTeamUrl(team_id, url)
52
+ team_id = team_id.downcase
53
+ special = {
54
+ 'was' => 'wsh', 'nop' => 'no', 'sas' => 'sa', 'uta' => 'utah',
55
+ 'pho' => 'phx', 'gsw' => 'gs', 'nyk' => 'ny'
56
+ }
57
+ team_id = special[team_id] if special.keys.include?(team_id)
58
+ url % [team_id]
59
+ end
60
+
61
+ # Derive three letter Team ID from Team Name
62
+ # @param team_name [String] Full Team Name
63
+ # @return [String] Team ID
64
+ # @example
65
+ # getTid("Oklahoma City Thunder") #=> "OKC"
66
+ #
67
+ def getTid(team_name)
68
+ result = ''
69
+ words = team_name.split
70
+ words.size > 2 ? words.each { |word| result << word[0] } : result = words[0][0, 3]
71
+ checkSpecial(result)
72
+ end
73
+
74
+ # Adjust Outlier Abbreviations
75
+ def checkSpecial(abbr)
76
+ abbr.upcase!
77
+ special = { 'OCT' => 'OKC', 'PTB' => 'POR', 'BRO' => 'BKN', 'LA' => 'LAC' }
78
+ special.keys.include?(abbr) ? special[abbr] : abbr
79
+ end
80
+
81
+ # Adjust Team Names
82
+ def adjustTeamName(team_name)
83
+ special = { 'LA Clippers' => 'Los Angeles Clippers' }
84
+ special.keys.include?(team_name) ? special[team_name] : team_name
85
+ end
86
+ end
@@ -0,0 +1,34 @@
1
+ # Print Utilities
2
+ module PrintUtils
3
+ # Printable tabular String representation of args data
4
+ # @param args [[[object]]] Table Data
5
+ # @param col_width [Integer] Column Width
6
+ # @param title [String] Table Title
7
+ # @param show_idx [Bool] Show Index?
8
+ # @return [String] Table String
9
+ def asTable(args, col_width = 15, title = '', show_idx = true)
10
+ result = "\n#{title}"
11
+ idx_width = args.size.to_s.length
12
+ args.each_with_index do |row, idx|
13
+ result << "\n#{idx + 1}." + ' ' * pad(idx_width, idx) + ' ' if show_idx
14
+ result << "\n" unless show_idx
15
+ formatRow(row, col_width, result)
16
+ end
17
+ result << "\n"
18
+ end
19
+
20
+ private
21
+
22
+ # Format Row
23
+ def formatRow(row, width, target)
24
+ row.each do |td|
25
+ target << td.to_s
26
+ target << ' ' * pad(width, td) if pad(width, td) > 0
27
+ end
28
+ end
29
+
30
+ # Calculate required padding
31
+ def pad(width, content)
32
+ (width - content.to_s.length) > 0 ? (width - content.to_s.length) : 0
33
+ end
34
+ end
@@ -0,0 +1,16 @@
1
+ # Extend String
2
+ class String
3
+ # Get an NbaRoster
4
+ # @param [Symbol] Format
5
+ # @return [NbaRoster] Roster
6
+ def roster(f_mat = nil)
7
+ HoopScrape.roster(self, format: f_mat)
8
+ end
9
+
10
+ # Get an NbaSchedule
11
+ # @param (see #roster)
12
+ # @return [NbaSchedule] Schedule
13
+ def schedule(f_mat = nil)
14
+ HoopScrape.schedule(self, format: f_mat)
15
+ end
16
+ end