hoopscrape 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,56 @@
1
+ # Array Extensions for Type Conversion
2
+ class Array
3
+ # Create Hash Array
4
+ # @param keys [Array] Symbols to be used as field names
5
+ # @return [[Hash]] Array<Hash>
6
+ # @example
7
+ # teams = es.teamList # Array of Team data
8
+ # hash_a = teams.to_hashes
9
+ # => [{:t_abbr=>"BOS", :t_name=>"Boston Celtics", :division=>"Atlantic", :conference=>"Eastern"} ... ]
10
+ def to_hashes(keys = [])
11
+ return [] if empty?
12
+ two_d = first.is_a? Array # Check for 2D array
13
+ keys = checkKeys(keys, two_d ? first.size : size) # Determine keys
14
+ return [Hash[keys.map.with_index { |key, idx| [key, self[idx]] }]] unless two_d # 1D Array
15
+ map { |ary| Hash[keys.map.with_index { |key, idx| [key, ary[idx]] }] } # 2D Array
16
+ end
17
+
18
+ # Create Struct Array
19
+ # @param keys [Array] Symbols to be used as field names
20
+ # @return [[Struct]] Array<Struct>
21
+ # @example
22
+ # teams = es.teamList # Array of Team data
23
+ # structs = teams.to_structs
24
+ # => [#<struct t_abbr="BOS", t_name="Boston Celtics", division="Atlantic", conference="Eastern"> ... ]
25
+ def to_structs(keys = [])
26
+ return [] if empty?
27
+ keys = checkKeys(keys, first.size)
28
+ to_hashes(keys).map { |hash| Struct.new(*hash.keys).new(*hash.values) }
29
+ end
30
+
31
+ # Determine default field names
32
+ # @param keys [[Symbol]] Field Names
33
+ # @param k_id [Int] Key Identifier
34
+ # @return [[Symbol]]
35
+ def checkKeys(keys, k_id)
36
+ return keys unless keys.empty?
37
+ [S_BOX_P, S_BOX_T, S_GAME_F, S_GAME_P, S_ROSTER, S_TEAM].each do |default|
38
+ return default if default.size.eql?(k_id)
39
+ end
40
+ end
41
+
42
+ # Replace old symbol with new symbol in Array
43
+ # @param old_sym [Symbol] Symbol to remove
44
+ # @param new_sym [Symbol] Symbol to add
45
+ def change_sym!(old_sym, new_sym)
46
+ map! { |x| x.eql?(old_sym) ? new_sym : x }
47
+ end
48
+
49
+ # Get an NbaBoxscore
50
+ # @param [Symbol] Format
51
+ # @return [NbaBoxscore] Boxscore
52
+ def boxscore(f_mat = nil)
53
+ return nil unless size == S_GAME_P.size
54
+ HoopScrape.boxscore(self[8], f_mat)
55
+ end
56
+ end
@@ -0,0 +1,9 @@
1
+ # Extend Hash
2
+ class Hash
3
+ # Get an NbaBoxscore
4
+ # @param [Symbol] Format
5
+ # @return [NbaBoxscore] Boxscore
6
+ def boxscore(f_mat = nil)
7
+ HoopScrape.boxscore(self[:boxscore_id], (f_mat || :to_hashes))
8
+ end
9
+ end
@@ -0,0 +1,60 @@
1
+ # Array Navigator
2
+ class Navigator
3
+ # Array of data to Navigate
4
+ attr_reader :list
5
+
6
+ # Store array and initialize cursor
7
+ def initialize(list)
8
+ @list = list
9
+ @bounds = [-1, list.size]
10
+ @cursor = -1
11
+ end
12
+
13
+ # Return the player located at the current navigation cursor
14
+ # @return [[Object]] Array Element
15
+ def curr
16
+ self[@cursor] if @cursor >= 0
17
+ end
18
+
19
+ # Increments the navigation cursor and return the item at that location
20
+ # @return (see #curr)
21
+ def next
22
+ self[(@cursor += 1)]
23
+ end
24
+
25
+ # Decrements the navigation cursor and return the item at that location
26
+ # @return (see #curr)
27
+ def prev
28
+ self[(@cursor -= 1)]
29
+ end
30
+
31
+ # Updates the navigation cursor if out of bounds. Returns the item at the given location.
32
+ # Returns the underlying array if no index is given.
33
+ # @return [Element] Array Element or Array
34
+ def [](idx = nil)
35
+ return @list if idx.nil?
36
+ @cursor = @bounds[0] if @cursor < @bounds[0]
37
+ @cursor = @bounds[1] if @cursor > @bounds[1]
38
+ @list[idx] if [inbounds?(idx), !@list.nil?].all?
39
+ end
40
+
41
+ # Checks if the requested index is within the array bounderies
42
+ def inbounds?(idx)
43
+ (@bounds[0]..@bounds[1]).cover?(idx)
44
+ end
45
+
46
+ # Updates the cursor and returns the first element of the array
47
+ def first
48
+ self[@cursor = 0]
49
+ end
50
+
51
+ # Updates the cursor and returns the last element of the array
52
+ def last
53
+ self[@cursor = @list.size - 1]
54
+ end
55
+
56
+ # Returns the size of the underlying array
57
+ def size
58
+ @list.size
59
+ end
60
+ end
@@ -0,0 +1,174 @@
1
+ require 'date'
2
+ require_relative './PrintUtils.rb'
3
+
4
+ # Access NBA boxscore data
5
+ class NbaBoxScore
6
+ include NbaUrls
7
+
8
+ # @return [String] Game Date
9
+ attr_reader :gameDate
10
+
11
+ # @return [String] Away Team Name
12
+ attr_reader :awayName
13
+
14
+ # @return [Navigator] Away Team Stats Array
15
+ # @note (see SymbolDefaults::BOX_P)
16
+ # @see BOX_P
17
+ attr_reader :awayPlayers
18
+
19
+ # @return [[String]] Away Team Combined Stats
20
+ # @note (see SymbolDefaults::BOX_T)
21
+ # @see BOX_T
22
+ attr_reader :awayTotals
23
+
24
+ # @return [String] Home Team Name
25
+ attr_reader :homeName
26
+
27
+ # @return [Navigator] Home Team Stats Array
28
+ # @note (see #awayPlayers)
29
+ # @see BOX_P
30
+ attr_reader :homePlayers
31
+
32
+ # @return [[String]] Home Team Combined Stats
33
+ # @note (see #awayTotals)
34
+ # @see BOX_T
35
+ attr_reader :homeTotals
36
+
37
+ # Boxscore ID
38
+ attr_reader :id
39
+ attr_reader :awayScore
40
+ attr_reader :homeScore
41
+
42
+ # Scrape Box Score Data
43
+ # @param game_id [Integer] Boxscore ID
44
+ # @example
45
+ # bs = NbaBoxScore.new(400828035)
46
+ def initialize(args)
47
+ doc = getNokoDoc(args[:game_id], args[:file])
48
+ return if doc.nil?
49
+ @id = args[:game_id].to_s
50
+ @gameDate = readGameDate(doc)
51
+ @awayName, @homeName = readTeamNames(doc)
52
+ return unless @gameDate.index('00:00:00') # Only past games have stats
53
+ @awayPlayers, @awayTotals, @awayScore = readTeamStats(doc, 'away', args[:format])
54
+ @homePlayers, @homeTotals, @homeScore = readTeamStats(doc, 'home', args[:format])
55
+ end
56
+
57
+ private
58
+
59
+ def getNokoDoc(game_id, file)
60
+ return Nokogiri::HTML(open(file)) if game_id.nil? # Parse File
61
+ Nokogiri::HTML(open(boxScoreUrl + game_id.to_s)) # Parse URL
62
+ end
63
+
64
+ # Reads the game date from a Nokogiri::Doc
65
+ # @param d [Nokogiri::HTML::Document]
66
+ # @return [String] Game date
67
+ # @example
68
+ # bs.readGameDate(doc) #=> "Mon, Nov 23"
69
+ # @note
70
+ # Times will be Local to the system Timezone
71
+ #
72
+ def readGameDate(d)
73
+ date = d.title.split('-')[2].delete(',')
74
+ time = d.xpath('//span[contains(@class,"game-time")]')[0].text.strip rescue ''
75
+ time = '00:00:00' if time == 'Final' || time.empty?
76
+ DateTime.parse(date + ' ' + time).strftime('%Y-%m-%d %H:%M:%S')
77
+ end
78
+
79
+ # Reads the team names from a Nokogiri::Doc
80
+ # @param d [Nokogiri::HTML::Document]
81
+ # @return [String, String] Team 1, Team 2
82
+ # @example
83
+ # bs.readGameDate(doc)
84
+ #
85
+ def readTeamNames(d)
86
+ names = d.xpath('//span[@class="long-name" or @class="short-name"]')
87
+ away = names[0].text + ' ' + names[1].text
88
+ home = names[2].text + ' ' + names[3].text
89
+ [away, home]
90
+ end
91
+
92
+ # Extract Player Stats
93
+ # @param rows [[Nokogiri::XML::NodeSet]] Cumulative Team Stats
94
+ # @param tid [String] Team ID
95
+ # @return [[String]] Processed Team Stats
96
+ def processPlayerRows(rows, tid, new_form)
97
+ result = [] # Extracted Player Data
98
+ rows.each_with_index do |row, index|
99
+ curr_row = [tid] # Team ID
100
+
101
+ row.children.each do |cell| # Process Columns
102
+ c_val = cell.text.strip
103
+ case cell.attribute('class').text
104
+ when 'name'
105
+ curr_row << cell.children[0].attribute('href').text[%r{id/(\d+)}, 1] # Player ID
106
+ curr_row << cell.children[0].children[0].text.strip # Player Short Name (i.e. D. Wade)
107
+ curr_row << cell.children[1].text.strip # Position
108
+ # binding.pry
109
+ when 'fg', '3pt', 'ft'
110
+ # Made-Attempts
111
+ curr_row += c_val.split('-')
112
+ else
113
+ curr_row << c_val
114
+ end
115
+ end
116
+
117
+ curr_row << (index < 5).to_s # Check if Starter
118
+ result << curr_row # Save processed data
119
+ end
120
+ return result.send(new_form, S_BOX_P) unless new_form.nil?
121
+ result
122
+ end
123
+
124
+ # Extract Team Stats
125
+ # @param row [[Nokogiri::XML::NodeSet]] Cumulative Team Stats
126
+ # @param tid [String] Team ID
127
+ # @return [[String]] Processed Team Stats
128
+ def processTeamRow(row, tid, new_form)
129
+ result = []
130
+ row.children.each do |cell|
131
+ c_val = cell.text.strip
132
+ case cell.attribute('class').text
133
+ when 'name'
134
+ result << tid
135
+ when 'fg', '3pt', 'ft'
136
+ # Made-Attempts
137
+ result += c_val.split('-')
138
+ else
139
+ next if c_val.empty?
140
+ result << c_val
141
+ end
142
+ end
143
+ return [result.send(new_form, S_BOX_T).first, result.last] unless new_form.nil?
144
+ [result, result.last]
145
+ end
146
+
147
+ # Reads the team stats from a Nokogiri::Doc
148
+ # @param d [Nokogiri::HTML::Document]
149
+ # @param id [String] Team selector -> home/away
150
+ # @return [String] Game date
151
+ # @example
152
+ # bs.readTeamStats(doc,'away')
153
+ #
154
+ def readTeamStats(d, id, new_form)
155
+ # Extract player tables
156
+ p_tables = d.xpath('//div[@class="sub-module"]/*/table/tbody')
157
+
158
+ if id == 'away'
159
+ p_tab = p_tables[0, 2]
160
+ tid = getTid(@awayName)
161
+ else
162
+ p_tab = p_tables[2, 4]
163
+ tid = getTid(@homeName)
164
+ end
165
+
166
+ player_rows = p_tab.xpath('tr[not(@class)]') # Ignore TEAM rows
167
+ team_row = p_tab.xpath('tr[@class="highlight"]')[0] # Ignore Percentage row
168
+
169
+ player_stats = processPlayerRows(player_rows, tid, new_form)
170
+ team_totals, team_score = processTeamRow(team_row, tid, new_form)
171
+
172
+ [Navigator.new(player_stats), team_totals, team_score]
173
+ end
174
+ end
@@ -0,0 +1,82 @@
1
+ # Read basic bio info from ESPN Player page
2
+ class NbaPlayer
3
+ include NbaUrls
4
+
5
+ # @return [String] Name
6
+ attr_accessor :name
7
+ # @return [String] Position
8
+ attr_accessor :position
9
+ # @return [Integer] Age
10
+ attr_accessor :age
11
+ # @return [String] College
12
+ attr_accessor :college
13
+ # @return [Integer] Weight
14
+ attr_accessor :weight
15
+ # @return [Integer] Height (ft)
16
+ attr_accessor :h_ft
17
+ # @return [Integer] Height (in)
18
+ attr_accessor :h_in
19
+
20
+ # Read Player Data
21
+ def initialize(espn_player_id, file = '')
22
+ espn_player_id = espn_player_id.to_s
23
+ if !espn_player_id.empty?
24
+ url = playerUrl + espn_player_id
25
+ doc = Nokogiri::HTML(open(url))
26
+ else
27
+ doc = Nokogiri::HTML(open(file)) rescue nil
28
+ end
29
+ return if doc.nil?
30
+
31
+ readInfo(doc)
32
+ end
33
+
34
+ # alias for h_ft
35
+ def height_ft
36
+ @h_ft
37
+ end
38
+
39
+ # alias for h_in
40
+ def height_in
41
+ @h_in
42
+ end
43
+
44
+ private
45
+
46
+ # Extract basic bio info info class attributes
47
+ def readInfo(d)
48
+ @name = d.xpath("//div[@class='mod-content']/*/h1 | //div[@class='mod-content']/h1")[0].text.strip
49
+ @position = d.xpath("//ul[@class='general-info']/li")[0].text.gsub(/#\d*\s*/, '')
50
+ @college = d.xpath('//span[text() = "College"]/parent::li').text.gsub('College', '')
51
+
52
+ height, weight = gatherHeightWeight(d)
53
+
54
+ @weight = processWeight(weight)
55
+ processHeight(height)
56
+ processAge(d)
57
+ end
58
+
59
+ def processAge(d)
60
+ /:\s(?<age_num>\d\d)/ =~ d.xpath('//span[text() = "Born"]/parent::li').text
61
+ @age = age_num.to_i.to_s
62
+ end
63
+
64
+ def gatherHeightWeight(d)
65
+ h_w = d.xpath("//ul[@class='general-info']/li")[1]
66
+ h_w.text.split(',') unless h_w.nil?
67
+ end
68
+
69
+ def processWeight(weight)
70
+ return 0 if weight.nil? || weight.empty?
71
+ weight.strip.split(' ')[0]
72
+ end
73
+
74
+ def processHeight(height)
75
+ if !height.nil? && !height.empty?
76
+ @h_ft, @h_in = height.strip.split('\'')
77
+ @h_in = @h_in.delete('"').strip
78
+ else
79
+ @h_ft = @h_in = 0
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,74 @@
1
+ require_relative './Navigator'
2
+ # Access NBA roster data
3
+ class NbaRoster
4
+ include NbaUrls
5
+ include PrintUtils
6
+
7
+ # @return [String] Coach Name
8
+ attr_reader :coach
9
+
10
+ # Returns Team Roster
11
+ # @return [[[String]]] Player List Table ({ROSTER Row Description})
12
+ # @see ROSTER
13
+ attr_reader :players
14
+
15
+ # Scrape Roster Data
16
+ # @param team_id [String] Team ID
17
+ # @example
18
+ # r = NbaRoster.new("UTA")
19
+ # r = NbaRoster.new('', 'test/data/rosterData.html')
20
+ def initialize(args = {})
21
+ if args[:team_id]
22
+ url = formatTeamUrl(args[:team_id], teamRosterUrl) # Generate URL
23
+ doc = Nokogiri::HTML(open(url)) # Get DOM
24
+ elsif args[:file]
25
+ doc = Nokogiri::HTML(open(args[:file]))
26
+ end
27
+ return if doc.nil?
28
+
29
+ team_id ||= getTid(doc.title.split(/\d{4}/).first.strip)
30
+ list = doc.xpath('//div/div/table/tr')
31
+ p_list = list[2, list.length - 3] # Get Player Nodes
32
+
33
+ @coach = list[-1].children.first.text.split(':').last.strip # Read Coach Name
34
+ @players = Navigator.new processPlayerTable(p_list, team_id, args[:format])
35
+ end
36
+
37
+ private
38
+
39
+ # Collect Roster Data
40
+ # @param table [[Nokogiri::XML::NodeSet]] Roster Table
41
+ # @param team_id [String] Team ID
42
+ # @return [[[String]]] Processed Roster Data
43
+ def processPlayerTable(table, team_id, new_form)
44
+ result = []
45
+ table.each do |row|
46
+ tmp = [team_id] # Start row with Team ID
47
+ row.children.each_with_index do |cell, cnt|
48
+ processCell(cell, tmp, cnt)
49
+ end
50
+ result << tmp
51
+ end
52
+ return result.send(new_form, S_ROSTER) unless new_form.nil?
53
+ result
54
+ end
55
+
56
+ # Extract and Normalize Player Data
57
+ def processCell(cell, tmp, cnt)
58
+ txt = cell.text.chomp.strip
59
+ case cnt
60
+ when 0, 2, 3, 5 # 0 Player No, 2 Position, 3 Age, 5 Weight
61
+ tmp << txt
62
+ when 1 # Player Name
63
+ tmp << txt.tr("'", "\'")
64
+ tmp << cell.children.first.attribute('href').text[%r{id/(\d+)}, 1] # Player ID
65
+ when 4 # Player Height
66
+ tmp.concat(txt.split('-'))
67
+ when 6 # College
68
+ tmp << txt.tr("'", "\'").strip
69
+ when 7 # Salary
70
+ # Remove extraneous symbols & default to 0
71
+ tmp << txt.delete('$').delete(',').strip.to_i.to_s
72
+ end
73
+ end
74
+ end