hoopscrape 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.yardopts +7 -0
- data/CHANGELOG.md +20 -0
- data/LICENSE +619 -0
- data/README.md +390 -0
- data/Rakefile +22 -0
- data/lib/hoopscrape.rb +113 -0
- data/lib/hoopscrape/ArrayConversions.rb +56 -0
- data/lib/hoopscrape/Hash.rb +9 -0
- data/lib/hoopscrape/Navigator.rb +60 -0
- data/lib/hoopscrape/NbaBoxScore.rb +174 -0
- data/lib/hoopscrape/NbaPlayer.rb +82 -0
- data/lib/hoopscrape/NbaRoster.rb +74 -0
- data/lib/hoopscrape/NbaSchedule.rb +258 -0
- data/lib/hoopscrape/NbaTeamList.rb +63 -0
- data/lib/hoopscrape/NbaUrls.rb +86 -0
- data/lib/hoopscrape/PrintUtils.rb +34 -0
- data/lib/hoopscrape/String.rb +16 -0
- data/lib/hoopscrape/Struct.rb +9 -0
- data/lib/hoopscrape/SymbolDefaults.rb +31 -0
- data/lib/hoopscrape/requires.rb +28 -0
- data/lib/tasks/build.rake +28 -0
- data/lib/tasks/rubo.rake +30 -0
- metadata +141 -0
@@ -0,0 +1,56 @@
|
|
1
|
+
# Array Extensions for Type Conversion
|
2
|
+
class Array
|
3
|
+
# Create Hash Array
|
4
|
+
# @param keys [Array] Symbols to be used as field names
|
5
|
+
# @return [[Hash]] Array<Hash>
|
6
|
+
# @example
|
7
|
+
# teams = es.teamList # Array of Team data
|
8
|
+
# hash_a = teams.to_hashes
|
9
|
+
# => [{:t_abbr=>"BOS", :t_name=>"Boston Celtics", :division=>"Atlantic", :conference=>"Eastern"} ... ]
|
10
|
+
def to_hashes(keys = [])
|
11
|
+
return [] if empty?
|
12
|
+
two_d = first.is_a? Array # Check for 2D array
|
13
|
+
keys = checkKeys(keys, two_d ? first.size : size) # Determine keys
|
14
|
+
return [Hash[keys.map.with_index { |key, idx| [key, self[idx]] }]] unless two_d # 1D Array
|
15
|
+
map { |ary| Hash[keys.map.with_index { |key, idx| [key, ary[idx]] }] } # 2D Array
|
16
|
+
end
|
17
|
+
|
18
|
+
# Create Struct Array
|
19
|
+
# @param keys [Array] Symbols to be used as field names
|
20
|
+
# @return [[Struct]] Array<Struct>
|
21
|
+
# @example
|
22
|
+
# teams = es.teamList # Array of Team data
|
23
|
+
# structs = teams.to_structs
|
24
|
+
# => [#<struct t_abbr="BOS", t_name="Boston Celtics", division="Atlantic", conference="Eastern"> ... ]
|
25
|
+
def to_structs(keys = [])
|
26
|
+
return [] if empty?
|
27
|
+
keys = checkKeys(keys, first.size)
|
28
|
+
to_hashes(keys).map { |hash| Struct.new(*hash.keys).new(*hash.values) }
|
29
|
+
end
|
30
|
+
|
31
|
+
# Determine default field names
|
32
|
+
# @param keys [[Symbol]] Field Names
|
33
|
+
# @param k_id [Int] Key Identifier
|
34
|
+
# @return [[Symbol]]
|
35
|
+
def checkKeys(keys, k_id)
|
36
|
+
return keys unless keys.empty?
|
37
|
+
[S_BOX_P, S_BOX_T, S_GAME_F, S_GAME_P, S_ROSTER, S_TEAM].each do |default|
|
38
|
+
return default if default.size.eql?(k_id)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# Replace old symbol with new symbol in Array
|
43
|
+
# @param old_sym [Symbol] Symbol to remove
|
44
|
+
# @param new_sym [Symbol] Symbol to add
|
45
|
+
def change_sym!(old_sym, new_sym)
|
46
|
+
map! { |x| x.eql?(old_sym) ? new_sym : x }
|
47
|
+
end
|
48
|
+
|
49
|
+
# Get an NbaBoxscore
|
50
|
+
# @param [Symbol] Format
|
51
|
+
# @return [NbaBoxscore] Boxscore
|
52
|
+
def boxscore(f_mat = nil)
|
53
|
+
return nil unless size == S_GAME_P.size
|
54
|
+
HoopScrape.boxscore(self[8], f_mat)
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# Array Navigator
|
2
|
+
class Navigator
|
3
|
+
# Array of data to Navigate
|
4
|
+
attr_reader :list
|
5
|
+
|
6
|
+
# Store array and initialize cursor
|
7
|
+
def initialize(list)
|
8
|
+
@list = list
|
9
|
+
@bounds = [-1, list.size]
|
10
|
+
@cursor = -1
|
11
|
+
end
|
12
|
+
|
13
|
+
# Return the player located at the current navigation cursor
|
14
|
+
# @return [[Object]] Array Element
|
15
|
+
def curr
|
16
|
+
self[@cursor] if @cursor >= 0
|
17
|
+
end
|
18
|
+
|
19
|
+
# Increments the navigation cursor and return the item at that location
|
20
|
+
# @return (see #curr)
|
21
|
+
def next
|
22
|
+
self[(@cursor += 1)]
|
23
|
+
end
|
24
|
+
|
25
|
+
# Decrements the navigation cursor and return the item at that location
|
26
|
+
# @return (see #curr)
|
27
|
+
def prev
|
28
|
+
self[(@cursor -= 1)]
|
29
|
+
end
|
30
|
+
|
31
|
+
# Updates the navigation cursor if out of bounds. Returns the item at the given location.
|
32
|
+
# Returns the underlying array if no index is given.
|
33
|
+
# @return [Element] Array Element or Array
|
34
|
+
def [](idx = nil)
|
35
|
+
return @list if idx.nil?
|
36
|
+
@cursor = @bounds[0] if @cursor < @bounds[0]
|
37
|
+
@cursor = @bounds[1] if @cursor > @bounds[1]
|
38
|
+
@list[idx] if [inbounds?(idx), !@list.nil?].all?
|
39
|
+
end
|
40
|
+
|
41
|
+
# Checks if the requested index is within the array bounderies
|
42
|
+
def inbounds?(idx)
|
43
|
+
(@bounds[0]..@bounds[1]).cover?(idx)
|
44
|
+
end
|
45
|
+
|
46
|
+
# Updates the cursor and returns the first element of the array
|
47
|
+
def first
|
48
|
+
self[@cursor = 0]
|
49
|
+
end
|
50
|
+
|
51
|
+
# Updates the cursor and returns the last element of the array
|
52
|
+
def last
|
53
|
+
self[@cursor = @list.size - 1]
|
54
|
+
end
|
55
|
+
|
56
|
+
# Returns the size of the underlying array
|
57
|
+
def size
|
58
|
+
@list.size
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,174 @@
|
|
1
|
+
require 'date'
|
2
|
+
require_relative './PrintUtils.rb'
|
3
|
+
|
4
|
+
# Access NBA boxscore data
|
5
|
+
class NbaBoxScore
|
6
|
+
include NbaUrls
|
7
|
+
|
8
|
+
# @return [String] Game Date
|
9
|
+
attr_reader :gameDate
|
10
|
+
|
11
|
+
# @return [String] Away Team Name
|
12
|
+
attr_reader :awayName
|
13
|
+
|
14
|
+
# @return [Navigator] Away Team Stats Array
|
15
|
+
# @note (see SymbolDefaults::BOX_P)
|
16
|
+
# @see BOX_P
|
17
|
+
attr_reader :awayPlayers
|
18
|
+
|
19
|
+
# @return [[String]] Away Team Combined Stats
|
20
|
+
# @note (see SymbolDefaults::BOX_T)
|
21
|
+
# @see BOX_T
|
22
|
+
attr_reader :awayTotals
|
23
|
+
|
24
|
+
# @return [String] Home Team Name
|
25
|
+
attr_reader :homeName
|
26
|
+
|
27
|
+
# @return [Navigator] Home Team Stats Array
|
28
|
+
# @note (see #awayPlayers)
|
29
|
+
# @see BOX_P
|
30
|
+
attr_reader :homePlayers
|
31
|
+
|
32
|
+
# @return [[String]] Home Team Combined Stats
|
33
|
+
# @note (see #awayTotals)
|
34
|
+
# @see BOX_T
|
35
|
+
attr_reader :homeTotals
|
36
|
+
|
37
|
+
# Boxscore ID
|
38
|
+
attr_reader :id
|
39
|
+
attr_reader :awayScore
|
40
|
+
attr_reader :homeScore
|
41
|
+
|
42
|
+
# Scrape Box Score Data
|
43
|
+
# @param game_id [Integer] Boxscore ID
|
44
|
+
# @example
|
45
|
+
# bs = NbaBoxScore.new(400828035)
|
46
|
+
def initialize(args)
|
47
|
+
doc = getNokoDoc(args[:game_id], args[:file])
|
48
|
+
return if doc.nil?
|
49
|
+
@id = args[:game_id].to_s
|
50
|
+
@gameDate = readGameDate(doc)
|
51
|
+
@awayName, @homeName = readTeamNames(doc)
|
52
|
+
return unless @gameDate.index('00:00:00') # Only past games have stats
|
53
|
+
@awayPlayers, @awayTotals, @awayScore = readTeamStats(doc, 'away', args[:format])
|
54
|
+
@homePlayers, @homeTotals, @homeScore = readTeamStats(doc, 'home', args[:format])
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def getNokoDoc(game_id, file)
|
60
|
+
return Nokogiri::HTML(open(file)) if game_id.nil? # Parse File
|
61
|
+
Nokogiri::HTML(open(boxScoreUrl + game_id.to_s)) # Parse URL
|
62
|
+
end
|
63
|
+
|
64
|
+
# Reads the game date from a Nokogiri::Doc
|
65
|
+
# @param d [Nokogiri::HTML::Document]
|
66
|
+
# @return [String] Game date
|
67
|
+
# @example
|
68
|
+
# bs.readGameDate(doc) #=> "Mon, Nov 23"
|
69
|
+
# @note
|
70
|
+
# Times will be Local to the system Timezone
|
71
|
+
#
|
72
|
+
def readGameDate(d)
|
73
|
+
date = d.title.split('-')[2].delete(',')
|
74
|
+
time = d.xpath('//span[contains(@class,"game-time")]')[0].text.strip rescue ''
|
75
|
+
time = '00:00:00' if time == 'Final' || time.empty?
|
76
|
+
DateTime.parse(date + ' ' + time).strftime('%Y-%m-%d %H:%M:%S')
|
77
|
+
end
|
78
|
+
|
79
|
+
# Reads the team names from a Nokogiri::Doc
|
80
|
+
# @param d [Nokogiri::HTML::Document]
|
81
|
+
# @return [String, String] Team 1, Team 2
|
82
|
+
# @example
|
83
|
+
# bs.readGameDate(doc)
|
84
|
+
#
|
85
|
+
def readTeamNames(d)
|
86
|
+
names = d.xpath('//span[@class="long-name" or @class="short-name"]')
|
87
|
+
away = names[0].text + ' ' + names[1].text
|
88
|
+
home = names[2].text + ' ' + names[3].text
|
89
|
+
[away, home]
|
90
|
+
end
|
91
|
+
|
92
|
+
# Extract Player Stats
|
93
|
+
# @param rows [[Nokogiri::XML::NodeSet]] Cumulative Team Stats
|
94
|
+
# @param tid [String] Team ID
|
95
|
+
# @return [[String]] Processed Team Stats
|
96
|
+
def processPlayerRows(rows, tid, new_form)
|
97
|
+
result = [] # Extracted Player Data
|
98
|
+
rows.each_with_index do |row, index|
|
99
|
+
curr_row = [tid] # Team ID
|
100
|
+
|
101
|
+
row.children.each do |cell| # Process Columns
|
102
|
+
c_val = cell.text.strip
|
103
|
+
case cell.attribute('class').text
|
104
|
+
when 'name'
|
105
|
+
curr_row << cell.children[0].attribute('href').text[%r{id/(\d+)}, 1] # Player ID
|
106
|
+
curr_row << cell.children[0].children[0].text.strip # Player Short Name (i.e. D. Wade)
|
107
|
+
curr_row << cell.children[1].text.strip # Position
|
108
|
+
# binding.pry
|
109
|
+
when 'fg', '3pt', 'ft'
|
110
|
+
# Made-Attempts
|
111
|
+
curr_row += c_val.split('-')
|
112
|
+
else
|
113
|
+
curr_row << c_val
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
curr_row << (index < 5).to_s # Check if Starter
|
118
|
+
result << curr_row # Save processed data
|
119
|
+
end
|
120
|
+
return result.send(new_form, S_BOX_P) unless new_form.nil?
|
121
|
+
result
|
122
|
+
end
|
123
|
+
|
124
|
+
# Extract Team Stats
|
125
|
+
# @param row [[Nokogiri::XML::NodeSet]] Cumulative Team Stats
|
126
|
+
# @param tid [String] Team ID
|
127
|
+
# @return [[String]] Processed Team Stats
|
128
|
+
def processTeamRow(row, tid, new_form)
|
129
|
+
result = []
|
130
|
+
row.children.each do |cell|
|
131
|
+
c_val = cell.text.strip
|
132
|
+
case cell.attribute('class').text
|
133
|
+
when 'name'
|
134
|
+
result << tid
|
135
|
+
when 'fg', '3pt', 'ft'
|
136
|
+
# Made-Attempts
|
137
|
+
result += c_val.split('-')
|
138
|
+
else
|
139
|
+
next if c_val.empty?
|
140
|
+
result << c_val
|
141
|
+
end
|
142
|
+
end
|
143
|
+
return [result.send(new_form, S_BOX_T).first, result.last] unless new_form.nil?
|
144
|
+
[result, result.last]
|
145
|
+
end
|
146
|
+
|
147
|
+
# Reads the team stats from a Nokogiri::Doc
|
148
|
+
# @param d [Nokogiri::HTML::Document]
|
149
|
+
# @param id [String] Team selector -> home/away
|
150
|
+
# @return [String] Game date
|
151
|
+
# @example
|
152
|
+
# bs.readTeamStats(doc,'away')
|
153
|
+
#
|
154
|
+
def readTeamStats(d, id, new_form)
|
155
|
+
# Extract player tables
|
156
|
+
p_tables = d.xpath('//div[@class="sub-module"]/*/table/tbody')
|
157
|
+
|
158
|
+
if id == 'away'
|
159
|
+
p_tab = p_tables[0, 2]
|
160
|
+
tid = getTid(@awayName)
|
161
|
+
else
|
162
|
+
p_tab = p_tables[2, 4]
|
163
|
+
tid = getTid(@homeName)
|
164
|
+
end
|
165
|
+
|
166
|
+
player_rows = p_tab.xpath('tr[not(@class)]') # Ignore TEAM rows
|
167
|
+
team_row = p_tab.xpath('tr[@class="highlight"]')[0] # Ignore Percentage row
|
168
|
+
|
169
|
+
player_stats = processPlayerRows(player_rows, tid, new_form)
|
170
|
+
team_totals, team_score = processTeamRow(team_row, tid, new_form)
|
171
|
+
|
172
|
+
[Navigator.new(player_stats), team_totals, team_score]
|
173
|
+
end
|
174
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# Read basic bio info from ESPN Player page
|
2
|
+
class NbaPlayer
|
3
|
+
include NbaUrls
|
4
|
+
|
5
|
+
# @return [String] Name
|
6
|
+
attr_accessor :name
|
7
|
+
# @return [String] Position
|
8
|
+
attr_accessor :position
|
9
|
+
# @return [Integer] Age
|
10
|
+
attr_accessor :age
|
11
|
+
# @return [String] College
|
12
|
+
attr_accessor :college
|
13
|
+
# @return [Integer] Weight
|
14
|
+
attr_accessor :weight
|
15
|
+
# @return [Integer] Height (ft)
|
16
|
+
attr_accessor :h_ft
|
17
|
+
# @return [Integer] Height (in)
|
18
|
+
attr_accessor :h_in
|
19
|
+
|
20
|
+
# Read Player Data
|
21
|
+
def initialize(espn_player_id, file = '')
|
22
|
+
espn_player_id = espn_player_id.to_s
|
23
|
+
if !espn_player_id.empty?
|
24
|
+
url = playerUrl + espn_player_id
|
25
|
+
doc = Nokogiri::HTML(open(url))
|
26
|
+
else
|
27
|
+
doc = Nokogiri::HTML(open(file)) rescue nil
|
28
|
+
end
|
29
|
+
return if doc.nil?
|
30
|
+
|
31
|
+
readInfo(doc)
|
32
|
+
end
|
33
|
+
|
34
|
+
# alias for h_ft
|
35
|
+
def height_ft
|
36
|
+
@h_ft
|
37
|
+
end
|
38
|
+
|
39
|
+
# alias for h_in
|
40
|
+
def height_in
|
41
|
+
@h_in
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
|
46
|
+
# Extract basic bio info info class attributes
|
47
|
+
def readInfo(d)
|
48
|
+
@name = d.xpath("//div[@class='mod-content']/*/h1 | //div[@class='mod-content']/h1")[0].text.strip
|
49
|
+
@position = d.xpath("//ul[@class='general-info']/li")[0].text.gsub(/#\d*\s*/, '')
|
50
|
+
@college = d.xpath('//span[text() = "College"]/parent::li').text.gsub('College', '')
|
51
|
+
|
52
|
+
height, weight = gatherHeightWeight(d)
|
53
|
+
|
54
|
+
@weight = processWeight(weight)
|
55
|
+
processHeight(height)
|
56
|
+
processAge(d)
|
57
|
+
end
|
58
|
+
|
59
|
+
def processAge(d)
|
60
|
+
/:\s(?<age_num>\d\d)/ =~ d.xpath('//span[text() = "Born"]/parent::li').text
|
61
|
+
@age = age_num.to_i.to_s
|
62
|
+
end
|
63
|
+
|
64
|
+
def gatherHeightWeight(d)
|
65
|
+
h_w = d.xpath("//ul[@class='general-info']/li")[1]
|
66
|
+
h_w.text.split(',') unless h_w.nil?
|
67
|
+
end
|
68
|
+
|
69
|
+
def processWeight(weight)
|
70
|
+
return 0 if weight.nil? || weight.empty?
|
71
|
+
weight.strip.split(' ')[0]
|
72
|
+
end
|
73
|
+
|
74
|
+
def processHeight(height)
|
75
|
+
if !height.nil? && !height.empty?
|
76
|
+
@h_ft, @h_in = height.strip.split('\'')
|
77
|
+
@h_in = @h_in.delete('"').strip
|
78
|
+
else
|
79
|
+
@h_ft = @h_in = 0
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require_relative './Navigator'
|
2
|
+
# Access NBA roster data
|
3
|
+
class NbaRoster
|
4
|
+
include NbaUrls
|
5
|
+
include PrintUtils
|
6
|
+
|
7
|
+
# @return [String] Coach Name
|
8
|
+
attr_reader :coach
|
9
|
+
|
10
|
+
# Returns Team Roster
|
11
|
+
# @return [[[String]]] Player List Table ({ROSTER Row Description})
|
12
|
+
# @see ROSTER
|
13
|
+
attr_reader :players
|
14
|
+
|
15
|
+
# Scrape Roster Data
|
16
|
+
# @param team_id [String] Team ID
|
17
|
+
# @example
|
18
|
+
# r = NbaRoster.new("UTA")
|
19
|
+
# r = NbaRoster.new('', 'test/data/rosterData.html')
|
20
|
+
def initialize(args = {})
|
21
|
+
if args[:team_id]
|
22
|
+
url = formatTeamUrl(args[:team_id], teamRosterUrl) # Generate URL
|
23
|
+
doc = Nokogiri::HTML(open(url)) # Get DOM
|
24
|
+
elsif args[:file]
|
25
|
+
doc = Nokogiri::HTML(open(args[:file]))
|
26
|
+
end
|
27
|
+
return if doc.nil?
|
28
|
+
|
29
|
+
team_id ||= getTid(doc.title.split(/\d{4}/).first.strip)
|
30
|
+
list = doc.xpath('//div/div/table/tr')
|
31
|
+
p_list = list[2, list.length - 3] # Get Player Nodes
|
32
|
+
|
33
|
+
@coach = list[-1].children.first.text.split(':').last.strip # Read Coach Name
|
34
|
+
@players = Navigator.new processPlayerTable(p_list, team_id, args[:format])
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
# Collect Roster Data
|
40
|
+
# @param table [[Nokogiri::XML::NodeSet]] Roster Table
|
41
|
+
# @param team_id [String] Team ID
|
42
|
+
# @return [[[String]]] Processed Roster Data
|
43
|
+
def processPlayerTable(table, team_id, new_form)
|
44
|
+
result = []
|
45
|
+
table.each do |row|
|
46
|
+
tmp = [team_id] # Start row with Team ID
|
47
|
+
row.children.each_with_index do |cell, cnt|
|
48
|
+
processCell(cell, tmp, cnt)
|
49
|
+
end
|
50
|
+
result << tmp
|
51
|
+
end
|
52
|
+
return result.send(new_form, S_ROSTER) unless new_form.nil?
|
53
|
+
result
|
54
|
+
end
|
55
|
+
|
56
|
+
# Extract and Normalize Player Data
|
57
|
+
def processCell(cell, tmp, cnt)
|
58
|
+
txt = cell.text.chomp.strip
|
59
|
+
case cnt
|
60
|
+
when 0, 2, 3, 5 # 0 Player No, 2 Position, 3 Age, 5 Weight
|
61
|
+
tmp << txt
|
62
|
+
when 1 # Player Name
|
63
|
+
tmp << txt.tr("'", "\'")
|
64
|
+
tmp << cell.children.first.attribute('href').text[%r{id/(\d+)}, 1] # Player ID
|
65
|
+
when 4 # Player Height
|
66
|
+
tmp.concat(txt.split('-'))
|
67
|
+
when 6 # College
|
68
|
+
tmp << txt.tr("'", "\'").strip
|
69
|
+
when 7 # Salary
|
70
|
+
# Remove extraneous symbols & default to 0
|
71
|
+
tmp << txt.delete('$').delete(',').strip.to_i.to_s
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|