espn_scraper 1.3.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4e71f14160cc29d944f5860b4eaa3f983d353890
4
- data.tar.gz: 75353026bf2aa879d8151a4c1455314e69af8c78
3
+ metadata.gz: 92cd55c7052685e30d2dbc52d24a058cd7e7ef46
4
+ data.tar.gz: d4751ef0577a07dd87e2721af60dfd5041129656
5
5
  SHA512:
6
- metadata.gz: 5f06578dd6ca1eeefa13910a35a03e84a4781499de5d478c924707db90a476780fb25a9276a6ede4d3b66e1135c31dd1b0c105ad3bd37cc213cd15ae050720f6
7
- data.tar.gz: d2b1eda63a460fc734f7280ae171ce0e9260612d1513bc6507dda595d8d484d038a7dc3df213c5066fcfb51151799156cd81b51a07a75bd204cbec1d99c267e8
6
+ metadata.gz: ac9fcbc028381b407c01ecde9ea3a47d75658c6ca582bdb6024c03e7333e9bd0ec41e5d27a48664ce331a48ea6d541e150918eb13c74b8d9d7d2f201c3d4ef25
7
+ data.tar.gz: bac030d2c801f9749a85be6d58986062dc79f94d27d7110c1cd006f1bc23d43f64c7320881329780dca7c9f674815e81bc1b9f54762ab459f12b089ac39083f9
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # ESPN Scraper
2
2
 
3
- ESPN Scraper is a simple gem for scraping teams and scores from `ESPN`'s website. Please note that `ESPN` is not involved with this gem or me in any way. I chose `ESPN` because it is a leader in sports statistics and has a robust website.
3
+ ESPN Scraper is a simple gem for scraping teams and scores from `ESPN`'s website. Please note that `ESPN` is not involved with this gem or me in any way. I chose `ESPN` because it is a leader in sports statistics and has a robust website.
4
4
 
5
5
  ```ruby
6
6
  ESPN.responding?
@@ -55,11 +55,11 @@ You can get the teams in each league by acronym. It returns a hash of each divis
55
55
  ```ruby
56
56
  ESPN.get_teams_in('nba')
57
57
  # => {
58
- # "atlantic"=> [
59
- # { :name => "Boston Celtics", :data_name => "bos" },
60
- # { :name => "Brooklyn Nets", :data_name => "bkn" },
61
- # { :name => "New York Knicks", :data_name => "ny" },
62
- # { :name => "Philadelphia 76ers", :data_name => "phi" },
58
+ # "atlantic"=> [
59
+ # { :name => "Boston Celtics", :data_name => "bos" },
60
+ # { :name => "Brooklyn Nets", :data_name => "bkn" },
61
+ # { :name => "New York Knicks", :data_name => "ny" },
62
+ # { :name => "Philadelphia 76ers", :data_name => "phi" },
63
63
  # { :name => "Toronto Raptors", :data_name => "tor" }
64
64
  # ]
65
65
  # "pacific" => ...
@@ -121,7 +121,7 @@ require 'espn_scraper'
121
121
 
122
122
  ## Contributing
123
123
 
124
- Please report back if something breaks on you!
124
+ Please report back if something breaks on you!
125
125
 
126
126
  Also please let me know if any of the data names get outdated. For instance a bunch of NFL data names were recently changed. You can make fixes temporarily with the following:
127
127
 
@@ -129,7 +129,19 @@ Also please let me know if any of the data names get outdated. For instance a bu
129
129
  ESPN::DATA_NAME_FIXES['nfl']['gnb'] = 'gb'
130
130
  ```
131
131
 
132
- Future plans:
132
+ Running tests:
133
+ ```
134
+ rake test
135
+ ```
136
+
137
+ Re-building the gem and installing locally:
138
+ ```
139
+ gem build espn_scraper.gemspec
140
+ gem uninstall espn_scraper
141
+ gem install espn_scraper-x.x.x.gem
142
+ ```
143
+
144
+ Future plans:
133
145
  - Get start and end dates of a season
134
146
 
135
147
  ### Thank You
data/Rakefile CHANGED
@@ -1,4 +1,3 @@
1
- require "bundler/gem_tasks"
2
1
  require 'rake/testtask'
3
2
 
4
3
  Rake::TestTask.new do |t|
@@ -3,11 +3,11 @@ require 'nokogiri'
3
3
 
4
4
  module ESPN
5
5
  class << self
6
-
6
+
7
7
  def responding?
8
- HTTParty.get('http://espn.go.com/').code == 200
8
+ HTTParty.get('http://www.espn.com').code == 200
9
9
  end
10
-
10
+
11
11
  def down?
12
12
  !responding?
13
13
  end
@@ -15,9 +15,8 @@ module ESPN
15
15
  # Ex: ESPN.url('scores')
16
16
  # ESPN.url('teams', 'nba')
17
17
  def url(*path)
18
- subdomain = (path.first == 'scores') ? path.shift : nil
19
- domain = [subdomain, 'espn', 'go', 'com'].compact.join('.')
20
- ['http:/', domain, *path].join('/')
18
+ path.shift if path.first == 'scores'
19
+ ['http://www.espn.com', *path].join('/')
21
20
  end
22
21
 
23
22
  # Returns Nokogiri HTML document
@@ -29,22 +28,22 @@ module ESPN
29
28
  Nokogiri::HTML(response.body)
30
29
  else
31
30
  raise ArgumentError, error_message(url, path)
32
- end
31
+ end
33
32
  end
34
-
33
+
35
34
  def dasherize(str)
36
35
  str.strip.downcase.gsub(/\s+/, '-')
37
36
  end
38
-
39
-
37
+
38
+
40
39
  private
41
-
42
-
43
-
40
+
41
+
42
+
44
43
  def error_message(url, path)
45
44
  "The url #{url} from the path #{path} did not return a valid page."
46
45
  end
47
-
46
+
48
47
  end
49
48
  end
50
49
 
@@ -4,14 +4,14 @@ require 'json'
4
4
 
5
5
  module ESPN
6
6
  SEASONS = {
7
- preseason: 1,
8
- regular_season: 2,
9
- postseason: 3
7
+ preseason: 1,
8
+ regular_season: 2,
9
+ postseason: 3
10
10
  }
11
11
 
12
- mlb_ignores = %w(
13
- florida-state u-of-south-florida georgetown fla.-southern northeastern boston-college
14
- miami-florida florida-intl canada hanshin yomiuri sacramento springfield corpus-christi
12
+ mlb_ignores = %w(
13
+ florida-state u-of-south-florida georgetown fla.-southern northeastern boston-college
14
+ miami-florida florida-intl canada hanshin yomiuri sacramento springfield corpus-christi
15
15
  round-rock carolina manatee-cc mexico cincinnati-(f) atlanta-(f) frisco toledo norfolk
16
16
  fort-myers tampa-bay-(f) nl-all-stars al-all-stars
17
17
  )
@@ -23,7 +23,7 @@ module ESPN
23
23
  team-chara team-alfredsson
24
24
  )
25
25
 
26
- ncf_ignores = %w( paul-quinn san-diego-christian ferris-st notre-dame-college chaminade
26
+ ncf_ignores = %w( paul-quinn san-diego-christian ferris-st notre-dame-college chaminade
27
27
  w-new-mexico n-new-mexico tx-a&m-commerce nw-oklahoma-st )
28
28
 
29
29
  IGNORED_TEAMS = (mlb_ignores + nhl_ignores + nba_ignores + ncf_ignores).inject({}) do |h, team|
@@ -31,29 +31,29 @@ module ESPN
31
31
  end
32
32
 
33
33
  DATA_NAME_EXCEPTIONS = {
34
- 'nets' => 'bkn',
35
- 'supersonics' => 'okc',
36
- 'hornets' => 'no',
34
+ 'nets' => 'bkn',
35
+ 'supersonics' => 'okc',
36
+ 'hornets' => 'no',
37
37
 
38
- 'marlins' => 'mia'
38
+ 'marlins' => 'mia'
39
39
  }.merge(IGNORED_TEAMS)
40
40
 
41
41
  DATA_NAME_FIXES = {
42
- 'nfl' => {
43
- 'nwe' => 'ne',
44
- 'kan' => 'kc',
45
- 'was' => 'wsh',
46
- 'nor' => 'no',
47
- 'gnb' => 'gb',
48
- 'sfo' => 'sf',
49
- 'tam' => 'tb',
50
- 'sdg' => 'sd'
51
- },
52
- 'mlb' => {},
53
- 'nba' => {},
54
- 'nhl' => {},
55
- 'ncf' => {},
56
- 'ncb' => {}
42
+ 'nfl' => {
43
+ 'nwe' => 'ne',
44
+ 'kan' => 'kc',
45
+ 'was' => 'wsh',
46
+ 'nor' => 'no',
47
+ 'gnb' => 'gb',
48
+ 'sfo' => 'sf',
49
+ 'tam' => 'tb',
50
+ 'sdg' => 'sd'
51
+ },
52
+ 'mlb' => {},
53
+ 'nba' => {},
54
+ 'nhl' => {},
55
+ 'ncf' => {},
56
+ 'ncb' => {}
57
57
  }
58
58
 
59
59
  # Example output:
@@ -68,13 +68,6 @@ module ESPN
68
68
 
69
69
  class << self
70
70
 
71
- def get_superbowl_score
72
- markup = Scores.markup_from_superbowl
73
- scores = Scores.home_away_parse(markup, false)
74
- add_league_and_fixes(scores, 'nfl')
75
- scores
76
- end
77
-
78
71
  def get_nfl_scores(year, week)
79
72
  markup = Scores.markup_from_year_and_week('nfl', year, week)
80
73
  scores = Scores.home_away_parse(markup)
@@ -84,7 +77,7 @@ module ESPN
84
77
 
85
78
  def get_mlb_scores(date)
86
79
  markup = Scores.markup_from_date('mlb', date)
87
- scores = Scores.home_away_parse(markup)
80
+ scores = Scores.home_away_parse(markup, date)
88
81
  scores.each { |report| report[:league] = 'mlb' }
89
82
  scores
90
83
  end
@@ -104,7 +97,7 @@ module ESPN
104
97
  end
105
98
 
106
99
  def get_ncf_scores(year, week)
107
- markup = Scores.markup_from_year_and_week('college-football', year, week)
100
+ markup = Scores.markup_from_year_and_week('college-football', year, week, 80)
108
101
  scores = Scores.ncf_parse(markup)
109
102
  scores.each { |report| report[:league] = 'college-football' }
110
103
  scores
@@ -112,32 +105,15 @@ module ESPN
112
105
 
113
106
  alias_method :get_college_football_scores, :get_ncf_scores
114
107
 
115
- def get_ncb_scores(date, conference_id=nil, final_only=true)
116
- if conference_id
117
- markup = Scores.markup_from_date_and_conference('ncb', date, conference_id)
118
- else
119
- markup = Scores.markup_from_date('ncb', date)
120
- end
121
-
122
- scores = Scores.home_away_parse(markup, final_only)
123
-
124
- scores.each do |report|
125
- report[:league] ||= 'mens-college-basketball'
126
- report[:game_date] ||= date
127
- end
128
-
108
+ def get_ncb_scores(date, conference_id)
109
+ markup = Scores.markup_from_date_and_conference('ncb', date, conference_id)
110
+ scores = Scores.home_away_parse(markup, date)
111
+ scores.each { |report| report.merge! league: 'mens-college-basketball', game_date: date }
129
112
  scores
130
113
  end
131
114
 
132
115
  alias_method :get_college_basketball_scores, :get_ncb_scores
133
116
 
134
- def get_ncb_abbreviations(date)
135
- markup = Scores.markup_from_date('ncb', date)
136
-
137
- teams = Scores.team_abbreviation_parse(markup)
138
- teams
139
- end
140
-
141
117
  def add_league_and_fixes(scores, league)
142
118
  scores.each do |report|
143
119
  report[:league] = league
@@ -156,12 +132,12 @@ module ESPN
156
132
 
157
133
  # Get Markup
158
134
 
159
- def markup_from_superbowl
160
- ESPN.get 'scores', 'nfl', "scoreboard/_/group/80/year/#{Time.now.year - 1}/seasontype/3/week/5"
161
- end
162
-
163
- def markup_from_year_and_week(league, year, week)
164
- ESPN.get 'scores', league, "scoreboard/_/group/80/year/#{year}/seasontype/2/week/#{week}"
135
+ def markup_from_year_and_week(league, year, week, group=nil)
136
+ if group
137
+ ESPN.get 'scores', league, "scoreboard/_/group/#{group}/year/#{year}/seasontype/2/week/#{week}"
138
+ else
139
+ ESPN.get 'scores', league, "scoreboard/_/year/#{year}/seasontype/2/week/#{week}"
140
+ end
165
141
  end
166
142
 
167
143
  def markup_from_date(league, date)
@@ -176,7 +152,7 @@ module ESPN
176
152
 
177
153
  # parsing strategies
178
154
 
179
- def home_away_parse(doc, final=true)
155
+ def home_away_parse(doc, date=nil)
180
156
  scores = []
181
157
  games = []
182
158
  espn_regex = /window\.espn\.scoreboardData \t= (\{.*?\});/
@@ -190,11 +166,16 @@ module ESPN
190
166
  games.each do |game|
191
167
  # Game must be regular or postseason
192
168
  next unless game['season']['type'] == SEASONS[:regular_season] || game['season']['type'] == SEASONS[:postseason]
169
+
170
+ # Game must not be suspended if it was supposed to start on the query date.
171
+ # This prevents fetching scores for suspended games which are not yet completed.
172
+ game_start = DateTime.parse(game['date']).to_time.utc + Time.zone_offset('EDT')
173
+ next if date && game['competitions'][0]['wasSuspended'] && game_start.to_date == date
174
+
193
175
  score = {}
194
176
  competition = game['competitions'].first
195
-
196
177
  # Score must be final
197
- if !final || competition['status']['type']['detail'] =~ /^Final/
178
+ if competition['status']['type']['detail'] =~ /^Final/
198
179
  competition['competitors'].each do |competitor|
199
180
  if competitor['homeAway'] == 'home'
200
181
  score[:home_team] = competitor['team']['abbreviation'].downcase
@@ -205,34 +186,12 @@ module ESPN
205
186
  end
206
187
  end
207
188
  score[:game_date] = DateTime.parse(game['date'])
208
- score[:status] = competition['status']['type']['detail']
209
189
  scores << score
210
190
  end
211
191
  end
212
192
  scores
213
193
  end
214
194
 
215
- def team_abbreviation_parse(doc)
216
- games = []
217
- teams = {}
218
- espn_regex = /window\.espn\.scoreboardData \t= (\{.*?\});/
219
- doc.xpath("//script").each do |script_section|
220
- if script_section.content =~ espn_regex
221
- espn_data = JSON.parse(espn_regex.match(script_section.content)[1])
222
- games = espn_data['events']
223
- break
224
- end
225
- end
226
- games.each do |game|
227
- competition = game['competitions'].first
228
- competition['competitors'].each do |competitor|
229
- teams[competitor['team']['displayName']] = competitor['team']['abbreviation'].downcase
230
- end
231
- end
232
-
233
- teams
234
- end
235
-
236
195
  def ncf_parse(doc)
237
196
  scores = []
238
197
  games = []
@@ -256,7 +215,7 @@ module ESPN
256
215
  if competitor['homeAway'] == 'home'
257
216
  score[:home_team] = competitor['team']['id'].downcase
258
217
  score[:home_score] = competitor['score'].to_i
259
- else
218
+ else
260
219
  score[:away_team] = competitor['team']['id'].downcase
261
220
  score[:away_score] = competitor['score'].to_i
262
221
  end
@@ -308,4 +267,4 @@ module ESPN
308
267
 
309
268
  end
310
269
  end
311
- end
270
+ end
@@ -1,3 +1,3 @@
1
1
  module ESPN
2
- VERSION = '1.3.1'
2
+ VERSION = '1.5.0'
3
3
  end
@@ -1,34 +1,34 @@
1
1
  require 'test_helper'
2
2
 
3
3
  class BoilerplateTest < EspnTest
4
-
4
+
5
5
  test 'espn is up' do
6
6
  assert ESPN.responding?
7
7
  assert !ESPN.down?
8
8
  end
9
-
9
+
10
10
  test 'paths are working' do
11
- assert_equal 'http://scores.espn.go.com', ESPN.url('scores')
12
- assert_equal 'http://espn.go.com/nba/teams', ESPN.url('nba', 'teams')
11
+ assert_equal 'http://www.espn.com', ESPN.url('scores')
12
+ assert_equal 'http://www.espn.com/nba/teams', ESPN.url('nba', 'teams')
13
13
  end
14
-
14
+
15
15
  test 'error message works' do
16
16
  assert_raises(ArgumentError) do
17
17
  ESPN.get('bad-api-keyword')
18
18
  end
19
19
  end
20
-
20
+
21
21
  test 'get pages is working' do
22
22
  assert ESPN.get('scores')
23
23
  end
24
-
24
+
25
25
  test 'dasherize strings' do
26
26
  assert_equal 'string-is-dashed', ESPN.send(:dasherize, 'String is dashed')
27
27
  end
28
-
28
+
29
29
  test 'leagues' do
30
30
  leagues = 'nfl mlb nba nhl ncf ncb'.split
31
31
  assert_equal leagues, ESPN.leagues
32
32
  end
33
33
 
34
- end
34
+ end
@@ -1,7 +1,7 @@
1
1
  require 'test_helper'
2
2
 
3
3
  class MlbTest < EspnTest
4
-
4
+
5
5
  test 'mlb august 13th 2012 yankees beat rangers' do
6
6
  starts_at = DateTime.parse('2012-08-13T23:00:00+00:00')
7
7
  expected = {
@@ -29,12 +29,18 @@ class MlbTest < EspnTest
29
29
  scores = ESPN.get_mlb_scores(starts_at.to_date)
30
30
  assert scores.include?(expected), 'A known MLB final score cannot be found'
31
31
  end
32
-
32
+
33
+ test 'mlb sept 5th 2014 suspended game pittsburgh pirates at chicago cubs' do
34
+ scores = ESPN.get_mlb_scores(Date.parse('2014-09-05'))
35
+ home_teams = scores.each { |s| s['home_team'] }
36
+ assert !home_teams.include?('chc'), 'A known suspended game was unexpectedly found'
37
+ end
38
+
33
39
  test 'random mlb days' do
34
40
  random_days.each do |day|
35
41
  scores = ESPN.get_mlb_scores(day)
36
42
  assert all_names_present?(scores), "Error on #{day} for mlb"
37
43
  end
38
44
  end
39
-
40
- end
45
+
46
+ end
@@ -1,7 +1,7 @@
1
1
  require 'test_helper'
2
2
 
3
3
  class TeamsTest < EspnTest
4
-
4
+
5
5
  test 'scrape nfl teams' do
6
6
  divisions = ESPN.get_teams_in('nfl')
7
7
  assert_equal 8, divisions.count
@@ -13,7 +13,7 @@ class TeamsTest < EspnTest
13
13
  assert_equal 32, teams.map{ |h| h[:data_name] }.uniq.count
14
14
  assert divisions['nfc-west'].include?({ name: 'Seattle Seahawks', data_name: 'sea' })
15
15
  end
16
-
16
+
17
17
  test 'scrape mlb teams' do
18
18
  divisions = ESPN.get_teams_in('mlb')
19
19
  assert_equal 6, divisions.count
@@ -25,7 +25,7 @@ class TeamsTest < EspnTest
25
25
  assert_equal 30, teams.map{ |h| h[:data_name] }.uniq.count
26
26
  assert divisions['al-west'].include?({ name: 'Seattle Mariners', data_name: 'sea' })
27
27
  end
28
-
28
+
29
29
  test 'scrape nba teams' do
30
30
  divisions = ESPN.get_teams_in('nba')
31
31
  assert_equal 6, divisions.count
@@ -37,18 +37,18 @@ class TeamsTest < EspnTest
37
37
  assert_equal 30, teams.map{ |h| h[:data_name] }.uniq.count
38
38
  assert divisions['atlantic'].include?({ name: 'Toronto Raptors', data_name: 'tor' })
39
39
  end
40
-
40
+
41
41
  test 'scrape nhl teams' do
42
42
  divisions = ESPN.get_teams_in('nhl')
43
43
  assert_equal 4, divisions.count
44
44
  assert_equal 7, divisions['central'].count
45
45
  assert_equal 8, divisions['atlantic'].count
46
46
  teams = divisions.values.flatten
47
- assert_equal 30, teams.map{ |h| h[:name] }.uniq.count
48
- assert_equal 30, teams.map{ |h| h[:data_name] }.uniq.count
47
+ assert_equal 31, teams.map{ |h| h[:name] }.uniq.count
48
+ assert_equal 31, teams.map{ |h| h[:data_name] }.uniq.count
49
49
  assert divisions['atlantic'].include?({ name: 'Montreal Canadiens', data_name: 'mtl' })
50
50
  end
51
-
51
+
52
52
  test 'scrape ncaa football teams' do
53
53
  divisions = ESPN.get_teams_in('college-football')
54
54
  assert_equal 25, divisions.count
@@ -57,23 +57,23 @@ class TeamsTest < EspnTest
57
57
  assert divisions['conference-usa'].include?({ name: 'Rice Owls', data_name: '242' })
58
58
  assert divisions['meac'].include?({ name: 'Bethune-Cookman Wildcats', data_name: '2065' })
59
59
  assert divisions['northeast'].include?({ name: 'St Francis Red Flash', data_name: '2598' })
60
- assert divisions['swac'].include?({ name: 'Alabama A&M Bulldogs', data_name: '2010' })
60
+ assert divisions['swac'].include?({ name: 'Alabama State Hornets', data_name: '2011' })
61
61
  end
62
-
62
+
63
63
  test 'scrape ncaa basketball teams' do
64
64
  divisions = ESPN.get_teams_in('mens-college-basketball')
65
65
  assert_equal 32, divisions.count
66
66
  assert_equal 15, divisions['acc'].count
67
67
  assert_equal 10, divisions['patriot-league'].count
68
-
69
- assert divisions['southland'].include?({ name: 'Texas A&M-CC Islanders', data_name: '357' })
68
+
69
+ assert divisions['southland'].include?({ name: 'Incarnate Word Cardinals', data_name: '2916' })
70
70
  assert divisions['atlantic-10'].include?({ name: "Saint Joe's Saint Joseph's Hawks", data_name: '2603' })
71
71
  end
72
72
 
73
73
  test 'scrape ncaa basketball conferences' do
74
74
  conferences = ESPN.get_conferences_in_ncb
75
- assert_equal 32, conferences.count
75
+ assert_equal 33, conferences.count
76
76
  assert conferences.include?({ name: 'Mountain West', data_name: '44' })
77
77
  end
78
-
79
- end
78
+
79
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: espn_scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.1
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - aj0strow
@@ -84,7 +84,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
84
84
  version: '0'
85
85
  requirements: []
86
86
  rubyforge_project:
87
- rubygems_version: 2.5.2
87
+ rubygems_version: 2.6.11
88
88
  signing_key:
89
89
  specification_version: 4
90
90
  summary: ESPN Scraper